mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-24 15:09:59 +00:00
Compare commits
55 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
122b47210e | ||
|
|
316d843482 | ||
|
|
8c58d3f85b | ||
|
|
fcacb100a2 | ||
|
|
58ada1dfef | ||
|
|
f78c467a86 | ||
|
|
78303639db | ||
|
|
bd1a5dc265 | ||
|
|
e0a43f37d7 | ||
|
|
a89840f5f9 | ||
|
|
c2db970687 | ||
|
|
e0525dbfeb | ||
|
|
cdc9021160 | ||
|
|
702ea32538 | ||
|
|
342faa4e07 | ||
|
|
44ba131987 | ||
|
|
96b6235f25 | ||
|
|
f1a4750576 | ||
|
|
d973cf81f0 | ||
|
|
284a496f54 | ||
|
|
4d250ed054 | ||
|
|
ec43b9183d | ||
|
|
b025bed45c | ||
|
|
21694c2a1d | ||
|
|
5c66ce6e88 | ||
|
|
b2b752337b | ||
|
|
aa22f9c94a | ||
|
|
611a8aa2fe | ||
|
|
e4c71843e6 | ||
|
|
e1ad7af10c | ||
|
|
b9302e4f0d | ||
|
|
2e686fe053 | ||
|
|
128d3717fa | ||
|
|
2b181e91e0 | ||
|
|
d87ab06b28 | ||
|
|
5653389063 | ||
|
|
c4d7b0d91d | ||
|
|
f735f739e5 | ||
|
|
6070e88077 | ||
|
|
9db168875c | ||
|
|
4460af800f | ||
|
|
69a53130c2 | ||
|
|
1c94d4c506 | ||
|
|
41e51d4ab3 | ||
|
|
11ae85b1cd | ||
|
|
7551432cff | ||
|
|
e16f093282 | ||
|
|
301ffc1d91 | ||
|
|
d22072f68b | ||
|
|
b526d159c3 | ||
|
|
7152407428 | ||
|
|
b58296de22 | ||
|
|
1d80a0f2d6 | ||
|
|
286b9af661 | ||
|
|
af13eeaad3 |
@@ -19,3 +19,5 @@ GT_GCS_BUCKET = GCS bucket
|
||||
GT_GCS_SCOPE = GCS scope
|
||||
GT_GCS_CREDENTIAL_PATH = GCS credential path
|
||||
GT_GCS_ENDPOINT = GCS end point
|
||||
# Settings for kafka wal test
|
||||
GT_KAFKA_ENDPOINTS = localhost:9092
|
||||
|
||||
2
.github/workflows/apidoc.yml
vendored
2
.github/workflows/apidoc.yml
vendored
@@ -1,7 +1,7 @@
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- develop
|
||||
- main
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- 'config/**'
|
||||
|
||||
32
.github/workflows/develop.yml
vendored
32
.github/workflows/develop.yml
vendored
@@ -11,7 +11,6 @@ on:
|
||||
- '.gitignore'
|
||||
push:
|
||||
branches:
|
||||
- develop
|
||||
- main
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
@@ -105,6 +104,37 @@ jobs:
|
||||
path: ${{ runner.temp }}/greptime-*.log
|
||||
retention-days: 3
|
||||
|
||||
sqlness-kafka-wal:
|
||||
name: Sqlness Test with Kafka Wal
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ ubuntu-20.04-8-cores ]
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: arduino/setup-protoc@v1
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
- uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||
- name: Rust Cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
- name: Setup kafka server
|
||||
working-directory: tests-integration/fixtures/kafka
|
||||
run: docker compose -f docker-compose-standalone.yml up -d --wait
|
||||
- name: Run sqlness
|
||||
run: cargo sqlness -w kafka -k 127.0.0.1:9092
|
||||
- name: Upload sqlness logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: sqlness-logs
|
||||
path: ${{ runner.temp }}/greptime-*.log
|
||||
retention-days: 3
|
||||
|
||||
fmt:
|
||||
name: Rustfmt
|
||||
if: github.event.pull_request.draft == false
|
||||
|
||||
11
.github/workflows/doc-label.yml
vendored
11
.github/workflows/doc-label.yml
vendored
@@ -18,3 +18,14 @@ jobs:
|
||||
enable-versioned-regex: false
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
sync-labels: 1
|
||||
- name: create an issue in doc repo
|
||||
uses: dacbd/create-issue-action@main
|
||||
if: ${{ github.event.action == 'opened' && contains(github.event.pull_request.body, '- [ ] This PR does not require documentation updates.') }}
|
||||
with:
|
||||
owner: GreptimeTeam
|
||||
repo: docs
|
||||
token: ${{ secrets.DOCS_REPO_TOKEN }}
|
||||
title: Update docs for ${{ github.event.issue.title || github.event.pull_request.title }}
|
||||
body: |
|
||||
A document change request is generated from
|
||||
${{ github.event.issue.html_url || github.event.pull_request.html_url }}
|
||||
|
||||
1
.github/workflows/docs.yml
vendored
1
.github/workflows/docs.yml
vendored
@@ -11,7 +11,6 @@ on:
|
||||
- '.gitignore'
|
||||
push:
|
||||
branches:
|
||||
- develop
|
||||
- main
|
||||
paths:
|
||||
- 'docs/**'
|
||||
|
||||
2
.github/workflows/license.yaml
vendored
2
.github/workflows/license.yaml
vendored
@@ -3,7 +3,7 @@ name: License checker
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- develop
|
||||
- main
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, ready_for_review]
|
||||
jobs:
|
||||
|
||||
@@ -10,7 +10,7 @@ Follow our [README](https://github.com/GreptimeTeam/greptimedb#readme) to get th
|
||||
|
||||
It can feel intimidating to contribute to a complex project, but it can also be exciting and fun. These general notes will help everyone participate in this communal activity.
|
||||
|
||||
- Follow the [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/develop/CODE_OF_CONDUCT.md)
|
||||
- Follow the [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/main/CODE_OF_CONDUCT.md)
|
||||
- Small changes make huge differences. We will happily accept a PR making a single character change if it helps move forward. Don't wait to have everything working.
|
||||
- Check the closed issues before opening your issue.
|
||||
- Try to follow the existing style of the code.
|
||||
@@ -26,7 +26,7 @@ Pull requests are great, but we accept all kinds of other help if you like. Such
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
Also, there are things that we are not looking for because they don't match the goals of the product or benefit the community. Please read [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/develop/CODE_OF_CONDUCT.md); we hope everyone can keep good manners and become an honored member.
|
||||
Also, there are things that we are not looking for because they don't match the goals of the product or benefit the community. Please read [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/main/CODE_OF_CONDUCT.md); we hope everyone can keep good manners and become an honored member.
|
||||
|
||||
## License
|
||||
|
||||
|
||||
159
Cargo.lock
generated
159
Cargo.lock
generated
@@ -196,7 +196,7 @@ checksum = "8f1f8f5a6f3d50d89e3797d7593a50f96bb2aaa20ca0cc7be1fb673232c91d72"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
@@ -674,7 +674,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "auth"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -847,7 +847,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -1179,10 +1179,11 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -1450,7 +1451,7 @@ checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -1483,7 +1484,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu",
|
||||
"substrait 0.17.1",
|
||||
"substrait 0.5.0",
|
||||
"substrait 0.5.1",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.10.2",
|
||||
@@ -1513,7 +1514,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"async-trait",
|
||||
@@ -1564,7 +1565,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu",
|
||||
"store-api",
|
||||
"substrait 0.5.0",
|
||||
"substrait 0.5.1",
|
||||
"table",
|
||||
"temp-env",
|
||||
"tikv-jemallocator",
|
||||
@@ -1597,7 +1598,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"bitvec",
|
||||
@@ -1612,7 +1613,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
@@ -1623,7 +1624,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-config"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"humantime-serde",
|
||||
@@ -1636,7 +1637,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
@@ -1667,7 +1668,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-decimal"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"bigdecimal",
|
||||
@@ -1681,7 +1682,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"snafu",
|
||||
"strum 0.25.0",
|
||||
@@ -1689,7 +1690,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"build-data",
|
||||
@@ -1713,7 +1714,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-greptimedb-telemetry"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-error",
|
||||
@@ -1732,7 +1733,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -1762,7 +1763,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1781,7 +1782,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-macro"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-query",
|
||||
@@ -1796,7 +1797,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -1809,7 +1810,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-recursion",
|
||||
@@ -1832,6 +1833,7 @@ dependencies = [
|
||||
"derive_builder 0.12.0",
|
||||
"etcd-client",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"humantime-serde",
|
||||
"hyper",
|
||||
"lazy_static",
|
||||
@@ -1850,11 +1852,12 @@ dependencies = [
|
||||
"tokio",
|
||||
"toml 0.8.8",
|
||||
"tonic 0.10.2",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -1878,7 +1881,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -1886,7 +1889,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1909,7 +1912,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -1926,7 +1929,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-error",
|
||||
@@ -1946,7 +1949,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"common-error",
|
||||
@@ -1972,7 +1975,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"rand",
|
||||
@@ -1981,7 +1984,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -1997,7 +2000,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-version"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"build-data",
|
||||
]
|
||||
@@ -2627,7 +2630,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -2687,7 +2690,7 @@ dependencies = [
|
||||
"snafu",
|
||||
"sql",
|
||||
"store-api",
|
||||
"substrait 0.5.0",
|
||||
"substrait 0.5.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -2701,7 +2704,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -3162,7 +3165,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-engine"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -3293,7 +3296,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -3357,7 +3360,7 @@ dependencies = [
|
||||
"sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)",
|
||||
"store-api",
|
||||
"strfmt",
|
||||
"substrait 0.5.0",
|
||||
"substrait 0.5.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"toml 0.8.8",
|
||||
@@ -4011,7 +4014,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"asynchronous-codec",
|
||||
@@ -4029,7 +4032,7 @@ dependencies = [
|
||||
"prost 0.12.3",
|
||||
"rand",
|
||||
"regex",
|
||||
"regex-automata 0.1.10",
|
||||
"regex-automata 0.2.0",
|
||||
"snafu",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -4491,12 +4494,13 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"common-base",
|
||||
"common-config",
|
||||
"common-error",
|
||||
@@ -4505,13 +4509,14 @@ dependencies = [
|
||||
"common-runtime",
|
||||
"common-telemetry",
|
||||
"common-test-util",
|
||||
"dashmap",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"itertools 0.10.5",
|
||||
"protobuf",
|
||||
"protobuf-build",
|
||||
"raft-engine",
|
||||
"rand",
|
||||
"rand_distr",
|
||||
"rskafka",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -4519,6 +4524,7 @@ dependencies = [
|
||||
"store-api",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4765,7 +4771,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -4795,7 +4801,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"api",
|
||||
@@ -4873,7 +4879,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "metric-engine"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -4944,7 +4950,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"api",
|
||||
@@ -4977,6 +4983,7 @@ dependencies = [
|
||||
"datatypes",
|
||||
"futures",
|
||||
"humantime-serde",
|
||||
"index",
|
||||
"lazy_static",
|
||||
"log-store",
|
||||
"memcomparable",
|
||||
@@ -4985,8 +4992,10 @@ dependencies = [
|
||||
"object-store",
|
||||
"parquet",
|
||||
"paste",
|
||||
"pin-project",
|
||||
"prometheus",
|
||||
"prost 0.12.3",
|
||||
"puffin",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -5442,7 +5451,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -5498,9 +5507,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
|
||||
|
||||
[[package]]
|
||||
name = "opendal"
|
||||
version = "0.40.0"
|
||||
version = "0.44.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ddba7299bab261d3ae2f37617fb7f45b19ed872752bb4e22cf93a69d979366c5"
|
||||
checksum = "bc0ad72f7b44ca4ae59d27ea151fdc6f37305cf6efe099bdaedbb30ec34579c0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-compat",
|
||||
@@ -5511,15 +5520,15 @@ dependencies = [
|
||||
"chrono",
|
||||
"flagset",
|
||||
"futures",
|
||||
"getrandom",
|
||||
"http",
|
||||
"hyper",
|
||||
"log",
|
||||
"md-5",
|
||||
"once_cell",
|
||||
"parking_lot 0.12.1",
|
||||
"percent-encoding",
|
||||
"pin-project",
|
||||
"quick-xml 0.29.0",
|
||||
"quick-xml 0.30.0",
|
||||
"reqsign",
|
||||
"reqwest",
|
||||
"serde",
|
||||
@@ -5687,7 +5696,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "operator"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -5731,7 +5740,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)",
|
||||
"store-api",
|
||||
"substrait 0.5.0",
|
||||
"substrait 0.5.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.10.2",
|
||||
@@ -5962,7 +5971,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -6281,7 +6290,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "plugins"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"common-base",
|
||||
@@ -6539,8 +6548,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.6",
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
"bytemuck",
|
||||
@@ -6748,7 +6758,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bitflags 2.4.1",
|
||||
@@ -6859,7 +6869,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.6",
|
||||
"api",
|
||||
@@ -6917,7 +6927,7 @@ dependencies = [
|
||||
"stats-cli",
|
||||
"store-api",
|
||||
"streaming-stats",
|
||||
"substrait 0.5.0",
|
||||
"substrait 0.5.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -6934,9 +6944,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.29.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81b9228215d82c7b61490fec1de287136b5de6f5700f6e58ea9ad61a7964ca51"
|
||||
checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"serde",
|
||||
@@ -7133,8 +7143,18 @@ name = "regex-automata"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
||||
dependencies = [
|
||||
"regex-syntax 0.6.29",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9368763f5a9b804326f3af749e16f9abf378d227bcdee7634b13d8f17793782"
|
||||
dependencies = [
|
||||
"fst",
|
||||
"memchr",
|
||||
"regex-syntax 0.6.29",
|
||||
]
|
||||
|
||||
@@ -8177,7 +8197,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "script"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -8437,7 +8457,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"aide",
|
||||
"api",
|
||||
@@ -8533,7 +8553,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -8794,7 +8814,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -8846,7 +8866,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.4.11",
|
||||
@@ -8858,6 +8878,7 @@ dependencies = [
|
||||
"common-recordbatch",
|
||||
"common-time",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sqlness",
|
||||
"tinytemplate",
|
||||
"tokio",
|
||||
@@ -9052,7 +9073,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -9192,7 +9213,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
@@ -9340,7 +9361,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"async-trait",
|
||||
@@ -9452,7 +9473,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -9508,7 +9529,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlx",
|
||||
"store-api",
|
||||
"substrait 0.5.0",
|
||||
"substrait 0.5.1",
|
||||
"table",
|
||||
"tempfile",
|
||||
"time",
|
||||
|
||||
@@ -58,7 +58,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -111,7 +111,7 @@ prost = "0.12"
|
||||
raft-engine = { git = "https://github.com/tikv/raft-engine.git", rev = "22dfb426cd994602b57725ef080287d3e53db479" }
|
||||
rand = "0.8"
|
||||
regex = "1.8"
|
||||
regex-automata = { version = "0.1", features = ["transducer"] }
|
||||
regex-automata = { version = "0.2", features = ["transducer"] }
|
||||
reqwest = { version = "0.11", default-features = false, features = [
|
||||
"json",
|
||||
"rustls-tls-native-roots",
|
||||
@@ -169,6 +169,7 @@ datanode = { path = "src/datanode" }
|
||||
datatypes = { path = "src/datatypes" }
|
||||
file-engine = { path = "src/file-engine" }
|
||||
frontend = { path = "src/frontend" }
|
||||
index = { path = "src/index" }
|
||||
log-store = { path = "src/log-store" }
|
||||
meta-client = { path = "src/meta-client" }
|
||||
meta-srv = { path = "src/meta-srv" }
|
||||
@@ -179,6 +180,7 @@ operator = { path = "src/operator" }
|
||||
partition = { path = "src/partition" }
|
||||
plugins = { path = "src/plugins" }
|
||||
promql = { path = "src/promql" }
|
||||
puffin = { path = "src/puffin" }
|
||||
query = { path = "src/query" }
|
||||
script = { path = "src/script" }
|
||||
servers = { path = "src/servers" }
|
||||
|
||||
34
README.md
34
README.md
@@ -1,8 +1,8 @@
|
||||
<p align="center">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png">
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding-dark.png">
|
||||
<img alt="GreptimeDB Logo" src="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png" width="400px">
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding.png">
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding-dark.png">
|
||||
<img alt="GreptimeDB Logo" src="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding.png" width="400px">
|
||||
</picture>
|
||||
</p>
|
||||
|
||||
@@ -12,11 +12,11 @@
|
||||
</h3>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://codecov.io/gh/GrepTimeTeam/greptimedb"><img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/develop/graph/badge.svg?token=FITFDI3J3C"></img></a>
|
||||
<a href="https://codecov.io/gh/GrepTimeTeam/greptimedb"><img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/main/graph/badge.svg?token=FITFDI3J3C"></img></a>
|
||||
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml"><img src="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml/badge.svg" alt="CI"></img></a>
|
||||
|
||||
<a href="https://github.com/greptimeTeam/greptimedb/blob/develop/LICENSE"><img src="https://img.shields.io/github/license/greptimeTeam/greptimedb"></a>
|
||||
<a href="https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE"><img src="https://img.shields.io/github/license/greptimeTeam/greptimedb"></a>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
@@ -29,21 +29,17 @@
|
||||
|
||||
## What is GreptimeDB
|
||||
|
||||
GreptimeDB is an open-source time-series database with a special focus on
|
||||
scalability, analytical capabilities and efficiency. It's designed to work on
|
||||
infrastructure of the cloud era, and users benefit from its elasticity and commodity
|
||||
storage.
|
||||
GreptimeDB is an open-source time-series database focusing on efficiency, scalability, and analytical capabilities.
|
||||
It's designed to work on infrastructure of the cloud era, and users benefit from its elasticity and commodity storage.
|
||||
|
||||
Our core developers have been building time-series data platform
|
||||
for years. Based on their best-practices, GreptimeDB is born to give you:
|
||||
Our core developers have been building time-series data platforms for years. Based on their best-practices, GreptimeDB is born to give you:
|
||||
|
||||
- A standalone binary that scales to highly-available distributed cluster, providing a transparent experience for cluster users
|
||||
- Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends
|
||||
- Flexible indexes, tackling high cardinality issues down
|
||||
- Distributed, parallel query execution, leveraging elastic computing resource
|
||||
- Native SQL, and Python scripting for advanced analytical scenarios
|
||||
- Widely adopted database protocols and APIs, native PromQL supports
|
||||
- Extensible table engine architecture for extensive workloads
|
||||
- Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends, particularly cloud object storage with 50x cost efficiency.
|
||||
- Fully open-source distributed cluster architecture that harnesses the power of cloud-native elastic computing resources.
|
||||
- Seamless scalability from a standalone binary at edge to a robust, highly available distributed cluster in cloud, with a transparent experience for both developers and administrators.
|
||||
- Native SQL and PromQL for queries, and Python scripting to facilitate complex analytical tasks.
|
||||
- Flexible indexing capabilities and distributed, parallel-processing query engine, tackling high cardinality issues down.
|
||||
- Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc.
|
||||
|
||||
## Quick Start
|
||||
|
||||
@@ -168,7 +164,7 @@ In addition, you may:
|
||||
GreptimeDB uses the [Apache 2.0 license][1] to strike a balance between
|
||||
open contributions and allowing you to use the software however you want.
|
||||
|
||||
[1]: <https://github.com/greptimeTeam/greptimedb/blob/develop/LICENSE>
|
||||
[1]: <https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE>
|
||||
|
||||
## Contributing
|
||||
|
||||
|
||||
@@ -51,9 +51,10 @@ sync_write = false
|
||||
|
||||
# Kafka wal options, see `standalone.example.toml`.
|
||||
# broker_endpoints = ["127.0.0.1:9092"]
|
||||
# max_batch_size = "4MB"
|
||||
# Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
# max_batch_size = "1MB"
|
||||
# linger = "200ms"
|
||||
# produce_record_timeout = "100ms"
|
||||
# consumer_wait_timeout = "100ms"
|
||||
# backoff_init = "500ms"
|
||||
# backoff_max = "10s"
|
||||
# backoff_base = 2
|
||||
@@ -129,11 +130,10 @@ parallel_scan_channel_size = 32
|
||||
# [export_metrics]
|
||||
# whether enable export metrics, default is false
|
||||
# enable = false
|
||||
# The url of metrics export endpoint, default is `frontend` default HTTP endpoint.
|
||||
# endpoint = "127.0.0.1:4000"
|
||||
# The database name of exported metrics stores, user needs to specify a valid database
|
||||
# db = ""
|
||||
# The interval of export metrics
|
||||
# write_interval = "30s"
|
||||
# [export_metrics.remote_write]
|
||||
# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
|
||||
# url = ""
|
||||
# HTTP headers of Prometheus remote-write carry
|
||||
# headers = {}
|
||||
|
||||
@@ -87,11 +87,8 @@ tcp_nodelay = true
|
||||
# [export_metrics]
|
||||
# whether enable export metrics, default is false
|
||||
# enable = false
|
||||
# The url of metrics export endpoint, default is `frontend` default HTTP endpoint.
|
||||
# endpoint = "127.0.0.1:4000"
|
||||
# The database name of exported metrics stores, user needs to specify a valid database
|
||||
# db = ""
|
||||
# The interval of export metrics
|
||||
# write_interval = "30s"
|
||||
# HTTP headers of Prometheus remote-write carry
|
||||
# headers = {}
|
||||
# for `frontend`, `self_import` is recommend to collect metrics generated by itself
|
||||
# [export_metrics.self_import]
|
||||
# db = "information_schema"
|
||||
|
||||
@@ -86,11 +86,10 @@ provider = "raft_engine"
|
||||
# [export_metrics]
|
||||
# whether enable export metrics, default is false
|
||||
# enable = false
|
||||
# The url of metrics export endpoint, default is `frontend` default HTTP endpoint.
|
||||
# endpoint = "127.0.0.1:4000"
|
||||
# The database name of exported metrics stores, user needs to specify a valid database
|
||||
# db = ""
|
||||
# The interval of export metrics
|
||||
# write_interval = "30s"
|
||||
# [export_metrics.remote_write]
|
||||
# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
|
||||
# url = ""
|
||||
# HTTP headers of Prometheus remote-write carry
|
||||
# headers = {}
|
||||
|
||||
@@ -100,29 +100,30 @@ provider = "raft_engine"
|
||||
# Available selector types:
|
||||
# - "round_robin" (default)
|
||||
# selector_type = "round_robin"
|
||||
# A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
|
||||
# The prefix of topic name.
|
||||
# topic_name_prefix = "greptimedb_wal_topic"
|
||||
# Number of partitions per topic.
|
||||
# num_partitions = 1
|
||||
# Expected number of replicas of each partition.
|
||||
# The number of replicas of each partition.
|
||||
# replication_factor = 1
|
||||
|
||||
# The maximum log size a kafka batch producer could buffer.
|
||||
# max_batch_size = "4MB"
|
||||
# The linger duration of a kafka batch producer.
|
||||
# The max size of a single producer batch.
|
||||
# Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
# max_batch_size = "1MB"
|
||||
# The linger duration.
|
||||
# linger = "200ms"
|
||||
# The maximum amount of time (in milliseconds) to wait for Kafka records to be returned.
|
||||
# produce_record_timeout = "100ms"
|
||||
# Above which a topic creation operation will be cancelled.
|
||||
# The consumer wait timeout.
|
||||
# consumer_wait_timeout = "100ms"
|
||||
# Create topic timeout.
|
||||
# create_topic_timeout = "30s"
|
||||
|
||||
# The initial backoff for kafka clients.
|
||||
# The initial backoff delay.
|
||||
# backoff_init = "500ms"
|
||||
# The maximum backoff for kafka clients.
|
||||
# The maximum backoff delay.
|
||||
# backoff_max = "10s"
|
||||
# Exponential backoff rate, i.e. next backoff = base * current backoff.
|
||||
# backoff_base = 2
|
||||
# Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
|
||||
# The deadline of retries.
|
||||
# backoff_deadline = "5mins"
|
||||
|
||||
# WAL data directory
|
||||
@@ -230,11 +231,8 @@ parallel_scan_channel_size = 32
|
||||
# [export_metrics]
|
||||
# whether enable export metrics, default is false
|
||||
# enable = false
|
||||
# The url of metrics export endpoint, default is `frontend` default HTTP endpoint.
|
||||
# endpoint = "127.0.0.1:4000"
|
||||
# The database name of exported metrics stores, user needs to specify a valid database
|
||||
# db = ""
|
||||
# The interval of export metrics
|
||||
# write_interval = "30s"
|
||||
# HTTP headers of Prometheus remote-write carry
|
||||
# headers = {}
|
||||
# for `standalone`, `self_import` is recommend to collect metrics generated by itself
|
||||
# [export_metrics.self_import]
|
||||
# db = "information_schema"
|
||||
|
||||
@@ -11,6 +11,7 @@ testing = []
|
||||
api.workspace = true
|
||||
arc-swap = "1.0"
|
||||
arrow-schema.workspace = true
|
||||
arrow.workspace = true
|
||||
async-stream.workspace = true
|
||||
async-trait = "0.1"
|
||||
build-data = "0.1"
|
||||
|
||||
@@ -13,7 +13,10 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod columns;
|
||||
mod key_column_usage;
|
||||
mod memory_table;
|
||||
mod predicate;
|
||||
mod schemata;
|
||||
mod table_names;
|
||||
mod tables;
|
||||
|
||||
@@ -27,6 +30,7 @@ use datatypes::schema::SchemaRef;
|
||||
use futures_util::StreamExt;
|
||||
use lazy_static::lazy_static;
|
||||
use paste::paste;
|
||||
pub(crate) use predicate::Predicates;
|
||||
use snafu::ResultExt;
|
||||
use store_api::data_source::DataSource;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
@@ -40,7 +44,9 @@ pub use table_names::*;
|
||||
|
||||
use self::columns::InformationSchemaColumns;
|
||||
use crate::error::Result;
|
||||
use crate::information_schema::key_column_usage::InformationSchemaKeyColumnUsage;
|
||||
use crate::information_schema::memory_table::{get_schema_columns, MemoryTable};
|
||||
use crate::information_schema::schemata::InformationSchemaSchemata;
|
||||
use crate::information_schema::tables::InformationSchemaTables;
|
||||
use crate::CatalogManager;
|
||||
|
||||
@@ -51,6 +57,22 @@ lazy_static! {
|
||||
COLUMN_PRIVILEGES,
|
||||
COLUMN_STATISTICS,
|
||||
BUILD_INFO,
|
||||
CHARACTER_SETS,
|
||||
COLLATIONS,
|
||||
COLLATION_CHARACTER_SET_APPLICABILITY,
|
||||
CHECK_CONSTRAINTS,
|
||||
EVENTS,
|
||||
FILES,
|
||||
OPTIMIZER_TRACE,
|
||||
PARAMETERS,
|
||||
PROFILING,
|
||||
REFERENTIAL_CONSTRAINTS,
|
||||
ROUTINES,
|
||||
SCHEMA_PRIVILEGES,
|
||||
TABLE_PRIVILEGES,
|
||||
TRIGGERS,
|
||||
GLOBAL_STATUS,
|
||||
SESSION_STATUS,
|
||||
];
|
||||
}
|
||||
|
||||
@@ -121,11 +143,16 @@ impl InformationSchemaProvider {
|
||||
fn build_tables(&mut self) {
|
||||
let mut tables = HashMap::new();
|
||||
tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap());
|
||||
tables.insert(SCHEMATA.to_string(), self.build_table(SCHEMATA).unwrap());
|
||||
tables.insert(COLUMNS.to_string(), self.build_table(COLUMNS).unwrap());
|
||||
tables.insert(
|
||||
KEY_COLUMN_USAGE.to_string(),
|
||||
self.build_table(KEY_COLUMN_USAGE).unwrap(),
|
||||
);
|
||||
|
||||
// Add memory tables
|
||||
for name in MEMORY_TABLES.iter() {
|
||||
tables.insert((*name).to_string(), self.build_table(name).unwrap());
|
||||
tables.insert((*name).to_string(), self.build_table(name).expect(name));
|
||||
}
|
||||
|
||||
self.tables = tables;
|
||||
@@ -134,7 +161,7 @@ impl InformationSchemaProvider {
|
||||
fn build_table(&self, name: &str) -> Option<TableRef> {
|
||||
self.information_table(name).map(|table| {
|
||||
let table_info = Self::table_info(self.catalog_name.clone(), &table);
|
||||
let filter_pushdown = FilterPushDownType::Unsupported;
|
||||
let filter_pushdown = FilterPushDownType::Inexact;
|
||||
let thin_table = ThinTable::new(table_info, filter_pushdown);
|
||||
|
||||
let data_source = Arc::new(InformationTableDataSource::new(table));
|
||||
@@ -156,6 +183,32 @@ impl InformationSchemaProvider {
|
||||
COLUMN_PRIVILEGES => setup_memory_table!(COLUMN_PRIVILEGES),
|
||||
COLUMN_STATISTICS => setup_memory_table!(COLUMN_STATISTICS),
|
||||
BUILD_INFO => setup_memory_table!(BUILD_INFO),
|
||||
CHARACTER_SETS => setup_memory_table!(CHARACTER_SETS),
|
||||
COLLATIONS => setup_memory_table!(COLLATIONS),
|
||||
COLLATION_CHARACTER_SET_APPLICABILITY => {
|
||||
setup_memory_table!(COLLATION_CHARACTER_SET_APPLICABILITY)
|
||||
}
|
||||
CHECK_CONSTRAINTS => setup_memory_table!(CHECK_CONSTRAINTS),
|
||||
EVENTS => setup_memory_table!(EVENTS),
|
||||
FILES => setup_memory_table!(FILES),
|
||||
OPTIMIZER_TRACE => setup_memory_table!(OPTIMIZER_TRACE),
|
||||
PARAMETERS => setup_memory_table!(PARAMETERS),
|
||||
PROFILING => setup_memory_table!(PROFILING),
|
||||
REFERENTIAL_CONSTRAINTS => setup_memory_table!(REFERENTIAL_CONSTRAINTS),
|
||||
ROUTINES => setup_memory_table!(ROUTINES),
|
||||
SCHEMA_PRIVILEGES => setup_memory_table!(SCHEMA_PRIVILEGES),
|
||||
TABLE_PRIVILEGES => setup_memory_table!(TABLE_PRIVILEGES),
|
||||
TRIGGERS => setup_memory_table!(TRIGGERS),
|
||||
GLOBAL_STATUS => setup_memory_table!(GLOBAL_STATUS),
|
||||
SESSION_STATUS => setup_memory_table!(SESSION_STATUS),
|
||||
KEY_COLUMN_USAGE => Some(Arc::new(InformationSchemaKeyColumnUsage::new(
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
)) as _),
|
||||
SCHEMATA => Some(Arc::new(InformationSchemaSchemata::new(
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
)) as _),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -187,7 +240,7 @@ trait InformationTable {
|
||||
|
||||
fn schema(&self) -> SchemaRef;
|
||||
|
||||
fn to_stream(&self) -> Result<SendableRecordBatchStream>;
|
||||
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream>;
|
||||
|
||||
fn table_type(&self) -> TableType {
|
||||
TableType::Temporary
|
||||
@@ -221,7 +274,7 @@ impl DataSource for InformationTableDataSource {
|
||||
&self,
|
||||
request: ScanRequest,
|
||||
) -> std::result::Result<SendableRecordBatchStream, BoxedError> {
|
||||
let projection = request.projection;
|
||||
let projection = request.projection.clone();
|
||||
let projected_schema = match &projection {
|
||||
Some(projection) => self.try_project(projection)?,
|
||||
None => self.table.schema(),
|
||||
@@ -229,7 +282,7 @@ impl DataSource for InformationTableDataSource {
|
||||
|
||||
let stream = self
|
||||
.table
|
||||
.to_stream()
|
||||
.to_stream(request)
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)
|
||||
.map_err(BoxedError::new)?
|
||||
|
||||
@@ -29,14 +29,16 @@ use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatc
|
||||
use datatypes::prelude::{ConcreteDataType, DataType};
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{StringVectorBuilder, VectorRef};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::TableId;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::{InformationTable, COLUMNS};
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::Predicates;
|
||||
use crate::CatalogManager;
|
||||
|
||||
pub(super) struct InformationSchemaColumns {
|
||||
@@ -51,6 +53,10 @@ const TABLE_NAME: &str = "table_name";
|
||||
const COLUMN_NAME: &str = "column_name";
|
||||
const DATA_TYPE: &str = "data_type";
|
||||
const SEMANTIC_TYPE: &str = "semantic_type";
|
||||
const COLUMN_DEFAULT: &str = "column_default";
|
||||
const IS_NULLABLE: &str = "is_nullable";
|
||||
const COLUMN_TYPE: &str = "column_type";
|
||||
const COLUMN_COMMENT: &str = "column_comment";
|
||||
|
||||
impl InformationSchemaColumns {
|
||||
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
|
||||
@@ -69,6 +75,10 @@ impl InformationSchemaColumns {
|
||||
ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(DATA_TYPE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(SEMANTIC_TYPE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(COLUMN_DEFAULT, ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new(IS_NULLABLE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(COLUMN_TYPE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(COLUMN_COMMENT, ConcreteDataType::string_datatype(), true),
|
||||
]))
|
||||
}
|
||||
|
||||
@@ -94,14 +104,14 @@ impl InformationTable for InformationSchemaColumns {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn to_stream(&self) -> Result<SendableRecordBatchStream> {
|
||||
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_columns()
|
||||
.make_columns(Some(request))
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
@@ -126,6 +136,11 @@ struct InformationSchemaColumnsBuilder {
|
||||
column_names: StringVectorBuilder,
|
||||
data_types: StringVectorBuilder,
|
||||
semantic_types: StringVectorBuilder,
|
||||
|
||||
column_defaults: StringVectorBuilder,
|
||||
is_nullables: StringVectorBuilder,
|
||||
column_types: StringVectorBuilder,
|
||||
column_comments: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaColumnsBuilder {
|
||||
@@ -144,16 +159,21 @@ impl InformationSchemaColumnsBuilder {
|
||||
column_names: StringVectorBuilder::with_capacity(42),
|
||||
data_types: StringVectorBuilder::with_capacity(42),
|
||||
semantic_types: StringVectorBuilder::with_capacity(42),
|
||||
column_defaults: StringVectorBuilder::with_capacity(42),
|
||||
is_nullables: StringVectorBuilder::with_capacity(42),
|
||||
column_types: StringVectorBuilder::with_capacity(42),
|
||||
column_comments: StringVectorBuilder::with_capacity(42),
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct the `information_schema.columns` virtual table
|
||||
async fn make_columns(&mut self) -> Result<RecordBatch> {
|
||||
async fn make_columns(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
|
||||
let catalog_name = self.catalog_name.clone();
|
||||
let catalog_manager = self
|
||||
.catalog_manager
|
||||
.upgrade()
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
if !catalog_manager
|
||||
@@ -184,12 +204,12 @@ impl InformationSchemaColumnsBuilder {
|
||||
};
|
||||
|
||||
self.add_column(
|
||||
&predicates,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
&table_name,
|
||||
&column.name,
|
||||
&column.data_type.name(),
|
||||
semantic_type,
|
||||
column,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
@@ -203,19 +223,48 @@ impl InformationSchemaColumnsBuilder {
|
||||
|
||||
fn add_column(
|
||||
&mut self,
|
||||
predicates: &Predicates,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
column_name: &str,
|
||||
data_type: &str,
|
||||
semantic_type: &str,
|
||||
column_schema: &ColumnSchema,
|
||||
) {
|
||||
let data_type = &column_schema.data_type.name();
|
||||
|
||||
let row = [
|
||||
(TABLE_CATALOG, &Value::from(catalog_name)),
|
||||
(TABLE_SCHEMA, &Value::from(schema_name)),
|
||||
(TABLE_NAME, &Value::from(table_name)),
|
||||
(COLUMN_NAME, &Value::from(column_schema.name.as_str())),
|
||||
(DATA_TYPE, &Value::from(data_type.as_str())),
|
||||
(SEMANTIC_TYPE, &Value::from(semantic_type)),
|
||||
];
|
||||
|
||||
if !predicates.eval(&row) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.catalog_names.push(Some(catalog_name));
|
||||
self.schema_names.push(Some(schema_name));
|
||||
self.table_names.push(Some(table_name));
|
||||
self.column_names.push(Some(column_name));
|
||||
self.column_names.push(Some(&column_schema.name));
|
||||
self.data_types.push(Some(data_type));
|
||||
self.semantic_types.push(Some(semantic_type));
|
||||
self.column_defaults.push(
|
||||
column_schema
|
||||
.default_constraint()
|
||||
.map(|s| format!("{}", s))
|
||||
.as_deref(),
|
||||
);
|
||||
if column_schema.is_nullable() {
|
||||
self.is_nullables.push(Some("Yes"));
|
||||
} else {
|
||||
self.is_nullables.push(Some("No"));
|
||||
}
|
||||
self.column_types.push(Some(data_type));
|
||||
self.column_comments
|
||||
.push(column_schema.column_comment().map(|x| x.as_ref()));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<RecordBatch> {
|
||||
@@ -226,6 +275,10 @@ impl InformationSchemaColumnsBuilder {
|
||||
Arc::new(self.column_names.finish()),
|
||||
Arc::new(self.data_types.finish()),
|
||||
Arc::new(self.semantic_types.finish()),
|
||||
Arc::new(self.column_defaults.finish()),
|
||||
Arc::new(self.is_nullables.finish()),
|
||||
Arc::new(self.column_types.finish()),
|
||||
Arc::new(self.column_comments.finish()),
|
||||
];
|
||||
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
@@ -244,7 +297,7 @@ impl DfPartitionStream for InformationSchemaColumns {
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_columns()
|
||||
.make_columns(None)
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
|
||||
347
src/catalog/src/information_schema/key_column_usage.rs
Normal file
347
src/catalog/src/information_schema/key_column_usage.rs
Normal file
@@ -0,0 +1,347 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_query::physical_plan::TaskContext;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::KEY_COLUMN_USAGE;
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::{InformationTable, Predicates};
|
||||
use crate::CatalogManager;
|
||||
|
||||
const CONSTRAINT_SCHEMA: &str = "constraint_schema";
|
||||
const CONSTRAINT_NAME: &str = "constraint_name";
|
||||
const TABLE_CATALOG: &str = "table_catalog";
|
||||
const TABLE_SCHEMA: &str = "table_schema";
|
||||
const TABLE_NAME: &str = "table_name";
|
||||
const COLUMN_NAME: &str = "column_name";
|
||||
const ORDINAL_POSITION: &str = "ordinal_position";
|
||||
|
||||
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
|
||||
pub(super) struct InformationSchemaKeyColumnUsage {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
}
|
||||
|
||||
impl InformationSchemaKeyColumnUsage {
|
||||
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
|
||||
Self {
|
||||
schema: Self::schema(),
|
||||
catalog_name,
|
||||
catalog_manager,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn schema() -> SchemaRef {
|
||||
Arc::new(Schema::new(vec![
|
||||
ColumnSchema::new(
|
||||
"constraint_catalog",
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
CONSTRAINT_SCHEMA,
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(CONSTRAINT_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(ORDINAL_POSITION, ConcreteDataType::uint32_datatype(), false),
|
||||
ColumnSchema::new(
|
||||
"position_in_unique_constraint",
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"referenced_table_schema",
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"referenced_table_name",
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"referenced_column_name",
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
]))
|
||||
}
|
||||
|
||||
fn builder(&self) -> InformationSchemaKeyColumnUsageBuilder {
|
||||
InformationSchemaKeyColumnUsageBuilder::new(
|
||||
self.schema.clone(),
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl InformationTable for InformationSchemaKeyColumnUsage {
|
||||
fn table_id(&self) -> TableId {
|
||||
INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID
|
||||
}
|
||||
|
||||
fn table_name(&self) -> &'static str {
|
||||
KEY_COLUMN_USAGE
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_key_column_usage(Some(request))
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
));
|
||||
Ok(Box::pin(
|
||||
RecordBatchStreamAdapter::try_new(stream)
|
||||
.map_err(BoxedError::new)
|
||||
.context(InternalSnafu)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the `information_schema.KEY_COLUMN_USAGE` table row by row
|
||||
///
|
||||
/// Columns are based on <https://dev.mysql.com/doc/refman/8.2/en/information-schema-key-column-usage-table.html>
|
||||
struct InformationSchemaKeyColumnUsageBuilder {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
|
||||
constraint_catalog: StringVectorBuilder,
|
||||
constraint_schema: StringVectorBuilder,
|
||||
constraint_name: StringVectorBuilder,
|
||||
table_catalog: StringVectorBuilder,
|
||||
table_schema: StringVectorBuilder,
|
||||
table_name: StringVectorBuilder,
|
||||
column_name: StringVectorBuilder,
|
||||
ordinal_position: UInt32VectorBuilder,
|
||||
position_in_unique_constraint: UInt32VectorBuilder,
|
||||
referenced_table_schema: StringVectorBuilder,
|
||||
referenced_table_name: StringVectorBuilder,
|
||||
referenced_column_name: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaKeyColumnUsageBuilder {
|
||||
fn new(
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
catalog_name,
|
||||
catalog_manager,
|
||||
constraint_catalog: StringVectorBuilder::with_capacity(42),
|
||||
constraint_schema: StringVectorBuilder::with_capacity(42),
|
||||
constraint_name: StringVectorBuilder::with_capacity(42),
|
||||
table_catalog: StringVectorBuilder::with_capacity(42),
|
||||
table_schema: StringVectorBuilder::with_capacity(42),
|
||||
table_name: StringVectorBuilder::with_capacity(42),
|
||||
column_name: StringVectorBuilder::with_capacity(42),
|
||||
ordinal_position: UInt32VectorBuilder::with_capacity(42),
|
||||
position_in_unique_constraint: UInt32VectorBuilder::with_capacity(42),
|
||||
referenced_table_schema: StringVectorBuilder::with_capacity(42),
|
||||
referenced_table_name: StringVectorBuilder::with_capacity(42),
|
||||
referenced_column_name: StringVectorBuilder::with_capacity(42),
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct the `information_schema.KEY_COLUMN_USAGE` virtual table
|
||||
async fn make_key_column_usage(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
|
||||
let catalog_name = self.catalog_name.clone();
|
||||
let catalog_manager = self
|
||||
.catalog_manager
|
||||
.upgrade()
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
let mut primary_constraints = vec![];
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
if !catalog_manager
|
||||
.schema_exists(&catalog_name, &schema_name)
|
||||
.await?
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for table_name in catalog_manager
|
||||
.table_names(&catalog_name, &schema_name)
|
||||
.await?
|
||||
{
|
||||
if let Some(table) = catalog_manager
|
||||
.table(&catalog_name, &schema_name, &table_name)
|
||||
.await?
|
||||
{
|
||||
let keys = &table.table_info().meta.primary_key_indices;
|
||||
let schema = table.schema();
|
||||
|
||||
for (idx, column) in schema.column_schemas().iter().enumerate() {
|
||||
if column.is_time_index() {
|
||||
self.add_key_column_usage(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
"TIME INDEX",
|
||||
&schema_name,
|
||||
&table_name,
|
||||
&column.name,
|
||||
1, //always 1 for time index
|
||||
);
|
||||
}
|
||||
if keys.contains(&idx) {
|
||||
primary_constraints.push((
|
||||
schema_name.clone(),
|
||||
table_name.clone(),
|
||||
column.name.clone(),
|
||||
));
|
||||
}
|
||||
// TODO(dimbtp): foreign key constraint not supported yet
|
||||
}
|
||||
} else {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i, (schema_name, table_name, column_name)) in
|
||||
primary_constraints.into_iter().enumerate()
|
||||
{
|
||||
self.add_key_column_usage(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
"PRIMARY",
|
||||
&schema_name,
|
||||
&table_name,
|
||||
&column_name,
|
||||
i as u32 + 1,
|
||||
);
|
||||
}
|
||||
|
||||
self.finish()
|
||||
}
|
||||
|
||||
// TODO(dimbtp): Foreign key constraint has not `None` value for last 4
|
||||
// fields, but it is not supported yet.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn add_key_column_usage(
|
||||
&mut self,
|
||||
predicates: &Predicates,
|
||||
constraint_schema: &str,
|
||||
constraint_name: &str,
|
||||
table_schema: &str,
|
||||
table_name: &str,
|
||||
column_name: &str,
|
||||
ordinal_position: u32,
|
||||
) {
|
||||
let row = [
|
||||
(CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
|
||||
(CONSTRAINT_NAME, &Value::from(constraint_name)),
|
||||
(TABLE_SCHEMA, &Value::from(table_schema)),
|
||||
(TABLE_NAME, &Value::from(table_name)),
|
||||
(COLUMN_NAME, &Value::from(column_name)),
|
||||
(ORDINAL_POSITION, &Value::from(ordinal_position)),
|
||||
];
|
||||
|
||||
if !predicates.eval(&row) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.constraint_catalog.push(Some("def"));
|
||||
self.constraint_schema.push(Some(constraint_schema));
|
||||
self.constraint_name.push(Some(constraint_name));
|
||||
self.table_catalog.push(Some("def"));
|
||||
self.table_schema.push(Some(table_schema));
|
||||
self.table_name.push(Some(table_name));
|
||||
self.column_name.push(Some(column_name));
|
||||
self.ordinal_position.push(Some(ordinal_position));
|
||||
self.position_in_unique_constraint.push(None);
|
||||
self.referenced_table_schema.push(None);
|
||||
self.referenced_table_name.push(None);
|
||||
self.referenced_column_name.push(None);
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<RecordBatch> {
|
||||
let columns: Vec<VectorRef> = vec![
|
||||
Arc::new(self.constraint_catalog.finish()),
|
||||
Arc::new(self.constraint_schema.finish()),
|
||||
Arc::new(self.constraint_name.finish()),
|
||||
Arc::new(self.table_catalog.finish()),
|
||||
Arc::new(self.table_schema.finish()),
|
||||
Arc::new(self.table_name.finish()),
|
||||
Arc::new(self.column_name.finish()),
|
||||
Arc::new(self.ordinal_position.finish()),
|
||||
Arc::new(self.position_in_unique_constraint.finish()),
|
||||
Arc::new(self.referenced_table_schema.finish()),
|
||||
Arc::new(self.referenced_table_name.finish()),
|
||||
Arc::new(self.referenced_column_name.finish()),
|
||||
];
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
impl DfPartitionStream for InformationSchemaKeyColumnUsage {
|
||||
fn schema(&self) -> &ArrowSchemaRef {
|
||||
self.schema.arrow_schema()
|
||||
}
|
||||
|
||||
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_key_column_usage(None)
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -26,7 +26,7 @@ use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatc
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::TableId;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
pub use tables::get_schema_columns;
|
||||
|
||||
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
|
||||
@@ -74,7 +74,7 @@ impl InformationTable for MemoryTable {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn to_stream(&self) -> Result<SendableRecordBatchStream> {
|
||||
fn to_stream(&self, _request: ScanRequest) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
@@ -169,7 +169,7 @@ mod tests {
|
||||
assert_eq!("test", table.table_name());
|
||||
assert_eq!(schema, InformationTable::schema(&table));
|
||||
|
||||
let stream = table.to_stream().unwrap();
|
||||
let stream = table.to_stream(ScanRequest::default()).unwrap();
|
||||
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
|
||||
@@ -198,7 +198,7 @@ mod tests {
|
||||
assert_eq!("test", table.table_name());
|
||||
assert_eq!(schema, InformationTable::schema(&table));
|
||||
|
||||
let stream = table.to_stream().unwrap();
|
||||
let stream = table.to_stream(ScanRequest::default()).unwrap();
|
||||
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::sync::Arc;
|
||||
use common_catalog::consts::MITO_ENGINE;
|
||||
use datatypes::prelude::{ConcreteDataType, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::vectors::StringVector;
|
||||
use datatypes::vectors::{Int64Vector, StringVector};
|
||||
|
||||
use crate::information_schema::table_names::*;
|
||||
|
||||
@@ -97,6 +97,320 @@ pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) {
|
||||
],
|
||||
),
|
||||
|
||||
CHARACTER_SETS => (
|
||||
vec![
|
||||
string_column("CHARACTER_SET_NAME"),
|
||||
string_column("DEFAULT_COLLATE_NAME"),
|
||||
string_column("DESCRIPTION"),
|
||||
bigint_column("MAXLEN"),
|
||||
],
|
||||
vec![
|
||||
Arc::new(StringVector::from(vec!["utf8"])),
|
||||
Arc::new(StringVector::from(vec!["utf8_bin"])),
|
||||
Arc::new(StringVector::from(vec!["UTF-8 Unicode"])),
|
||||
Arc::new(Int64Vector::from_slice([4])),
|
||||
],
|
||||
),
|
||||
|
||||
COLLATIONS => (
|
||||
vec![
|
||||
string_column("COLLATION_NAME"),
|
||||
string_column("CHARACTER_SET_NAME"),
|
||||
bigint_column("ID"),
|
||||
string_column("IS_DEFAULT"),
|
||||
string_column("IS_COMPILED"),
|
||||
bigint_column("SORTLEN"),
|
||||
],
|
||||
vec![
|
||||
Arc::new(StringVector::from(vec!["utf8_bin"])),
|
||||
Arc::new(StringVector::from(vec!["utf8"])),
|
||||
Arc::new(Int64Vector::from_slice([1])),
|
||||
Arc::new(StringVector::from(vec!["Yes"])),
|
||||
Arc::new(StringVector::from(vec!["Yes"])),
|
||||
Arc::new(Int64Vector::from_slice([1])),
|
||||
],
|
||||
),
|
||||
|
||||
COLLATION_CHARACTER_SET_APPLICABILITY => (
|
||||
vec![
|
||||
string_column("COLLATION_NAME"),
|
||||
string_column("CHARACTER_SET_NAME"),
|
||||
],
|
||||
vec![
|
||||
Arc::new(StringVector::from(vec!["utf8_bin"])),
|
||||
Arc::new(StringVector::from(vec!["utf8"])),
|
||||
],
|
||||
),
|
||||
|
||||
CHECK_CONSTRAINTS => (
|
||||
string_columns(&[
|
||||
"CONSTRAINT_CATALOG",
|
||||
"CONSTRAINT_SCHEMA",
|
||||
"CONSTRAINT_NAME",
|
||||
"CHECK_CLAUSE",
|
||||
]),
|
||||
// Not support check constraints yet
|
||||
vec![],
|
||||
),
|
||||
|
||||
EVENTS => (
|
||||
vec![
|
||||
string_column("EVENT_CATALOG"),
|
||||
string_column("EVENT_SCHEMA"),
|
||||
string_column("EVENT_NAME"),
|
||||
string_column("DEFINER"),
|
||||
string_column("TIME_ZONE"),
|
||||
string_column("EVENT_BODY"),
|
||||
string_column("EVENT_DEFINITION"),
|
||||
string_column("EVENT_TYPE"),
|
||||
datetime_column("EXECUTE_AT"),
|
||||
bigint_column("INTERVAL_VALUE"),
|
||||
string_column("INTERVAL_FIELD"),
|
||||
string_column("SQL_MODE"),
|
||||
datetime_column("STARTS"),
|
||||
datetime_column("ENDS"),
|
||||
string_column("STATUS"),
|
||||
string_column("ON_COMPLETION"),
|
||||
datetime_column("CREATED"),
|
||||
datetime_column("LAST_ALTERED"),
|
||||
datetime_column("LAST_EXECUTED"),
|
||||
string_column("EVENT_COMMENT"),
|
||||
bigint_column("ORIGINATOR"),
|
||||
string_column("CHARACTER_SET_CLIENT"),
|
||||
string_column("COLLATION_CONNECTION"),
|
||||
string_column("DATABASE_COLLATION"),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
FILES => (
|
||||
vec![
|
||||
bigint_column("FILE_ID"),
|
||||
string_column("FILE_NAME"),
|
||||
string_column("FILE_TYPE"),
|
||||
string_column("TABLESPACE_NAME"),
|
||||
string_column("TABLE_CATALOG"),
|
||||
string_column("TABLE_SCHEMA"),
|
||||
string_column("TABLE_NAME"),
|
||||
string_column("LOGFILE_GROUP_NAME"),
|
||||
bigint_column("LOGFILE_GROUP_NUMBER"),
|
||||
string_column("ENGINE"),
|
||||
string_column("FULLTEXT_KEYS"),
|
||||
bigint_column("DELETED_ROWS"),
|
||||
bigint_column("UPDATE_COUNT"),
|
||||
bigint_column("FREE_EXTENTS"),
|
||||
bigint_column("TOTAL_EXTENTS"),
|
||||
bigint_column("EXTENT_SIZE"),
|
||||
bigint_column("INITIAL_SIZE"),
|
||||
bigint_column("MAXIMUM_SIZE"),
|
||||
bigint_column("AUTOEXTEND_SIZE"),
|
||||
datetime_column("CREATION_TIME"),
|
||||
datetime_column("LAST_UPDATE_TIME"),
|
||||
datetime_column("LAST_ACCESS_TIME"),
|
||||
datetime_column("RECOVER_TIME"),
|
||||
bigint_column("TRANSACTION_COUNTER"),
|
||||
string_column("VERSION"),
|
||||
string_column("ROW_FORMAT"),
|
||||
bigint_column("TABLE_ROWS"),
|
||||
bigint_column("AVG_ROW_LENGTH"),
|
||||
bigint_column("DATA_LENGTH"),
|
||||
bigint_column("MAX_DATA_LENGTH"),
|
||||
bigint_column("INDEX_LENGTH"),
|
||||
bigint_column("DATA_FREE"),
|
||||
datetime_column("CREATE_TIME"),
|
||||
datetime_column("UPDATE_TIME"),
|
||||
datetime_column("CHECK_TIME"),
|
||||
string_column("CHECKSUM"),
|
||||
string_column("STATUS"),
|
||||
string_column("EXTRA"),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
OPTIMIZER_TRACE => (
|
||||
vec![
|
||||
string_column("QUERY"),
|
||||
string_column("TRACE"),
|
||||
bigint_column("MISSING_BYTES_BEYOND_MAX_MEM_SIZE"),
|
||||
bigint_column("INSUFFICIENT_PRIVILEGES"),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
// MySQL(https://dev.mysql.com/doc/refman/8.2/en/information-schema-parameters-table.html)
|
||||
// has the spec that is different from
|
||||
// PostgreSQL(https://www.postgresql.org/docs/current/infoschema-parameters.html).
|
||||
// Follow `MySQL` spec here.
|
||||
PARAMETERS => (
|
||||
vec![
|
||||
string_column("SPECIFIC_CATALOG"),
|
||||
string_column("SPECIFIC_SCHEMA"),
|
||||
string_column("SPECIFIC_NAME"),
|
||||
bigint_column("ORDINAL_POSITION"),
|
||||
string_column("PARAMETER_MODE"),
|
||||
string_column("PARAMETER_NAME"),
|
||||
string_column("DATA_TYPE"),
|
||||
bigint_column("CHARACTER_MAXIMUM_LENGTH"),
|
||||
bigint_column("CHARACTER_OCTET_LENGTH"),
|
||||
bigint_column("NUMERIC_PRECISION"),
|
||||
bigint_column("NUMERIC_SCALE"),
|
||||
bigint_column("DATETIME_PRECISION"),
|
||||
string_column("CHARACTER_SET_NAME"),
|
||||
string_column("COLLATION_NAME"),
|
||||
string_column("DTD_IDENTIFIER"),
|
||||
string_column("ROUTINE_TYPE"),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
PROFILING => (
|
||||
vec![
|
||||
bigint_column("QUERY_ID"),
|
||||
bigint_column("SEQ"),
|
||||
string_column("STATE"),
|
||||
bigint_column("DURATION"),
|
||||
bigint_column("CPU_USER"),
|
||||
bigint_column("CPU_SYSTEM"),
|
||||
bigint_column("CONTEXT_VOLUNTARY"),
|
||||
bigint_column("CONTEXT_INVOLUNTARY"),
|
||||
bigint_column("BLOCK_OPS_IN"),
|
||||
bigint_column("BLOCK_OPS_OUT"),
|
||||
bigint_column("MESSAGES_SENT"),
|
||||
bigint_column("MESSAGES_RECEIVED"),
|
||||
bigint_column("PAGE_FAULTS_MAJOR"),
|
||||
bigint_column("PAGE_FAULTS_MINOR"),
|
||||
bigint_column("SWAPS"),
|
||||
string_column("SOURCE_FUNCTION"),
|
||||
string_column("SOURCE_FILE"),
|
||||
bigint_column("SOURCE_LINE"),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
// TODO: _Must_ reimplement this table when foreign key constraint is supported.
|
||||
REFERENTIAL_CONSTRAINTS => (
|
||||
vec![
|
||||
string_column("CONSTRAINT_CATALOG"),
|
||||
string_column("CONSTRAINT_SCHEMA"),
|
||||
string_column("CONSTRAINT_NAME"),
|
||||
string_column("UNIQUE_CONSTRAINT_CATALOG"),
|
||||
string_column("UNIQUE_CONSTRAINT_SCHEMA"),
|
||||
string_column("UNIQUE_CONSTRAINT_NAME"),
|
||||
string_column("MATCH_OPTION"),
|
||||
string_column("UPDATE_RULE"),
|
||||
string_column("DELETE_RULE"),
|
||||
string_column("TABLE_NAME"),
|
||||
string_column("REFERENCED_TABLE_NAME"),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
ROUTINES => (
|
||||
vec![
|
||||
string_column("SPECIFIC_NAME"),
|
||||
string_column("ROUTINE_CATALOG"),
|
||||
string_column("ROUTINE_SCHEMA"),
|
||||
string_column("ROUTINE_NAME"),
|
||||
string_column("ROUTINE_TYPE"),
|
||||
string_column("DATA_TYPE"),
|
||||
bigint_column("CHARACTER_MAXIMUM_LENGTH"),
|
||||
bigint_column("CHARACTER_OCTET_LENGTH"),
|
||||
bigint_column("NUMERIC_PRECISION"),
|
||||
bigint_column("NUMERIC_SCALE"),
|
||||
bigint_column("DATETIME_PRECISION"),
|
||||
string_column("CHARACTER_SET_NAME"),
|
||||
string_column("COLLATION_NAME"),
|
||||
string_column("DTD_IDENTIFIER"),
|
||||
string_column("ROUTINE_BODY"),
|
||||
string_column("ROUTINE_DEFINITION"),
|
||||
string_column("EXTERNAL_NAME"),
|
||||
string_column("EXTERNAL_LANGUAGE"),
|
||||
string_column("PARAMETER_STYLE"),
|
||||
string_column("IS_DETERMINISTIC"),
|
||||
string_column("SQL_DATA_ACCESS"),
|
||||
string_column("SQL_PATH"),
|
||||
string_column("SECURITY_TYPE"),
|
||||
datetime_column("CREATED"),
|
||||
datetime_column("LAST_ALTERED"),
|
||||
string_column("SQL_MODE"),
|
||||
string_column("ROUTINE_COMMENT"),
|
||||
string_column("DEFINER"),
|
||||
string_column("CHARACTER_SET_CLIENT"),
|
||||
string_column("COLLATION_CONNECTION"),
|
||||
string_column("DATABASE_COLLATION"),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
SCHEMA_PRIVILEGES => (
|
||||
vec![
|
||||
string_column("GRANTEE"),
|
||||
string_column("TABLE_CATALOG"),
|
||||
string_column("TABLE_SCHEMA"),
|
||||
string_column("PRIVILEGE_TYPE"),
|
||||
string_column("IS_GRANTABLE"),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
TABLE_PRIVILEGES => (
|
||||
vec![
|
||||
string_column("GRANTEE"),
|
||||
string_column("TABLE_CATALOG"),
|
||||
string_column("TABLE_SCHEMA"),
|
||||
string_column("TABLE_NAME"),
|
||||
string_column("PRIVILEGE_TYPE"),
|
||||
string_column("IS_GRANTABLE"),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
TRIGGERS => (
|
||||
vec![
|
||||
string_column("TRIGGER_CATALOG"),
|
||||
string_column("TRIGGER_SCHEMA"),
|
||||
string_column("TRIGGER_NAME"),
|
||||
string_column("EVENT_MANIPULATION"),
|
||||
string_column("EVENT_OBJECT_CATALOG"),
|
||||
string_column("EVENT_OBJECT_SCHEMA"),
|
||||
string_column("EVENT_OBJECT_TABLE"),
|
||||
bigint_column("ACTION_ORDER"),
|
||||
string_column("ACTION_CONDITION"),
|
||||
string_column("ACTION_STATEMENT"),
|
||||
string_column("ACTION_ORIENTATION"),
|
||||
string_column("ACTION_TIMING"),
|
||||
string_column("ACTION_REFERENCE_OLD_TABLE"),
|
||||
string_column("ACTION_REFERENCE_NEW_TABLE"),
|
||||
string_column("ACTION_REFERENCE_OLD_ROW"),
|
||||
string_column("ACTION_REFERENCE_NEW_ROW"),
|
||||
datetime_column("CREATED"),
|
||||
string_column("SQL_MODE"),
|
||||
string_column("DEFINER"),
|
||||
string_column("CHARACTER_SET_CLIENT"),
|
||||
string_column("COLLATION_CONNECTION"),
|
||||
string_column("DATABASE_COLLATION"),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
// TODO: Considering store internal metrics in `global_status` and
|
||||
// `session_status` tables.
|
||||
GLOBAL_STATUS => (
|
||||
vec![
|
||||
string_column("VARIABLE_NAME"),
|
||||
string_column("VARIABLE_VALUE"),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
SESSION_STATUS => (
|
||||
vec![
|
||||
string_column("VARIABLE_NAME"),
|
||||
string_column("VARIABLE_VALUE"),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
_ => unreachable!("Unknown table in information_schema: {}", table_name),
|
||||
};
|
||||
|
||||
@@ -115,6 +429,22 @@ fn string_column(name: &str) -> ColumnSchema {
|
||||
)
|
||||
}
|
||||
|
||||
fn bigint_column(name: &str) -> ColumnSchema {
|
||||
ColumnSchema::new(
|
||||
str::to_lowercase(name),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
fn datetime_column(name: &str) -> ColumnSchema {
|
||||
ColumnSchema::new(
|
||||
str::to_lowercase(name),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
609
src/catalog/src/information_schema/predicate.rs
Normal file
609
src/catalog/src/information_schema/predicate.rs
Normal file
@@ -0,0 +1,609 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::array::StringArray;
|
||||
use arrow::compute::kernels::comparison;
|
||||
use common_query::logical_plan::DfExpr;
|
||||
use datafusion::common::ScalarValue;
|
||||
use datafusion::logical_expr::expr::Like;
|
||||
use datafusion::logical_expr::Operator;
|
||||
use datatypes::value::Value;
|
||||
use store_api::storage::ScanRequest;
|
||||
|
||||
type ColumnName = String;
|
||||
/// Predicate to filter `information_schema` tables stream,
|
||||
/// we only support these simple predicates currently.
|
||||
/// TODO(dennis): supports more predicate types.
|
||||
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||
enum Predicate {
|
||||
Eq(ColumnName, Value),
|
||||
Like(ColumnName, String, bool),
|
||||
NotEq(ColumnName, Value),
|
||||
InList(ColumnName, Vec<Value>),
|
||||
And(Box<Predicate>, Box<Predicate>),
|
||||
Or(Box<Predicate>, Box<Predicate>),
|
||||
Not(Box<Predicate>),
|
||||
}
|
||||
|
||||
impl Predicate {
|
||||
/// Evaluate the predicate with the row, returns:
|
||||
/// - `None` when the predicate can't evaluate with the row.
|
||||
/// - `Some(true)` when the predicate is satisfied,
|
||||
/// - `Some(false)` when the predicate is not satisfied,
|
||||
fn eval(&self, row: &[(&str, &Value)]) -> Option<bool> {
|
||||
match self {
|
||||
Predicate::Eq(c, v) => {
|
||||
for (column, value) in row {
|
||||
if c != column {
|
||||
continue;
|
||||
}
|
||||
return Some(v == *value);
|
||||
}
|
||||
}
|
||||
Predicate::Like(c, pattern, case_insensitive) => {
|
||||
for (column, value) in row {
|
||||
if c != column {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Value::String(bs) = value else {
|
||||
continue;
|
||||
};
|
||||
|
||||
return like_utf8(bs.as_utf8(), pattern, case_insensitive);
|
||||
}
|
||||
}
|
||||
Predicate::NotEq(c, v) => {
|
||||
for (column, value) in row {
|
||||
if c != column {
|
||||
continue;
|
||||
}
|
||||
return Some(v != *value);
|
||||
}
|
||||
}
|
||||
Predicate::InList(c, values) => {
|
||||
for (column, value) in row {
|
||||
if c != column {
|
||||
continue;
|
||||
}
|
||||
return Some(values.iter().any(|v| v == *value));
|
||||
}
|
||||
}
|
||||
Predicate::And(left, right) => {
|
||||
let left = left.eval(row);
|
||||
|
||||
// short-circuit
|
||||
if matches!(left, Some(false)) {
|
||||
return Some(false);
|
||||
}
|
||||
|
||||
return match (left, right.eval(row)) {
|
||||
(Some(left), Some(right)) => Some(left && right),
|
||||
(None, Some(false)) => Some(false),
|
||||
_ => None,
|
||||
};
|
||||
}
|
||||
Predicate::Or(left, right) => {
|
||||
let left = left.eval(row);
|
||||
|
||||
// short-circuit
|
||||
if matches!(left, Some(true)) {
|
||||
return Some(true);
|
||||
}
|
||||
|
||||
return match (left, right.eval(row)) {
|
||||
(Some(left), Some(right)) => Some(left || right),
|
||||
(None, Some(true)) => Some(true),
|
||||
_ => None,
|
||||
};
|
||||
}
|
||||
Predicate::Not(p) => {
|
||||
let Some(b) = p.eval(row) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
return Some(!b);
|
||||
}
|
||||
}
|
||||
|
||||
// Can't evaluate predicate with the row
|
||||
None
|
||||
}
|
||||
|
||||
/// Try to create a predicate from datafusion [`Expr`], return None if fails.
|
||||
fn from_expr(expr: DfExpr) -> Option<Predicate> {
|
||||
match expr {
|
||||
// NOT expr
|
||||
DfExpr::Not(expr) => {
|
||||
let Some(p) = Self::from_expr(*expr) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
Some(Predicate::Not(Box::new(p)))
|
||||
}
|
||||
// expr LIKE pattern
|
||||
DfExpr::Like(Like {
|
||||
negated,
|
||||
expr,
|
||||
pattern,
|
||||
case_insensitive,
|
||||
..
|
||||
}) if is_column(&expr) && is_string_literal(&pattern) => {
|
||||
// Safety: ensured by gurad
|
||||
let DfExpr::Column(c) = *expr else {
|
||||
unreachable!();
|
||||
};
|
||||
let DfExpr::Literal(ScalarValue::Utf8(Some(pattern))) = *pattern else {
|
||||
unreachable!();
|
||||
};
|
||||
|
||||
let p = Predicate::Like(c.name, pattern, case_insensitive);
|
||||
|
||||
if negated {
|
||||
Some(Predicate::Not(Box::new(p)))
|
||||
} else {
|
||||
Some(p)
|
||||
}
|
||||
}
|
||||
// left OP right
|
||||
DfExpr::BinaryExpr(bin) => match (*bin.left, bin.op, *bin.right) {
|
||||
// left == right
|
||||
(DfExpr::Literal(scalar), Operator::Eq, DfExpr::Column(c))
|
||||
| (DfExpr::Column(c), Operator::Eq, DfExpr::Literal(scalar)) => {
|
||||
let Ok(v) = Value::try_from(scalar) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
Some(Predicate::Eq(c.name, v))
|
||||
}
|
||||
// left != right
|
||||
(DfExpr::Literal(scalar), Operator::NotEq, DfExpr::Column(c))
|
||||
| (DfExpr::Column(c), Operator::NotEq, DfExpr::Literal(scalar)) => {
|
||||
let Ok(v) = Value::try_from(scalar) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
Some(Predicate::NotEq(c.name, v))
|
||||
}
|
||||
// left AND right
|
||||
(left, Operator::And, right) => {
|
||||
let Some(left) = Self::from_expr(left) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let Some(right) = Self::from_expr(right) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
Some(Predicate::And(Box::new(left), Box::new(right)))
|
||||
}
|
||||
// left OR right
|
||||
(left, Operator::Or, right) => {
|
||||
let Some(left) = Self::from_expr(left) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let Some(right) = Self::from_expr(right) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
Some(Predicate::Or(Box::new(left), Box::new(right)))
|
||||
}
|
||||
_ => None,
|
||||
},
|
||||
// [NOT] IN (LIST)
|
||||
DfExpr::InList(list) => {
|
||||
match (*list.expr, list.list, list.negated) {
|
||||
// column [NOT] IN (v1, v2, v3, ...)
|
||||
(DfExpr::Column(c), list, negated) if is_all_scalars(&list) => {
|
||||
let mut values = Vec::with_capacity(list.len());
|
||||
for scalar in list {
|
||||
// Safety: checked by `is_all_scalars`
|
||||
let DfExpr::Literal(scalar) = scalar else {
|
||||
unreachable!();
|
||||
};
|
||||
|
||||
let Ok(value) = Value::try_from(scalar) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
values.push(value);
|
||||
}
|
||||
|
||||
let predicate = Predicate::InList(c.name, values);
|
||||
|
||||
if negated {
|
||||
Some(Predicate::Not(Box::new(predicate)))
|
||||
} else {
|
||||
Some(predicate)
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform SQL left LIKE right, return `None` if fail to evaluate.
|
||||
/// - `s` the target string
|
||||
/// - `pattern` the pattern just like '%abc'
|
||||
/// - `case_insensitive` whether to perform case-insensitive like or not.
|
||||
fn like_utf8(s: &str, pattern: &str, case_insensitive: &bool) -> Option<bool> {
|
||||
let array = StringArray::from(vec![s]);
|
||||
let patterns = StringArray::new_scalar(pattern);
|
||||
|
||||
let Ok(booleans) = (if *case_insensitive {
|
||||
comparison::ilike(&array, &patterns)
|
||||
} else {
|
||||
comparison::like(&array, &patterns)
|
||||
}) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
// Safety: at least one value in result
|
||||
Some(booleans.value(0))
|
||||
}
|
||||
|
||||
fn is_string_literal(expr: &DfExpr) -> bool {
|
||||
matches!(expr, DfExpr::Literal(ScalarValue::Utf8(Some(_))))
|
||||
}
|
||||
|
||||
fn is_column(expr: &DfExpr) -> bool {
|
||||
matches!(expr, DfExpr::Column(_))
|
||||
}
|
||||
|
||||
/// A list of predicate
|
||||
pub struct Predicates {
|
||||
predicates: Vec<Predicate>,
|
||||
}
|
||||
|
||||
impl Predicates {
|
||||
/// Try its best to create predicates from [`ScanRequest`].
|
||||
pub fn from_scan_request(request: &Option<ScanRequest>) -> Predicates {
|
||||
if let Some(request) = request {
|
||||
let mut predicates = Vec::with_capacity(request.filters.len());
|
||||
|
||||
for filter in &request.filters {
|
||||
if let Some(predicate) = Predicate::from_expr(filter.df_expr().clone()) {
|
||||
predicates.push(predicate);
|
||||
}
|
||||
}
|
||||
|
||||
Self { predicates }
|
||||
} else {
|
||||
Self {
|
||||
predicates: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate the predicates with the row.
|
||||
/// returns true when all the predicates are satisfied or can't be evaluated.
|
||||
pub fn eval(&self, row: &[(&str, &Value)]) -> bool {
|
||||
// fast path
|
||||
if self.predicates.is_empty() {
|
||||
return true;
|
||||
}
|
||||
|
||||
self.predicates
|
||||
.iter()
|
||||
.filter_map(|p| p.eval(row))
|
||||
.all(|b| b)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true when the values are all [`DfExpr::Literal`].
|
||||
fn is_all_scalars(list: &[DfExpr]) -> bool {
|
||||
list.iter().all(|v| matches!(v, DfExpr::Literal(_)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use datafusion::common::{Column, ScalarValue};
|
||||
use datafusion::logical_expr::expr::InList;
|
||||
use datafusion::logical_expr::BinaryExpr;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_predicate_eval() {
|
||||
let a_col = "a".to_string();
|
||||
let b_col = "b".to_string();
|
||||
let a_value = Value::from("a_value");
|
||||
let b_value = Value::from("b_value");
|
||||
let wrong_value = Value::from("wrong_value");
|
||||
|
||||
let a_row = [(a_col.as_str(), &a_value)];
|
||||
let b_row = [("b", &wrong_value)];
|
||||
let wrong_row = [(a_col.as_str(), &wrong_value)];
|
||||
|
||||
// Predicate::Eq
|
||||
let p = Predicate::Eq(a_col.clone(), a_value.clone());
|
||||
assert!(p.eval(&a_row).unwrap());
|
||||
assert!(p.eval(&b_row).is_none());
|
||||
assert!(!p.eval(&wrong_row).unwrap());
|
||||
|
||||
// Predicate::NotEq
|
||||
let p = Predicate::NotEq(a_col.clone(), a_value.clone());
|
||||
assert!(!p.eval(&a_row).unwrap());
|
||||
assert!(p.eval(&b_row).is_none());
|
||||
assert!(p.eval(&wrong_row).unwrap());
|
||||
|
||||
// Predicate::InList
|
||||
let p = Predicate::InList(a_col.clone(), vec![a_value.clone(), b_value.clone()]);
|
||||
assert!(p.eval(&a_row).unwrap());
|
||||
assert!(p.eval(&b_row).is_none());
|
||||
assert!(!p.eval(&wrong_row).unwrap());
|
||||
assert!(p.eval(&[(&a_col, &b_value)]).unwrap());
|
||||
|
||||
let p1 = Predicate::Eq(a_col.clone(), a_value.clone());
|
||||
let p2 = Predicate::Eq(b_col.clone(), b_value.clone());
|
||||
let row = [(a_col.as_str(), &a_value), (b_col.as_str(), &b_value)];
|
||||
let wrong_row = [(a_col.as_str(), &a_value), (b_col.as_str(), &wrong_value)];
|
||||
|
||||
//Predicate::And
|
||||
let p = Predicate::And(Box::new(p1.clone()), Box::new(p2.clone()));
|
||||
assert!(p.eval(&row).unwrap());
|
||||
assert!(!p.eval(&wrong_row).unwrap());
|
||||
assert!(p.eval(&[]).is_none());
|
||||
assert!(p.eval(&[("c", &a_value)]).is_none());
|
||||
assert!(!p
|
||||
.eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &a_value)])
|
||||
.unwrap());
|
||||
assert!(!p
|
||||
.eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &b_value)])
|
||||
.unwrap());
|
||||
assert!(p
|
||||
.eval(&[(a_col.as_ref(), &a_value), ("c", &a_value)])
|
||||
.is_none());
|
||||
assert!(!p
|
||||
.eval(&[(a_col.as_ref(), &b_value), ("c", &a_value)])
|
||||
.unwrap());
|
||||
|
||||
//Predicate::Or
|
||||
let p = Predicate::Or(Box::new(p1), Box::new(p2));
|
||||
assert!(p.eval(&row).unwrap());
|
||||
assert!(p.eval(&wrong_row).unwrap());
|
||||
assert!(p.eval(&[]).is_none());
|
||||
assert!(p.eval(&[("c", &a_value)]).is_none());
|
||||
assert!(!p
|
||||
.eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &a_value)])
|
||||
.unwrap());
|
||||
assert!(p
|
||||
.eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &b_value)])
|
||||
.unwrap());
|
||||
assert!(p
|
||||
.eval(&[(a_col.as_ref(), &a_value), ("c", &a_value)])
|
||||
.unwrap());
|
||||
assert!(p
|
||||
.eval(&[(a_col.as_ref(), &b_value), ("c", &a_value)])
|
||||
.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_predicate_like() {
|
||||
// case insensitive
|
||||
let expr = DfExpr::Like(Like {
|
||||
negated: false,
|
||||
expr: Box::new(column("a")),
|
||||
pattern: Box::new(string_literal("%abc")),
|
||||
case_insensitive: true,
|
||||
escape_char: None,
|
||||
});
|
||||
|
||||
let p = Predicate::from_expr(expr).unwrap();
|
||||
assert!(
|
||||
matches!(&p, Predicate::Like(c, pattern, case_insensitive) if
|
||||
c == "a"
|
||||
&& pattern == "%abc"
|
||||
&& *case_insensitive)
|
||||
);
|
||||
|
||||
let match_row = [
|
||||
("a", &Value::from("hello AbC")),
|
||||
("b", &Value::from("b value")),
|
||||
];
|
||||
let unmatch_row = [("a", &Value::from("bca")), ("b", &Value::from("b value"))];
|
||||
|
||||
assert!(p.eval(&match_row).unwrap());
|
||||
assert!(!p.eval(&unmatch_row).unwrap());
|
||||
assert!(p.eval(&[]).is_none());
|
||||
|
||||
// case sensitive
|
||||
let expr = DfExpr::Like(Like {
|
||||
negated: false,
|
||||
expr: Box::new(column("a")),
|
||||
pattern: Box::new(string_literal("%abc")),
|
||||
case_insensitive: false,
|
||||
escape_char: None,
|
||||
});
|
||||
|
||||
let p = Predicate::from_expr(expr).unwrap();
|
||||
assert!(
|
||||
matches!(&p, Predicate::Like(c, pattern, case_insensitive) if
|
||||
c == "a"
|
||||
&& pattern == "%abc"
|
||||
&& !*case_insensitive)
|
||||
);
|
||||
assert!(!p.eval(&match_row).unwrap());
|
||||
assert!(!p.eval(&unmatch_row).unwrap());
|
||||
assert!(p.eval(&[]).is_none());
|
||||
|
||||
// not like
|
||||
let expr = DfExpr::Like(Like {
|
||||
negated: true,
|
||||
expr: Box::new(column("a")),
|
||||
pattern: Box::new(string_literal("%abc")),
|
||||
case_insensitive: true,
|
||||
escape_char: None,
|
||||
});
|
||||
|
||||
let p = Predicate::from_expr(expr).unwrap();
|
||||
assert!(!p.eval(&match_row).unwrap());
|
||||
assert!(p.eval(&unmatch_row).unwrap());
|
||||
assert!(p.eval(&[]).is_none());
|
||||
}
|
||||
|
||||
fn column(name: &str) -> DfExpr {
|
||||
DfExpr::Column(Column {
|
||||
relation: None,
|
||||
name: name.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
fn string_literal(v: &str) -> DfExpr {
|
||||
DfExpr::Literal(ScalarValue::Utf8(Some(v.to_string())))
|
||||
}
|
||||
|
||||
fn match_string_value(v: &Value, expected: &str) -> bool {
|
||||
matches!(v, Value::String(bs) if bs.as_utf8() == expected)
|
||||
}
|
||||
|
||||
fn match_string_values(vs: &[Value], expected: &[&str]) -> bool {
|
||||
assert_eq!(vs.len(), expected.len());
|
||||
|
||||
let mut result = true;
|
||||
for (i, v) in vs.iter().enumerate() {
|
||||
result = result && match_string_value(v, expected[i]);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn mock_exprs() -> (DfExpr, DfExpr) {
|
||||
let expr1 = DfExpr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(column("a")),
|
||||
op: Operator::Eq,
|
||||
right: Box::new(string_literal("a_value")),
|
||||
});
|
||||
|
||||
let expr2 = DfExpr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(column("b")),
|
||||
op: Operator::NotEq,
|
||||
right: Box::new(string_literal("b_value")),
|
||||
});
|
||||
|
||||
(expr1, expr2)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_predicate_from_expr() {
|
||||
let (expr1, expr2) = mock_exprs();
|
||||
|
||||
let p1 = Predicate::from_expr(expr1.clone()).unwrap();
|
||||
assert!(matches!(&p1, Predicate::Eq(column, v) if column == "a"
|
||||
&& match_string_value(v, "a_value")));
|
||||
|
||||
let p2 = Predicate::from_expr(expr2.clone()).unwrap();
|
||||
assert!(matches!(&p2, Predicate::NotEq(column, v) if column == "b"
|
||||
&& match_string_value(v, "b_value")));
|
||||
|
||||
let and_expr = DfExpr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(expr1.clone()),
|
||||
op: Operator::And,
|
||||
right: Box::new(expr2.clone()),
|
||||
});
|
||||
let or_expr = DfExpr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(expr1.clone()),
|
||||
op: Operator::Or,
|
||||
right: Box::new(expr2.clone()),
|
||||
});
|
||||
let not_expr = DfExpr::Not(Box::new(expr1.clone()));
|
||||
|
||||
let and_p = Predicate::from_expr(and_expr).unwrap();
|
||||
assert!(matches!(and_p, Predicate::And(left, right) if *left == p1 && *right == p2));
|
||||
let or_p = Predicate::from_expr(or_expr).unwrap();
|
||||
assert!(matches!(or_p, Predicate::Or(left, right) if *left == p1 && *right == p2));
|
||||
let not_p = Predicate::from_expr(not_expr).unwrap();
|
||||
assert!(matches!(not_p, Predicate::Not(p) if *p == p1));
|
||||
|
||||
let inlist_expr = DfExpr::InList(InList {
|
||||
expr: Box::new(column("a")),
|
||||
list: vec![string_literal("a1"), string_literal("a2")],
|
||||
negated: false,
|
||||
});
|
||||
|
||||
let inlist_p = Predicate::from_expr(inlist_expr).unwrap();
|
||||
assert!(matches!(&inlist_p, Predicate::InList(c, values) if c == "a"
|
||||
&& match_string_values(values, &["a1", "a2"])));
|
||||
|
||||
let inlist_expr = DfExpr::InList(InList {
|
||||
expr: Box::new(column("a")),
|
||||
list: vec![string_literal("a1"), string_literal("a2")],
|
||||
negated: true,
|
||||
});
|
||||
let inlist_p = Predicate::from_expr(inlist_expr).unwrap();
|
||||
assert!(matches!(inlist_p, Predicate::Not(p) if
|
||||
matches!(&*p,
|
||||
Predicate::InList(c, values) if c == "a"
|
||||
&& match_string_values(values, &["a1", "a2"]))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_predicates_from_scan_request() {
|
||||
let predicates = Predicates::from_scan_request(&None);
|
||||
assert!(predicates.predicates.is_empty());
|
||||
|
||||
let (expr1, expr2) = mock_exprs();
|
||||
|
||||
let request = ScanRequest {
|
||||
filters: vec![expr1.into(), expr2.into()],
|
||||
..Default::default()
|
||||
};
|
||||
let predicates = Predicates::from_scan_request(&Some(request));
|
||||
|
||||
assert_eq!(2, predicates.predicates.len());
|
||||
assert!(
|
||||
matches!(&predicates.predicates[0], Predicate::Eq(column, v) if column == "a"
|
||||
&& match_string_value(v, "a_value"))
|
||||
);
|
||||
assert!(
|
||||
matches!(&predicates.predicates[1], Predicate::NotEq(column, v) if column == "b"
|
||||
&& match_string_value(v, "b_value"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_predicates_eval_row() {
|
||||
let wrong_row = [
|
||||
("a", &Value::from("a_value")),
|
||||
("b", &Value::from("b_value")),
|
||||
("c", &Value::from("c_value")),
|
||||
];
|
||||
let row = [
|
||||
("a", &Value::from("a_value")),
|
||||
("b", &Value::from("not_b_value")),
|
||||
("c", &Value::from("c_value")),
|
||||
];
|
||||
let c_row = [("c", &Value::from("c_value"))];
|
||||
|
||||
// test empty predicates, always returns true
|
||||
let predicates = Predicates::from_scan_request(&None);
|
||||
assert!(predicates.eval(&row));
|
||||
assert!(predicates.eval(&wrong_row));
|
||||
assert!(predicates.eval(&c_row));
|
||||
|
||||
let (expr1, expr2) = mock_exprs();
|
||||
let request = ScanRequest {
|
||||
filters: vec![expr1.into(), expr2.into()],
|
||||
..Default::default()
|
||||
};
|
||||
let predicates = Predicates::from_scan_request(&Some(request));
|
||||
assert!(predicates.eval(&row));
|
||||
assert!(!predicates.eval(&wrong_row));
|
||||
assert!(predicates.eval(&c_row));
|
||||
}
|
||||
}
|
||||
228
src/catalog/src/information_schema/schemata.rs
Normal file
228
src/catalog/src/information_schema/schemata.rs
Normal file
@@ -0,0 +1,228 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_SCHEMATA_TABLE_ID;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_query::physical_plan::TaskContext;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::StringVectorBuilder;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::SCHEMATA;
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::{InformationTable, Predicates};
|
||||
use crate::CatalogManager;
|
||||
|
||||
const CATALOG_NAME: &str = "catalog_name";
|
||||
const SCHEMA_NAME: &str = "schema_name";
|
||||
const DEFAULT_CHARACTER_SET_NAME: &str = "default_character_set_name";
|
||||
const DEFAULT_COLLATION_NAME: &str = "default_collation_name";
|
||||
|
||||
/// The `information_schema.schemata` table implementation.
|
||||
pub(super) struct InformationSchemaSchemata {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
}
|
||||
|
||||
impl InformationSchemaSchemata {
|
||||
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
|
||||
Self {
|
||||
schema: Self::schema(),
|
||||
catalog_name,
|
||||
catalog_manager,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn schema() -> SchemaRef {
|
||||
Arc::new(Schema::new(vec![
|
||||
ColumnSchema::new(CATALOG_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(SCHEMA_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(
|
||||
DEFAULT_CHARACTER_SET_NAME,
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
DEFAULT_COLLATION_NAME,
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new("sql_path", ConcreteDataType::string_datatype(), true),
|
||||
]))
|
||||
}
|
||||
|
||||
fn builder(&self) -> InformationSchemaSchemataBuilder {
|
||||
InformationSchemaSchemataBuilder::new(
|
||||
self.schema.clone(),
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl InformationTable for InformationSchemaSchemata {
|
||||
fn table_id(&self) -> TableId {
|
||||
INFORMATION_SCHEMA_SCHEMATA_TABLE_ID
|
||||
}
|
||||
|
||||
fn table_name(&self) -> &'static str {
|
||||
SCHEMATA
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_schemata(Some(request))
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
));
|
||||
Ok(Box::pin(
|
||||
RecordBatchStreamAdapter::try_new(stream)
|
||||
.map_err(BoxedError::new)
|
||||
.context(InternalSnafu)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the `information_schema.schemata` table row by row
|
||||
///
|
||||
/// Columns are based on <https://docs.pingcap.com/tidb/stable/information-schema-schemata>
|
||||
struct InformationSchemaSchemataBuilder {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
|
||||
catalog_names: StringVectorBuilder,
|
||||
schema_names: StringVectorBuilder,
|
||||
charset_names: StringVectorBuilder,
|
||||
collation_names: StringVectorBuilder,
|
||||
sql_paths: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaSchemataBuilder {
|
||||
fn new(
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
catalog_name,
|
||||
catalog_manager,
|
||||
catalog_names: StringVectorBuilder::with_capacity(42),
|
||||
schema_names: StringVectorBuilder::with_capacity(42),
|
||||
charset_names: StringVectorBuilder::with_capacity(42),
|
||||
collation_names: StringVectorBuilder::with_capacity(42),
|
||||
sql_paths: StringVectorBuilder::with_capacity(42),
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct the `information_schema.schemata` virtual table
|
||||
async fn make_schemata(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
|
||||
let catalog_name = self.catalog_name.clone();
|
||||
let catalog_manager = self
|
||||
.catalog_manager
|
||||
.upgrade()
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
if !catalog_manager
|
||||
.schema_exists(&catalog_name, &schema_name)
|
||||
.await?
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
self.add_schema(&predicates, &catalog_name, &schema_name);
|
||||
}
|
||||
|
||||
self.finish()
|
||||
}
|
||||
|
||||
fn add_schema(&mut self, predicates: &Predicates, catalog_name: &str, schema_name: &str) {
|
||||
let row = [
|
||||
(CATALOG_NAME, &Value::from(catalog_name)),
|
||||
(SCHEMA_NAME, &Value::from(schema_name)),
|
||||
(DEFAULT_CHARACTER_SET_NAME, &Value::from("utf8")),
|
||||
(DEFAULT_COLLATION_NAME, &Value::from("utf8_bin")),
|
||||
];
|
||||
|
||||
if !predicates.eval(&row) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.catalog_names.push(Some(catalog_name));
|
||||
self.schema_names.push(Some(schema_name));
|
||||
self.charset_names.push(Some("utf8"));
|
||||
self.collation_names.push(Some("utf8_bin"));
|
||||
self.sql_paths.push(None);
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<RecordBatch> {
|
||||
let columns: Vec<VectorRef> = vec![
|
||||
Arc::new(self.catalog_names.finish()),
|
||||
Arc::new(self.schema_names.finish()),
|
||||
Arc::new(self.charset_names.finish()),
|
||||
Arc::new(self.collation_names.finish()),
|
||||
Arc::new(self.sql_paths.finish()),
|
||||
];
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
impl DfPartitionStream for InformationSchemaSchemata {
|
||||
fn schema(&self) -> &ArrowSchemaRef {
|
||||
self.schema.arrow_schema()
|
||||
}
|
||||
|
||||
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_schemata(None)
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -20,3 +20,21 @@ pub const ENGINES: &str = "engines";
|
||||
pub const COLUMN_PRIVILEGES: &str = "column_privileges";
|
||||
pub const COLUMN_STATISTICS: &str = "column_statistics";
|
||||
pub const BUILD_INFO: &str = "build_info";
|
||||
pub const CHARACTER_SETS: &str = "character_sets";
|
||||
pub const COLLATIONS: &str = "collations";
|
||||
pub const COLLATION_CHARACTER_SET_APPLICABILITY: &str = "collation_character_set_applicability";
|
||||
pub const CHECK_CONSTRAINTS: &str = "check_constraints";
|
||||
pub const EVENTS: &str = "events";
|
||||
pub const FILES: &str = "files";
|
||||
pub const SCHEMATA: &str = "schemata";
|
||||
pub const KEY_COLUMN_USAGE: &str = "key_column_usage";
|
||||
pub const OPTIMIZER_TRACE: &str = "optimizer_trace";
|
||||
pub const PARAMETERS: &str = "parameters";
|
||||
pub const PROFILING: &str = "profiling";
|
||||
pub const REFERENTIAL_CONSTRAINTS: &str = "referential_constraints";
|
||||
pub const ROUTINES: &str = "routines";
|
||||
pub const SCHEMA_PRIVILEGES: &str = "schema_privileges";
|
||||
pub const TABLE_PRIVILEGES: &str = "table_privileges";
|
||||
pub const TRIGGERS: &str = "triggers";
|
||||
pub const GLOBAL_STATUS: &str = "global_status";
|
||||
pub const SESSION_STATUS: &str = "session_status";
|
||||
|
||||
@@ -25,18 +25,26 @@ use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::TableId;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
use table::metadata::TableType;
|
||||
|
||||
use super::TABLES;
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::InformationTable;
|
||||
use crate::information_schema::{InformationTable, Predicates};
|
||||
use crate::CatalogManager;
|
||||
|
||||
const TABLE_CATALOG: &str = "table_catalog";
|
||||
const TABLE_SCHEMA: &str = "table_schema";
|
||||
const TABLE_NAME: &str = "table_name";
|
||||
const TABLE_TYPE: &str = "table_type";
|
||||
const TABLE_ID: &str = "table_id";
|
||||
const ENGINE: &str = "engine";
|
||||
|
||||
pub(super) struct InformationSchemaTables {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
@@ -54,12 +62,12 @@ impl InformationSchemaTables {
|
||||
|
||||
pub(crate) fn schema() -> SchemaRef {
|
||||
Arc::new(Schema::new(vec![
|
||||
ColumnSchema::new("table_catalog", ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new("table_schema", ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new("table_name", ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new("table_type", ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new("table_id", ConcreteDataType::uint32_datatype(), true),
|
||||
ColumnSchema::new("engine", ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_TYPE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_ID, ConcreteDataType::uint32_datatype(), true),
|
||||
ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
|
||||
]))
|
||||
}
|
||||
|
||||
@@ -85,14 +93,14 @@ impl InformationTable for InformationSchemaTables {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn to_stream(&self) -> Result<SendableRecordBatchStream> {
|
||||
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_tables()
|
||||
.make_tables(Some(request))
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
@@ -142,12 +150,13 @@ impl InformationSchemaTablesBuilder {
|
||||
}
|
||||
|
||||
/// Construct the `information_schema.tables` virtual table
|
||||
async fn make_tables(&mut self) -> Result<RecordBatch> {
|
||||
async fn make_tables(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
|
||||
let catalog_name = self.catalog_name.clone();
|
||||
let catalog_manager = self
|
||||
.catalog_manager
|
||||
.upgrade()
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
if !catalog_manager
|
||||
@@ -167,6 +176,7 @@ impl InformationSchemaTablesBuilder {
|
||||
{
|
||||
let table_info = table.table_info();
|
||||
self.add_table(
|
||||
&predicates,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
&table_name,
|
||||
@@ -183,8 +193,10 @@ impl InformationSchemaTablesBuilder {
|
||||
self.finish()
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn add_table(
|
||||
&mut self,
|
||||
predicates: &Predicates,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
@@ -192,14 +204,27 @@ impl InformationSchemaTablesBuilder {
|
||||
table_id: Option<u32>,
|
||||
engine: Option<&str>,
|
||||
) {
|
||||
self.catalog_names.push(Some(catalog_name));
|
||||
self.schema_names.push(Some(schema_name));
|
||||
self.table_names.push(Some(table_name));
|
||||
self.table_types.push(Some(match table_type {
|
||||
let table_type = match table_type {
|
||||
TableType::Base => "BASE TABLE",
|
||||
TableType::View => "VIEW",
|
||||
TableType::Temporary => "LOCAL TEMPORARY",
|
||||
}));
|
||||
};
|
||||
|
||||
let row = [
|
||||
(TABLE_CATALOG, &Value::from(catalog_name)),
|
||||
(TABLE_SCHEMA, &Value::from(schema_name)),
|
||||
(TABLE_NAME, &Value::from(table_name)),
|
||||
(TABLE_TYPE, &Value::from(table_type)),
|
||||
];
|
||||
|
||||
if !predicates.eval(&row) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.catalog_names.push(Some(catalog_name));
|
||||
self.schema_names.push(Some(schema_name));
|
||||
self.table_names.push(Some(table_name));
|
||||
self.table_types.push(Some(table_type));
|
||||
self.table_ids.push(table_id);
|
||||
self.engines.push(engine);
|
||||
}
|
||||
@@ -229,7 +254,7 @@ impl DfPartitionStream for InformationSchemaTables {
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_tables()
|
||||
.make_tables(None)
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
|
||||
@@ -19,17 +19,17 @@ use prometheus::*;
|
||||
|
||||
lazy_static! {
|
||||
pub static ref METRIC_CATALOG_MANAGER_CATALOG_COUNT: IntGauge =
|
||||
register_int_gauge!("catalog_catalog_count", "catalog catalog count").unwrap();
|
||||
register_int_gauge!("greptime_catalog_catalog_count", "catalog catalog count").unwrap();
|
||||
pub static ref METRIC_CATALOG_MANAGER_SCHEMA_COUNT: IntGauge =
|
||||
register_int_gauge!("catalog_schema_count", "catalog schema count").unwrap();
|
||||
register_int_gauge!("greptime_catalog_schema_count", "catalog schema count").unwrap();
|
||||
pub static ref METRIC_CATALOG_MANAGER_TABLE_COUNT: IntGaugeVec = register_int_gauge_vec!(
|
||||
"catalog_table_count",
|
||||
"greptime_catalog_table_count",
|
||||
"catalog table count",
|
||||
&[METRIC_DB_LABEL]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_CATALOG_KV_REMOTE_GET: Histogram =
|
||||
register_histogram!("catalog_kv_get_remote", "catalog kv get remote").unwrap();
|
||||
register_histogram!("greptime_catalog_kv_get_remote", "catalog kv get remote").unwrap();
|
||||
pub static ref METRIC_CATALOG_KV_GET: Histogram =
|
||||
register_histogram!("catalog_kv_get", "catalog kv get").unwrap();
|
||||
register_histogram!("greptime_catalog_kv_get", "catalog kv get").unwrap();
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::any::Any;
|
||||
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_error::{GREPTIME_ERROR_CODE, GREPTIME_ERROR_MSG};
|
||||
use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG};
|
||||
use common_macro::stack_trace_debug;
|
||||
use snafu::{Location, Snafu};
|
||||
use tonic::{Code, Status};
|
||||
@@ -115,7 +115,7 @@ impl From<Status> for Error {
|
||||
.and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
|
||||
}
|
||||
|
||||
let code = get_metadata_value(&e, GREPTIME_ERROR_CODE)
|
||||
let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE)
|
||||
.and_then(|s| {
|
||||
if let Ok(code) = s.parse::<u32>() {
|
||||
StatusCode::from_u32(code)
|
||||
@@ -125,8 +125,8 @@ impl From<Status> for Error {
|
||||
})
|
||||
.unwrap_or(StatusCode::Unknown);
|
||||
|
||||
let msg =
|
||||
get_metadata_value(&e, GREPTIME_ERROR_MSG).unwrap_or_else(|| e.message().to_string());
|
||||
let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG)
|
||||
.unwrap_or_else(|| e.message().to_string());
|
||||
|
||||
Self::Server { code, msg }
|
||||
}
|
||||
|
||||
@@ -17,27 +17,30 @@ use prometheus::*;
|
||||
|
||||
lazy_static! {
|
||||
pub static ref METRIC_GRPC_CREATE_TABLE: Histogram =
|
||||
register_histogram!("grpc_create_table", "grpc create table").unwrap();
|
||||
pub static ref METRIC_GRPC_PROMQL_RANGE_QUERY: Histogram =
|
||||
register_histogram!("grpc_promql_range_query", "grpc promql range query").unwrap();
|
||||
register_histogram!("greptime_grpc_create_table", "grpc create table").unwrap();
|
||||
pub static ref METRIC_GRPC_PROMQL_RANGE_QUERY: Histogram = register_histogram!(
|
||||
"greptime_grpc_promql_range_query",
|
||||
"grpc promql range query"
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_GRPC_INSERT: Histogram =
|
||||
register_histogram!("grpc_insert", "grpc insert").unwrap();
|
||||
register_histogram!("greptime_grpc_insert", "grpc insert").unwrap();
|
||||
pub static ref METRIC_GRPC_DELETE: Histogram =
|
||||
register_histogram!("grpc_delete", "grpc delete").unwrap();
|
||||
register_histogram!("greptime_grpc_delete", "grpc delete").unwrap();
|
||||
pub static ref METRIC_GRPC_SQL: Histogram =
|
||||
register_histogram!("grpc_sql", "grpc sql").unwrap();
|
||||
register_histogram!("greptime_grpc_sql", "grpc sql").unwrap();
|
||||
pub static ref METRIC_GRPC_LOGICAL_PLAN: Histogram =
|
||||
register_histogram!("grpc_logical_plan", "grpc logical plan").unwrap();
|
||||
register_histogram!("greptime_grpc_logical_plan", "grpc logical plan").unwrap();
|
||||
pub static ref METRIC_GRPC_ALTER: Histogram =
|
||||
register_histogram!("grpc_alter", "grpc alter").unwrap();
|
||||
register_histogram!("greptime_grpc_alter", "grpc alter").unwrap();
|
||||
pub static ref METRIC_GRPC_DROP_TABLE: Histogram =
|
||||
register_histogram!("grpc_drop_table", "grpc drop table").unwrap();
|
||||
register_histogram!("greptime_grpc_drop_table", "grpc drop table").unwrap();
|
||||
pub static ref METRIC_GRPC_TRUNCATE_TABLE: Histogram =
|
||||
register_histogram!("grpc_truncate_table", "grpc truncate table").unwrap();
|
||||
register_histogram!("greptime_grpc_truncate_table", "grpc truncate table").unwrap();
|
||||
pub static ref METRIC_GRPC_DO_GET: Histogram =
|
||||
register_histogram!("grpc_do_get", "grpc do get").unwrap();
|
||||
register_histogram!("greptime_grpc_do_get", "grpc do get").unwrap();
|
||||
pub static ref METRIC_REGION_REQUEST_GRPC: HistogramVec = register_histogram_vec!(
|
||||
"grpc_region_request",
|
||||
"greptime_grpc_region_request",
|
||||
"grpc region request",
|
||||
&["request_type"]
|
||||
)
|
||||
|
||||
@@ -39,7 +39,7 @@ use crate::from_grpc_response;
|
||||
/// ```
|
||||
///
|
||||
/// If you want to see a concrete usage example, please see
|
||||
/// [stream_inserter.rs](https://github.com/GreptimeTeam/greptimedb/blob/develop/src/client/examples/stream_ingest.rs).
|
||||
/// [stream_inserter.rs](https://github.com/GreptimeTeam/greptimedb/blob/main/src/client/examples/stream_ingest.rs).
|
||||
pub struct StreamInserter {
|
||||
sender: mpsc::Sender<GreptimeRequest>,
|
||||
|
||||
|
||||
@@ -252,10 +252,6 @@ impl StartCommand {
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
instance
|
||||
.build_export_metrics_task(&opts.export_metrics)
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
instance
|
||||
.build_servers(opts)
|
||||
.await
|
||||
|
||||
@@ -28,7 +28,7 @@ pub mod standalone;
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
static ref APP_VERSION: prometheus::IntGaugeVec =
|
||||
prometheus::register_int_gauge_vec!("app_version", "app version", &["short_version", "version"]).unwrap();
|
||||
prometheus::register_int_gauge_vec!("greptime_app_version", "app version", &["short_version", "version"]).unwrap();
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
@@ -22,7 +22,8 @@ use common_config::wal::StandaloneWalConfig;
|
||||
use common_config::{metadata_store_dir, KvBackendConfig};
|
||||
use common_meta::cache_invalidator::DummyCacheInvalidator;
|
||||
use common_meta::datanode_manager::DatanodeManagerRef;
|
||||
use common_meta::ddl::{DdlTaskExecutorRef, TableMetadataAllocatorRef};
|
||||
use common_meta::ddl::table_meta::TableMetadataAllocator;
|
||||
use common_meta::ddl::DdlTaskExecutorRef;
|
||||
use common_meta::ddl_manager::DdlManager;
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
@@ -38,7 +39,6 @@ use datanode::datanode::{Datanode, DatanodeBuilder};
|
||||
use file_engine::config::EngineConfig as FileEngineConfig;
|
||||
use frontend::frontend::FrontendOptions;
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::instance::standalone::StandaloneTableMetadataAllocator;
|
||||
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
|
||||
use frontend::service_config::{
|
||||
GrpcOptions, InfluxdbOptions, MysqlOptions, OpentsdbOptions, PostgresOptions, PromStoreOptions,
|
||||
@@ -406,13 +406,18 @@ impl StartCommand {
|
||||
opts.wal_meta.clone(),
|
||||
kv_backend.clone(),
|
||||
));
|
||||
let table_meta_allocator = Arc::new(StandaloneTableMetadataAllocator::new(
|
||||
|
||||
let table_metadata_manager =
|
||||
Self::create_table_metadata_manager(kv_backend.clone()).await?;
|
||||
|
||||
let table_meta_allocator = TableMetadataAllocator::new(
|
||||
table_id_sequence,
|
||||
wal_options_allocator.clone(),
|
||||
));
|
||||
table_metadata_manager.clone(),
|
||||
);
|
||||
|
||||
let ddl_task_executor = Self::create_ddl_task_executor(
|
||||
kv_backend.clone(),
|
||||
table_metadata_manager,
|
||||
procedure_manager.clone(),
|
||||
datanode_manager.clone(),
|
||||
table_meta_allocator,
|
||||
@@ -425,10 +430,6 @@ impl StartCommand {
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
frontend
|
||||
.build_export_metrics_task(&opts.frontend.export_metrics)
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
frontend
|
||||
.build_servers(opts)
|
||||
.await
|
||||
@@ -443,14 +444,11 @@ impl StartCommand {
|
||||
}
|
||||
|
||||
pub async fn create_ddl_task_executor(
|
||||
kv_backend: KvBackendRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
datanode_manager: DatanodeManagerRef,
|
||||
table_meta_allocator: TableMetadataAllocatorRef,
|
||||
table_meta_allocator: TableMetadataAllocator,
|
||||
) -> Result<DdlTaskExecutorRef> {
|
||||
let table_metadata_manager =
|
||||
Self::create_table_metadata_manager(kv_backend.clone()).await?;
|
||||
|
||||
let ddl_task_executor: DdlTaskExecutorRef = Arc::new(
|
||||
DdlManager::try_new(
|
||||
procedure_manager,
|
||||
@@ -466,7 +464,7 @@ impl StartCommand {
|
||||
Ok(ddl_task_executor)
|
||||
}
|
||||
|
||||
async fn create_table_metadata_manager(
|
||||
pub async fn create_table_metadata_manager(
|
||||
kv_backend: KvBackendRef,
|
||||
) -> Result<TableMetadataManagerRef> {
|
||||
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend));
|
||||
|
||||
@@ -44,6 +44,42 @@ pub const INFORMATION_SCHEMA_COLUMN_PRIVILEGES_TABLE_ID: u32 = 6;
|
||||
pub const INFORMATION_SCHEMA_COLUMN_STATISTICS_TABLE_ID: u32 = 7;
|
||||
/// id for information_schema.build_info
|
||||
pub const INFORMATION_SCHEMA_BUILD_INFO_TABLE_ID: u32 = 8;
|
||||
/// id for information_schema.CHARACTER_SETS
|
||||
pub const INFORMATION_SCHEMA_CHARACTER_SETS_TABLE_ID: u32 = 9;
|
||||
/// id for information_schema.COLLATIONS
|
||||
pub const INFORMATION_SCHEMA_COLLATIONS_TABLE_ID: u32 = 10;
|
||||
/// id for information_schema.COLLATIONS
|
||||
pub const INFORMATION_SCHEMA_COLLATION_CHARACTER_SET_APPLICABILITY_TABLE_ID: u32 = 11;
|
||||
/// id for information_schema.CHECK_CONSTRAINTS
|
||||
pub const INFORMATION_SCHEMA_CHECK_CONSTRAINTS_TABLE_ID: u32 = 12;
|
||||
/// id for information_schema.EVENTS
|
||||
pub const INFORMATION_SCHEMA_EVENTS_TABLE_ID: u32 = 13;
|
||||
/// id for information_schema.FILES
|
||||
pub const INFORMATION_SCHEMA_FILES_TABLE_ID: u32 = 14;
|
||||
/// id for information_schema.SCHEMATA
|
||||
pub const INFORMATION_SCHEMA_SCHEMATA_TABLE_ID: u32 = 15;
|
||||
/// id for information_schema.KEY_COLUMN_USAGE
|
||||
pub const INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID: u32 = 16;
|
||||
/// id for information_schema.OPTIMIZER_TRACE
|
||||
pub const INFORMATION_SCHEMA_OPTIMIZER_TRACE_TABLE_ID: u32 = 17;
|
||||
/// id for information_schema.PARAMETERS
|
||||
pub const INFORMATION_SCHEMA_PARAMETERS_TABLE_ID: u32 = 18;
|
||||
/// id for information_schema.PROFILING
|
||||
pub const INFORMATION_SCHEMA_PROFILING_TABLE_ID: u32 = 19;
|
||||
/// id for information_schema.REFERENTIAL_CONSTRAINTS
|
||||
pub const INFORMATION_SCHEMA_REFERENTIAL_CONSTRAINTS_TABLE_ID: u32 = 20;
|
||||
/// id for information_schema.ROUTINES
|
||||
pub const INFORMATION_SCHEMA_ROUTINES_TABLE_ID: u32 = 21;
|
||||
/// id for information_schema.SCHEMA_PRIVILEGES
|
||||
pub const INFORMATION_SCHEMA_SCHEMA_PRIVILEGES_TABLE_ID: u32 = 22;
|
||||
/// id for information_schema.TABLE_PRIVILEGES
|
||||
pub const INFORMATION_SCHEMA_TABLE_PRIVILEGES_TABLE_ID: u32 = 23;
|
||||
/// id for information_schema.TRIGGERS
|
||||
pub const INFORMATION_SCHEMA_TRIGGERS_TABLE_ID: u32 = 24;
|
||||
/// id for information_schema.GLOBAL_STATUS
|
||||
pub const INFORMATION_SCHEMA_GLOBAL_STATUS_TABLE_ID: u32 = 25;
|
||||
/// id for information_schema.SESSION_STATUS
|
||||
pub const INFORMATION_SCHEMA_SESSION_STATUS_TABLE_ID: u32 = 26;
|
||||
/// ----- End of information_schema tables -----
|
||||
|
||||
pub const MITO_ENGINE: &str = "mito";
|
||||
|
||||
@@ -90,11 +90,12 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_serde_kafka_config() {
|
||||
// With all fields.
|
||||
let toml_str = r#"
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
max_batch_size = "4MB"
|
||||
max_batch_size = "1MB"
|
||||
linger = "200ms"
|
||||
produce_record_timeout = "100ms"
|
||||
consumer_wait_timeout = "100ms"
|
||||
backoff_init = "500ms"
|
||||
backoff_max = "10s"
|
||||
backoff_base = 2
|
||||
@@ -104,9 +105,9 @@ mod tests {
|
||||
let expected = KafkaConfig {
|
||||
broker_endpoints: vec!["127.0.0.1:9092".to_string()],
|
||||
compression: RsKafkaCompression::default(),
|
||||
max_batch_size: ReadableSize::mb(4),
|
||||
max_batch_size: ReadableSize::mb(1),
|
||||
linger: Duration::from_millis(200),
|
||||
produce_record_timeout: Duration::from_millis(100),
|
||||
consumer_wait_timeout: Duration::from_millis(100),
|
||||
backoff: KafkaBackoffConfig {
|
||||
init: Duration::from_millis(500),
|
||||
max: Duration::from_secs(10),
|
||||
@@ -115,6 +116,19 @@ mod tests {
|
||||
},
|
||||
};
|
||||
assert_eq!(decoded, expected);
|
||||
|
||||
// With some fields missing.
|
||||
let toml_str = r#"
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
linger = "200ms"
|
||||
"#;
|
||||
let decoded: KafkaConfig = toml::from_str(toml_str).unwrap();
|
||||
let expected = KafkaConfig {
|
||||
broker_endpoints: vec!["127.0.0.1:9092".to_string()],
|
||||
linger: Duration::from_millis(200),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(decoded, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -40,16 +40,15 @@ pub struct KafkaConfig {
|
||||
pub broker_endpoints: Vec<String>,
|
||||
/// The compression algorithm used to compress log entries.
|
||||
#[serde(skip)]
|
||||
#[serde(default)]
|
||||
pub compression: RsKafkaCompression,
|
||||
/// The maximum log size a kakfa batch producer could buffer.
|
||||
/// The max size of a single producer batch.
|
||||
pub max_batch_size: ReadableSize,
|
||||
/// The linger duration of a kafka batch producer.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub linger: Duration,
|
||||
/// The maximum amount of time (in milliseconds) to wait for Kafka records to be returned.
|
||||
/// The consumer wait timeout.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub produce_record_timeout: Duration,
|
||||
pub consumer_wait_timeout: Duration,
|
||||
/// The backoff config.
|
||||
#[serde(flatten, with = "kafka_backoff")]
|
||||
pub backoff: KafkaBackoffConfig,
|
||||
@@ -60,9 +59,10 @@ impl Default for KafkaConfig {
|
||||
Self {
|
||||
broker_endpoints: vec!["127.0.0.1:9092".to_string()],
|
||||
compression: RsKafkaCompression::NoCompression,
|
||||
max_batch_size: ReadableSize::mb(4),
|
||||
// Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
max_batch_size: ReadableSize::mb(1),
|
||||
linger: Duration::from_millis(200),
|
||||
produce_record_timeout: Duration::from_millis(100),
|
||||
consumer_wait_timeout: Duration::from_millis(100),
|
||||
backoff: KafkaBackoffConfig::default(),
|
||||
}
|
||||
}
|
||||
@@ -73,17 +73,15 @@ with_prefix!(pub kafka_backoff "backoff_");
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(default)]
|
||||
pub struct KafkaBackoffConfig {
|
||||
/// The initial backoff for kafka clients.
|
||||
/// The initial backoff delay.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub init: Duration,
|
||||
/// The maximum backoff for kafka clients.
|
||||
/// The maximum backoff delay.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub max: Duration,
|
||||
/// Exponential backoff rate, i.e. next backoff = base * current backoff.
|
||||
// Sets to u32 type since some structs containing the KafkaConfig need to derive the Eq trait.
|
||||
pub base: u32,
|
||||
/// Stop reconnecting if the total wait time reaches the deadline.
|
||||
/// If it's None, the reconnecting won't terminate.
|
||||
/// The deadline of retries. `None` stands for no deadline.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub deadline: Option<Duration>,
|
||||
}
|
||||
@@ -114,7 +112,7 @@ pub struct StandaloneKafkaConfig {
|
||||
pub num_partitions: i32,
|
||||
/// The replication factor of each topic.
|
||||
pub replication_factor: i16,
|
||||
/// Above which a topic creation operation will be cancelled.
|
||||
/// The timeout of topic creation.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub create_topic_timeout: Duration,
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ pub mod format;
|
||||
pub mod mock;
|
||||
pub mod status_code;
|
||||
|
||||
pub const GREPTIME_ERROR_CODE: &str = "x-greptime-err-code";
|
||||
pub const GREPTIME_ERROR_MSG: &str = "x-greptime-err-msg";
|
||||
pub const GREPTIME_DB_HEADER_ERROR_CODE: &str = "x-greptime-err-code";
|
||||
pub const GREPTIME_DB_HEADER_ERROR_MSG: &str = "x-greptime-err-msg";
|
||||
|
||||
pub use snafu;
|
||||
|
||||
@@ -14,6 +14,7 @@ async-stream.workspace = true
|
||||
async-trait.workspace = true
|
||||
base64.workspace = true
|
||||
bytes.workspace = true
|
||||
chrono.workspace = true
|
||||
common-catalog.workspace = true
|
||||
common-config.workspace = true
|
||||
common-error.workspace = true
|
||||
@@ -27,6 +28,7 @@ common-time.workspace = true
|
||||
datatypes.workspace = true
|
||||
derive_builder.workspace = true
|
||||
etcd-client.workspace = true
|
||||
futures-util.workspace = true
|
||||
futures.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
lazy_static.workspace = true
|
||||
@@ -51,3 +53,4 @@ chrono.workspace = true
|
||||
common-procedure = { workspace = true, features = ["testing"] }
|
||||
datatypes.workspace = true
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
uuid.workspace = true
|
||||
|
||||
@@ -24,11 +24,12 @@ use crate::error::Result;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::key::TableMetadataManagerRef;
|
||||
use crate::region_keeper::MemoryRegionKeeperRef;
|
||||
use crate::rpc::ddl::{CreateTableTask, SubmitDdlTaskRequest, SubmitDdlTaskResponse};
|
||||
use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
|
||||
|
||||
pub mod alter_table;
|
||||
pub mod create_table;
|
||||
pub mod drop_table;
|
||||
pub mod table_meta;
|
||||
pub mod truncate_table;
|
||||
pub mod utils;
|
||||
|
||||
@@ -64,17 +65,6 @@ pub struct TableMetadata {
|
||||
pub region_wal_options: HashMap<RegionNumber, String>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait TableMetadataAllocator: Send + Sync {
|
||||
async fn create(
|
||||
&self,
|
||||
ctx: &TableMetadataAllocatorContext,
|
||||
task: &CreateTableTask,
|
||||
) -> Result<TableMetadata>;
|
||||
}
|
||||
|
||||
pub type TableMetadataAllocatorRef = Arc<dyn TableMetadataAllocator>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DdlContext {
|
||||
pub datanode_manager: DatanodeManagerRef,
|
||||
|
||||
@@ -40,9 +40,7 @@ use table::requests::AlterKind;
|
||||
use crate::cache_invalidator::Context;
|
||||
use crate::ddl::utils::handle_operate_region_error;
|
||||
use crate::ddl::DdlContext;
|
||||
use crate::error::{
|
||||
self, ConvertAlterTableRequestSnafu, InvalidProtoMsgSnafu, Result, TableRouteNotFoundSnafu,
|
||||
};
|
||||
use crate::error::{self, ConvertAlterTableRequestSnafu, InvalidProtoMsgSnafu, Result};
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
@@ -65,6 +63,7 @@ impl AlterTableProcedure {
|
||||
cluster_id: u64,
|
||||
task: AlterTableTask,
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
physical_table_name: Option<TableName>,
|
||||
context: DdlContext,
|
||||
) -> Result<Self> {
|
||||
let alter_kind = task
|
||||
@@ -84,7 +83,13 @@ impl AlterTableProcedure {
|
||||
|
||||
Ok(Self {
|
||||
context,
|
||||
data: AlterTableData::new(task, table_info_value, cluster_id, next_column_id),
|
||||
data: AlterTableData::new(
|
||||
task,
|
||||
table_info_value,
|
||||
physical_table_name,
|
||||
cluster_id,
|
||||
next_column_id,
|
||||
),
|
||||
kind,
|
||||
})
|
||||
}
|
||||
@@ -182,23 +187,19 @@ impl AlterTableProcedure {
|
||||
|
||||
pub async fn submit_alter_region_requests(&mut self) -> Result<Status> {
|
||||
let table_id = self.data.table_id();
|
||||
|
||||
let table_route = self
|
||||
let (_, physical_table_route) = self
|
||||
.context
|
||||
.table_metadata_manager
|
||||
.table_route_manager()
|
||||
.get(table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu { table_id })?
|
||||
.into_inner();
|
||||
let region_routes = table_route.region_routes();
|
||||
.get_physical_table_route(table_id)
|
||||
.await?;
|
||||
|
||||
let leaders = find_leaders(region_routes);
|
||||
let leaders = find_leaders(&physical_table_route.region_routes);
|
||||
let mut alter_region_tasks = Vec::with_capacity(leaders.len());
|
||||
|
||||
for datanode in leaders {
|
||||
let requester = self.context.datanode_manager.datanode(&datanode).await;
|
||||
let regions = find_leader_regions(region_routes, &datanode);
|
||||
let regions = find_leader_regions(&physical_table_route.region_routes, &datanode);
|
||||
|
||||
for region in regions {
|
||||
let region_id = RegionId::new(table_id, region);
|
||||
@@ -335,13 +336,24 @@ impl AlterTableProcedure {
|
||||
}
|
||||
|
||||
fn lock_key_inner(&self) -> Vec<String> {
|
||||
let mut lock_key = vec![];
|
||||
|
||||
if let Some(physical_table_name) = self.data.physical_table_name() {
|
||||
let physical_table_key = common_catalog::format_full_table_name(
|
||||
&physical_table_name.catalog_name,
|
||||
&physical_table_name.schema_name,
|
||||
&physical_table_name.table_name,
|
||||
);
|
||||
lock_key.push(physical_table_key);
|
||||
}
|
||||
|
||||
let table_ref = self.data.table_ref();
|
||||
let table_key = common_catalog::format_full_table_name(
|
||||
table_ref.catalog,
|
||||
table_ref.schema,
|
||||
table_ref.table,
|
||||
);
|
||||
let mut lock_key = vec![table_key];
|
||||
lock_key.push(table_key);
|
||||
|
||||
if let Ok(Kind::RenameTable(RenameTable { new_table_name })) = self.alter_kind() {
|
||||
lock_key.push(common_catalog::format_full_table_name(
|
||||
@@ -394,7 +406,7 @@ impl Procedure for AlterTableProcedure {
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let key = self.lock_key_inner();
|
||||
|
||||
LockKey::new(key)
|
||||
LockKey::new_exclusive(key)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -415,6 +427,8 @@ pub struct AlterTableData {
|
||||
task: AlterTableTask,
|
||||
/// Table info value before alteration.
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
/// Physical table name, if the table to alter is a logical table.
|
||||
physical_table_name: Option<TableName>,
|
||||
cluster_id: u64,
|
||||
/// Next column id of the table if the task adds columns to the table.
|
||||
next_column_id: Option<ColumnId>,
|
||||
@@ -424,6 +438,7 @@ impl AlterTableData {
|
||||
pub fn new(
|
||||
task: AlterTableTask,
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
physical_table_name: Option<TableName>,
|
||||
cluster_id: u64,
|
||||
next_column_id: Option<ColumnId>,
|
||||
) -> Self {
|
||||
@@ -431,6 +446,7 @@ impl AlterTableData {
|
||||
state: AlterTableState::Prepare,
|
||||
task,
|
||||
table_info_value,
|
||||
physical_table_name,
|
||||
cluster_id,
|
||||
next_column_id,
|
||||
}
|
||||
@@ -447,6 +463,10 @@ impl AlterTableData {
|
||||
fn table_info(&self) -> &RawTableInfo {
|
||||
&self.table_info_value.table_info
|
||||
}
|
||||
|
||||
fn physical_table_name(&self) -> Option<&TableName> {
|
||||
self.physical_table_name.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates region proto alter kind from `table_info` and `alter_kind`.
|
||||
|
||||
@@ -20,7 +20,6 @@ use api::v1::region::{
|
||||
};
|
||||
use api::v1::{ColumnDef, SemanticType};
|
||||
use async_trait::async_trait;
|
||||
use common_config::WAL_OPTIONS_KEY;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_procedure::error::{
|
||||
ExternalSnafu, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
|
||||
@@ -48,6 +47,7 @@ use crate::rpc::ddl::CreateTableTask;
|
||||
use crate::rpc::router::{
|
||||
find_leader_regions, find_leaders, operating_leader_regions, RegionRoute,
|
||||
};
|
||||
use crate::wal::prepare_wal_option;
|
||||
|
||||
pub struct CreateTableProcedure {
|
||||
pub context: DdlContext,
|
||||
@@ -217,7 +217,7 @@ impl CreateTableProcedure {
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: physical_table_id,
|
||||
})?;
|
||||
let region_routes = physical_table_route.region_routes();
|
||||
let region_routes = physical_table_route.region_routes()?;
|
||||
|
||||
let request_builder = self.new_region_request_builder(Some(physical_table_id))?;
|
||||
|
||||
@@ -349,7 +349,7 @@ impl Procedure for CreateTableProcedure {
|
||||
table_ref.table,
|
||||
);
|
||||
|
||||
LockKey::single(key)
|
||||
LockKey::single_exclusive(key)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -455,13 +455,7 @@ impl CreateRequestBuilder {
|
||||
request.region_id = region_id.as_u64();
|
||||
request.path = storage_path;
|
||||
// Stores the encoded wal options into the request options.
|
||||
region_wal_options
|
||||
.get(®ion_id.region_number())
|
||||
.and_then(|wal_options| {
|
||||
request
|
||||
.options
|
||||
.insert(WAL_OPTIONS_KEY.to_string(), wal_options.clone())
|
||||
});
|
||||
prepare_wal_option(&mut request.options, region_id, region_wal_options);
|
||||
|
||||
if let Some(physical_table_id) = self.physical_table_id {
|
||||
// Logical table has the same region numbers with physical table, and they have a one-to-one mapping.
|
||||
|
||||
@@ -116,7 +116,7 @@ impl DropTableProcedure {
|
||||
|
||||
/// Register dropping regions if doesn't exist.
|
||||
fn register_dropping_regions(&mut self) -> Result<()> {
|
||||
let region_routes = self.data.region_routes();
|
||||
let region_routes = self.data.region_routes()?;
|
||||
|
||||
let dropping_regions = operating_leader_regions(region_routes);
|
||||
|
||||
@@ -190,7 +190,7 @@ impl DropTableProcedure {
|
||||
pub async fn on_datanode_drop_regions(&self) -> Result<Status> {
|
||||
let table_id = self.data.table_id();
|
||||
|
||||
let region_routes = &self.data.region_routes();
|
||||
let region_routes = &self.data.region_routes()?;
|
||||
let leaders = find_leaders(region_routes);
|
||||
let mut drop_region_tasks = Vec::with_capacity(leaders.len());
|
||||
|
||||
@@ -273,7 +273,7 @@ impl Procedure for DropTableProcedure {
|
||||
table_ref.table,
|
||||
);
|
||||
|
||||
LockKey::single(key)
|
||||
LockKey::single_exclusive(key)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -306,7 +306,7 @@ impl DropTableData {
|
||||
self.task.table_ref()
|
||||
}
|
||||
|
||||
fn region_routes(&self) -> &Vec<RegionRoute> {
|
||||
fn region_routes(&self) -> Result<&Vec<RegionRoute>> {
|
||||
self.table_route_value.region_routes()
|
||||
}
|
||||
|
||||
|
||||
223
src/common/meta/src/ddl/table_meta.rs
Normal file
223
src/common/meta/src/ddl/table_meta.rs
Normal file
@@ -0,0 +1,223 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_catalog::consts::METRIC_ENGINE;
|
||||
use common_telemetry::{debug, info};
|
||||
use snafu::{ensure, OptionExt};
|
||||
use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
|
||||
use store_api::storage::{RegionId, RegionNumber, TableId};
|
||||
|
||||
use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
|
||||
use crate::error::{Result, TableNotFoundSnafu, UnsupportedSnafu};
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::{LogicalTableRouteValue, PhysicalTableRouteValue, TableRouteValue};
|
||||
use crate::key::TableMetadataManagerRef;
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::ddl::CreateTableTask;
|
||||
use crate::rpc::router::{Region, RegionRoute};
|
||||
use crate::sequence::SequenceRef;
|
||||
use crate::wal::{allocate_region_wal_options, WalOptionsAllocatorRef};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TableMetadataAllocator {
|
||||
table_id_sequence: SequenceRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
peer_allocator: PeerAllocatorRef,
|
||||
}
|
||||
|
||||
impl TableMetadataAllocator {
|
||||
pub fn new(
|
||||
table_id_sequence: SequenceRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
) -> Self {
|
||||
Self::with_peer_allocator(
|
||||
table_id_sequence,
|
||||
wal_options_allocator,
|
||||
table_metadata_manager,
|
||||
Arc::new(NoopPeerAllocator),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn with_peer_allocator(
|
||||
table_id_sequence: SequenceRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
peer_allocator: PeerAllocatorRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
table_id_sequence,
|
||||
wal_options_allocator,
|
||||
table_metadata_manager,
|
||||
peer_allocator,
|
||||
}
|
||||
}
|
||||
|
||||
async fn allocate_table_id(&self, task: &CreateTableTask) -> Result<TableId> {
|
||||
let table_id = if let Some(table_id) = &task.create_table.table_id {
|
||||
let table_id = table_id.id;
|
||||
|
||||
ensure!(
|
||||
!self
|
||||
.table_id_sequence
|
||||
.min_max()
|
||||
.await
|
||||
.contains(&(table_id as u64)),
|
||||
UnsupportedSnafu {
|
||||
operation: format!(
|
||||
"create table by id {} that is reserved in this node",
|
||||
table_id
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
info!(
|
||||
"Received explicitly allocated table id {}, will use it directly.",
|
||||
table_id
|
||||
);
|
||||
|
||||
table_id
|
||||
} else {
|
||||
self.table_id_sequence.next().await? as TableId
|
||||
};
|
||||
Ok(table_id)
|
||||
}
|
||||
|
||||
fn create_wal_options(
|
||||
&self,
|
||||
table_route: &TableRouteValue,
|
||||
) -> Result<HashMap<RegionNumber, String>> {
|
||||
match table_route {
|
||||
TableRouteValue::Physical(x) => {
|
||||
let region_numbers = x
|
||||
.region_routes
|
||||
.iter()
|
||||
.map(|route| route.region.id.region_number())
|
||||
.collect();
|
||||
allocate_region_wal_options(region_numbers, &self.wal_options_allocator)
|
||||
}
|
||||
TableRouteValue::Logical(_) => Ok(HashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_table_route(
|
||||
&self,
|
||||
ctx: &TableMetadataAllocatorContext,
|
||||
table_id: TableId,
|
||||
task: &CreateTableTask,
|
||||
) -> Result<TableRouteValue> {
|
||||
let regions = task.partitions.len();
|
||||
|
||||
let table_route = if task.create_table.engine == METRIC_ENGINE
|
||||
&& let Some(physical_table_name) = task
|
||||
.create_table
|
||||
.table_options
|
||||
.get(LOGICAL_TABLE_METADATA_KEY)
|
||||
{
|
||||
let physical_table_id = self
|
||||
.table_metadata_manager
|
||||
.table_name_manager()
|
||||
.get(TableNameKey::new(
|
||||
&task.create_table.catalog_name,
|
||||
&task.create_table.schema_name,
|
||||
physical_table_name,
|
||||
))
|
||||
.await?
|
||||
.context(TableNotFoundSnafu {
|
||||
table_name: physical_table_name,
|
||||
})?
|
||||
.table_id();
|
||||
|
||||
let region_ids = (0..regions)
|
||||
.map(|i| RegionId::new(table_id, i as RegionNumber))
|
||||
.collect();
|
||||
|
||||
TableRouteValue::Logical(LogicalTableRouteValue::new(physical_table_id, region_ids))
|
||||
} else {
|
||||
let peers = self.peer_allocator.alloc(ctx, regions).await?;
|
||||
|
||||
let region_routes = task
|
||||
.partitions
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, partition)| {
|
||||
let region = Region {
|
||||
id: RegionId::new(table_id, i as u32),
|
||||
partition: Some(partition.clone().into()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let peer = peers[i % peers.len()].clone();
|
||||
|
||||
RegionRoute {
|
||||
region,
|
||||
leader_peer: Some(peer),
|
||||
..Default::default()
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
TableRouteValue::Physical(PhysicalTableRouteValue::new(region_routes))
|
||||
};
|
||||
Ok(table_route)
|
||||
}
|
||||
|
||||
pub async fn create(
|
||||
&self,
|
||||
ctx: &TableMetadataAllocatorContext,
|
||||
task: &CreateTableTask,
|
||||
) -> Result<TableMetadata> {
|
||||
let table_id = self.allocate_table_id(task).await?;
|
||||
let table_route = self.create_table_route(ctx, table_id, task).await?;
|
||||
let region_wal_options = self.create_wal_options(&table_route)?;
|
||||
|
||||
debug!(
|
||||
"Allocated region wal options {:?} for table {}",
|
||||
region_wal_options, table_id
|
||||
);
|
||||
|
||||
Ok(TableMetadata {
|
||||
table_id,
|
||||
table_route,
|
||||
region_wal_options,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub type PeerAllocatorRef = Arc<dyn PeerAllocator>;
|
||||
|
||||
/// [PeerAllocator] allocates [Peer]s for creating regions.
|
||||
#[async_trait]
|
||||
pub trait PeerAllocator: Send + Sync {
|
||||
/// Allocates `regions` size [Peer]s.
|
||||
async fn alloc(&self, ctx: &TableMetadataAllocatorContext, regions: usize)
|
||||
-> Result<Vec<Peer>>;
|
||||
}
|
||||
|
||||
struct NoopPeerAllocator;
|
||||
|
||||
#[async_trait]
|
||||
impl PeerAllocator for NoopPeerAllocator {
|
||||
async fn alloc(
|
||||
&self,
|
||||
_ctx: &TableMetadataAllocatorContext,
|
||||
regions: usize,
|
||||
) -> Result<Vec<Peer>> {
|
||||
Ok(vec![Peer::default(); regions])
|
||||
}
|
||||
}
|
||||
@@ -81,7 +81,7 @@ impl Procedure for TruncateTableProcedure {
|
||||
table_ref.table,
|
||||
);
|
||||
|
||||
LockKey::single(key)
|
||||
LockKey::single_exclusive(key)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -26,10 +26,10 @@ use crate::datanode_manager::DatanodeManagerRef;
|
||||
use crate::ddl::alter_table::AlterTableProcedure;
|
||||
use crate::ddl::create_table::CreateTableProcedure;
|
||||
use crate::ddl::drop_table::DropTableProcedure;
|
||||
use crate::ddl::table_meta::TableMetadataAllocator;
|
||||
use crate::ddl::truncate_table::TruncateTableProcedure;
|
||||
use crate::ddl::{
|
||||
DdlContext, DdlTaskExecutor, ExecutorContext, TableMetadata, TableMetadataAllocatorContext,
|
||||
TableMetadataAllocatorRef,
|
||||
};
|
||||
use crate::error::{
|
||||
self, RegisterProcedureLoaderSnafu, Result, SubmitProcedureSnafu, TableNotFoundSnafu,
|
||||
@@ -46,6 +46,8 @@ use crate::rpc::ddl::{
|
||||
TruncateTableTask,
|
||||
};
|
||||
use crate::rpc::router::RegionRoute;
|
||||
use crate::table_name::TableName;
|
||||
|
||||
pub type DdlManagerRef = Arc<DdlManager>;
|
||||
|
||||
/// The [DdlManager] provides the ability to execute Ddl.
|
||||
@@ -54,7 +56,7 @@ pub struct DdlManager {
|
||||
datanode_manager: DatanodeManagerRef,
|
||||
cache_invalidator: CacheInvalidatorRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
table_metadata_allocator: TableMetadataAllocatorRef,
|
||||
table_metadata_allocator: TableMetadataAllocator,
|
||||
memory_region_keeper: MemoryRegionKeeperRef,
|
||||
}
|
||||
|
||||
@@ -65,7 +67,7 @@ impl DdlManager {
|
||||
datanode_clients: DatanodeManagerRef,
|
||||
cache_invalidator: CacheInvalidatorRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
table_metadata_allocator: TableMetadataAllocatorRef,
|
||||
table_metadata_allocator: TableMetadataAllocator,
|
||||
memory_region_keeper: MemoryRegionKeeperRef,
|
||||
) -> Result<Self> {
|
||||
let manager = Self {
|
||||
@@ -160,11 +162,17 @@ impl DdlManager {
|
||||
cluster_id: u64,
|
||||
alter_table_task: AlterTableTask,
|
||||
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
|
||||
physical_table_name: Option<TableName>,
|
||||
) -> Result<ProcedureId> {
|
||||
let context = self.create_context();
|
||||
|
||||
let procedure =
|
||||
AlterTableProcedure::new(cluster_id, alter_table_task, table_info_value, context)?;
|
||||
let procedure = AlterTableProcedure::new(
|
||||
cluster_id,
|
||||
alter_table_task,
|
||||
table_info_value,
|
||||
physical_table_name,
|
||||
context,
|
||||
)?;
|
||||
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
@@ -278,7 +286,7 @@ async fn handle_truncate_table_task(
|
||||
let table_route_value =
|
||||
table_route_value.context(error::TableRouteNotFoundSnafu { table_id })?;
|
||||
|
||||
let table_route = table_route_value.into_inner().region_routes().clone();
|
||||
let table_route = table_route_value.into_inner().region_routes()?.clone();
|
||||
|
||||
let id = ddl_manager
|
||||
.submit_truncate_table_task(
|
||||
@@ -327,8 +335,38 @@ async fn handle_alter_table_task(
|
||||
table_name: table_ref.to_string(),
|
||||
})?;
|
||||
|
||||
let physical_table_id = ddl_manager
|
||||
.table_metadata_manager()
|
||||
.table_route_manager()
|
||||
.get_physical_table_id(table_id)
|
||||
.await?;
|
||||
|
||||
let physical_table_name = if physical_table_id == table_id {
|
||||
None
|
||||
} else {
|
||||
let physical_table_info = &ddl_manager
|
||||
.table_metadata_manager()
|
||||
.table_info_manager()
|
||||
.get(physical_table_id)
|
||||
.await?
|
||||
.with_context(|| error::TableInfoNotFoundSnafu {
|
||||
table_name: table_ref.to_string(),
|
||||
})?
|
||||
.table_info;
|
||||
Some(TableName {
|
||||
catalog_name: physical_table_info.catalog_name.clone(),
|
||||
schema_name: physical_table_info.schema_name.clone(),
|
||||
table_name: physical_table_info.name.clone(),
|
||||
})
|
||||
};
|
||||
|
||||
let id = ddl_manager
|
||||
.submit_alter_table_task(cluster_id, alter_table_task, table_info_value)
|
||||
.submit_alter_table_task(
|
||||
cluster_id,
|
||||
alter_table_task,
|
||||
table_info_value,
|
||||
physical_table_name,
|
||||
)
|
||||
.await?;
|
||||
|
||||
info!("Table: {table_id} is altered via procedure_id {id:?}");
|
||||
@@ -461,15 +499,15 @@ mod tests {
|
||||
use crate::ddl::alter_table::AlterTableProcedure;
|
||||
use crate::ddl::create_table::CreateTableProcedure;
|
||||
use crate::ddl::drop_table::DropTableProcedure;
|
||||
use crate::ddl::table_meta::TableMetadataAllocator;
|
||||
use crate::ddl::truncate_table::TruncateTableProcedure;
|
||||
use crate::ddl::{TableMetadata, TableMetadataAllocator, TableMetadataAllocatorContext};
|
||||
use crate::error::Result;
|
||||
use crate::key::TableMetadataManager;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::peer::Peer;
|
||||
use crate::region_keeper::MemoryRegionKeeper;
|
||||
use crate::rpc::ddl::CreateTableTask;
|
||||
use crate::sequence::SequenceBuilder;
|
||||
use crate::state_store::KvStateStore;
|
||||
use crate::wal::WalOptionsAllocator;
|
||||
|
||||
/// A dummy implemented [DatanodeManager].
|
||||
pub struct DummyDatanodeManager;
|
||||
@@ -481,26 +519,12 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
/// A dummy implemented [TableMetadataAllocator].
|
||||
pub struct DummyTableMetadataAllocator;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl TableMetadataAllocator for DummyTableMetadataAllocator {
|
||||
async fn create(
|
||||
&self,
|
||||
_ctx: &TableMetadataAllocatorContext,
|
||||
_task: &CreateTableTask,
|
||||
) -> Result<TableMetadata> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_new() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new());
|
||||
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend.clone()));
|
||||
|
||||
let state_store = Arc::new(KvStateStore::new(kv_backend));
|
||||
let state_store = Arc::new(KvStateStore::new(kv_backend.clone()));
|
||||
let procedure_manager = Arc::new(LocalManager::new(Default::default(), state_store));
|
||||
|
||||
let _ = DdlManager::try_new(
|
||||
@@ -508,7 +532,11 @@ mod tests {
|
||||
Arc::new(DummyDatanodeManager),
|
||||
Arc::new(DummyCacheInvalidator),
|
||||
table_metadata_manager,
|
||||
Arc::new(DummyTableMetadataAllocator),
|
||||
TableMetadataAllocator::new(
|
||||
Arc::new(SequenceBuilder::new("test", kv_backend.clone()).build()),
|
||||
Arc::new(WalOptionsAllocator::default()),
|
||||
Arc::new(TableMetadataManager::new(kv_backend)),
|
||||
),
|
||||
Arc::new(MemoryRegionKeeper::default()),
|
||||
);
|
||||
|
||||
|
||||
@@ -321,6 +321,27 @@ pub enum Error {
|
||||
error: rskafka::client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to build a Kafka partition client, topic: {}, partition: {}",
|
||||
topic,
|
||||
partition
|
||||
))]
|
||||
BuildKafkaPartitionClient {
|
||||
topic: String,
|
||||
partition: i32,
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: rskafka::client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to produce records to Kafka, topic: {}", topic))]
|
||||
ProduceRecord {
|
||||
topic: String,
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: rskafka::client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to create a Kafka wal topic"))]
|
||||
CreateKafkaWalTopic {
|
||||
location: Location,
|
||||
@@ -330,6 +351,9 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("The topic pool is empty"))]
|
||||
EmptyTopicPool { location: Location },
|
||||
|
||||
#[snafu(display("Unexpected table route type: {}", err_msg))]
|
||||
UnexpectedLogicalRouteTable { location: Location, err_msg: String },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -368,8 +392,11 @@ impl ErrorExt for Error {
|
||||
| EncodeWalOptions { .. }
|
||||
| BuildKafkaClient { .. }
|
||||
| BuildKafkaCtrlClient { .. }
|
||||
| BuildKafkaPartitionClient { .. }
|
||||
| ProduceRecord { .. }
|
||||
| CreateKafkaWalTopic { .. }
|
||||
| EmptyTopicPool { .. } => StatusCode::Unexpected,
|
||||
| EmptyTopicPool { .. }
|
||||
| UnexpectedLogicalRouteTable { .. } => StatusCode::Unexpected,
|
||||
|
||||
SendMessage { .. }
|
||||
| GetKvCache { .. }
|
||||
|
||||
@@ -92,13 +92,15 @@ impl Display for OpenRegion {
|
||||
}
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct OpenRegion {
|
||||
pub region_ident: RegionIdent,
|
||||
pub region_storage_path: String,
|
||||
pub region_options: HashMap<String, String>,
|
||||
#[serde(default)]
|
||||
pub region_wal_options: HashMap<String, String>,
|
||||
#[serde_as(as = "HashMap<serde_with::DisplayFromStr, _>")]
|
||||
pub region_wal_options: HashMap<RegionNumber, String>,
|
||||
#[serde(default)]
|
||||
pub skip_wal_replay: bool,
|
||||
}
|
||||
@@ -108,7 +110,7 @@ impl OpenRegion {
|
||||
region_ident: RegionIdent,
|
||||
path: &str,
|
||||
region_options: HashMap<String, String>,
|
||||
region_wal_options: HashMap<String, String>,
|
||||
region_wal_options: HashMap<RegionNumber, String>,
|
||||
skip_wal_replay: bool,
|
||||
) -> Self {
|
||||
Self {
|
||||
|
||||
@@ -427,7 +427,7 @@ impl TableMetadataManager {
|
||||
®ion_storage_path,
|
||||
region_options,
|
||||
region_wal_options,
|
||||
region_distribution(&x.region_routes)?,
|
||||
region_distribution(&x.region_routes),
|
||||
)?;
|
||||
txn = txn.merge(create_datanode_table_txn);
|
||||
}
|
||||
@@ -483,7 +483,7 @@ impl TableMetadataManager {
|
||||
.build_delete_txn(table_id, table_info_value)?;
|
||||
|
||||
// Deletes datanode table key value pairs.
|
||||
let distribution = region_distribution(table_route_value.region_routes())?;
|
||||
let distribution = region_distribution(table_route_value.region_routes()?);
|
||||
let delete_datanode_txn = self
|
||||
.datanode_table_manager()
|
||||
.build_delete_txn(table_id, distribution)?;
|
||||
@@ -604,12 +604,12 @@ impl TableMetadataManager {
|
||||
current_table_route_value: &DeserializedValueWithBytes<TableRouteValue>,
|
||||
new_region_routes: Vec<RegionRoute>,
|
||||
new_region_options: &HashMap<String, String>,
|
||||
new_region_wal_options: &HashMap<String, String>,
|
||||
new_region_wal_options: &HashMap<RegionNumber, String>,
|
||||
) -> Result<()> {
|
||||
// Updates the datanode table key value pairs.
|
||||
let current_region_distribution =
|
||||
region_distribution(current_table_route_value.region_routes())?;
|
||||
let new_region_distribution = region_distribution(&new_region_routes)?;
|
||||
region_distribution(current_table_route_value.region_routes()?);
|
||||
let new_region_distribution = region_distribution(&new_region_routes);
|
||||
|
||||
let update_datanode_table_txn = self.datanode_table_manager().build_update_txn(
|
||||
table_id,
|
||||
@@ -621,7 +621,7 @@ impl TableMetadataManager {
|
||||
)?;
|
||||
|
||||
// Updates the table_route.
|
||||
let new_table_route_value = current_table_route_value.update(new_region_routes);
|
||||
let new_table_route_value = current_table_route_value.update(new_region_routes)?;
|
||||
|
||||
let (update_table_route_txn, on_update_table_route_failure) = self
|
||||
.table_route_manager()
|
||||
@@ -656,7 +656,7 @@ impl TableMetadataManager {
|
||||
where
|
||||
F: Fn(&RegionRoute) -> Option<Option<RegionStatus>>,
|
||||
{
|
||||
let mut new_region_routes = current_table_route_value.region_routes().clone();
|
||||
let mut new_region_routes = current_table_route_value.region_routes()?.clone();
|
||||
|
||||
let mut updated = 0;
|
||||
for route in &mut new_region_routes {
|
||||
@@ -673,7 +673,7 @@ impl TableMetadataManager {
|
||||
}
|
||||
|
||||
// Updates the table_route.
|
||||
let new_table_route_value = current_table_route_value.update(new_region_routes);
|
||||
let new_table_route_value = current_table_route_value.update(new_region_routes)?;
|
||||
|
||||
let (update_table_route_txn, on_update_table_route_failure) = self
|
||||
.table_route_manager()
|
||||
@@ -897,7 +897,11 @@ mod tests {
|
||||
table_info
|
||||
);
|
||||
assert_eq!(
|
||||
remote_table_route.unwrap().into_inner().region_routes(),
|
||||
remote_table_route
|
||||
.unwrap()
|
||||
.into_inner()
|
||||
.region_routes()
|
||||
.unwrap(),
|
||||
region_routes
|
||||
);
|
||||
}
|
||||
@@ -978,7 +982,7 @@ mod tests {
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
assert_eq!(removed_table_route.region_routes(), region_routes);
|
||||
assert_eq!(removed_table_route.region_routes().unwrap(), region_routes);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1173,11 +1177,11 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
updated_route_value.region_routes()[0].leader_status,
|
||||
updated_route_value.region_routes().unwrap()[0].leader_status,
|
||||
Some(RegionStatus::Downgraded)
|
||||
);
|
||||
assert_eq!(
|
||||
updated_route_value.region_routes()[1].leader_status,
|
||||
updated_route_value.region_routes().unwrap()[1].leader_status,
|
||||
Some(RegionStatus::Downgraded)
|
||||
);
|
||||
}
|
||||
@@ -1187,7 +1191,7 @@ mod tests {
|
||||
table_id: u32,
|
||||
region_routes: &[RegionRoute],
|
||||
) {
|
||||
let region_distribution = region_distribution(region_routes).unwrap();
|
||||
let region_distribution = region_distribution(region_routes);
|
||||
for (datanode, regions) in region_distribution {
|
||||
let got = table_metadata_manager
|
||||
.datanode_table_manager()
|
||||
@@ -1271,7 +1275,8 @@ mod tests {
|
||||
let current_table_route_value = DeserializedValueWithBytes::from_inner(
|
||||
current_table_route_value
|
||||
.inner
|
||||
.update(new_region_routes.clone()),
|
||||
.update(new_region_routes.clone())
|
||||
.unwrap(),
|
||||
);
|
||||
let new_region_routes = vec![new_region_route(2, 4), new_region_route(5, 5)];
|
||||
// it should be ok.
|
||||
@@ -1295,13 +1300,16 @@ mod tests {
|
||||
|
||||
// if the current_table_route_value is wrong, it should return an error.
|
||||
// The ABA problem.
|
||||
let wrong_table_route_value =
|
||||
DeserializedValueWithBytes::from_inner(current_table_route_value.update(vec![
|
||||
new_region_route(1, 1),
|
||||
new_region_route(2, 2),
|
||||
new_region_route(3, 3),
|
||||
new_region_route(4, 4),
|
||||
]));
|
||||
let wrong_table_route_value = DeserializedValueWithBytes::from_inner(
|
||||
current_table_route_value
|
||||
.update(vec![
|
||||
new_region_route(1, 1),
|
||||
new_region_route(2, 2),
|
||||
new_region_route(3, 3),
|
||||
new_region_route(4, 4),
|
||||
])
|
||||
.unwrap(),
|
||||
);
|
||||
assert!(table_metadata_manager
|
||||
.update_table_route(
|
||||
table_id,
|
||||
|
||||
@@ -34,6 +34,7 @@ use crate::rpc::store::RangeRequest;
|
||||
use crate::rpc::KeyValue;
|
||||
use crate::DatanodeId;
|
||||
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
|
||||
/// RegionInfo
|
||||
/// For compatible reason, DON'T modify the field name.
|
||||
@@ -48,14 +49,15 @@ pub struct RegionInfo {
|
||||
#[serde(default)]
|
||||
pub region_options: HashMap<String, String>,
|
||||
/// The per-region wal options.
|
||||
/// Key: region number (in string representation). Value: the encoded wal options of the region.
|
||||
/// Key: region number. Value: the encoded wal options of the region.
|
||||
#[serde(default)]
|
||||
pub region_wal_options: HashMap<String, String>,
|
||||
#[serde_as(as = "HashMap<serde_with::DisplayFromStr, _>")]
|
||||
pub region_wal_options: HashMap<RegionNumber, String>,
|
||||
}
|
||||
|
||||
pub struct DatanodeTableKey {
|
||||
datanode_id: DatanodeId,
|
||||
table_id: TableId,
|
||||
pub datanode_id: DatanodeId,
|
||||
pub table_id: TableId,
|
||||
}
|
||||
|
||||
impl DatanodeTableKey {
|
||||
@@ -181,7 +183,7 @@ impl DatanodeTableManager {
|
||||
.filter_map(|region_number| {
|
||||
region_wal_options
|
||||
.get(region_number)
|
||||
.map(|wal_options| (region_number.to_string(), wal_options.clone()))
|
||||
.map(|wal_options| (*region_number, wal_options.clone()))
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -214,7 +216,7 @@ impl DatanodeTableManager {
|
||||
current_region_distribution: RegionDistribution,
|
||||
new_region_distribution: RegionDistribution,
|
||||
new_region_options: &HashMap<String, String>,
|
||||
new_region_wal_options: &HashMap<String, String>,
|
||||
new_region_wal_options: &HashMap<RegionNumber, String>,
|
||||
) -> Result<Txn> {
|
||||
let mut opts = Vec::new();
|
||||
|
||||
@@ -306,6 +308,61 @@ mod tests {
|
||||
assert!(parsed.is_ok());
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
||||
struct StringHashMap {
|
||||
inner: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
||||
struct IntegerHashMap {
|
||||
#[serde_as(as = "HashMap<serde_with::DisplayFromStr, _>")]
|
||||
inner: HashMap<u32, String>,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serde_with_integer_hash_map() {
|
||||
let map = StringHashMap {
|
||||
inner: HashMap::from([
|
||||
("1".to_string(), "aaa".to_string()),
|
||||
("2".to_string(), "bbb".to_string()),
|
||||
("3".to_string(), "ccc".to_string()),
|
||||
]),
|
||||
};
|
||||
let encoded = serde_json::to_string(&map).unwrap();
|
||||
let decoded: IntegerHashMap = serde_json::from_str(&encoded).unwrap();
|
||||
assert_eq!(
|
||||
IntegerHashMap {
|
||||
inner: HashMap::from([
|
||||
(1, "aaa".to_string()),
|
||||
(2, "bbb".to_string()),
|
||||
(3, "ccc".to_string()),
|
||||
]),
|
||||
},
|
||||
decoded
|
||||
);
|
||||
|
||||
let map = IntegerHashMap {
|
||||
inner: HashMap::from([
|
||||
(1, "aaa".to_string()),
|
||||
(2, "bbb".to_string()),
|
||||
(3, "ccc".to_string()),
|
||||
]),
|
||||
};
|
||||
let encoded = serde_json::to_string(&map).unwrap();
|
||||
let decoded: StringHashMap = serde_json::from_str(&encoded).unwrap();
|
||||
assert_eq!(
|
||||
StringHashMap {
|
||||
inner: HashMap::from([
|
||||
("1".to_string(), "aaa".to_string()),
|
||||
("2".to_string(), "bbb".to_string()),
|
||||
("3".to_string(), "ccc".to_string()),
|
||||
]),
|
||||
},
|
||||
decoded
|
||||
);
|
||||
}
|
||||
|
||||
// This test intends to ensure both the `serde_json::to_string` + `serde_json::from_str`
|
||||
// and `serde_json::to_vec` + `serde_json::from_slice` work for `DatanodeTableValue`.
|
||||
// Warning: if the key of `region_wal_options` is of type non-String, this test would fail.
|
||||
@@ -320,9 +377,9 @@ mod tests {
|
||||
("c".to_string(), "cc".to_string()),
|
||||
]),
|
||||
region_wal_options: HashMap::from([
|
||||
("1".to_string(), "aaa".to_string()),
|
||||
("2".to_string(), "bbb".to_string()),
|
||||
("3".to_string(), "ccc".to_string()),
|
||||
(1, "aaa".to_string()),
|
||||
(2, "bbb".to_string()),
|
||||
(3, "ccc".to_string()),
|
||||
]),
|
||||
};
|
||||
let table_value = DatanodeTableValue {
|
||||
|
||||
@@ -16,12 +16,14 @@ use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::TableId;
|
||||
|
||||
use super::{DeserializedValueWithBytes, TableMetaValue};
|
||||
use crate::error::{Result, SerdeJsonSnafu};
|
||||
use crate::error::{
|
||||
Result, SerdeJsonSnafu, TableRouteNotFoundSnafu, UnexpectedLogicalRouteTableSnafu,
|
||||
};
|
||||
use crate::key::{to_removed_key, RegionDistribution, TableMetaKey, TABLE_ROUTE_PREFIX};
|
||||
use crate::kv_backend::txn::{Compare, CompareOp, Txn, TxnOp, TxnOpResponse};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
@@ -53,7 +55,8 @@ pub struct PhysicalTableRouteValue {
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
|
||||
pub struct LogicalTableRouteValue {
|
||||
// TODO(LFC): Add table route for MetricsEngine table.
|
||||
physical_table_id: TableId,
|
||||
region_ids: Vec<RegionId>,
|
||||
}
|
||||
|
||||
impl TableRouteValue {
|
||||
@@ -62,29 +65,50 @@ impl TableRouteValue {
|
||||
}
|
||||
|
||||
/// Returns a new version [TableRouteValue] with `region_routes`.
|
||||
pub fn update(&self, region_routes: Vec<RegionRoute>) -> Self {
|
||||
pub fn update(&self, region_routes: Vec<RegionRoute>) -> Result<Self> {
|
||||
ensure!(
|
||||
self.is_physical(),
|
||||
UnexpectedLogicalRouteTableSnafu {
|
||||
err_msg: format!("{self:?} is a non-physical TableRouteValue."),
|
||||
}
|
||||
);
|
||||
let version = self.physical_table_route().version;
|
||||
Self::Physical(PhysicalTableRouteValue {
|
||||
Ok(Self::Physical(PhysicalTableRouteValue {
|
||||
region_routes,
|
||||
version: version + 1,
|
||||
})
|
||||
}))
|
||||
}
|
||||
|
||||
/// Returns the version.
|
||||
///
|
||||
/// For test purpose.
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub fn version(&self) -> u64 {
|
||||
self.physical_table_route().version
|
||||
pub fn version(&self) -> Result<u64> {
|
||||
ensure!(
|
||||
self.is_physical(),
|
||||
UnexpectedLogicalRouteTableSnafu {
|
||||
err_msg: format!("{self:?} is a non-physical TableRouteValue."),
|
||||
}
|
||||
);
|
||||
Ok(self.physical_table_route().version)
|
||||
}
|
||||
|
||||
/// Returns the corresponding [RegionRoute].
|
||||
pub fn region_route(&self, region_id: RegionId) -> Option<RegionRoute> {
|
||||
self.physical_table_route()
|
||||
/// Returns the corresponding [RegionRoute], returns `None` if it's the specific region is not found.
|
||||
///
|
||||
/// Note: It throws an error if it's a logical table
|
||||
pub fn region_route(&self, region_id: RegionId) -> Result<Option<RegionRoute>> {
|
||||
ensure!(
|
||||
self.is_physical(),
|
||||
UnexpectedLogicalRouteTableSnafu {
|
||||
err_msg: format!("{self:?} is a non-physical TableRouteValue."),
|
||||
}
|
||||
);
|
||||
Ok(self
|
||||
.physical_table_route()
|
||||
.region_routes
|
||||
.iter()
|
||||
.find(|route| route.region.id == region_id)
|
||||
.cloned()
|
||||
.cloned())
|
||||
}
|
||||
|
||||
/// Returns true if it's [TableRouteValue::Physical].
|
||||
@@ -93,11 +117,14 @@ impl TableRouteValue {
|
||||
}
|
||||
|
||||
/// Gets the [RegionRoute]s of this [TableRouteValue::Physical].
|
||||
///
|
||||
/// # Panics
|
||||
/// The route type is not the [TableRouteValue::Physical].
|
||||
pub fn region_routes(&self) -> &Vec<RegionRoute> {
|
||||
&self.physical_table_route().region_routes
|
||||
pub fn region_routes(&self) -> Result<&Vec<RegionRoute>> {
|
||||
ensure!(
|
||||
self.is_physical(),
|
||||
UnexpectedLogicalRouteTableSnafu {
|
||||
err_msg: format!("{self:?} is a non-physical TableRouteValue."),
|
||||
}
|
||||
);
|
||||
Ok(&self.physical_table_route().region_routes)
|
||||
}
|
||||
|
||||
fn physical_table_route(&self) -> &PhysicalTableRouteValue {
|
||||
@@ -152,12 +179,19 @@ impl PhysicalTableRouteValue {
|
||||
}
|
||||
|
||||
impl LogicalTableRouteValue {
|
||||
pub fn physical_table_id(&self) -> TableId {
|
||||
todo!()
|
||||
pub fn new(physical_table_id: TableId, region_ids: Vec<RegionId>) -> Self {
|
||||
Self {
|
||||
physical_table_id,
|
||||
region_ids,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn region_ids(&self) -> Vec<RegionId> {
|
||||
todo!()
|
||||
pub fn physical_table_id(&self) -> TableId {
|
||||
self.physical_table_id
|
||||
}
|
||||
|
||||
pub fn region_ids(&self) -> &Vec<RegionId> {
|
||||
&self.region_ids
|
||||
}
|
||||
}
|
||||
|
||||
@@ -302,6 +336,54 @@ impl TableRouteManager {
|
||||
.transpose()
|
||||
}
|
||||
|
||||
pub async fn get_physical_table_id(
|
||||
&self,
|
||||
logical_or_physical_table_id: TableId,
|
||||
) -> Result<TableId> {
|
||||
let table_route = self
|
||||
.get(logical_or_physical_table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: logical_or_physical_table_id,
|
||||
})?
|
||||
.into_inner();
|
||||
|
||||
match table_route {
|
||||
TableRouteValue::Physical(_) => Ok(logical_or_physical_table_id),
|
||||
TableRouteValue::Logical(x) => Ok(x.physical_table_id()),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_physical_table_route(
|
||||
&self,
|
||||
logical_or_physical_table_id: TableId,
|
||||
) -> Result<(TableId, PhysicalTableRouteValue)> {
|
||||
let table_route = self
|
||||
.get(logical_or_physical_table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: logical_or_physical_table_id,
|
||||
})?
|
||||
.into_inner();
|
||||
|
||||
match table_route {
|
||||
TableRouteValue::Physical(x) => Ok((logical_or_physical_table_id, x)),
|
||||
TableRouteValue::Logical(x) => {
|
||||
let physical_table_id = x.physical_table_id();
|
||||
let physical_table_route =
|
||||
self.get(physical_table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: physical_table_id,
|
||||
})?;
|
||||
Ok((
|
||||
physical_table_id,
|
||||
physical_table_route.physical_table_route().clone(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// It may return a subset of the `table_ids`.
|
||||
pub async fn batch_get(
|
||||
&self,
|
||||
@@ -354,7 +436,7 @@ impl TableRouteManager {
|
||||
) -> Result<Option<RegionDistribution>> {
|
||||
self.get(table_id)
|
||||
.await?
|
||||
.map(|table_route| region_distribution(table_route.region_routes()))
|
||||
.map(|table_route| Ok(region_distribution(table_route.region_routes()?)))
|
||||
.transpose()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#![feature(assert_matches)]
|
||||
#![feature(btree_extract_if)]
|
||||
#![feature(async_closure)]
|
||||
#![feature(let_chains)]
|
||||
|
||||
pub mod cache_invalidator;
|
||||
pub mod datanode_manager;
|
||||
@@ -35,7 +36,6 @@ pub mod sequence;
|
||||
pub mod state_store;
|
||||
pub mod table_name;
|
||||
pub mod util;
|
||||
#[allow(unused)]
|
||||
pub mod wal;
|
||||
|
||||
pub type ClusterId = u64;
|
||||
|
||||
@@ -16,36 +16,43 @@ use lazy_static::lazy_static;
|
||||
use prometheus::*;
|
||||
|
||||
lazy_static! {
|
||||
pub static ref METRIC_META_TXN_REQUEST: HistogramVec =
|
||||
register_histogram_vec!("meta_txn_request", "meta txn request", &["target", "op"]).unwrap();
|
||||
pub static ref METRIC_META_TXN_REQUEST: HistogramVec = register_histogram_vec!(
|
||||
"greptime_meta_txn_request",
|
||||
"meta txn request",
|
||||
&["target", "op"]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_META_CREATE_CATALOG: Histogram =
|
||||
register_histogram!("meta_create_catalog", "meta create catalog").unwrap();
|
||||
pub static ref METRIC_META_CREATE_CATALOG_COUNTER: IntCounter =
|
||||
register_int_counter!("meta_create_catalog_counter", "meta create catalog").unwrap();
|
||||
register_histogram!("greptime_meta_create_catalog", "meta create catalog").unwrap();
|
||||
pub static ref METRIC_META_CREATE_CATALOG_COUNTER: IntCounter = register_int_counter!(
|
||||
"greptime_meta_create_catalog_counter",
|
||||
"meta create catalog"
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_META_CREATE_SCHEMA: Histogram =
|
||||
register_histogram!("meta_create_schema", "meta create schema").unwrap();
|
||||
register_histogram!("greptime_meta_create_schema", "meta create schema").unwrap();
|
||||
pub static ref METRIC_META_CREATE_SCHEMA_COUNTER: IntCounter =
|
||||
register_int_counter!("meta_create_schema_counter", "meta create schema").unwrap();
|
||||
register_int_counter!("greptime_meta_create_schema_counter", "meta create schema").unwrap();
|
||||
pub static ref METRIC_META_PROCEDURE_CREATE_TABLE: HistogramVec = register_histogram_vec!(
|
||||
"meta_procedure_create_table",
|
||||
"greptime_meta_procedure_create_table",
|
||||
"meta procedure create table",
|
||||
&["step"]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_META_PROCEDURE_DROP_TABLE: HistogramVec = register_histogram_vec!(
|
||||
"meta_procedure_drop_table",
|
||||
"greptime_meta_procedure_drop_table",
|
||||
"meta procedure drop table",
|
||||
&["step"]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_META_PROCEDURE_ALTER_TABLE: HistogramVec = register_histogram_vec!(
|
||||
"meta_procedure_alter_table",
|
||||
"greptime_meta_procedure_alter_table",
|
||||
"meta procedure alter table",
|
||||
&["step"]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_META_PROCEDURE_TRUNCATE_TABLE: HistogramVec = register_histogram_vec!(
|
||||
"meta_procedure_truncate_table",
|
||||
"greptime_meta_procedure_truncate_table",
|
||||
"meta procedure truncate table",
|
||||
&["step"]
|
||||
)
|
||||
|
||||
@@ -30,7 +30,7 @@ use crate::peer::Peer;
|
||||
use crate::table_name::TableName;
|
||||
use crate::DatanodeId;
|
||||
|
||||
pub fn region_distribution(region_routes: &[RegionRoute]) -> Result<RegionDistribution> {
|
||||
pub fn region_distribution(region_routes: &[RegionRoute]) -> RegionDistribution {
|
||||
let mut regions_id_map = RegionDistribution::new();
|
||||
for route in region_routes.iter() {
|
||||
if let Some(peer) = route.leader_peer.as_ref() {
|
||||
@@ -42,7 +42,7 @@ pub fn region_distribution(region_routes: &[RegionRoute]) -> Result<RegionDistri
|
||||
// id asc
|
||||
regions.sort()
|
||||
}
|
||||
Ok(regions_id_map)
|
||||
regions_id_map
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
|
||||
@@ -123,11 +123,12 @@ pub fn convert_to_region_leader_status_map(
|
||||
pub fn find_region_leader(
|
||||
region_routes: &[RegionRoute],
|
||||
region_number: RegionNumber,
|
||||
) -> Option<&Peer> {
|
||||
) -> Option<Peer> {
|
||||
region_routes
|
||||
.iter()
|
||||
.find(|x| x.region.id.region_number() == region_number)
|
||||
.and_then(|r| r.leader_peer.as_ref())
|
||||
.cloned()
|
||||
}
|
||||
|
||||
pub fn find_leader_regions(region_routes: &[RegionRoute], datanode: &Peer) -> Vec<RegionNumber> {
|
||||
|
||||
@@ -18,10 +18,10 @@ pub mod options_allocator;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_config::wal::StandaloneWalConfig;
|
||||
use common_config::WAL_OPTIONS_KEY;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::with_prefix;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::wal::kafka::KafkaConfig;
|
||||
pub use crate::wal::kafka::Topic as KafkaWalTopic;
|
||||
pub use crate::wal::options_allocator::{
|
||||
@@ -40,7 +40,7 @@ pub enum WalConfig {
|
||||
impl From<StandaloneWalConfig> for WalConfig {
|
||||
fn from(value: StandaloneWalConfig) -> Self {
|
||||
match value {
|
||||
StandaloneWalConfig::RaftEngine(config) => WalConfig::RaftEngine,
|
||||
StandaloneWalConfig::RaftEngine(_) => WalConfig::RaftEngine,
|
||||
StandaloneWalConfig::Kafka(config) => WalConfig::Kafka(KafkaConfig {
|
||||
broker_endpoints: config.base.broker_endpoints,
|
||||
num_topics: config.num_topics,
|
||||
@@ -55,6 +55,16 @@ impl From<StandaloneWalConfig> for WalConfig {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn prepare_wal_option(
|
||||
options: &mut HashMap<String, String>,
|
||||
region_id: RegionId,
|
||||
region_wal_options: &HashMap<RegionNumber, String>,
|
||||
) {
|
||||
if let Some(wal_options) = region_wal_options.get(®ion_id.region_number()) {
|
||||
options.insert(WAL_OPTIONS_KEY.to_string(), wal_options.clone());
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub mod test_util;
|
||||
pub mod topic;
|
||||
pub mod topic_manager;
|
||||
pub mod topic_selector;
|
||||
@@ -19,7 +21,6 @@ pub mod topic_selector;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_config::wal::kafka::{kafka_backoff, KafkaBackoffConfig, TopicSelectorType};
|
||||
use common_config::wal::StandaloneWalConfig;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub use crate::wal::kafka::topic::Topic;
|
||||
@@ -27,6 +28,7 @@ pub use crate::wal::kafka::topic_manager::TopicManager;
|
||||
|
||||
/// Configurations for kafka wal.
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct KafkaConfig {
|
||||
/// The broker endpoints of the Kafka cluster.
|
||||
pub broker_endpoints: Vec<String>,
|
||||
@@ -40,7 +42,7 @@ pub struct KafkaConfig {
|
||||
pub num_partitions: i32,
|
||||
/// The replication factor of each topic.
|
||||
pub replication_factor: i16,
|
||||
/// Above which a topic creation operation will be cancelled.
|
||||
/// The timeout of topic creation.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub create_topic_timeout: Duration,
|
||||
/// The backoff config.
|
||||
|
||||
33
src/common/meta/src/wal/kafka/test_util.rs
Normal file
33
src/common/meta/src/wal/kafka/test_util.rs
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_telemetry::warn;
|
||||
use futures_util::future::BoxFuture;
|
||||
|
||||
pub async fn run_test_with_kafka_wal<F>(test: F)
|
||||
where
|
||||
F: FnOnce(Vec<String>) -> BoxFuture<'static, ()>,
|
||||
{
|
||||
let Ok(endpoints) = std::env::var("GT_KAFKA_ENDPOINTS") else {
|
||||
warn!("The endpoints is empty, skipping the test");
|
||||
return;
|
||||
};
|
||||
|
||||
let endpoints = endpoints
|
||||
.split(',')
|
||||
.map(|s| s.trim().to_string())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
test(endpoints).await
|
||||
}
|
||||
@@ -15,4 +15,5 @@
|
||||
/// Kafka wal topic.
|
||||
/// Publishers publish log entries to the topic while subscribers pull log entries from the topic.
|
||||
/// A topic is simply a string right now. But it may be more complex in the future.
|
||||
// TODO(niebayes): remove the Topic alias.
|
||||
pub type Topic = String;
|
||||
|
||||
@@ -14,20 +14,22 @@
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_config::wal::kafka::TopicSelectorType;
|
||||
use common_telemetry::{debug, error, info};
|
||||
use common_telemetry::{error, info};
|
||||
use rskafka::client::controller::ControllerClient;
|
||||
use rskafka::client::error::Error as RsKafkaError;
|
||||
use rskafka::client::error::ProtocolError::TopicAlreadyExists;
|
||||
use rskafka::client::ClientBuilder;
|
||||
use rskafka::client::partition::{Compression, UnknownTopicHandling};
|
||||
use rskafka::client::{Client, ClientBuilder};
|
||||
use rskafka::record::Record;
|
||||
use rskafka::BackoffConfig;
|
||||
use snafu::{ensure, AsErrorSource, ResultExt};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
BuildKafkaClientSnafu, BuildKafkaCtrlClientSnafu, CreateKafkaWalTopicSnafu, DecodeJsonSnafu,
|
||||
EncodeJsonSnafu, InvalidNumTopicsSnafu, Result,
|
||||
BuildKafkaClientSnafu, BuildKafkaCtrlClientSnafu, BuildKafkaPartitionClientSnafu,
|
||||
CreateKafkaWalTopicSnafu, DecodeJsonSnafu, EncodeJsonSnafu, InvalidNumTopicsSnafu,
|
||||
ProduceRecordSnafu, Result,
|
||||
};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::store::PutRequest;
|
||||
@@ -37,12 +39,15 @@ use crate::wal::kafka::KafkaConfig;
|
||||
|
||||
const CREATED_TOPICS_KEY: &str = "__created_wal_topics/kafka/";
|
||||
|
||||
// Each topic only has one partition for now.
|
||||
// The `DEFAULT_PARTITION` refers to the index of the partition.
|
||||
const DEFAULT_PARTITION: i32 = 0;
|
||||
|
||||
/// Manages topic initialization and selection.
|
||||
pub struct TopicManager {
|
||||
config: KafkaConfig,
|
||||
// TODO(niebayes): maybe add a guard to ensure all topics in the topic pool are created.
|
||||
topic_pool: Vec<Topic>,
|
||||
topic_selector: TopicSelectorRef,
|
||||
pub(crate) topic_pool: Vec<Topic>,
|
||||
pub(crate) topic_selector: TopicSelectorRef,
|
||||
kv_backend: KvBackendRef,
|
||||
}
|
||||
|
||||
@@ -117,14 +122,20 @@ impl TopicManager {
|
||||
.await
|
||||
.with_context(|_| BuildKafkaClientSnafu {
|
||||
broker_endpoints: self.config.broker_endpoints.clone(),
|
||||
})?
|
||||
})?;
|
||||
|
||||
let control_client = client
|
||||
.controller_client()
|
||||
.context(BuildKafkaCtrlClientSnafu)?;
|
||||
|
||||
// Try to create missing topics.
|
||||
let tasks = to_be_created
|
||||
.iter()
|
||||
.map(|i| self.try_create_topic(&topics[*i], &client))
|
||||
.map(|i| async {
|
||||
self.try_create_topic(&topics[*i], &control_client).await?;
|
||||
self.try_append_noop_record(&topics[*i], &client).await?;
|
||||
Ok(())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
futures::future::try_join_all(tasks).await.map(|_| ())
|
||||
}
|
||||
@@ -141,6 +152,31 @@ impl TopicManager {
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn try_append_noop_record(&self, topic: &Topic, client: &Client) -> Result<()> {
|
||||
let partition_client = client
|
||||
.partition_client(topic, DEFAULT_PARTITION, UnknownTopicHandling::Retry)
|
||||
.await
|
||||
.context(BuildKafkaPartitionClientSnafu {
|
||||
topic,
|
||||
partition: DEFAULT_PARTITION,
|
||||
})?;
|
||||
|
||||
partition_client
|
||||
.produce(
|
||||
vec![Record {
|
||||
key: None,
|
||||
value: None,
|
||||
timestamp: chrono::Utc::now(),
|
||||
headers: Default::default(),
|
||||
}],
|
||||
Compression::NoCompression,
|
||||
)
|
||||
.await
|
||||
.context(ProduceRecordSnafu { topic })?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn try_create_topic(&self, topic: &Topic, client: &ControllerClient) -> Result<()> {
|
||||
match client
|
||||
.create_topic(
|
||||
@@ -202,13 +238,9 @@ impl TopicManager {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::env;
|
||||
|
||||
use common_telemetry::info;
|
||||
|
||||
use super::*;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::kv_backend::{self};
|
||||
use crate::wal::kafka::test_util::run_test_with_kafka_wal;
|
||||
|
||||
// Tests that topics can be successfully persisted into the kv backend and can be successfully restored from the kv backend.
|
||||
#[tokio::test]
|
||||
@@ -235,26 +267,60 @@ mod tests {
|
||||
assert_eq!(topics, restored_topics);
|
||||
}
|
||||
|
||||
/// Tests that the topic manager could allocate topics correctly.
|
||||
#[tokio::test]
|
||||
async fn test_topic_manager() {
|
||||
let endpoints = env::var("GT_KAFKA_ENDPOINTS").unwrap_or_default();
|
||||
common_telemetry::init_default_ut_logging();
|
||||
async fn test_alloc_topics() {
|
||||
run_test_with_kafka_wal(|broker_endpoints| {
|
||||
Box::pin(async {
|
||||
// Constructs topics that should be created.
|
||||
let topics = (0..256)
|
||||
.map(|i| format!("test_alloc_topics_{}_{}", i, uuid::Uuid::new_v4()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if endpoints.is_empty() {
|
||||
info!("The endpoints is empty, skipping the test.");
|
||||
return;
|
||||
}
|
||||
// TODO: supports topic prefix
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new());
|
||||
let config = KafkaConfig {
|
||||
replication_factor: 1,
|
||||
broker_endpoints: endpoints
|
||||
.split(',')
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<_>>(),
|
||||
..Default::default()
|
||||
};
|
||||
let manager = TopicManager::new(config, kv_backend);
|
||||
manager.start().await.unwrap();
|
||||
// Creates a topic manager.
|
||||
let config = KafkaConfig {
|
||||
replication_factor: broker_endpoints.len() as i16,
|
||||
broker_endpoints,
|
||||
..Default::default()
|
||||
};
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
|
||||
let mut manager = TopicManager::new(config.clone(), kv_backend);
|
||||
// Replaces the default topic pool with the constructed topics.
|
||||
manager.topic_pool = topics.clone();
|
||||
// Replaces the default selector with a round-robin selector without shuffled.
|
||||
manager.topic_selector = Arc::new(RoundRobinTopicSelector::default());
|
||||
manager.start().await.unwrap();
|
||||
|
||||
// Selects exactly the number of `num_topics` topics one by one.
|
||||
let got = (0..topics.len())
|
||||
.map(|_| manager.select().unwrap())
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(got, topics);
|
||||
|
||||
// Selects exactly the number of `num_topics` topics in a batching manner.
|
||||
let got = manager
|
||||
.select_batch(topics.len())
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(ToString::to_string)
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(got, topics);
|
||||
|
||||
// Selects more than the number of `num_topics` topics.
|
||||
let got = manager
|
||||
.select_batch(2 * topics.len())
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(ToString::to_string)
|
||||
.collect::<Vec<_>>();
|
||||
let expected = vec![topics.clone(); 2]
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(got, expected);
|
||||
})
|
||||
})
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@ use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use rand::Rng;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::error::{EmptyTopicPoolSnafu, Result};
|
||||
@@ -60,6 +59,14 @@ impl TopicSelector for RoundRobinTopicSelector {
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Tests that a selector behaves as expected when the given topic pool is empty.
|
||||
#[test]
|
||||
fn test_empty_topic_pool() {
|
||||
let topic_pool = vec![];
|
||||
let selector = RoundRobinTopicSelector::default();
|
||||
assert!(selector.select(&topic_pool).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_round_robin_topic_selector() {
|
||||
let topic_pool: Vec<_> = [0, 1, 2].into_iter().map(|v| v.to_string()).collect();
|
||||
|
||||
@@ -107,14 +107,16 @@ pub fn allocate_region_wal_options(
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::wal::kafka::test_util::run_test_with_kafka_wal;
|
||||
use crate::wal::kafka::topic_selector::RoundRobinTopicSelector;
|
||||
use crate::wal::kafka::KafkaConfig;
|
||||
|
||||
// Tests the wal options allocator could successfully allocate raft-engine wal options.
|
||||
// Note: tests for allocator with kafka are integration tests.
|
||||
#[tokio::test]
|
||||
async fn test_allocator_with_raft_engine() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
|
||||
let wal_config = WalConfig::RaftEngine;
|
||||
let mut allocator = WalOptionsAllocator::new(wal_config, kv_backend);
|
||||
let allocator = WalOptionsAllocator::new(wal_config, kv_backend);
|
||||
allocator.start().await.unwrap();
|
||||
|
||||
let num_regions = 32;
|
||||
@@ -128,4 +130,49 @@ mod tests {
|
||||
.collect();
|
||||
assert_eq!(got, expected);
|
||||
}
|
||||
|
||||
// Tests that the wal options allocator could successfully allocate Kafka wal options.
|
||||
#[tokio::test]
|
||||
async fn test_allocator_with_kafka() {
|
||||
run_test_with_kafka_wal(|broker_endpoints| {
|
||||
Box::pin(async {
|
||||
let topics = (0..256)
|
||||
.map(|i| format!("test_allocator_with_kafka_{}_{}", i, uuid::Uuid::new_v4()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Creates a topic manager.
|
||||
let config = KafkaConfig {
|
||||
replication_factor: broker_endpoints.len() as i16,
|
||||
broker_endpoints,
|
||||
..Default::default()
|
||||
};
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
|
||||
let mut topic_manager = KafkaTopicManager::new(config.clone(), kv_backend);
|
||||
// Replaces the default topic pool with the constructed topics.
|
||||
topic_manager.topic_pool = topics.clone();
|
||||
// Replaces the default selector with a round-robin selector without shuffled.
|
||||
topic_manager.topic_selector = Arc::new(RoundRobinTopicSelector::default());
|
||||
|
||||
// Creates an options allocator.
|
||||
let allocator = WalOptionsAllocator::Kafka(topic_manager);
|
||||
allocator.start().await.unwrap();
|
||||
|
||||
let num_regions = 32;
|
||||
let regions = (0..num_regions).collect::<Vec<_>>();
|
||||
let got = allocate_region_wal_options(regions.clone(), &allocator).unwrap();
|
||||
|
||||
// Check the allocated wal options contain the expected topics.
|
||||
let expected = (0..num_regions)
|
||||
.map(|i| {
|
||||
let options = WalOptions::Kafka(KafkaWalOptions {
|
||||
topic: topics[i as usize].clone(),
|
||||
});
|
||||
(i, serde_json::to_string(&options).unwrap())
|
||||
})
|
||||
.collect::<HashMap<_, _>>();
|
||||
assert_eq!(got, expected);
|
||||
})
|
||||
})
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod lock;
|
||||
mod runner;
|
||||
mod rwlock;
|
||||
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
@@ -29,11 +29,11 @@ use snafu::{ensure, ResultExt};
|
||||
use tokio::sync::watch::{self, Receiver, Sender};
|
||||
use tokio::sync::{Mutex as TokioMutex, Notify};
|
||||
|
||||
use self::rwlock::KeyRwLock;
|
||||
use crate::error::{
|
||||
DuplicateProcedureSnafu, Error, LoaderConflictSnafu, ManagerNotStartSnafu, Result,
|
||||
StartRemoveOutdatedMetaTaskSnafu, StopRemoveOutdatedMetaTaskSnafu,
|
||||
};
|
||||
use crate::local::lock::LockMap;
|
||||
use crate::local::runner::Runner;
|
||||
use crate::procedure::BoxedProcedureLoader;
|
||||
use crate::store::{ProcedureMessage, ProcedureStore, StateStoreRef};
|
||||
@@ -57,8 +57,6 @@ const META_TTL: Duration = Duration::from_secs(60 * 10);
|
||||
pub(crate) struct ProcedureMeta {
|
||||
/// Id of this procedure.
|
||||
id: ProcedureId,
|
||||
/// Notify to wait for a lock.
|
||||
lock_notify: Notify,
|
||||
/// Parent procedure id.
|
||||
parent_id: Option<ProcedureId>,
|
||||
/// Notify to wait for subprocedures.
|
||||
@@ -78,7 +76,6 @@ impl ProcedureMeta {
|
||||
let (state_sender, state_receiver) = watch::channel(ProcedureState::Running);
|
||||
ProcedureMeta {
|
||||
id,
|
||||
lock_notify: Notify::new(),
|
||||
parent_id,
|
||||
child_notify: Notify::new(),
|
||||
lock_key,
|
||||
@@ -131,7 +128,7 @@ struct LoadedProcedure {
|
||||
pub(crate) struct ManagerContext {
|
||||
/// Procedure loaders. The key is the type name of the procedure which the loader returns.
|
||||
loaders: Mutex<HashMap<String, BoxedProcedureLoader>>,
|
||||
lock_map: LockMap,
|
||||
key_lock: KeyRwLock<String>,
|
||||
procedures: RwLock<HashMap<ProcedureId, ProcedureMetaRef>>,
|
||||
/// Messages loaded from the procedure store.
|
||||
messages: Mutex<HashMap<ProcedureId, ProcedureMessage>>,
|
||||
@@ -152,8 +149,8 @@ impl ManagerContext {
|
||||
/// Returns a new [ManagerContext].
|
||||
fn new() -> ManagerContext {
|
||||
ManagerContext {
|
||||
key_lock: KeyRwLock::new(),
|
||||
loaders: Mutex::new(HashMap::new()),
|
||||
lock_map: LockMap::new(),
|
||||
procedures: RwLock::new(HashMap::new()),
|
||||
messages: Mutex::new(HashMap::new()),
|
||||
finished_procedures: Mutex::new(VecDeque::new()),
|
||||
@@ -850,7 +847,7 @@ mod tests {
|
||||
assert!(manager.procedure_watcher(procedure_id).is_none());
|
||||
|
||||
let mut procedure = ProcedureToLoad::new("submit");
|
||||
procedure.lock_key = LockKey::single("test.submit");
|
||||
procedure.lock_key = LockKey::single_exclusive("test.submit");
|
||||
assert!(manager
|
||||
.submit(ProcedureWithId {
|
||||
id: procedure_id,
|
||||
@@ -918,7 +915,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
LockKey::single("test.submit")
|
||||
LockKey::single_exclusive("test.submit")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -955,7 +952,7 @@ mod tests {
|
||||
let manager = LocalManager::new(config, state_store);
|
||||
|
||||
let mut procedure = ProcedureToLoad::new("submit");
|
||||
procedure.lock_key = LockKey::single("test.submit");
|
||||
procedure.lock_key = LockKey::single_exclusive("test.submit");
|
||||
let procedure_id = ProcedureId::random();
|
||||
assert_matches!(
|
||||
manager
|
||||
@@ -986,7 +983,7 @@ mod tests {
|
||||
manager.start().await.unwrap();
|
||||
|
||||
let mut procedure = ProcedureToLoad::new("submit");
|
||||
procedure.lock_key = LockKey::single("test.submit");
|
||||
procedure.lock_key = LockKey::single_exclusive("test.submit");
|
||||
let procedure_id = ProcedureId::random();
|
||||
assert!(manager
|
||||
.submit(ProcedureWithId {
|
||||
@@ -1018,7 +1015,7 @@ mod tests {
|
||||
manager.manager_ctx.set_running();
|
||||
|
||||
let mut procedure = ProcedureToLoad::new("submit");
|
||||
procedure.lock_key = LockKey::single("test.submit");
|
||||
procedure.lock_key = LockKey::single_exclusive("test.submit");
|
||||
let procedure_id = ProcedureId::random();
|
||||
assert!(manager
|
||||
.submit(ProcedureWithId {
|
||||
@@ -1041,7 +1038,7 @@ mod tests {
|
||||
// The remove_outdated_meta method has been stopped, so any procedure meta-data will not be automatically removed.
|
||||
manager.stop().await.unwrap();
|
||||
let mut procedure = ProcedureToLoad::new("submit");
|
||||
procedure.lock_key = LockKey::single("test.submit");
|
||||
procedure.lock_key = LockKey::single_exclusive("test.submit");
|
||||
let procedure_id = ProcedureId::random();
|
||||
|
||||
manager.manager_ctx.set_running();
|
||||
@@ -1063,7 +1060,7 @@ mod tests {
|
||||
|
||||
// After restart
|
||||
let mut procedure = ProcedureToLoad::new("submit");
|
||||
procedure.lock_key = LockKey::single("test.submit");
|
||||
procedure.lock_key = LockKey::single_exclusive("test.submit");
|
||||
let procedure_id = ProcedureId::random();
|
||||
assert!(manager
|
||||
.submit(ProcedureWithId {
|
||||
|
||||
@@ -1,214 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::sync::RwLock;
|
||||
|
||||
use crate::local::ProcedureMetaRef;
|
||||
use crate::ProcedureId;
|
||||
|
||||
/// A lock entry.
|
||||
#[derive(Debug)]
|
||||
struct Lock {
|
||||
/// Current lock owner.
|
||||
owner: ProcedureMetaRef,
|
||||
/// Waiter procedures.
|
||||
waiters: VecDeque<ProcedureMetaRef>,
|
||||
}
|
||||
|
||||
impl Lock {
|
||||
/// Returns a [Lock] with specific `owner` procedure.
|
||||
fn from_owner(owner: ProcedureMetaRef) -> Lock {
|
||||
Lock {
|
||||
owner,
|
||||
waiters: VecDeque::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to pop a waiter from the waiter list, set it as owner
|
||||
/// and wake up the new owner.
|
||||
///
|
||||
/// Returns false if there is no waiter in the waiter list.
|
||||
fn switch_owner(&mut self) -> bool {
|
||||
if let Some(waiter) = self.waiters.pop_front() {
|
||||
// Update owner.
|
||||
self.owner = waiter.clone();
|
||||
// We need to use notify_one() since the waiter may have not called `notified()` yet.
|
||||
waiter.lock_notify.notify_one();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages lock entries for procedures.
|
||||
pub(crate) struct LockMap {
|
||||
locks: RwLock<HashMap<String, Lock>>,
|
||||
}
|
||||
|
||||
impl LockMap {
|
||||
/// Returns a new [LockMap].
|
||||
pub(crate) fn new() -> LockMap {
|
||||
LockMap {
|
||||
locks: RwLock::new(HashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Acquire lock by `key` for procedure with specific `meta`.
|
||||
///
|
||||
/// Though `meta` is cloneable, callers must ensure that only one `meta`
|
||||
/// is acquiring and holding the lock at the same time.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the procedure acquires the lock recursively.
|
||||
pub(crate) async fn acquire_lock(&self, key: &str, meta: ProcedureMetaRef) {
|
||||
assert!(!self.hold_lock(key, meta.id));
|
||||
|
||||
{
|
||||
let mut locks = self.locks.write().unwrap();
|
||||
if let Some(lock) = locks.get_mut(key) {
|
||||
// Lock already exists, but we don't expect that a procedure acquires
|
||||
// the same lock again.
|
||||
assert_ne!(lock.owner.id, meta.id);
|
||||
|
||||
// Add this procedure to the waiter list. Here we don't check
|
||||
// whether the procedure is already in the waiter list as we
|
||||
// expect that a procedure should not wait for two lock simultaneously.
|
||||
lock.waiters.push_back(meta.clone());
|
||||
} else {
|
||||
let _ = locks.insert(key.to_string(), Lock::from_owner(meta));
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for notify.
|
||||
meta.lock_notify.notified().await;
|
||||
|
||||
assert!(self.hold_lock(key, meta.id));
|
||||
}
|
||||
|
||||
/// Release lock by `key`.
|
||||
pub(crate) fn release_lock(&self, key: &str, procedure_id: ProcedureId) {
|
||||
let mut locks = self.locks.write().unwrap();
|
||||
if let Some(lock) = locks.get_mut(key) {
|
||||
if lock.owner.id != procedure_id {
|
||||
// This is not the lock owner.
|
||||
return;
|
||||
}
|
||||
|
||||
if !lock.switch_owner() {
|
||||
// No body waits for this lock, we can remove the lock entry.
|
||||
let _ = locks.remove(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the procedure with specific `procedure_id` holds the
|
||||
/// lock of `key`.
|
||||
fn hold_lock(&self, key: &str, procedure_id: ProcedureId) -> bool {
|
||||
let locks = self.locks.read().unwrap();
|
||||
locks
|
||||
.get(key)
|
||||
.map(|lock| lock.owner.id == procedure_id)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Returns true if the procedure is waiting for the lock `key`.
|
||||
#[cfg(test)]
|
||||
fn waiting_lock(&self, key: &str, procedure_id: ProcedureId) -> bool {
|
||||
let locks = self.locks.read().unwrap();
|
||||
locks
|
||||
.get(key)
|
||||
.map(|lock| lock.waiters.iter().any(|meta| meta.id == procedure_id))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
use crate::local::test_util;
|
||||
|
||||
#[test]
|
||||
fn test_lock_no_waiter() {
|
||||
let meta = Arc::new(test_util::procedure_meta_for_test());
|
||||
let mut lock = Lock::from_owner(meta);
|
||||
|
||||
assert!(!lock.switch_owner());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_lock_with_waiter() {
|
||||
let owner = Arc::new(test_util::procedure_meta_for_test());
|
||||
let mut lock = Lock::from_owner(owner);
|
||||
|
||||
let waiter = Arc::new(test_util::procedure_meta_for_test());
|
||||
lock.waiters.push_back(waiter.clone());
|
||||
|
||||
assert!(lock.switch_owner());
|
||||
assert!(lock.waiters.is_empty());
|
||||
|
||||
waiter.lock_notify.notified().await;
|
||||
assert_eq!(lock.owner.id, waiter.id);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_lock_map() {
|
||||
let key = "hello";
|
||||
|
||||
let owner = Arc::new(test_util::procedure_meta_for_test());
|
||||
let lock_map = Arc::new(LockMap::new());
|
||||
lock_map.acquire_lock(key, owner.clone()).await;
|
||||
|
||||
let waiter = Arc::new(test_util::procedure_meta_for_test());
|
||||
let waiter_id = waiter.id;
|
||||
|
||||
// Waiter release the lock, this should not take effect.
|
||||
lock_map.release_lock(key, waiter_id);
|
||||
|
||||
let lock_map2 = lock_map.clone();
|
||||
let owner_id = owner.id;
|
||||
let handle = tokio::spawn(async move {
|
||||
assert!(lock_map2.hold_lock(key, owner_id));
|
||||
assert!(!lock_map2.hold_lock(key, waiter_id));
|
||||
|
||||
// Waiter wait for lock.
|
||||
lock_map2.acquire_lock(key, waiter.clone()).await;
|
||||
|
||||
assert!(lock_map2.hold_lock(key, waiter_id));
|
||||
});
|
||||
|
||||
// Owner still holds the lock.
|
||||
assert!(lock_map.hold_lock(key, owner_id));
|
||||
|
||||
// Wait until the waiter acquired the lock
|
||||
while !lock_map.waiting_lock(key, waiter_id) {
|
||||
tokio::time::sleep(std::time::Duration::from_millis(5)).await;
|
||||
}
|
||||
// Release lock
|
||||
lock_map.release_lock(key, owner_id);
|
||||
assert!(!lock_map.hold_lock(key, owner_id));
|
||||
|
||||
// Wait for task.
|
||||
handle.await.unwrap();
|
||||
// The waiter should hold the lock now.
|
||||
assert!(lock_map.hold_lock(key, waiter_id));
|
||||
|
||||
lock_map.release_lock(key, waiter_id);
|
||||
}
|
||||
}
|
||||
@@ -19,8 +19,10 @@ use backon::{BackoffBuilder, ExponentialBuilder};
|
||||
use common_telemetry::logging;
|
||||
use tokio::time;
|
||||
|
||||
use super::rwlock::OwnedKeyRwLockGuard;
|
||||
use crate::error::{self, ProcedurePanicSnafu, Result};
|
||||
use crate::local::{ManagerContext, ProcedureMeta, ProcedureMetaRef};
|
||||
use crate::procedure::StringKey;
|
||||
use crate::store::ProcedureStore;
|
||||
use crate::ProcedureState::Retrying;
|
||||
use crate::{BoxedProcedure, Context, Error, ProcedureId, ProcedureState, ProcedureWithId, Status};
|
||||
@@ -56,6 +58,7 @@ impl ExecResult {
|
||||
struct ProcedureGuard {
|
||||
meta: ProcedureMetaRef,
|
||||
manager_ctx: Arc<ManagerContext>,
|
||||
key_guards: Vec<OwnedKeyRwLockGuard>,
|
||||
finish: bool,
|
||||
}
|
||||
|
||||
@@ -65,6 +68,7 @@ impl ProcedureGuard {
|
||||
ProcedureGuard {
|
||||
meta,
|
||||
manager_ctx,
|
||||
key_guards: vec![],
|
||||
finish: false,
|
||||
}
|
||||
}
|
||||
@@ -95,10 +99,15 @@ impl Drop for ProcedureGuard {
|
||||
self.manager_ctx.notify_by_subprocedure(parent_id);
|
||||
}
|
||||
|
||||
// Release lock in reverse order.
|
||||
for key in self.meta.lock_key.keys_to_unlock() {
|
||||
self.manager_ctx.lock_map.release_lock(key, self.meta.id);
|
||||
// Drops the key guards in the reverse order.
|
||||
while !self.key_guards.is_empty() {
|
||||
self.key_guards.pop();
|
||||
}
|
||||
|
||||
// Clean the staled locks.
|
||||
self.manager_ctx
|
||||
.key_lock
|
||||
.clean_keys(self.meta.lock_key.keys_to_lock().map(|k| k.as_string()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,7 +130,7 @@ impl Runner {
|
||||
/// Run the procedure.
|
||||
pub(crate) async fn run(mut self) {
|
||||
// Ensure we can update the procedure state.
|
||||
let guard = ProcedureGuard::new(self.meta.clone(), self.manager_ctx.clone());
|
||||
let mut guard = ProcedureGuard::new(self.meta.clone(), self.manager_ctx.clone());
|
||||
|
||||
logging::info!(
|
||||
"Runner {}-{} starts",
|
||||
@@ -133,10 +142,14 @@ impl Runner {
|
||||
// recursive locking by adding a root procedure id to the meta.
|
||||
for key in self.meta.lock_key.keys_to_lock() {
|
||||
// Acquire lock for each key.
|
||||
self.manager_ctx
|
||||
.lock_map
|
||||
.acquire_lock(key, self.meta.clone())
|
||||
.await;
|
||||
let key_guard = match key {
|
||||
StringKey::Share(key) => self.manager_ctx.key_lock.read(key.clone()).await.into(),
|
||||
StringKey::Exclusive(key) => {
|
||||
self.manager_ctx.key_lock.write(key.clone()).await.into()
|
||||
}
|
||||
};
|
||||
|
||||
guard.key_guards.push(key_guard);
|
||||
}
|
||||
|
||||
// Execute the procedure. We need to release the lock whenever the the execution
|
||||
@@ -604,7 +617,7 @@ mod tests {
|
||||
};
|
||||
let normal = ProcedureAdapter {
|
||||
data: "normal".to_string(),
|
||||
lock_key: LockKey::single("catalog.schema.table"),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
@@ -665,7 +678,7 @@ mod tests {
|
||||
};
|
||||
let suspend = ProcedureAdapter {
|
||||
data: "suspend".to_string(),
|
||||
lock_key: LockKey::single("catalog.schema.table"),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
@@ -697,7 +710,7 @@ mod tests {
|
||||
};
|
||||
let child = ProcedureAdapter {
|
||||
data: "child".to_string(),
|
||||
lock_key: LockKey::new(keys.iter().map(|k| k.to_string())),
|
||||
lock_key: LockKey::new_exclusive(keys.iter().map(|k| k.to_string())),
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
@@ -765,7 +778,7 @@ mod tests {
|
||||
};
|
||||
let parent = ProcedureAdapter {
|
||||
data: "parent".to_string(),
|
||||
lock_key: LockKey::single("catalog.schema.table"),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
@@ -784,6 +797,7 @@ mod tests {
|
||||
runner.manager_ctx = manager_ctx.clone();
|
||||
|
||||
runner.run().await;
|
||||
assert!(manager_ctx.key_lock.is_empty());
|
||||
|
||||
// Check child procedures.
|
||||
for child_id in children_ids {
|
||||
@@ -810,7 +824,7 @@ mod tests {
|
||||
let exec_fn = move |_| async move { Ok(Status::Executing { persist: true }) }.boxed();
|
||||
let normal = ProcedureAdapter {
|
||||
data: "normal".to_string(),
|
||||
lock_key: LockKey::single("catalog.schema.table"),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
@@ -851,7 +865,7 @@ mod tests {
|
||||
|_| async { Err(Error::external(MockError::new(StatusCode::Unexpected))) }.boxed();
|
||||
let normal = ProcedureAdapter {
|
||||
data: "fail".to_string(),
|
||||
lock_key: LockKey::single("catalog.schema.table"),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
@@ -875,7 +889,7 @@ mod tests {
|
||||
|_| async { Err(Error::external(MockError::new(StatusCode::Unexpected))) }.boxed();
|
||||
let fail = ProcedureAdapter {
|
||||
data: "fail".to_string(),
|
||||
lock_key: LockKey::single("catalog.schema.table"),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
@@ -917,7 +931,7 @@ mod tests {
|
||||
|
||||
let retry_later = ProcedureAdapter {
|
||||
data: "retry_later".to_string(),
|
||||
lock_key: LockKey::single("catalog.schema.table"),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
@@ -952,7 +966,7 @@ mod tests {
|
||||
|
||||
let exceed_max_retry_later = ProcedureAdapter {
|
||||
data: "exceed_max_retry_later".to_string(),
|
||||
lock_key: LockKey::single("catalog.schema.table"),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
@@ -993,7 +1007,7 @@ mod tests {
|
||||
};
|
||||
let fail = ProcedureAdapter {
|
||||
data: "fail".to_string(),
|
||||
lock_key: LockKey::single("catalog.schema.table.region-0"),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table.region-0"),
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
@@ -1027,7 +1041,7 @@ mod tests {
|
||||
};
|
||||
let parent = ProcedureAdapter {
|
||||
data: "parent".to_string(),
|
||||
lock_key: LockKey::single("catalog.schema.table"),
|
||||
lock_key: LockKey::single_exclusive("catalog.schema.table"),
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
@@ -1042,10 +1056,11 @@ mod tests {
|
||||
// Manually add this procedure to the manager ctx.
|
||||
assert!(manager_ctx.try_insert_procedure(meta.clone()));
|
||||
// Replace the manager ctx.
|
||||
runner.manager_ctx = manager_ctx;
|
||||
runner.manager_ctx = manager_ctx.clone();
|
||||
|
||||
// Run the runner and execute the procedure.
|
||||
runner.run().await;
|
||||
assert!(manager_ctx.key_lock.is_empty());
|
||||
let err = meta.state().error().unwrap().output_msg();
|
||||
assert!(err.contains("subprocedure failed"), "{err}");
|
||||
}
|
||||
|
||||
247
src/common/procedure/src/local/rwlock.rs
Normal file
247
src/common/procedure/src/local/rwlock.rs
Normal file
@@ -0,0 +1,247 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::hash::Hash;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use tokio::sync::{OwnedRwLockReadGuard, OwnedRwLockWriteGuard, RwLock};
|
||||
|
||||
pub enum OwnedKeyRwLockGuard {
|
||||
Read(OwnedRwLockReadGuard<()>),
|
||||
Write(OwnedRwLockWriteGuard<()>),
|
||||
}
|
||||
|
||||
impl From<OwnedRwLockReadGuard<()>> for OwnedKeyRwLockGuard {
|
||||
fn from(guard: OwnedRwLockReadGuard<()>) -> Self {
|
||||
OwnedKeyRwLockGuard::Read(guard)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<OwnedRwLockWriteGuard<()>> for OwnedKeyRwLockGuard {
|
||||
fn from(guard: OwnedRwLockWriteGuard<()>) -> Self {
|
||||
OwnedKeyRwLockGuard::Write(guard)
|
||||
}
|
||||
}
|
||||
|
||||
/// Locks based on a key, allowing other keys to lock independently.
|
||||
#[derive(Debug)]
|
||||
pub struct KeyRwLock<K> {
|
||||
/// The inner map of locks for specific keys.
|
||||
inner: Mutex<HashMap<K, Arc<RwLock<()>>>>,
|
||||
}
|
||||
|
||||
impl<K> KeyRwLock<K>
|
||||
where
|
||||
K: Eq + Hash + Clone,
|
||||
{
|
||||
pub fn new() -> Self {
|
||||
KeyRwLock {
|
||||
inner: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Locks the key with shared read access, returning a guard.
|
||||
pub async fn read(&self, key: K) -> OwnedRwLockReadGuard<()> {
|
||||
let lock = {
|
||||
let mut locks = self.inner.lock().unwrap();
|
||||
locks.entry(key).or_default().clone()
|
||||
};
|
||||
|
||||
lock.read_owned().await
|
||||
}
|
||||
|
||||
/// Locks the key with exclusive write access, returning a guard.
|
||||
pub async fn write(&self, key: K) -> OwnedRwLockWriteGuard<()> {
|
||||
let lock = {
|
||||
let mut locks = self.inner.lock().unwrap();
|
||||
locks.entry(key).or_default().clone()
|
||||
};
|
||||
|
||||
lock.write_owned().await
|
||||
}
|
||||
|
||||
/// Clean up stale locks.
|
||||
///
|
||||
/// Note: It only cleans a lock if
|
||||
/// - Its strong ref count equals one.
|
||||
/// - Able to acquire the write lock.
|
||||
pub fn clean_keys<'a>(&'a self, iter: impl IntoIterator<Item = &'a K>) {
|
||||
let mut locks = self.inner.lock().unwrap();
|
||||
let mut keys = Vec::new();
|
||||
for key in iter {
|
||||
if let Some(lock) = locks.get(key) {
|
||||
if lock.try_write().is_ok() {
|
||||
debug_assert_eq!(Arc::weak_count(lock), 0);
|
||||
// Ensures nobody keeps this ref.
|
||||
if Arc::strong_count(lock) == 1 {
|
||||
keys.push(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for key in keys {
|
||||
locks.remove(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl<K> KeyRwLock<K>
|
||||
where
|
||||
K: Eq + Hash + Clone,
|
||||
{
|
||||
/// Tries to lock the key with shared read access, returning immediately.
|
||||
pub fn try_read(&self, key: K) -> Result<OwnedRwLockReadGuard<()>, tokio::sync::TryLockError> {
|
||||
let lock = {
|
||||
let mut locks = self.inner.lock().unwrap();
|
||||
locks.entry(key).or_default().clone()
|
||||
};
|
||||
|
||||
lock.try_read_owned()
|
||||
}
|
||||
|
||||
/// Tries lock this key with exclusive write access, returning immediately.
|
||||
pub fn try_write(
|
||||
&self,
|
||||
key: K,
|
||||
) -> Result<OwnedRwLockWriteGuard<()>, tokio::sync::TryLockError> {
|
||||
let lock = {
|
||||
let mut locks = self.inner.lock().unwrap();
|
||||
locks.entry(key).or_default().clone()
|
||||
};
|
||||
|
||||
lock.try_write_owned()
|
||||
}
|
||||
|
||||
/// Returns number of keys.
|
||||
pub fn len(&self) -> usize {
|
||||
self.inner.lock().unwrap().len()
|
||||
}
|
||||
|
||||
/// Returns true the inner map is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_naive() {
|
||||
let lock_key = KeyRwLock::new();
|
||||
|
||||
{
|
||||
let _guard = lock_key.read("test1").await;
|
||||
assert_eq!(lock_key.len(), 1);
|
||||
assert!(lock_key.try_read("test1").is_ok());
|
||||
assert!(lock_key.try_write("test1").is_err());
|
||||
}
|
||||
|
||||
{
|
||||
let _guard0 = lock_key.write("test2").await;
|
||||
let _guard = lock_key.write("test1").await;
|
||||
assert_eq!(lock_key.len(), 2);
|
||||
assert!(lock_key.try_read("test1").is_err());
|
||||
assert!(lock_key.try_write("test1").is_err());
|
||||
}
|
||||
|
||||
assert_eq!(lock_key.len(), 2);
|
||||
|
||||
lock_key.clean_keys(&vec!["test1", "test2"]);
|
||||
assert!(lock_key.is_empty());
|
||||
|
||||
let mut guards = Vec::new();
|
||||
for key in ["test1", "test2"] {
|
||||
guards.push(lock_key.read(key).await);
|
||||
}
|
||||
while !guards.is_empty() {
|
||||
guards.pop();
|
||||
}
|
||||
lock_key.clean_keys(vec![&"test1", &"test2"]);
|
||||
assert_eq!(lock_key.len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_clean_keys() {
|
||||
let lock_key = KeyRwLock::<&str>::new();
|
||||
{
|
||||
let rwlock = {
|
||||
lock_key
|
||||
.inner
|
||||
.lock()
|
||||
.unwrap()
|
||||
.entry("test")
|
||||
.or_default()
|
||||
.clone()
|
||||
};
|
||||
assert_eq!(Arc::strong_count(&rwlock), 2);
|
||||
let _guard = rwlock.read_owned().await;
|
||||
|
||||
{
|
||||
let inner = lock_key.inner.lock().unwrap();
|
||||
let rwlock = inner.get("test").unwrap();
|
||||
assert_eq!(Arc::strong_count(rwlock), 2);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let rwlock = {
|
||||
lock_key
|
||||
.inner
|
||||
.lock()
|
||||
.unwrap()
|
||||
.entry("test")
|
||||
.or_default()
|
||||
.clone()
|
||||
};
|
||||
assert_eq!(Arc::strong_count(&rwlock), 2);
|
||||
let _guard = rwlock.write_owned().await;
|
||||
|
||||
{
|
||||
let inner = lock_key.inner.lock().unwrap();
|
||||
let rwlock = inner.get("test").unwrap();
|
||||
assert_eq!(Arc::strong_count(rwlock), 2);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let inner = lock_key.inner.lock().unwrap();
|
||||
let rwlock = inner.get("test").unwrap();
|
||||
assert_eq!(Arc::strong_count(rwlock), 1);
|
||||
}
|
||||
|
||||
// Someone has the ref of the rwlock, but it waits to be granted the lock.
|
||||
let rwlock = {
|
||||
lock_key
|
||||
.inner
|
||||
.lock()
|
||||
.unwrap()
|
||||
.entry("test")
|
||||
.or_default()
|
||||
.clone()
|
||||
};
|
||||
assert_eq!(Arc::strong_count(&rwlock), 2);
|
||||
// However, One thread trying to remove the "test" key should have no effect.
|
||||
lock_key.clean_keys(vec![&"test"]);
|
||||
// Should get the rwlock.
|
||||
{
|
||||
let inner = lock_key.inner.lock().unwrap();
|
||||
inner.get("test").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -116,22 +116,49 @@ impl<T: Procedure + ?Sized> Procedure for Box<T> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub enum StringKey {
|
||||
Share(String),
|
||||
Exclusive(String),
|
||||
}
|
||||
|
||||
/// Keys to identify required locks.
|
||||
///
|
||||
/// [LockKey] always sorts keys lexicographically so that they can be acquired
|
||||
/// in the same order.
|
||||
// Most procedures should only acquire 1 ~ 2 locks so we use smallvec to hold keys.
|
||||
/// Most procedures should only acquire 1 ~ 2 locks so we use smallvec to hold keys.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub struct LockKey(SmallVec<[String; 2]>);
|
||||
pub struct LockKey(SmallVec<[StringKey; 2]>);
|
||||
|
||||
impl StringKey {
|
||||
pub fn into_string(self) -> String {
|
||||
match self {
|
||||
StringKey::Share(s) => s,
|
||||
StringKey::Exclusive(s) => s,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_string(&self) -> &String {
|
||||
match self {
|
||||
StringKey::Share(s) => s,
|
||||
StringKey::Exclusive(s) => s,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LockKey {
|
||||
/// Returns a new [LockKey] with only one key.
|
||||
pub fn single(key: impl Into<String>) -> LockKey {
|
||||
pub fn single(key: impl Into<StringKey>) -> LockKey {
|
||||
LockKey(smallvec![key.into()])
|
||||
}
|
||||
|
||||
/// Returns a new [LockKey] with only one key.
|
||||
pub fn single_exclusive(key: impl Into<String>) -> LockKey {
|
||||
LockKey(smallvec![StringKey::Exclusive(key.into())])
|
||||
}
|
||||
|
||||
/// Returns a new [LockKey] with keys from specific `iter`.
|
||||
pub fn new(iter: impl IntoIterator<Item = String>) -> LockKey {
|
||||
pub fn new(iter: impl IntoIterator<Item = StringKey>) -> LockKey {
|
||||
let mut vec: SmallVec<_> = iter.into_iter().collect();
|
||||
vec.sort();
|
||||
// Dedup keys to avoid acquiring the same key multiple times.
|
||||
@@ -139,14 +166,14 @@ impl LockKey {
|
||||
LockKey(vec)
|
||||
}
|
||||
|
||||
/// Returns the keys to lock.
|
||||
pub fn keys_to_lock(&self) -> impl Iterator<Item = &String> {
|
||||
self.0.iter()
|
||||
/// Returns a new [LockKey] with keys from specific `iter`.
|
||||
pub fn new_exclusive(iter: impl IntoIterator<Item = String>) -> LockKey {
|
||||
Self::new(iter.into_iter().map(StringKey::Exclusive))
|
||||
}
|
||||
|
||||
/// Returns the keys to unlock.
|
||||
pub fn keys_to_unlock(&self) -> impl Iterator<Item = &String> {
|
||||
self.0.iter().rev()
|
||||
/// Returns the keys to lock.
|
||||
pub fn keys_to_lock(&self) -> impl Iterator<Item = &StringKey> {
|
||||
self.0.iter()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -340,20 +367,25 @@ mod tests {
|
||||
#[test]
|
||||
fn test_lock_key() {
|
||||
let entity = "catalog.schema.my_table";
|
||||
let key = LockKey::single(entity);
|
||||
assert_eq!(vec![entity], key.keys_to_lock().collect::<Vec<_>>());
|
||||
assert_eq!(vec![entity], key.keys_to_unlock().collect::<Vec<_>>());
|
||||
let key = LockKey::single_exclusive(entity);
|
||||
assert_eq!(
|
||||
vec![&StringKey::Exclusive(entity.to_string())],
|
||||
key.keys_to_lock().collect::<Vec<_>>()
|
||||
);
|
||||
|
||||
let key = LockKey::new([
|
||||
let key = LockKey::new_exclusive([
|
||||
"b".to_string(),
|
||||
"c".to_string(),
|
||||
"a".to_string(),
|
||||
"c".to_string(),
|
||||
]);
|
||||
assert_eq!(vec!["a", "b", "c"], key.keys_to_lock().collect::<Vec<_>>());
|
||||
assert_eq!(
|
||||
vec!["c", "b", "a"],
|
||||
key.keys_to_unlock().collect::<Vec<_>>()
|
||||
vec![
|
||||
&StringKey::Exclusive("a".to_string()),
|
||||
&StringKey::Exclusive("b".to_string()),
|
||||
&StringKey::Exclusive("c".to_string())
|
||||
],
|
||||
key.keys_to_lock().collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ impl StateStore for ObjectStateStore {
|
||||
let mut lister = self
|
||||
.store
|
||||
.lister_with(path)
|
||||
.delimiter("")
|
||||
.recursive(true)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
|
||||
@@ -98,7 +98,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
LockKey::single("test.submit")
|
||||
LockKey::single_exclusive("test.submit")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,13 +20,13 @@ pub const THREAD_NAME_LABEL: &str = "thread_name";
|
||||
|
||||
lazy_static! {
|
||||
pub static ref METRIC_RUNTIME_THREADS_ALIVE: IntGaugeVec = register_int_gauge_vec!(
|
||||
"runtime_threads_alive",
|
||||
"greptime_runtime_threads_alive",
|
||||
"runtime threads alive",
|
||||
&[THREAD_NAME_LABEL]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_RUNTIME_THREADS_IDLE: IntGaugeVec = register_int_gauge_vec!(
|
||||
"runtime_threads_idle",
|
||||
"greptime_runtime_threads_idle",
|
||||
"runtime threads idle",
|
||||
&[THREAD_NAME_LABEL]
|
||||
)
|
||||
|
||||
@@ -22,7 +22,7 @@ use prometheus::*;
|
||||
|
||||
lazy_static! {
|
||||
pub static ref PANIC_COUNTER: IntCounter =
|
||||
register_int_counter!("panic_counter", "panic_counter").unwrap();
|
||||
register_int_counter!("greptime_panic_counter", "panic_counter").unwrap();
|
||||
}
|
||||
|
||||
pub fn set_panic_hook() {
|
||||
|
||||
@@ -4,6 +4,9 @@ version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
testing = []
|
||||
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
arrow-flight.workspace = true
|
||||
|
||||
@@ -22,11 +22,12 @@ use std::sync::Arc;
|
||||
use catalog::memory::MemoryCatalogManager;
|
||||
use common_base::Plugins;
|
||||
use common_config::wal::{KafkaConfig, RaftEngineConfig};
|
||||
use common_config::{WalConfig, WAL_OPTIONS_KEY};
|
||||
use common_config::WalConfig;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
|
||||
use common_meta::key::datanode_table::{DatanodeTableManager, DatanodeTableValue};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::wal::prepare_wal_option;
|
||||
pub use common_procedure::options::ProcedureConfig;
|
||||
use common_runtime::Runtime;
|
||||
use common_telemetry::{error, info, warn};
|
||||
@@ -98,7 +99,7 @@ impl Datanode {
|
||||
self.start_telemetry();
|
||||
|
||||
if let Some(t) = self.export_metrics_task.as_ref() {
|
||||
t.start()
|
||||
t.start(None).context(StartServerSnafu)?
|
||||
}
|
||||
|
||||
self.start_services().await
|
||||
@@ -538,13 +539,11 @@ async fn open_all_regions(
|
||||
for region_number in table_value.regions {
|
||||
// Augments region options with wal options if a wal options is provided.
|
||||
let mut region_options = table_value.region_info.region_options.clone();
|
||||
table_value
|
||||
.region_info
|
||||
.region_wal_options
|
||||
.get(®ion_number.to_string())
|
||||
.and_then(|wal_options| {
|
||||
region_options.insert(WAL_OPTIONS_KEY.to_string(), wal_options.clone())
|
||||
});
|
||||
prepare_wal_option(
|
||||
&mut region_options,
|
||||
RegionId::new(table_value.table_id, region_number),
|
||||
&table_value.region_info.region_wal_options,
|
||||
);
|
||||
|
||||
regions.push((
|
||||
RegionId::new(table_value.table_id, region_number),
|
||||
|
||||
@@ -272,6 +272,16 @@ pub enum Error {
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to find logical regions in physical region {}",
|
||||
physical_region_id
|
||||
))]
|
||||
FindLogicalRegions {
|
||||
physical_region_id: RegionId,
|
||||
source: metric_engine::error::Error,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -340,6 +350,8 @@ impl ErrorExt for Error {
|
||||
}
|
||||
HandleRegionRequest { source, .. } => source.status_code(),
|
||||
StopRegionEngine { source, .. } => source.status_code(),
|
||||
|
||||
FindLogicalRegions { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -305,7 +305,7 @@ impl HeartbeatTask {
|
||||
}
|
||||
|
||||
async fn load_region_stats(region_server: &RegionServer) -> Vec<RegionStat> {
|
||||
let regions = region_server.opened_regions();
|
||||
let regions = region_server.reportable_regions();
|
||||
|
||||
let mut region_stats = Vec::new();
|
||||
for stat in regions {
|
||||
|
||||
@@ -96,6 +96,7 @@ impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
|
||||
Some((_, Instruction::OpenRegion { .. }))
|
||||
| Some((_, Instruction::CloseRegion { .. }))
|
||||
| Some((_, Instruction::DowngradeRegion { .. }))
|
||||
| Some((_, Instruction::UpgradeRegion { .. }))
|
||||
)
|
||||
}
|
||||
|
||||
@@ -134,7 +135,7 @@ mod tests {
|
||||
use common_meta::heartbeat::mailbox::{
|
||||
HeartbeatMailbox, IncomingMessage, MailboxRef, MessageMeta,
|
||||
};
|
||||
use common_meta::instruction::{DowngradeRegion, OpenRegion};
|
||||
use common_meta::instruction::{DowngradeRegion, OpenRegion, UpgradeRegion};
|
||||
use mito2::config::MitoConfig;
|
||||
use mito2::engine::MITO_ENGINE_NAME;
|
||||
use mito2::test_util::{CreateRequestBuilder, TestEnv};
|
||||
@@ -175,6 +176,44 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_acceptable() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let region_server = mock_region_server();
|
||||
let heartbeat_handler = RegionHeartbeatResponseHandler::new(region_server.clone());
|
||||
let heartbeat_env = HeartbeatResponseTestEnv::new();
|
||||
let meta = MessageMeta::new_test(1, "test", "dn-1", "me-0");
|
||||
|
||||
// Open region
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let storage_path = "test";
|
||||
let instruction = open_region_instruction(region_id, storage_path);
|
||||
assert!(heartbeat_handler
|
||||
.is_acceptable(&heartbeat_env.create_handler_ctx((meta.clone(), instruction))));
|
||||
|
||||
// Close region
|
||||
let instruction = close_region_instruction(region_id);
|
||||
assert!(heartbeat_handler
|
||||
.is_acceptable(&heartbeat_env.create_handler_ctx((meta.clone(), instruction))));
|
||||
|
||||
// Downgrade region
|
||||
let instruction = Instruction::DowngradeRegion(DowngradeRegion {
|
||||
region_id: RegionId::new(2048, 1),
|
||||
});
|
||||
assert!(heartbeat_handler
|
||||
.is_acceptable(&heartbeat_env.create_handler_ctx((meta.clone(), instruction))));
|
||||
|
||||
// Upgrade region
|
||||
let instruction = Instruction::UpgradeRegion(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
wait_for_replay_timeout: None,
|
||||
});
|
||||
assert!(
|
||||
heartbeat_handler.is_acceptable(&heartbeat_env.create_handler_ctx((meta, instruction)))
|
||||
);
|
||||
}
|
||||
|
||||
fn close_region_instruction(region_id: RegionId) -> Instruction {
|
||||
Instruction::CloseRegion(RegionIdent {
|
||||
table_id: region_id.table_id(),
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_meta::instruction::{InstructionReply, OpenRegion, SimpleReply};
|
||||
use common_meta::wal::prepare_wal_option;
|
||||
use futures_util::future::BoxFuture;
|
||||
use store_api::path_utils::region_dir;
|
||||
use store_api::region_request::{RegionOpenRequest, RegionRequest};
|
||||
@@ -26,15 +27,14 @@ impl HandlerContext {
|
||||
OpenRegion {
|
||||
region_ident,
|
||||
region_storage_path,
|
||||
region_options,
|
||||
mut region_options,
|
||||
region_wal_options,
|
||||
skip_wal_replay,
|
||||
}: OpenRegion,
|
||||
) -> BoxFuture<'static, InstructionReply> {
|
||||
Box::pin(async move {
|
||||
let region_id = Self::region_ident_to_region_id(®ion_ident);
|
||||
// TODO(niebayes): extends region options with region_wal_options.
|
||||
let _ = region_wal_options;
|
||||
prepare_wal_option(&mut region_options, region_id, ®ion_wal_options);
|
||||
let request = RegionRequest::Open(RegionOpenRequest {
|
||||
engine: region_ident.engine,
|
||||
region_dir: region_dir(®ion_storage_path, region_id),
|
||||
@@ -42,10 +42,8 @@ impl HandlerContext {
|
||||
skip_wal_replay,
|
||||
});
|
||||
let result = self.region_server.handle_request(region_id, request).await;
|
||||
|
||||
let success = result.is_ok();
|
||||
let error = result.as_ref().map_err(|e| e.output_msg()).err();
|
||||
|
||||
InstructionReply::OpenRegion(SimpleReply {
|
||||
result: success,
|
||||
error,
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_meta::instruction::{InstructionReply, UpgradeRegion, UpgradeRegionReply};
|
||||
use common_telemetry::warn;
|
||||
use common_telemetry::{info, warn};
|
||||
use futures_util::future::BoxFuture;
|
||||
use store_api::region_request::{RegionCatchupRequest, RegionRequest};
|
||||
|
||||
@@ -56,6 +56,7 @@ impl HandlerContext {
|
||||
.try_register(
|
||||
region_id,
|
||||
Box::pin(async move {
|
||||
info!("Executing region: {region_id} catchup to: last entry id {last_entry_id:?}");
|
||||
region_server_moved
|
||||
.handle_request(
|
||||
region_id,
|
||||
|
||||
@@ -24,5 +24,5 @@ pub mod heartbeat;
|
||||
pub mod metrics;
|
||||
pub mod region_server;
|
||||
mod store;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub mod tests;
|
||||
|
||||
@@ -24,26 +24,26 @@ pub const REGION_ID: &str = "region_id";
|
||||
lazy_static! {
|
||||
/// The elapsed time of handling a request in the region_server.
|
||||
pub static ref HANDLE_REGION_REQUEST_ELAPSED: HistogramVec = register_histogram_vec!(
|
||||
"datanode_handle_region_request_elapsed",
|
||||
"greptime_datanode_handle_region_request_elapsed",
|
||||
"datanode handle region request elapsed",
|
||||
&[REGION_REQUEST_TYPE]
|
||||
)
|
||||
.unwrap();
|
||||
/// The elapsed time since the last received heartbeat.
|
||||
pub static ref LAST_RECEIVED_HEARTBEAT_ELAPSED: IntGauge = register_int_gauge!(
|
||||
"last_received_heartbeat_lease_elapsed",
|
||||
"greptime_last_received_heartbeat_lease_elapsed",
|
||||
"last received heartbeat lease elapsed",
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref LEASE_EXPIRED_REGION: IntGaugeVec = register_int_gauge_vec!(
|
||||
"lease_expired_region",
|
||||
"greptime_lease_expired_region",
|
||||
"lease expired region",
|
||||
&[REGION_ID]
|
||||
)
|
||||
.unwrap();
|
||||
/// The received region leases via heartbeat.
|
||||
pub static ref HEARTBEAT_REGION_LEASES: IntGaugeVec = register_int_gauge_vec!(
|
||||
"heartbeat_region_leases",
|
||||
"greptime_heartbeat_region_leases",
|
||||
"received region leases via heartbeat",
|
||||
&[REGION_ROLE]
|
||||
)
|
||||
|
||||
@@ -43,6 +43,7 @@ use datafusion_common::DataFusionError;
|
||||
use datafusion_expr::{Expr as DfExpr, TableProviderFilterPushDown, TableType};
|
||||
use datatypes::arrow::datatypes::SchemaRef;
|
||||
use futures_util::future::try_join_all;
|
||||
use metric_engine::engine::MetricEngine;
|
||||
use prost::Message;
|
||||
use query::QueryEngineRef;
|
||||
use servers::error::{self as servers_error, ExecuteGrpcRequestSnafu, Result as ServerResult};
|
||||
@@ -51,6 +52,7 @@ use servers::grpc::region_server::RegionServerHandler;
|
||||
use session::context::{QueryContextBuilder, QueryContextRef};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::metric_engine_consts::{METRIC_ENGINE_NAME, PHYSICAL_TABLE_METADATA_KEY};
|
||||
use store_api::region_engine::{RegionEngineRef, RegionRole, SetReadonlyResponse};
|
||||
use store_api::region_request::{AffectedRows, RegionCloseRequest, RegionRequest};
|
||||
use store_api::storage::{RegionId, ScanRequest};
|
||||
@@ -60,8 +62,9 @@ use tonic::{Request, Response, Result as TonicResult};
|
||||
|
||||
use crate::error::{
|
||||
self, BuildRegionRequestsSnafu, DecodeLogicalPlanSnafu, ExecuteLogicalPlanSnafu,
|
||||
GetRegionMetadataSnafu, HandleRegionRequestSnafu, RegionEngineNotFoundSnafu,
|
||||
RegionNotFoundSnafu, Result, StopRegionEngineSnafu, UnsupportedOutputSnafu,
|
||||
FindLogicalRegionsSnafu, GetRegionMetadataSnafu, HandleRegionRequestSnafu,
|
||||
RegionEngineNotFoundSnafu, RegionNotFoundSnafu, Result, StopRegionEngineSnafu, UnexpectedSnafu,
|
||||
UnsupportedOutputSnafu,
|
||||
};
|
||||
use crate::event_listener::RegionServerEventListenerRef;
|
||||
|
||||
@@ -123,7 +126,10 @@ impl RegionServer {
|
||||
self.inner.handle_read(request).await
|
||||
}
|
||||
|
||||
pub fn opened_regions(&self) -> Vec<RegionStat> {
|
||||
/// Returns all opened and reportable regions.
|
||||
///
|
||||
/// Notes: except all metrics regions.
|
||||
pub fn reportable_regions(&self) -> Vec<RegionStat> {
|
||||
self.inner
|
||||
.region_map
|
||||
.iter()
|
||||
@@ -369,7 +375,7 @@ impl RegionServerInner {
|
||||
let current_region_status = self.region_map.get(®ion_id);
|
||||
|
||||
let engine = match region_change {
|
||||
RegionChange::Register(ref engine_type) => match current_region_status {
|
||||
RegionChange::Register(ref engine_type, _) => match current_region_status {
|
||||
Some(status) => match status.clone() {
|
||||
RegionEngineWithStatus::Registering(_) => {
|
||||
return Ok(CurrentEngine::EarlyReturn(0))
|
||||
@@ -427,8 +433,12 @@ impl RegionServerInner {
|
||||
.start_timer();
|
||||
|
||||
let region_change = match &request {
|
||||
RegionRequest::Create(create) => RegionChange::Register(create.engine.clone()),
|
||||
RegionRequest::Open(open) => RegionChange::Register(open.engine.clone()),
|
||||
RegionRequest::Create(create) => RegionChange::Register(create.engine.clone(), false),
|
||||
RegionRequest::Open(open) => {
|
||||
let is_opening_physical_region =
|
||||
open.options.contains_key(PHYSICAL_TABLE_METADATA_KEY);
|
||||
RegionChange::Register(open.engine.clone(), is_opening_physical_region)
|
||||
}
|
||||
RegionRequest::Close(_) | RegionRequest::Drop(_) => RegionChange::Deregisters,
|
||||
RegionRequest::Put(_)
|
||||
| RegionRequest::Delete(_)
|
||||
@@ -460,7 +470,8 @@ impl RegionServerInner {
|
||||
{
|
||||
Ok(result) => {
|
||||
// Sets corresponding region status to ready.
|
||||
self.set_region_status_ready(region_id, engine, region_change);
|
||||
self.set_region_status_ready(region_id, engine, region_change)
|
||||
.await?;
|
||||
Ok(result)
|
||||
}
|
||||
Err(err) => {
|
||||
@@ -478,7 +489,7 @@ impl RegionServerInner {
|
||||
region_change: &RegionChange,
|
||||
) {
|
||||
match region_change {
|
||||
RegionChange::Register(_) => {
|
||||
RegionChange::Register(_, _) => {
|
||||
self.region_map.insert(
|
||||
region_id,
|
||||
RegionEngineWithStatus::Registering(engine.clone()),
|
||||
@@ -497,7 +508,7 @@ impl RegionServerInner {
|
||||
fn unset_region_status(&self, region_id: RegionId, region_change: RegionChange) {
|
||||
match region_change {
|
||||
RegionChange::None => {}
|
||||
RegionChange::Register(_) | RegionChange::Deregisters => {
|
||||
RegionChange::Register(_, _) | RegionChange::Deregisters => {
|
||||
self.region_map
|
||||
.remove(®ion_id)
|
||||
.map(|(id, engine)| engine.set_writable(id, false));
|
||||
@@ -505,16 +516,20 @@ impl RegionServerInner {
|
||||
}
|
||||
}
|
||||
|
||||
fn set_region_status_ready(
|
||||
async fn set_region_status_ready(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
engine: RegionEngineRef,
|
||||
region_change: RegionChange,
|
||||
) {
|
||||
) -> Result<()> {
|
||||
let engine_type = engine.name();
|
||||
match region_change {
|
||||
RegionChange::None => {}
|
||||
RegionChange::Register(_) => {
|
||||
RegionChange::Register(_, is_opening_physical_region) => {
|
||||
if is_opening_physical_region {
|
||||
self.register_logical_regions(&engine, region_id).await?;
|
||||
}
|
||||
|
||||
info!("Region {region_id} is registered to engine {engine_type}");
|
||||
self.region_map
|
||||
.insert(region_id, RegionEngineWithStatus::Ready(engine));
|
||||
@@ -528,6 +543,37 @@ impl RegionServerInner {
|
||||
self.event_listener.on_region_deregistered(region_id);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn register_logical_regions(
|
||||
&self,
|
||||
engine: &RegionEngineRef,
|
||||
physical_region_id: RegionId,
|
||||
) -> Result<()> {
|
||||
let metric_engine =
|
||||
engine
|
||||
.as_any()
|
||||
.downcast_ref::<MetricEngine>()
|
||||
.context(UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"expecting engine type '{}', actual '{}'",
|
||||
METRIC_ENGINE_NAME,
|
||||
engine.name(),
|
||||
),
|
||||
})?;
|
||||
|
||||
let logical_regions = metric_engine
|
||||
.logical_regions(physical_region_id)
|
||||
.await
|
||||
.context(FindLogicalRegionsSnafu { physical_region_id })?;
|
||||
|
||||
for region in logical_regions {
|
||||
self.region_map
|
||||
.insert(region, RegionEngineWithStatus::Ready(engine.clone()));
|
||||
info!("Logical region {} is registered!", region);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn handle_read(&self, request: QueryRequest) -> Result<SendableRecordBatchStream> {
|
||||
@@ -622,7 +668,7 @@ impl RegionServerInner {
|
||||
|
||||
enum RegionChange {
|
||||
None,
|
||||
Register(String),
|
||||
Register(String, bool),
|
||||
Deregisters,
|
||||
}
|
||||
|
||||
@@ -1051,7 +1097,7 @@ mod tests {
|
||||
CurrentEngineTest {
|
||||
region_id,
|
||||
current_region_status: None,
|
||||
region_change: RegionChange::Register(engine.name().to_string()),
|
||||
region_change: RegionChange::Register(engine.name().to_string(), false),
|
||||
assert: Box::new(|result| {
|
||||
let current_engine = result.unwrap();
|
||||
assert_matches!(current_engine, CurrentEngine::Engine(_));
|
||||
@@ -1060,7 +1106,7 @@ mod tests {
|
||||
CurrentEngineTest {
|
||||
region_id,
|
||||
current_region_status: Some(RegionEngineWithStatus::Registering(engine.clone())),
|
||||
region_change: RegionChange::Register(engine.name().to_string()),
|
||||
region_change: RegionChange::Register(engine.name().to_string(), false),
|
||||
assert: Box::new(|result| {
|
||||
let current_engine = result.unwrap();
|
||||
assert_matches!(current_engine, CurrentEngine::EarlyReturn(_));
|
||||
@@ -1069,7 +1115,7 @@ mod tests {
|
||||
CurrentEngineTest {
|
||||
region_id,
|
||||
current_region_status: Some(RegionEngineWithStatus::Deregistering(engine.clone())),
|
||||
region_change: RegionChange::Register(engine.name().to_string()),
|
||||
region_change: RegionChange::Register(engine.name().to_string(), false),
|
||||
assert: Box::new(|result| {
|
||||
let err = result.unwrap_err();
|
||||
assert_eq!(err.status_code(), StatusCode::RegionBusy);
|
||||
@@ -1078,7 +1124,7 @@ mod tests {
|
||||
CurrentEngineTest {
|
||||
region_id,
|
||||
current_region_status: Some(RegionEngineWithStatus::Ready(engine.clone())),
|
||||
region_change: RegionChange::Register(engine.name().to_string()),
|
||||
region_change: RegionChange::Register(engine.name().to_string(), false),
|
||||
assert: Box::new(|result| {
|
||||
let current_engine = result.unwrap();
|
||||
assert_matches!(current_engine, CurrentEngine::Engine(_));
|
||||
|
||||
@@ -207,4 +207,8 @@ impl RegionEngine for MockRegionEngine {
|
||||
}
|
||||
Some(RegionRole::Leader)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,6 +109,11 @@ impl ColumnSchema {
|
||||
&mut self.metadata
|
||||
}
|
||||
|
||||
/// Retrieve the column comment
|
||||
pub fn column_comment(&self) -> Option<&String> {
|
||||
self.metadata.get(COMMENT_KEY)
|
||||
}
|
||||
|
||||
pub fn with_time_index(mut self, is_time_index: bool) -> Self {
|
||||
self.is_time_index = is_time_index;
|
||||
if is_time_index {
|
||||
@@ -315,12 +320,16 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_metadata() {
|
||||
let metadata = Metadata::from([("k1".to_string(), "v1".to_string())]);
|
||||
let metadata = Metadata::from([
|
||||
("k1".to_string(), "v1".to_string()),
|
||||
(COMMENT_KEY.to_string(), "test comment".to_string()),
|
||||
]);
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_metadata(metadata)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
assert_eq!("v1", column_schema.metadata().get("k1").unwrap());
|
||||
assert_eq!("test comment", column_schema.column_comment().unwrap());
|
||||
assert!(column_schema
|
||||
.metadata()
|
||||
.get(DEFAULT_CONSTRAINT_KEY)
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
@@ -119,6 +120,10 @@ impl RegionEngine for FileRegionEngine {
|
||||
fn role(&self, region_id: RegionId) -> Option<RegionRole> {
|
||||
self.inner.state(region_id)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
struct EngineInner {
|
||||
|
||||
@@ -55,7 +55,7 @@ use query::QueryEngineRef;
|
||||
use raft_engine::{Config, ReadableSize, RecoveryMode};
|
||||
use servers::error as server_error;
|
||||
use servers::error::{AuthSnafu, ExecuteQuerySnafu, ParsePromQLSnafu};
|
||||
use servers::export_metrics::{ExportMetricsOption, ExportMetricsTask};
|
||||
use servers::export_metrics::ExportMetricsTask;
|
||||
use servers::interceptor::{
|
||||
PromQueryInterceptor, PromQueryInterceptorRef, SqlQueryInterceptor, SqlQueryInterceptorRef,
|
||||
};
|
||||
@@ -76,6 +76,7 @@ use sql::statements::statement::Statement;
|
||||
use sqlparser::ast::ObjectName;
|
||||
pub use standalone::StandaloneDatanodeManager;
|
||||
|
||||
use self::prom_store::ExportMetricHandler;
|
||||
use crate::error::{
|
||||
self, Error, ExecLogicalPlanSnafu, ExecutePromqlSnafu, ExternalSnafu, ParseSqlSnafu,
|
||||
PermissionSnafu, PlanStatementSnafu, Result, SqlExecInterceptedSnafu, StartServerSnafu,
|
||||
@@ -190,18 +191,16 @@ impl Instance {
|
||||
&mut self,
|
||||
opts: impl Into<FrontendOptions> + TomlSerializable,
|
||||
) -> Result<()> {
|
||||
let opts: FrontendOptions = opts.into();
|
||||
self.export_metrics_task =
|
||||
ExportMetricsTask::try_new(&opts.export_metrics, Some(&self.plugins))
|
||||
.context(StartServerSnafu)?;
|
||||
let servers = Services::build(opts, Arc::new(self.clone()), self.plugins.clone()).await?;
|
||||
self.servers = Arc::new(servers);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn build_export_metrics_task(&mut self, opts: &ExportMetricsOption) -> Result<()> {
|
||||
self.export_metrics_task =
|
||||
ExportMetricsTask::try_new(opts, Some(&self.plugins)).context(StartServerSnafu)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn catalog_manager(&self) -> &CatalogManagerRef {
|
||||
&self.catalog_manager
|
||||
}
|
||||
@@ -232,7 +231,15 @@ impl FrontendInstance for Instance {
|
||||
self.script_executor.start(self)?;
|
||||
|
||||
if let Some(t) = self.export_metrics_task.as_ref() {
|
||||
t.start()
|
||||
if t.send_by_handler {
|
||||
let handler = ExportMetricHandler::new_handler(
|
||||
self.inserter.clone(),
|
||||
self.statement_executor.clone(),
|
||||
);
|
||||
t.start(Some(handler)).context(StartServerSnafu)?
|
||||
} else {
|
||||
t.start(None).context(StartServerSnafu)?;
|
||||
}
|
||||
}
|
||||
|
||||
futures::future::try_join_all(self.servers.iter().map(|(name, handler)| async move {
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::prom_store::remote::read_request::ResponseType;
|
||||
use api::prom_store::remote::{Query, QueryResult, ReadRequest, ReadResponse, WriteRequest};
|
||||
use async_trait::async_trait;
|
||||
@@ -21,10 +23,14 @@ use common_error::ext::BoxedError;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use common_telemetry::logging;
|
||||
use operator::insert::InserterRef;
|
||||
use operator::statement::StatementExecutor;
|
||||
use prost::Message;
|
||||
use servers::error::{self, AuthSnafu, Result as ServerResult};
|
||||
use servers::prom_store::{self, Metrics};
|
||||
use servers::query_handler::{PromStoreProtocolHandler, PromStoreResponse};
|
||||
use servers::query_handler::{
|
||||
PromStoreProtocolHandler, PromStoreProtocolHandlerRef, PromStoreResponse,
|
||||
};
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
@@ -209,3 +215,49 @@ impl PromStoreProtocolHandler for Instance {
|
||||
todo!();
|
||||
}
|
||||
}
|
||||
|
||||
/// This handler is mainly used for `frontend` or `standalone` to directly import
|
||||
/// the metrics collected by itself, thereby avoiding importing metrics through the network,
|
||||
/// thus reducing compression and network transmission overhead,
|
||||
/// so only implement `PromStoreProtocolHandler::write` method.
|
||||
pub struct ExportMetricHandler {
|
||||
inserter: InserterRef,
|
||||
statement_executor: Arc<StatementExecutor>,
|
||||
}
|
||||
|
||||
impl ExportMetricHandler {
|
||||
pub fn new_handler(
|
||||
inserter: InserterRef,
|
||||
statement_executor: Arc<StatementExecutor>,
|
||||
) -> PromStoreProtocolHandlerRef {
|
||||
Arc::new(Self {
|
||||
inserter,
|
||||
statement_executor,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PromStoreProtocolHandler for ExportMetricHandler {
|
||||
async fn write(&self, request: WriteRequest, ctx: QueryContextRef) -> ServerResult<()> {
|
||||
let (requests, _) = prom_store::to_grpc_row_insert_requests(request)?;
|
||||
self.inserter
|
||||
.handle_row_inserts(requests, ctx, self.statement_executor.as_ref())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn read(
|
||||
&self,
|
||||
_request: ReadRequest,
|
||||
_ctx: QueryContextRef,
|
||||
) -> ServerResult<PromStoreResponse> {
|
||||
unreachable!();
|
||||
}
|
||||
|
||||
async fn ingest_metrics(&self, _metrics: Metrics) -> ServerResult<()> {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,10 +22,10 @@ use common_recordbatch::SendableRecordBatchStream;
|
||||
use partition::manager::PartitionRuleManagerRef;
|
||||
use query::error::{RegionQuerySnafu, Result as QueryResult};
|
||||
use query::region_query::RegionQueryHandler;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::error::{FindDatanodeSnafu, FindTableRouteSnafu, RequestQuerySnafu, Result};
|
||||
use crate::error::{FindTableRouteSnafu, RequestQuerySnafu, Result};
|
||||
|
||||
pub(crate) struct FrontendRegionQueryHandler {
|
||||
partition_manager: PartitionRuleManagerRef,
|
||||
@@ -58,18 +58,13 @@ impl FrontendRegionQueryHandler {
|
||||
async fn do_get_inner(&self, request: QueryRequest) -> Result<SendableRecordBatchStream> {
|
||||
let region_id = RegionId::from_u64(request.region_id);
|
||||
|
||||
let table_route = self
|
||||
let peer = &self
|
||||
.partition_manager
|
||||
.find_table_route(region_id.table_id())
|
||||
.find_region_leader(region_id)
|
||||
.await
|
||||
.context(FindTableRouteSnafu {
|
||||
table_id: region_id.table_id(),
|
||||
})?;
|
||||
let peer = table_route
|
||||
.find_region_leader(region_id.region_number())
|
||||
.context(FindDatanodeSnafu {
|
||||
region: region_id.region_number(),
|
||||
})?;
|
||||
|
||||
let client = self.datanode_manager.datanode(peer).await;
|
||||
|
||||
|
||||
@@ -12,33 +12,21 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::region::{QueryRequest, RegionRequest, RegionResponse};
|
||||
use async_trait::async_trait;
|
||||
use client::region::check_response_header;
|
||||
use common_catalog::consts::METRIC_ENGINE;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::datanode_manager::{AffectedRows, Datanode, DatanodeManager, DatanodeRef};
|
||||
use common_meta::ddl::{TableMetadata, TableMetadataAllocator, TableMetadataAllocatorContext};
|
||||
use common_meta::error::{self as meta_error, Result as MetaResult, UnsupportedSnafu};
|
||||
use common_meta::key::table_route::{
|
||||
LogicalTableRouteValue, PhysicalTableRouteValue, TableRouteValue,
|
||||
};
|
||||
use common_meta::error::{self as meta_error, Result as MetaResult};
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::ddl::CreateTableTask;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use common_meta::sequence::SequenceRef;
|
||||
use common_meta::wal::options_allocator::allocate_region_wal_options;
|
||||
use common_meta::wal::WalOptionsAllocatorRef;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::tracing;
|
||||
use common_telemetry::tracing_context::{FutureExt, TracingContext};
|
||||
use common_telemetry::{debug, info, tracing};
|
||||
use datanode::region_server::RegionServer;
|
||||
use servers::grpc::region_server::RegionServerHandler;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::storage::{RegionId, RegionNumber, TableId};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{InvalidRegionRequestSnafu, InvokeRegionServerSnafu, Result};
|
||||
|
||||
@@ -52,7 +40,7 @@ impl DatanodeManager for StandaloneDatanodeManager {
|
||||
}
|
||||
|
||||
/// Relative to [client::region::RegionRequester]
|
||||
struct RegionInvoker {
|
||||
pub struct RegionInvoker {
|
||||
region_server: RegionServer,
|
||||
}
|
||||
|
||||
@@ -109,121 +97,3 @@ impl Datanode for RegionInvoker {
|
||||
.context(meta_error::ExternalSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StandaloneTableMetadataAllocator {
|
||||
table_id_sequence: SequenceRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
}
|
||||
|
||||
impl StandaloneTableMetadataAllocator {
|
||||
pub fn new(
|
||||
table_id_sequence: SequenceRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
table_id_sequence,
|
||||
wal_options_allocator,
|
||||
}
|
||||
}
|
||||
|
||||
async fn allocate_table_id(&self, task: &CreateTableTask) -> MetaResult<TableId> {
|
||||
let table_id = if let Some(table_id) = &task.create_table.table_id {
|
||||
let table_id = table_id.id;
|
||||
|
||||
ensure!(
|
||||
!self
|
||||
.table_id_sequence
|
||||
.min_max()
|
||||
.await
|
||||
.contains(&(table_id as u64)),
|
||||
UnsupportedSnafu {
|
||||
operation: format!(
|
||||
"create table by id {} that is reserved in this node",
|
||||
table_id
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
info!(
|
||||
"Received explicitly allocated table id {}, will use it directly.",
|
||||
table_id
|
||||
);
|
||||
|
||||
table_id
|
||||
} else {
|
||||
self.table_id_sequence.next().await? as TableId
|
||||
};
|
||||
Ok(table_id)
|
||||
}
|
||||
|
||||
fn create_wal_options(
|
||||
&self,
|
||||
table_route: &TableRouteValue,
|
||||
) -> MetaResult<HashMap<RegionNumber, String>> {
|
||||
match table_route {
|
||||
TableRouteValue::Physical(x) => {
|
||||
let region_numbers = x
|
||||
.region_routes
|
||||
.iter()
|
||||
.map(|route| route.region.id.region_number())
|
||||
.collect();
|
||||
allocate_region_wal_options(region_numbers, &self.wal_options_allocator)
|
||||
}
|
||||
TableRouteValue::Logical(_) => Ok(HashMap::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn create_table_route(table_id: TableId, task: &CreateTableTask) -> TableRouteValue {
|
||||
if task.create_table.engine == METRIC_ENGINE {
|
||||
TableRouteValue::Logical(LogicalTableRouteValue {})
|
||||
} else {
|
||||
let region_routes = task
|
||||
.partitions
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, partition)| {
|
||||
let region = Region {
|
||||
id: RegionId::new(table_id, i as u32),
|
||||
partition: Some(partition.clone().into()),
|
||||
..Default::default()
|
||||
};
|
||||
// It's only a placeholder.
|
||||
let peer = Peer::default();
|
||||
RegionRoute {
|
||||
region,
|
||||
leader_peer: Some(peer),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
TableRouteValue::Physical(PhysicalTableRouteValue::new(region_routes))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl TableMetadataAllocator for StandaloneTableMetadataAllocator {
|
||||
async fn create(
|
||||
&self,
|
||||
_ctx: &TableMetadataAllocatorContext,
|
||||
task: &CreateTableTask,
|
||||
) -> MetaResult<TableMetadata> {
|
||||
let table_id = self.allocate_table_id(task).await?;
|
||||
|
||||
let table_route = create_table_route(table_id, task);
|
||||
|
||||
let region_wal_options = self.create_wal_options(&table_route)?;
|
||||
|
||||
debug!(
|
||||
"Allocated region wal options {:?} for table {}",
|
||||
region_wal_options, table_id
|
||||
);
|
||||
|
||||
Ok(TableMetadata {
|
||||
table_id,
|
||||
table_route,
|
||||
region_wal_options,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,34 +17,34 @@ use prometheus::*;
|
||||
|
||||
lazy_static! {
|
||||
pub static ref METRIC_HANDLE_SQL_ELAPSED: Histogram =
|
||||
register_histogram!("frontend_handle_sql_elapsed", "frontend handle sql elapsed").unwrap();
|
||||
register_histogram!("greptime_frontend_handle_sql_elapsed", "frontend handle sql elapsed").unwrap();
|
||||
pub static ref METRIC_HANDLE_PROMQL_ELAPSED: Histogram = register_histogram!(
|
||||
"frontend_handle_promql_elapsed",
|
||||
"greptime_frontend_handle_promql_elapsed",
|
||||
"frontend handle promql elapsed"
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_EXEC_PLAN_ELAPSED: Histogram =
|
||||
register_histogram!("frontend_exec_plan_elapsed", "frontend exec plan elapsed").unwrap();
|
||||
register_histogram!("greptime_frontend_exec_plan_elapsed", "frontend exec plan elapsed").unwrap();
|
||||
pub static ref METRIC_HANDLE_SCRIPTS_ELAPSED: Histogram = register_histogram!(
|
||||
"frontend_handle_scripts_elapsed",
|
||||
"greptime_frontend_handle_scripts_elapsed",
|
||||
"frontend handle scripts elapsed"
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_RUN_SCRIPT_ELAPSED: Histogram =
|
||||
register_histogram!("frontend_run_script_elapsed", "frontend run script elapsed").unwrap();
|
||||
register_histogram!("greptime_frontend_run_script_elapsed", "frontend run script elapsed").unwrap();
|
||||
/// The samples count of Prometheus remote write.
|
||||
pub static ref PROM_STORE_REMOTE_WRITE_SAMPLES: IntCounter = register_int_counter!(
|
||||
"frontend_prometheus_remote_write_samples",
|
||||
"greptime_frontend_prometheus_remote_write_samples",
|
||||
"frontend prometheus remote write samples"
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref OTLP_METRICS_ROWS: IntCounter = register_int_counter!(
|
||||
"frontend_otlp_metrics_rows",
|
||||
"greptime_frontend_otlp_metrics_rows",
|
||||
"frontend otlp metrics rows"
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref OTLP_TRACES_ROWS: IntCounter = register_int_counter!(
|
||||
"frontend_otlp_traces_rows",
|
||||
"greptime_frontend_otlp_traces_rows",
|
||||
"frontend otlp traces rows"
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -113,7 +113,7 @@ pub enum Error {
|
||||
#[snafu(display("Failed to parse regex DFA"))]
|
||||
ParseDFA {
|
||||
#[snafu(source)]
|
||||
error: regex_automata::Error,
|
||||
error: Box<regex_automata::dfa::Error>,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
//! More detailed information regarding the encoding of the inverted indices can be found in the [RFC].
|
||||
//!
|
||||
//! [`InvertedIndexMetas`]: https://github.com/GreptimeTeam/greptime-proto/blob/2aaee38de81047537dfa42af9df63bcfb866e06c/proto/greptime/v1/index/inverted_index.proto#L32-L64
|
||||
//! [RFC]: https://github.com/GreptimeTeam/greptimedb/blob/develop/docs/rfcs/2023-11-03-inverted-index.md
|
||||
//! [RFC]: https://github.com/GreptimeTeam/greptimedb/blob/main/docs/rfcs/2023-11-03-inverted-index.md
|
||||
|
||||
pub mod reader;
|
||||
pub mod writer;
|
||||
|
||||
@@ -30,4 +30,7 @@ pub trait FstApplier: Send + Sync {
|
||||
///
|
||||
/// Returns a `Vec<u64>`, with each u64 being a value from the FstMap.
|
||||
fn apply(&self, fst: &FstMap) -> Vec<u64>;
|
||||
|
||||
/// Returns the memory usage of the applier.
|
||||
fn memory_usage(&self) -> usize;
|
||||
}
|
||||
|
||||
@@ -12,9 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::mem::size_of;
|
||||
|
||||
use fst::map::OpBuilder;
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use regex_automata::DenseDFA;
|
||||
use regex_automata::dfa::dense::DFA;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::inverted_index::error::{
|
||||
@@ -24,15 +26,13 @@ use crate::inverted_index::search::fst_apply::FstApplier;
|
||||
use crate::inverted_index::search::predicate::{Predicate, Range};
|
||||
use crate::inverted_index::FstMap;
|
||||
|
||||
type Dfa = DenseDFA<Vec<usize>, usize>;
|
||||
|
||||
/// `IntersectionFstApplier` applies intersection operations on an FstMap using specified ranges and regex patterns.
|
||||
pub struct IntersectionFstApplier {
|
||||
/// A list of `Range` which define inclusive or exclusive ranges for keys to be queried in the FstMap.
|
||||
ranges: Vec<Range>,
|
||||
|
||||
/// A list of `Dfa` compiled from regular expression patterns.
|
||||
dfas: Vec<Dfa>,
|
||||
dfas: Vec<DFA<Vec<u32>>>,
|
||||
}
|
||||
|
||||
impl FstApplier for IntersectionFstApplier {
|
||||
@@ -70,6 +70,26 @@ impl FstApplier for IntersectionFstApplier {
|
||||
}
|
||||
values
|
||||
}
|
||||
|
||||
fn memory_usage(&self) -> usize {
|
||||
let mut size = self.ranges.capacity() * size_of::<Range>();
|
||||
for range in &self.ranges {
|
||||
size += range
|
||||
.lower
|
||||
.as_ref()
|
||||
.map_or(0, |bound| bound.value.capacity());
|
||||
size += range
|
||||
.upper
|
||||
.as_ref()
|
||||
.map_or(0, |bound| bound.value.capacity());
|
||||
}
|
||||
|
||||
size += self.dfas.capacity() * size_of::<DFA<Vec<u32>>>();
|
||||
for dfa in &self.dfas {
|
||||
size += dfa.memory_usage();
|
||||
}
|
||||
size
|
||||
}
|
||||
}
|
||||
|
||||
impl IntersectionFstApplier {
|
||||
@@ -88,8 +108,8 @@ impl IntersectionFstApplier {
|
||||
match predicate {
|
||||
Predicate::Range(range) => ranges.push(range.range),
|
||||
Predicate::RegexMatch(regex) => {
|
||||
let dfa = DenseDFA::new(®ex.pattern);
|
||||
let dfa = dfa.context(ParseDFASnafu)?;
|
||||
let dfa = DFA::new(®ex.pattern);
|
||||
let dfa = dfa.map_err(Box::new).context(ParseDFASnafu)?;
|
||||
dfas.push(dfa);
|
||||
}
|
||||
// Rejection of `InList` predicates is enforced here.
|
||||
@@ -210,47 +230,67 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_intersection_fst_applier_with_valid_pattern() {
|
||||
let test_fst = FstMap::from_iter([("aa", 1), ("bb", 2), ("cc", 3)]).unwrap();
|
||||
let test_fst = FstMap::from_iter([("123", 1), ("abc", 2)]).unwrap();
|
||||
|
||||
let applier = create_applier_from_pattern("a.?").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![1]);
|
||||
let cases = vec![
|
||||
("1", vec![1]),
|
||||
("2", vec![1]),
|
||||
("3", vec![1]),
|
||||
("^1", vec![1]),
|
||||
("^2", vec![]),
|
||||
("^3", vec![]),
|
||||
("^1.*", vec![1]),
|
||||
("^.*2", vec![1]),
|
||||
("^.*3", vec![1]),
|
||||
("1$", vec![]),
|
||||
("2$", vec![]),
|
||||
("3$", vec![1]),
|
||||
("1.*$", vec![1]),
|
||||
("2.*$", vec![1]),
|
||||
("3.*$", vec![1]),
|
||||
("^1..$", vec![1]),
|
||||
("^.2.$", vec![1]),
|
||||
("^..3$", vec![1]),
|
||||
("^[0-9]", vec![1]),
|
||||
("^[0-9]+$", vec![1]),
|
||||
("^[0-9][0-9]$", vec![]),
|
||||
("^[0-9][0-9][0-9]$", vec![1]),
|
||||
("^123$", vec![1]),
|
||||
("a", vec![2]),
|
||||
("b", vec![2]),
|
||||
("c", vec![2]),
|
||||
("^a", vec![2]),
|
||||
("^b", vec![]),
|
||||
("^c", vec![]),
|
||||
("^a.*", vec![2]),
|
||||
("^.*b", vec![2]),
|
||||
("^.*c", vec![2]),
|
||||
("a$", vec![]),
|
||||
("b$", vec![]),
|
||||
("c$", vec![2]),
|
||||
("a.*$", vec![2]),
|
||||
("b.*$", vec![2]),
|
||||
("c.*$", vec![2]),
|
||||
("^.[a-z]", vec![2]),
|
||||
("^abc$", vec![2]),
|
||||
("^ab$", vec![]),
|
||||
("abc$", vec![2]),
|
||||
("^a.c$", vec![2]),
|
||||
("^..c$", vec![2]),
|
||||
("ab", vec![2]),
|
||||
(".*", vec![1, 2]),
|
||||
("", vec![1, 2]),
|
||||
("^$", vec![]),
|
||||
("1|a", vec![1, 2]),
|
||||
("^123$|^abc$", vec![1, 2]),
|
||||
("^123$|d", vec![1]),
|
||||
];
|
||||
|
||||
let applier = create_applier_from_pattern("b.?").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![2]);
|
||||
|
||||
let applier = create_applier_from_pattern("c.?").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![3]);
|
||||
|
||||
let applier = create_applier_from_pattern("a.*").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![1]);
|
||||
|
||||
let applier = create_applier_from_pattern("b.*").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![2]);
|
||||
|
||||
let applier = create_applier_from_pattern("c.*").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![3]);
|
||||
|
||||
let applier = create_applier_from_pattern("d.?").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert!(results.is_empty());
|
||||
|
||||
let applier = create_applier_from_pattern("a.?|b.?").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![1, 2]);
|
||||
|
||||
let applier = create_applier_from_pattern("d.?|a.?").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![1]);
|
||||
|
||||
let applier = create_applier_from_pattern(".*").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![1, 2, 3]);
|
||||
for (pattern, expected) in cases {
|
||||
let applier = create_applier_from_pattern(pattern).unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, expected);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -322,4 +362,36 @@ mod tests {
|
||||
Err(Error::IntersectionApplierWithInList { .. })
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intersection_fst_applier_memory_usage() {
|
||||
let applier = IntersectionFstApplier {
|
||||
ranges: vec![],
|
||||
dfas: vec![],
|
||||
};
|
||||
|
||||
assert_eq!(applier.memory_usage(), 0);
|
||||
|
||||
let dfa = DFA::new("^abc$").unwrap();
|
||||
assert_eq!(dfa.memory_usage(), 320);
|
||||
|
||||
let applier = IntersectionFstApplier {
|
||||
ranges: vec![Range {
|
||||
lower: Some(Bound {
|
||||
value: b"aa".to_vec(),
|
||||
inclusive: true,
|
||||
}),
|
||||
upper: Some(Bound {
|
||||
value: b"cc".to_vec(),
|
||||
inclusive: true,
|
||||
}),
|
||||
}],
|
||||
dfas: vec![dfa],
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
applier.memory_usage(),
|
||||
size_of::<Range>() + 4 + size_of::<DFA<Vec<u32>>>() + 320
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::mem::size_of;
|
||||
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
@@ -35,6 +36,11 @@ impl FstApplier for KeysFstApplier {
|
||||
fn apply(&self, fst: &FstMap) -> Vec<u64> {
|
||||
self.keys.iter().filter_map(|k| fst.get(k)).collect()
|
||||
}
|
||||
|
||||
fn memory_usage(&self) -> usize {
|
||||
self.keys.capacity() * size_of::<Bytes>()
|
||||
+ self.keys.iter().map(|k| k.capacity()).sum::<usize>()
|
||||
}
|
||||
}
|
||||
|
||||
impl KeysFstApplier {
|
||||
@@ -302,4 +308,15 @@ mod tests {
|
||||
let result = KeysFstApplier::try_from(predicates);
|
||||
assert!(matches!(result, Err(Error::ParseRegex { .. })));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_keys_fst_applier_memory_usage() {
|
||||
let applier = KeysFstApplier { keys: vec![] };
|
||||
assert_eq!(applier.memory_usage(), 0);
|
||||
|
||||
let applier = KeysFstApplier {
|
||||
keys: vec![b("foo"), b("bar")],
|
||||
};
|
||||
assert_eq!(applier.memory_usage(), 2 * size_of::<Bytes>() + 6);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
|
||||
mod predicates_apply;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use async_trait::async_trait;
|
||||
pub use predicates_apply::PredicatesIndexApplier;
|
||||
|
||||
@@ -24,15 +26,19 @@ use crate::inverted_index::format::reader::InvertedIndexReader;
|
||||
///
|
||||
/// Applier instances are reusable and work with various `InvertedIndexReader` instances,
|
||||
/// avoiding repeated compilation of fixed predicates such as regex patterns.
|
||||
#[mockall::automock]
|
||||
#[async_trait]
|
||||
pub trait IndexApplier {
|
||||
/// Applies the predefined predicates to the data read by the given index reader, returning
|
||||
/// a list of relevant indices (e.g., post IDs, group IDs, row IDs).
|
||||
async fn apply(
|
||||
async fn apply<'a>(
|
||||
&self,
|
||||
context: SearchContext,
|
||||
reader: &mut dyn InvertedIndexReader,
|
||||
) -> Result<Vec<usize>>;
|
||||
reader: &mut (dyn InvertedIndexReader + 'a),
|
||||
) -> Result<BTreeSet<usize>>;
|
||||
|
||||
/// Returns the memory usage of the applier.
|
||||
fn memory_usage(&self) -> usize;
|
||||
}
|
||||
|
||||
/// A context for searching the inverted index.
|
||||
|
||||
@@ -12,6 +12,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::mem::size_of;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_base::BitVec;
|
||||
use greptime_proto::v1::index::InvertedIndexMetas;
|
||||
@@ -41,11 +44,11 @@ pub struct PredicatesIndexApplier {
|
||||
impl IndexApplier for PredicatesIndexApplier {
|
||||
/// Applies all `FstApplier`s to the data in the inverted index reader, intersecting the individual
|
||||
/// bitmaps obtained for each index to result in a final set of indices.
|
||||
async fn apply(
|
||||
async fn apply<'a>(
|
||||
&self,
|
||||
context: SearchContext,
|
||||
reader: &mut dyn InvertedIndexReader,
|
||||
) -> Result<Vec<usize>> {
|
||||
reader: &mut (dyn InvertedIndexReader + 'a),
|
||||
) -> Result<BTreeSet<usize>> {
|
||||
let metadata = reader.metadata().await?;
|
||||
|
||||
let mut bitmap = Self::bitmap_full_range(&metadata);
|
||||
@@ -58,7 +61,7 @@ impl IndexApplier for PredicatesIndexApplier {
|
||||
let Some(meta) = metadata.metas.get(name) else {
|
||||
match context.index_not_found_strategy {
|
||||
IndexNotFoundStrategy::ReturnEmpty => {
|
||||
return Ok(vec![]);
|
||||
return Ok(BTreeSet::default());
|
||||
}
|
||||
IndexNotFoundStrategy::Ignore => {
|
||||
continue;
|
||||
@@ -80,6 +83,16 @@ impl IndexApplier for PredicatesIndexApplier {
|
||||
|
||||
Ok(bitmap.iter_ones().collect())
|
||||
}
|
||||
|
||||
/// Returns the memory usage of the applier.
|
||||
fn memory_usage(&self) -> usize {
|
||||
let mut size = self.fst_appliers.capacity() * size_of::<(IndexName, Box<dyn FstApplier>)>();
|
||||
for (name, fst_applier) in &self.fst_appliers {
|
||||
size += name.capacity();
|
||||
size += fst_applier.memory_usage();
|
||||
}
|
||||
size
|
||||
}
|
||||
}
|
||||
|
||||
impl PredicatesIndexApplier {
|
||||
@@ -197,7 +210,7 @@ mod tests {
|
||||
.apply(SearchContext::default(), &mut mock_reader)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(indices, vec![0, 2, 4, 6]);
|
||||
assert_eq!(indices, BTreeSet::from_iter([0, 2, 4, 6]));
|
||||
|
||||
// An index reader with a single tag "tag-0" but without value "tag-0_value-0"
|
||||
let mut mock_reader = MockInvertedIndexReader::new();
|
||||
@@ -251,7 +264,7 @@ mod tests {
|
||||
.apply(SearchContext::default(), &mut mock_reader)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(indices, vec![0, 4, 6]);
|
||||
assert_eq!(indices, BTreeSet::from_iter([0, 4, 6]));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -269,7 +282,7 @@ mod tests {
|
||||
.apply(SearchContext::default(), &mut mock_reader)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]); // full range to scan
|
||||
assert_eq!(indices, BTreeSet::from_iter([0, 1, 2, 3, 4, 5, 6, 7])); // full range to scan
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -341,6 +354,21 @@ mod tests {
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
assert_eq!(indices, BTreeSet::from_iter([0, 1, 2, 3, 4, 5, 6, 7]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_index_applier_memory_usage() {
|
||||
let mut mock_fst_applier = MockFstApplier::new();
|
||||
mock_fst_applier.expect_memory_usage().returning(|| 100);
|
||||
|
||||
let applier = PredicatesIndexApplier {
|
||||
fst_appliers: vec![(s("tag-0"), Box::new(mock_fst_applier))],
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
applier.memory_usage(),
|
||||
size_of::<(IndexName, Box<dyn FstApplier>)>() + 5 + 100
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ async-stream.workspace = true
|
||||
async-trait.workspace = true
|
||||
byteorder = "1.4"
|
||||
bytes.workspace = true
|
||||
chrono.workspace = true
|
||||
common-base.workspace = true
|
||||
common-config.workspace = true
|
||||
common-error.workspace = true
|
||||
@@ -21,7 +22,6 @@ common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-runtime.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
dashmap.workspace = true
|
||||
futures-util.workspace = true
|
||||
futures.workspace = true
|
||||
protobuf = { version = "2", features = ["bytes"] }
|
||||
@@ -37,4 +37,7 @@ tokio.workspace = true
|
||||
[dev-dependencies]
|
||||
common-meta = { workspace = true, features = ["testing"] }
|
||||
common-test-util.workspace = true
|
||||
itertools.workspace = true
|
||||
rand.workspace = true
|
||||
rand_distr = "0.4"
|
||||
uuid.workspace = true
|
||||
|
||||
@@ -18,6 +18,7 @@ use common_config::wal::KafkaWalTopic;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_macro::stack_trace_debug;
|
||||
use common_runtime::error::Error as RuntimeError;
|
||||
use serde_json::error::Error as JsonError;
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
use crate::kafka::NamespaceImpl as KafkaNamespace;
|
||||
@@ -123,20 +124,6 @@ pub enum Error {
|
||||
error: String,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to encode a record meta"))]
|
||||
EncodeMeta {
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: serde_json::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to decode a record meta"))]
|
||||
DecodeMeta {
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: serde_json::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing required key in a record"))]
|
||||
MissingKey { location: Location },
|
||||
|
||||
@@ -146,9 +133,16 @@ pub enum Error {
|
||||
#[snafu(display("Cannot build a record from empty entries"))]
|
||||
EmptyEntries { location: Location },
|
||||
|
||||
#[snafu(display("Failed to produce records to Kafka, topic: {}", topic))]
|
||||
#[snafu(display(
|
||||
"Failed to produce records to Kafka, topic: {}, size: {}, limit: {}",
|
||||
topic,
|
||||
size,
|
||||
limit,
|
||||
))]
|
||||
ProduceRecord {
|
||||
topic: KafkaWalTopic,
|
||||
size: usize,
|
||||
limit: usize,
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: rskafka::client::producer::Error,
|
||||
@@ -172,6 +166,23 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Failed to do a cast"))]
|
||||
Cast { location: Location },
|
||||
|
||||
#[snafu(display("Failed to encode object into json"))]
|
||||
EncodeJson {
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: JsonError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to decode object from json"))]
|
||||
DecodeJson {
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: JsonError,
|
||||
},
|
||||
|
||||
#[snafu(display("The record sequence is not legal, error: {}", error))]
|
||||
IllegalSequence { location: Location, error: String },
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
|
||||
@@ -12,10 +12,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod client_manager;
|
||||
pub(crate) mod client_manager;
|
||||
pub mod log_store;
|
||||
mod offset;
|
||||
mod record_utils;
|
||||
pub(crate) mod util;
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
@@ -29,8 +28,8 @@ use crate::error::Error;
|
||||
/// Kafka Namespace implementation.
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Serialize, Deserialize)]
|
||||
pub struct NamespaceImpl {
|
||||
region_id: u64,
|
||||
topic: Topic,
|
||||
pub region_id: u64,
|
||||
pub topic: Topic,
|
||||
}
|
||||
|
||||
impl Namespace for NamespaceImpl {
|
||||
@@ -41,7 +40,7 @@ impl Namespace for NamespaceImpl {
|
||||
|
||||
impl Display for NamespaceImpl {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}/{}", self.topic, self.region_id)
|
||||
write!(f, "[topic: {}, region: {}]", self.topic, self.region_id)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,11 +48,11 @@ impl Display for NamespaceImpl {
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct EntryImpl {
|
||||
/// Entry payload.
|
||||
data: Vec<u8>,
|
||||
pub data: Vec<u8>,
|
||||
/// The logical entry id.
|
||||
id: EntryId,
|
||||
pub id: EntryId,
|
||||
/// The namespace used to identify and isolate log entries from different regions.
|
||||
ns: NamespaceImpl,
|
||||
pub ns: NamespaceImpl,
|
||||
}
|
||||
|
||||
impl Entry for EntryImpl {
|
||||
@@ -77,7 +76,7 @@ impl Display for EntryImpl {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Entry (ns: {}, id: {}, data_len: {})",
|
||||
"Entry [ns: {}, id: {}, data_len: {}]",
|
||||
self.ns,
|
||||
self.id,
|
||||
self.data.len()
|
||||
|
||||
@@ -12,17 +12,17 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_config::wal::{KafkaConfig, KafkaWalTopic as Topic};
|
||||
use dashmap::mapref::entry::Entry as DashMapEntry;
|
||||
use dashmap::DashMap;
|
||||
use rskafka::client::partition::{PartitionClient, UnknownTopicHandling};
|
||||
use rskafka::client::producer::aggregator::RecordAggregator;
|
||||
use rskafka::client::producer::{BatchProducer, BatchProducerBuilder};
|
||||
use rskafka::client::{Client as RsKafkaClient, ClientBuilder};
|
||||
use rskafka::BackoffConfig;
|
||||
use snafu::ResultExt;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::error::{BuildClientSnafu, BuildPartitionClientSnafu, Result};
|
||||
|
||||
@@ -62,12 +62,12 @@ impl Client {
|
||||
/// Manages client construction and accesses.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ClientManager {
|
||||
config: KafkaConfig,
|
||||
pub(crate) config: KafkaConfig,
|
||||
/// Top-level client in kafka. All clients are constructed by this client.
|
||||
client_factory: RsKafkaClient,
|
||||
/// A pool maintaining a collection of clients.
|
||||
/// Key: a topic. Value: the associated client of the topic.
|
||||
client_pool: DashMap<Topic, Client>,
|
||||
client_pool: RwLock<HashMap<Topic, Client>>,
|
||||
}
|
||||
|
||||
impl ClientManager {
|
||||
@@ -91,18 +91,27 @@ impl ClientManager {
|
||||
Ok(Self {
|
||||
config: config.clone(),
|
||||
client_factory: client,
|
||||
client_pool: DashMap::new(),
|
||||
client_pool: RwLock::new(HashMap::new()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Gets the client associated with the topic. If the client does not exist, a new one will
|
||||
/// be created and returned.
|
||||
pub(crate) async fn get_or_insert(&self, topic: &Topic) -> Result<Client> {
|
||||
match self.client_pool.entry(topic.to_string()) {
|
||||
DashMapEntry::Occupied(entry) => Ok(entry.get().clone()),
|
||||
DashMapEntry::Vacant(entry) => {
|
||||
let topic_client = self.try_create_client(topic).await?;
|
||||
Ok(entry.insert(topic_client).clone())
|
||||
{
|
||||
let client_pool = self.client_pool.read().await;
|
||||
if let Some(client) = client_pool.get(topic) {
|
||||
return Ok(client.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let mut client_pool = self.client_pool.write().await;
|
||||
match client_pool.get(topic) {
|
||||
Some(client) => Ok(client.clone()),
|
||||
None => {
|
||||
let client = self.try_create_client(topic).await?;
|
||||
client_pool.insert(topic.clone(), client.clone());
|
||||
Ok(client)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -124,3 +133,95 @@ impl ClientManager {
|
||||
Ok(Client::new(raw_client, &self.config))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_meta::wal::kafka::test_util::run_test_with_kafka_wal;
|
||||
use tokio::sync::Barrier;
|
||||
|
||||
use super::*;
|
||||
use crate::test_util::kafka::create_topics;
|
||||
|
||||
/// Prepares for a test in that a collection of topics and a client manager are created.
|
||||
async fn prepare(
|
||||
test_name: &str,
|
||||
num_topics: usize,
|
||||
broker_endpoints: Vec<String>,
|
||||
) -> (ClientManager, Vec<Topic>) {
|
||||
let topics = create_topics(
|
||||
num_topics,
|
||||
|i| format!("{test_name}_{}_{}", i, uuid::Uuid::new_v4()),
|
||||
&broker_endpoints,
|
||||
)
|
||||
.await;
|
||||
|
||||
let config = KafkaConfig {
|
||||
broker_endpoints,
|
||||
..Default::default()
|
||||
};
|
||||
let manager = ClientManager::try_new(&config).await.unwrap();
|
||||
|
||||
(manager, topics)
|
||||
}
|
||||
|
||||
/// Sends `get_or_insert` requests sequentially to the client manager, and checks if it could handle them correctly.
|
||||
#[tokio::test]
|
||||
async fn test_sequential() {
|
||||
run_test_with_kafka_wal(|broker_endpoints| {
|
||||
Box::pin(async {
|
||||
let (manager, topics) = prepare("test_sequential", 128, broker_endpoints).await;
|
||||
// Assigns multiple regions to a topic.
|
||||
let region_topic = (0..512)
|
||||
.map(|region_id| (region_id, &topics[region_id % topics.len()]))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
// Gets all clients sequentially.
|
||||
for (_, topic) in region_topic {
|
||||
manager.get_or_insert(topic).await.unwrap();
|
||||
}
|
||||
|
||||
// Ensures all clients exist.
|
||||
let client_pool = manager.client_pool.read().await;
|
||||
let all_exist = topics.iter().all(|topic| client_pool.contains_key(topic));
|
||||
assert!(all_exist);
|
||||
})
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Sends `get_or_insert` requests in parallel to the client manager, and checks if it could handle them correctly.
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_parallel() {
|
||||
run_test_with_kafka_wal(|broker_endpoints| {
|
||||
Box::pin(async {
|
||||
let (manager, topics) = prepare("test_parallel", 128, broker_endpoints).await;
|
||||
// Assigns multiple regions to a topic.
|
||||
let region_topic = (0..512)
|
||||
.map(|region_id| (region_id, topics[region_id % topics.len()].clone()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
// Gets all clients in parallel.
|
||||
let manager = Arc::new(manager);
|
||||
let barrier = Arc::new(Barrier::new(region_topic.len()));
|
||||
let tasks = region_topic
|
||||
.into_values()
|
||||
.map(|topic| {
|
||||
let manager = manager.clone();
|
||||
let barrier = barrier.clone();
|
||||
tokio::spawn(async move {
|
||||
barrier.wait().await;
|
||||
assert!(manager.get_or_insert(&topic).await.is_ok());
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
futures::future::try_join_all(tasks).await.unwrap();
|
||||
|
||||
// Ensures all clients exist.
|
||||
let client_pool = manager.client_pool.read().await;
|
||||
let all_exist = topics.iter().all(|topic| client_pool.contains_key(topic));
|
||||
assert!(all_exist);
|
||||
})
|
||||
})
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,10 +26,10 @@ use store_api::logstore::entry_stream::SendableEntryStream;
|
||||
use store_api::logstore::namespace::Id as NamespaceId;
|
||||
use store_api::logstore::{AppendBatchResponse, AppendResponse, LogStore};
|
||||
|
||||
use crate::error::{ConsumeRecordSnafu, Error, GetOffsetSnafu, Result};
|
||||
use crate::error::{ConsumeRecordSnafu, Error, GetOffsetSnafu, IllegalSequenceSnafu, Result};
|
||||
use crate::kafka::client_manager::{ClientManager, ClientManagerRef};
|
||||
use crate::kafka::offset::Offset;
|
||||
use crate::kafka::record_utils::{decode_from_record, RecordProducer};
|
||||
use crate::kafka::util::offset::Offset;
|
||||
use crate::kafka::util::record::{maybe_emit_entry, Record, RecordProducer};
|
||||
use crate::kafka::{EntryImpl, NamespaceImpl};
|
||||
|
||||
/// A log store backed by Kafka.
|
||||
@@ -85,8 +85,6 @@ impl LogStore for KafkaLogStore {
|
||||
/// Appends a batch of entries and returns a response containing a map where the key is a region id
|
||||
/// while the value is the id of the last successfully written entry of the region.
|
||||
async fn append_batch(&self, entries: Vec<Self::Entry>) -> Result<AppendBatchResponse> {
|
||||
debug!("LogStore handles append_batch with entries {:?}", entries);
|
||||
|
||||
if entries.is_empty() {
|
||||
return Ok(AppendBatchResponse::default());
|
||||
}
|
||||
@@ -96,29 +94,26 @@ impl LogStore for KafkaLogStore {
|
||||
for entry in entries {
|
||||
producers
|
||||
.entry(entry.ns.region_id)
|
||||
.or_insert(RecordProducer::new(entry.ns.clone()))
|
||||
.or_insert_with(|| RecordProducer::new(entry.ns.clone()))
|
||||
.push(entry);
|
||||
}
|
||||
|
||||
// Builds a record from entries belong to a region and produces them to kafka server.
|
||||
let region_ids = producers.keys().cloned().collect::<Vec<_>>();
|
||||
// Produces entries for each region and gets the offset those entries written to.
|
||||
// The returned offset is then converted into an entry id.
|
||||
let last_entry_ids = futures::future::try_join_all(producers.into_iter().map(
|
||||
|(region_id, producer)| async move {
|
||||
let entry_id = producer
|
||||
.produce(&self.client_manager)
|
||||
.await
|
||||
.map(TryInto::try_into)??;
|
||||
Ok((region_id, entry_id))
|
||||
},
|
||||
))
|
||||
.await?
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
let tasks = producers
|
||||
.into_values()
|
||||
.map(|producer| producer.produce(&self.client_manager))
|
||||
.collect::<Vec<_>>();
|
||||
// Each produce operation returns a kafka offset of the produced record.
|
||||
// The offsets are then converted to entry ids.
|
||||
let entry_ids = futures::future::try_join_all(tasks)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(TryInto::try_into)
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
debug!("The entries are appended at offsets {:?}", entry_ids);
|
||||
|
||||
Ok(AppendBatchResponse {
|
||||
last_entry_ids: region_ids.into_iter().zip(entry_ids).collect(),
|
||||
})
|
||||
Ok(AppendBatchResponse { last_entry_ids })
|
||||
}
|
||||
|
||||
/// Creates a new `EntryStream` to asynchronously generates `Entry` with entry ids
|
||||
@@ -128,13 +123,10 @@ impl LogStore for KafkaLogStore {
|
||||
ns: &Self::Namespace,
|
||||
entry_id: EntryId,
|
||||
) -> Result<SendableEntryStream<Self::Entry, Self::Error>> {
|
||||
let topic = ns.topic.clone();
|
||||
let region_id = ns.region_id;
|
||||
|
||||
// Gets the client associated with the topic.
|
||||
let client = self
|
||||
.client_manager
|
||||
.get_or_insert(&topic)
|
||||
.get_or_insert(&ns.topic)
|
||||
.await?
|
||||
.raw_client
|
||||
.clone();
|
||||
@@ -148,14 +140,19 @@ impl LogStore for KafkaLogStore {
|
||||
.await
|
||||
.context(GetOffsetSnafu { ns: ns.clone() })?
|
||||
- 1;
|
||||
// Reads entries with offsets in the range [start_offset, end_offset).
|
||||
// Reads entries with offsets in the range [start_offset, end_offset].
|
||||
let start_offset = Offset::try_from(entry_id)?.0;
|
||||
|
||||
debug!(
|
||||
"Start reading entries in range [{}, {}] for ns {}",
|
||||
start_offset, end_offset, ns
|
||||
);
|
||||
|
||||
// Abort if there're no new entries.
|
||||
// FIXME(niebayes): how come this case happens?
|
||||
if start_offset > end_offset {
|
||||
warn!(
|
||||
"No new entries for ns {} in range [{}, {})",
|
||||
"No new entries for ns {} in range [{}, {}]",
|
||||
ns, start_offset, end_offset
|
||||
);
|
||||
return Ok(futures_util::stream::empty().boxed());
|
||||
@@ -163,46 +160,56 @@ impl LogStore for KafkaLogStore {
|
||||
|
||||
let mut stream_consumer = StreamConsumerBuilder::new(client, StartOffset::At(start_offset))
|
||||
.with_max_batch_size(self.config.max_batch_size.as_bytes() as i32)
|
||||
.with_max_wait_ms(self.config.produce_record_timeout.as_millis() as i32)
|
||||
.with_max_wait_ms(self.config.consumer_wait_timeout.as_millis() as i32)
|
||||
.build();
|
||||
|
||||
debug!(
|
||||
"Built a stream consumer for ns {} to consume entries in range [{}, {})",
|
||||
"Built a stream consumer for ns {} to consume entries in range [{}, {}]",
|
||||
ns, start_offset, end_offset
|
||||
);
|
||||
|
||||
// Key: entry id, Value: the records associated with the entry.
|
||||
let mut entry_records: HashMap<_, Vec<_>> = HashMap::new();
|
||||
let ns_clone = ns.clone();
|
||||
let stream = async_stream::stream!({
|
||||
while let Some(consume_result) = stream_consumer.next().await {
|
||||
// Each next will prdoce a `RecordAndOffset` and a high watermark offset.
|
||||
// Each next on the stream consumer produces a `RecordAndOffset` and a high watermark offset.
|
||||
// The `RecordAndOffset` contains the record data and its start offset.
|
||||
// The high watermark offset is the end offset of the latest record in the partition.
|
||||
let (record, high_watermark) = consume_result.context(ConsumeRecordSnafu {
|
||||
ns: ns_clone.clone(),
|
||||
})?;
|
||||
let record_offset = record.offset;
|
||||
// The high watermark offset is the offset of the last record plus one.
|
||||
let (record_and_offset, high_watermark) =
|
||||
consume_result.with_context(|_| ConsumeRecordSnafu {
|
||||
ns: ns_clone.clone(),
|
||||
})?;
|
||||
let (kafka_record, offset) = (record_and_offset.record, record_and_offset.offset);
|
||||
|
||||
debug!(
|
||||
"Read a record at offset {} for ns {}, high watermark: {}",
|
||||
record_offset, ns_clone, high_watermark
|
||||
offset, ns_clone, high_watermark
|
||||
);
|
||||
|
||||
let entries = decode_from_record(record.record)?;
|
||||
|
||||
// Filters entries by region id.
|
||||
if let Some(entry) = entries.first()
|
||||
&& entry.ns.region_id == region_id
|
||||
{
|
||||
yield Ok(entries);
|
||||
} else {
|
||||
yield Ok(vec![]);
|
||||
// Ignores no-op records.
|
||||
if kafka_record.value.is_none() {
|
||||
if check_termination(offset, end_offset, &entry_records)? {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Terminates the stream if the entry with the end offset was read.
|
||||
if record_offset >= end_offset {
|
||||
debug!(
|
||||
"Stream consumer for ns {} terminates at offset {}",
|
||||
ns_clone, record_offset
|
||||
);
|
||||
// Filters records by namespace.
|
||||
let record = Record::try_from(kafka_record)?;
|
||||
if record.meta.ns != ns_clone {
|
||||
if check_termination(offset, end_offset, &entry_records)? {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Tries to construct an entry from records consumed so far.
|
||||
if let Some(entry) = maybe_emit_entry(record, &mut entry_records)? {
|
||||
yield Ok(vec![entry]);
|
||||
}
|
||||
|
||||
if check_termination(offset, end_offset, &entry_records)? {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -251,3 +258,226 @@ impl LogStore for KafkaLogStore {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn check_termination(
|
||||
offset: i64,
|
||||
end_offset: i64,
|
||||
entry_records: &HashMap<EntryId, Vec<Record>>,
|
||||
) -> Result<bool> {
|
||||
// Terminates the stream if the entry with the end offset was read.
|
||||
if offset >= end_offset {
|
||||
debug!("Stream consumer terminates at offset {}", offset);
|
||||
// There must have no records when the stream terminates.
|
||||
if !entry_records.is_empty() {
|
||||
return IllegalSequenceSnafu {
|
||||
error: "Found records leftover",
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_config::wal::KafkaWalTopic as Topic;
|
||||
use rand::seq::IteratorRandom;
|
||||
|
||||
use super::*;
|
||||
use crate::test_util::kafka::{
|
||||
create_topics, entries_with_random_data, new_namespace, EntryBuilder,
|
||||
};
|
||||
|
||||
// Stores test context for a region.
|
||||
struct RegionContext {
|
||||
ns: NamespaceImpl,
|
||||
entry_builder: EntryBuilder,
|
||||
expected: Vec<EntryImpl>,
|
||||
flushed_entry_id: EntryId,
|
||||
}
|
||||
|
||||
/// Prepares for a test in that a log store is constructed and a collection of topics is created.
|
||||
async fn prepare(
|
||||
test_name: &str,
|
||||
num_topics: usize,
|
||||
broker_endpoints: Vec<String>,
|
||||
) -> (KafkaLogStore, Vec<Topic>) {
|
||||
let topics = create_topics(
|
||||
num_topics,
|
||||
|i| format!("{test_name}_{}_{}", i, uuid::Uuid::new_v4()),
|
||||
&broker_endpoints,
|
||||
)
|
||||
.await;
|
||||
|
||||
let config = KafkaConfig {
|
||||
broker_endpoints,
|
||||
max_batch_size: ReadableSize::kb(32),
|
||||
..Default::default()
|
||||
};
|
||||
let logstore = KafkaLogStore::try_new(&config).await.unwrap();
|
||||
|
||||
// Appends a no-op record to each topic.
|
||||
for topic in topics.iter() {
|
||||
let last_entry_id = logstore
|
||||
.append(EntryImpl {
|
||||
data: vec![],
|
||||
id: 0,
|
||||
ns: new_namespace(topic, 0),
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
.last_entry_id;
|
||||
assert_eq!(last_entry_id, 0);
|
||||
}
|
||||
|
||||
(logstore, topics)
|
||||
}
|
||||
|
||||
/// Creates a vector containing indexes of all regions if the `all` is true.
|
||||
/// Otherwise, creates a subset of the indexes. The cardinality of the subset
|
||||
/// is nearly a quarter of that of the universe set.
|
||||
fn all_or_subset(all: bool, num_regions: usize) -> Vec<u64> {
|
||||
assert!(num_regions > 0);
|
||||
let amount = if all {
|
||||
num_regions
|
||||
} else {
|
||||
(num_regions / 4).max(1)
|
||||
};
|
||||
(0..num_regions as u64).choose_multiple(&mut rand::thread_rng(), amount)
|
||||
}
|
||||
|
||||
/// Builds entries for regions specified by `which`. Builds large entries if `large` is true.
|
||||
/// Returns the aggregated entries.
|
||||
fn build_entries(
|
||||
region_contexts: &mut HashMap<u64, RegionContext>,
|
||||
which: &[u64],
|
||||
large: bool,
|
||||
) -> Vec<EntryImpl> {
|
||||
let mut aggregated = Vec::with_capacity(which.len());
|
||||
for region_id in which {
|
||||
let ctx = region_contexts.get_mut(region_id).unwrap();
|
||||
// Builds entries for the region.
|
||||
ctx.expected = if !large {
|
||||
entries_with_random_data(3, &ctx.entry_builder)
|
||||
} else {
|
||||
// Builds a large entry of size 256KB which is way greater than the configured `max_batch_size` which is 32KB.
|
||||
let large_entry = ctx.entry_builder.with_data([b'1'; 256 * 1024]);
|
||||
vec![large_entry]
|
||||
};
|
||||
// Aggregates entries of all regions.
|
||||
aggregated.push(ctx.expected.clone());
|
||||
}
|
||||
aggregated.into_iter().flatten().collect()
|
||||
}
|
||||
|
||||
/// Starts a test with:
|
||||
/// * `test_name` - The name of the test.
|
||||
/// * `num_topics` - Number of topics to be created in the preparation phase.
|
||||
/// * `num_regions` - Number of regions involved in the test.
|
||||
/// * `num_appends` - Number of append operations to be performed.
|
||||
/// * `all` - All regions will be involved in an append operation if `all` is true. Otherwise,
|
||||
/// an append operation will only randomly choose a subset of regions.
|
||||
/// * `large` - Builds large entries for each region is `large` is true.
|
||||
async fn test_with(
|
||||
test_name: &str,
|
||||
num_topics: usize,
|
||||
num_regions: usize,
|
||||
num_appends: usize,
|
||||
all: bool,
|
||||
large: bool,
|
||||
) {
|
||||
let Ok(broker_endpoints) = std::env::var("GT_KAFKA_ENDPOINTS") else {
|
||||
warn!("The endpoints is empty, skipping the test {test_name}");
|
||||
return;
|
||||
};
|
||||
let broker_endpoints = broker_endpoints
|
||||
.split(',')
|
||||
.map(|s| s.trim().to_string())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let (logstore, topics) = prepare(test_name, num_topics, broker_endpoints).await;
|
||||
let mut region_contexts = (0..num_regions)
|
||||
.map(|i| {
|
||||
let topic = &topics[i % topics.len()];
|
||||
let ns = new_namespace(topic, i as u64);
|
||||
let entry_builder = EntryBuilder::new(ns.clone());
|
||||
(
|
||||
i as u64,
|
||||
RegionContext {
|
||||
ns,
|
||||
entry_builder,
|
||||
expected: Vec::new(),
|
||||
flushed_entry_id: 0,
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
for _ in 0..num_appends {
|
||||
// Appends entries for a subset of regions.
|
||||
let which = all_or_subset(all, num_regions);
|
||||
let entries = build_entries(&mut region_contexts, &which, large);
|
||||
let last_entry_ids = logstore.append_batch(entries).await.unwrap().last_entry_ids;
|
||||
|
||||
// Reads entries for regions and checks for each region that the gotten entries are identical with the expected ones.
|
||||
for region_id in which {
|
||||
let ctx = ®ion_contexts[®ion_id];
|
||||
let stream = logstore
|
||||
.read(&ctx.ns, ctx.flushed_entry_id + 1)
|
||||
.await
|
||||
.unwrap();
|
||||
let got = stream
|
||||
.collect::<Vec<_>>()
|
||||
.await
|
||||
.into_iter()
|
||||
.flat_map(|x| x.unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(ctx.expected, got);
|
||||
}
|
||||
|
||||
// Simulates a flush for regions.
|
||||
for (region_id, last_entry_id) in last_entry_ids {
|
||||
let ctx = region_contexts.get_mut(®ion_id).unwrap();
|
||||
ctx.flushed_entry_id = last_entry_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Appends entries for one region and checks all entries can be read successfully.
|
||||
#[tokio::test]
|
||||
async fn test_one_region() {
|
||||
test_with("test_one_region", 1, 1, 1, true, false).await;
|
||||
}
|
||||
|
||||
/// Appends entries for multiple regions and checks entries for each region can be read successfully.
|
||||
/// A topic is assigned only a single region.
|
||||
#[tokio::test]
|
||||
async fn test_multi_regions_disjoint() {
|
||||
test_with("test_multi_regions_disjoint", 5, 5, 1, true, false).await;
|
||||
}
|
||||
|
||||
/// Appends entries for multiple regions and checks entries for each region can be read successfully.
|
||||
/// A topic is assigned multiple regions.
|
||||
#[tokio::test]
|
||||
async fn test_multi_regions_overlapped() {
|
||||
test_with("test_multi_regions_overlapped", 5, 20, 1, true, false).await;
|
||||
}
|
||||
|
||||
/// Appends entries for multiple regions and checks entries for each region can be read successfully.
|
||||
/// A topic may be assigned multiple regions. The append operation repeats for a several iterations.
|
||||
/// Each append operation will only append entries for a subset of randomly chosen regions.
|
||||
#[tokio::test]
|
||||
async fn test_multi_appends() {
|
||||
test_with("test_multi_appends", 5, 20, 3, false, false).await;
|
||||
}
|
||||
|
||||
/// Appends large entries for multiple regions and checks entries for each region can be read successfully.
|
||||
/// A topic may be assigned multiple regions.
|
||||
#[tokio::test]
|
||||
async fn test_append_large_entries() {
|
||||
test_with("test_append_large_entries", 5, 20, 3, true, true).await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,188 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use rskafka::record::Record;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
DecodeMetaSnafu, EmptyEntriesSnafu, EncodeMetaSnafu, GetClientSnafu, MissingKeySnafu,
|
||||
MissingValueSnafu, ProduceRecordSnafu, Result,
|
||||
};
|
||||
use crate::kafka::client_manager::ClientManagerRef;
|
||||
use crate::kafka::offset::Offset;
|
||||
use crate::kafka::{EntryId, EntryImpl, NamespaceImpl};
|
||||
|
||||
/// Record metadata which will be serialized/deserialized to/from the `key` of a Record.
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
||||
struct RecordMeta {
|
||||
/// Meta version. Used for backward compatibility.
|
||||
version: u32,
|
||||
/// The namespace of the entries wrapped in the record.
|
||||
ns: NamespaceImpl,
|
||||
/// Ids of the entries built into the record.
|
||||
entry_ids: Vec<EntryId>,
|
||||
/// entry_offsets[i] is the end offset (exclusive) of the data of the i-th entry in the record value.
|
||||
entry_offsets: Vec<usize>,
|
||||
}
|
||||
|
||||
impl RecordMeta {
|
||||
fn new(ns: NamespaceImpl, entries: &[EntryImpl]) -> Self {
|
||||
Self {
|
||||
version: 0,
|
||||
ns,
|
||||
entry_ids: entries.iter().map(|entry| entry.id).collect(),
|
||||
entry_offsets: entries
|
||||
.iter()
|
||||
.map(|entry| entry.data.len())
|
||||
.scan(0, |presum, x| {
|
||||
*presum += x;
|
||||
Some(*presum)
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Produces a record to a kafka topic.
|
||||
pub(crate) struct RecordProducer {
|
||||
/// The namespace of the entries.
|
||||
ns: NamespaceImpl,
|
||||
/// Entries are buffered before being built into a record.
|
||||
entries: Vec<EntryImpl>,
|
||||
}
|
||||
|
||||
impl RecordProducer {
|
||||
/// Creates a new producer for producing entries with the given namespace.
|
||||
pub(crate) fn new(ns: NamespaceImpl) -> Self {
|
||||
Self {
|
||||
ns,
|
||||
entries: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Populates the entry buffer with the given entries.
|
||||
pub(crate) fn with_entries(self, entries: Vec<EntryImpl>) -> Self {
|
||||
Self { entries, ..self }
|
||||
}
|
||||
|
||||
/// Pushes an entry into the entry buffer.
|
||||
pub(crate) fn push(&mut self, entry: EntryImpl) {
|
||||
self.entries.push(entry);
|
||||
}
|
||||
|
||||
/// Produces the buffered entries to kafka sever as a kafka record.
|
||||
/// Returns the kafka offset of the produced record.
|
||||
// TODO(niebayes): since the total size of a region's entries may be way-too large,
|
||||
// the producer may need to support splitting entries into multiple records.
|
||||
pub(crate) async fn produce(self, client_manager: &ClientManagerRef) -> Result<Offset> {
|
||||
ensure!(!self.entries.is_empty(), EmptyEntriesSnafu);
|
||||
|
||||
// Produces the record through a client. The client determines when to send the record to kafka server.
|
||||
let client = client_manager
|
||||
.get_or_insert(&self.ns.topic)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
GetClientSnafu {
|
||||
topic: &self.ns.topic,
|
||||
error: e.to_string(),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
client
|
||||
.producer
|
||||
.produce(encode_to_record(self.ns.clone(), self.entries)?)
|
||||
.await
|
||||
.map(Offset)
|
||||
.context(ProduceRecordSnafu {
|
||||
topic: &self.ns.topic,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_to_record(ns: NamespaceImpl, entries: Vec<EntryImpl>) -> Result<Record> {
|
||||
let meta = RecordMeta::new(ns, &entries);
|
||||
let data = entries.into_iter().flat_map(|entry| entry.data).collect();
|
||||
Ok(Record {
|
||||
key: Some(serde_json::to_vec(&meta).context(EncodeMetaSnafu)?),
|
||||
value: Some(data),
|
||||
timestamp: rskafka::chrono::Utc::now(),
|
||||
headers: Default::default(),
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn decode_from_record(record: Record) -> Result<Vec<EntryImpl>> {
|
||||
let key = record.key.context(MissingKeySnafu)?;
|
||||
let value = record.value.context(MissingValueSnafu)?;
|
||||
let meta: RecordMeta = serde_json::from_slice(&key).context(DecodeMetaSnafu)?;
|
||||
|
||||
let mut entries = Vec::with_capacity(meta.entry_ids.len());
|
||||
let mut start_offset = 0;
|
||||
for (i, end_offset) in meta.entry_offsets.iter().enumerate() {
|
||||
entries.push(EntryImpl {
|
||||
// TODO(niebayes): try to avoid the clone.
|
||||
data: value[start_offset..*end_offset].to_vec(),
|
||||
id: meta.entry_ids[i],
|
||||
ns: meta.ns.clone(),
|
||||
});
|
||||
start_offset = *end_offset;
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn new_test_entry<D: AsRef<[u8]>>(data: D, entry_id: EntryId, ns: NamespaceImpl) -> EntryImpl {
|
||||
EntryImpl {
|
||||
data: data.as_ref().to_vec(),
|
||||
id: entry_id,
|
||||
ns,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serde_record_meta() {
|
||||
let ns = NamespaceImpl {
|
||||
region_id: 1,
|
||||
topic: "test_topic".to_string(),
|
||||
};
|
||||
let entries = vec![
|
||||
new_test_entry(b"111", 1, ns.clone()),
|
||||
new_test_entry(b"2222", 2, ns.clone()),
|
||||
new_test_entry(b"33333", 3, ns.clone()),
|
||||
];
|
||||
let meta = RecordMeta::new(ns, &entries);
|
||||
let encoded = serde_json::to_vec(&meta).unwrap();
|
||||
let decoded: RecordMeta = serde_json::from_slice(&encoded).unwrap();
|
||||
assert_eq!(meta, decoded);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encdec_record() {
|
||||
let ns = NamespaceImpl {
|
||||
region_id: 1,
|
||||
topic: "test_topic".to_string(),
|
||||
};
|
||||
let entries = vec![
|
||||
new_test_entry(b"111", 1, ns.clone()),
|
||||
new_test_entry(b"2222", 2, ns.clone()),
|
||||
new_test_entry(b"33333", 3, ns.clone()),
|
||||
];
|
||||
let record = encode_to_record(ns, entries.clone()).unwrap();
|
||||
let decoded_entries = decode_from_record(record).unwrap();
|
||||
assert_eq!(entries, decoded_entries);
|
||||
}
|
||||
}
|
||||
18
src/log-store/src/kafka/util.rs
Normal file
18
src/log-store/src/kafka/util.rs
Normal file
@@ -0,0 +1,18 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod offset;
|
||||
pub mod record;
|
||||
#[cfg(test)]
|
||||
mod test_util;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user