mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-25 07:30:02 +00:00
Compare commits
152 Commits
v0.8.0-nig
...
create-vie
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
94409967be | ||
|
|
54432df92f | ||
|
|
8f2ce4abe8 | ||
|
|
d077892e1c | ||
|
|
cfed466fcd | ||
|
|
0c5f4801b7 | ||
|
|
2114b153e7 | ||
|
|
314f2704d4 | ||
|
|
510782261d | ||
|
|
20e8c3d864 | ||
|
|
2a2a44883f | ||
|
|
4248dfcf36 | ||
|
|
64945533dd | ||
|
|
ffc8074556 | ||
|
|
7e56bf250b | ||
|
|
7503992d61 | ||
|
|
50ae4dc174 | ||
|
|
16aef70089 | ||
|
|
786f43da91 | ||
|
|
3e9bda3267 | ||
|
|
89d58538c7 | ||
|
|
d12379106e | ||
|
|
64941d848e | ||
|
|
96a40e0300 | ||
|
|
d2e081c1f9 | ||
|
|
cdbdb04d93 | ||
|
|
5af87baeb0 | ||
|
|
d5a948a0a6 | ||
|
|
bbea651d08 | ||
|
|
8060c81e1d | ||
|
|
e6507aaf34 | ||
|
|
87795248dd | ||
|
|
7a04bfe50a | ||
|
|
2f4726f7b5 | ||
|
|
75d85f9915 | ||
|
|
db329f6c80 | ||
|
|
544c4a70f8 | ||
|
|
02f806fba9 | ||
|
|
9459ace33e | ||
|
|
c1e005b148 | ||
|
|
c00c1d95ee | ||
|
|
5d739932c0 | ||
|
|
aab7367804 | ||
|
|
34f935df66 | ||
|
|
fda1523ced | ||
|
|
2c0c7759ee | ||
|
|
2398918adf | ||
|
|
50bea2f107 | ||
|
|
1629435888 | ||
|
|
b3c94a303b | ||
|
|
883b7fce96 | ||
|
|
ea9367f371 | ||
|
|
2896e1f868 | ||
|
|
183fccbbd6 | ||
|
|
b51089fa61 | ||
|
|
682b04cbe4 | ||
|
|
e1d2f9a596 | ||
|
|
2fca45b048 | ||
|
|
3e1a125732 | ||
|
|
34b1427a82 | ||
|
|
28fd0dc276 | ||
|
|
32b9639d7c | ||
|
|
9038e1b769 | ||
|
|
12286f07ac | ||
|
|
e920f95902 | ||
|
|
c4798d1913 | ||
|
|
2ede968c2b | ||
|
|
89db8c18c8 | ||
|
|
aa0af6135d | ||
|
|
87e0189e58 | ||
|
|
7e8e9aba9d | ||
|
|
c93b76ae5f | ||
|
|
097a0371dc | ||
|
|
b9890ab870 | ||
|
|
b32e0bba9c | ||
|
|
fe1a0109d8 | ||
|
|
11995eb52e | ||
|
|
86d377d028 | ||
|
|
ddeb73fbb7 | ||
|
|
d33435fa84 | ||
|
|
a0f243c128 | ||
|
|
a61fb98e4a | ||
|
|
6c316d268f | ||
|
|
5e24448b96 | ||
|
|
d6b2d1dfb8 | ||
|
|
bfd32571d9 | ||
|
|
0eb023bb23 | ||
|
|
4a5bb698a9 | ||
|
|
18d676802a | ||
|
|
93da45f678 | ||
|
|
7a19f66be0 | ||
|
|
500f9f10fc | ||
|
|
f49cd0ca18 | ||
|
|
ffbb132f27 | ||
|
|
14267c2aed | ||
|
|
77cc7216af | ||
|
|
63681f0e4d | ||
|
|
06a90527a3 | ||
|
|
d5ba2fcf9d | ||
|
|
e3b37ee2c9 | ||
|
|
5d7ce08358 | ||
|
|
92a8e863de | ||
|
|
9428cb8e7c | ||
|
|
5addb7d75a | ||
|
|
623c930736 | ||
|
|
5fa01e7a96 | ||
|
|
922b1a9b66 | ||
|
|
653697f1d5 | ||
|
|
83643eb195 | ||
|
|
d83279567b | ||
|
|
150454b1fd | ||
|
|
58c7858cd4 | ||
|
|
dd18d8c97b | ||
|
|
175929426a | ||
|
|
8f9676aad2 | ||
|
|
74565151e9 | ||
|
|
83c1b485ea | ||
|
|
c2dd1136fe | ||
|
|
7c1c6e8b8c | ||
|
|
62d8bbb10c | ||
|
|
bf14d33962 | ||
|
|
0f1747b80d | ||
|
|
992c7ec71b | ||
|
|
2ad0b24efa | ||
|
|
2b2fd80bf4 | ||
|
|
24886b9530 | ||
|
|
8345f1753c | ||
|
|
3420a010e6 | ||
|
|
9f020aa414 | ||
|
|
c9ac72e7f8 | ||
|
|
86fb9d8ac7 | ||
|
|
1f0fc40287 | ||
|
|
8b7a5aaa4a | ||
|
|
856a4e1e4f | ||
|
|
39b69f1e3b | ||
|
|
bbcdb28b7c | ||
|
|
6377982501 | ||
|
|
ddbcff68dd | ||
|
|
5b315c2d40 | ||
|
|
9816d2a08b | ||
|
|
a99d6eb3f9 | ||
|
|
2c115bc22a | ||
|
|
641592644d | ||
|
|
fa0f3555d4 | ||
|
|
3cad844acd | ||
|
|
cf25cf984b | ||
|
|
3acd5bfad0 | ||
|
|
343525dab8 | ||
|
|
0afac58e4d | ||
|
|
393ea44de0 | ||
|
|
44731fd653 | ||
|
|
d36a5a74d3 |
27
.github/CODEOWNERS
vendored
Normal file
27
.github/CODEOWNERS
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
# GreptimeDB CODEOWNERS
|
||||
|
||||
# These owners will be the default owners for everything in the repo.
|
||||
|
||||
* @GreptimeTeam/db-approver
|
||||
|
||||
## [Module] Databse Engine
|
||||
/src/index @zhongzc
|
||||
/src/mito2 @evenyag @v0y4g3r @waynexia
|
||||
/src/query @evenyag
|
||||
|
||||
## [Module] Distributed
|
||||
/src/common/meta @MichaelScofield
|
||||
/src/common/procedure @MichaelScofield
|
||||
/src/meta-client @MichaelScofield
|
||||
/src/meta-srv @MichaelScofield
|
||||
|
||||
## [Module] Write Ahead Log
|
||||
/src/log-store @v0y4g3r
|
||||
/src/store-api @v0y4g3r
|
||||
|
||||
## [Module] Metrics Engine
|
||||
/src/metric-engine @waynexia
|
||||
/src/promql @waynexia
|
||||
|
||||
## [Module] Flow
|
||||
/src/flow @zhongzc @waynexia
|
||||
2
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
2
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -39,7 +39,7 @@ body:
|
||||
- Query Engine
|
||||
- Table Engine
|
||||
- Write Protocols
|
||||
- MetaSrv
|
||||
- Metasrv
|
||||
- Frontend
|
||||
- Datanode
|
||||
- Other
|
||||
|
||||
@@ -26,6 +26,8 @@ runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: arduino/setup-protoc@v3
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Install rust toolchain
|
||||
uses: dtolnay/rust-toolchain@master
|
||||
|
||||
3
.github/workflows/apidoc.yml
vendored
3
.github/workflows/apidoc.yml
vendored
@@ -13,7 +13,7 @@ on:
|
||||
name: Build API docs
|
||||
|
||||
env:
|
||||
RUST_TOOLCHAIN: nightly-2023-12-19
|
||||
RUST_TOOLCHAIN: nightly-2024-04-18
|
||||
|
||||
jobs:
|
||||
apidoc:
|
||||
@@ -40,3 +40,4 @@ jobs:
|
||||
uses: JamesIves/github-pages-deploy-action@v4
|
||||
with:
|
||||
folder: target/doc
|
||||
single-commit: true
|
||||
|
||||
25
.github/workflows/develop.yml
vendored
25
.github/workflows/develop.yml
vendored
@@ -30,15 +30,20 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
RUST_TOOLCHAIN: nightly-2023-12-19
|
||||
RUST_TOOLCHAIN: nightly-2024-04-18
|
||||
|
||||
jobs:
|
||||
typos:
|
||||
name: Spell Check with Typos
|
||||
check-typos-and-docs:
|
||||
name: Check typos and docs
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: crate-ci/typos@v1.13.10
|
||||
- name: Check the config docs
|
||||
run: |
|
||||
make config-docs && \
|
||||
git diff --name-only --exit-code ./config/config.md \
|
||||
|| (echo "'config/config.md' is not up-to-date, please run 'make config-docs'." && exit 1)
|
||||
|
||||
check:
|
||||
name: Check
|
||||
@@ -93,6 +98,8 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: arduino/setup-protoc@v3
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
- uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||
@@ -123,10 +130,12 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
target: [ "fuzz_create_table", "fuzz_alter_table" ]
|
||||
target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database" ]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: arduino/setup-protoc@v3
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
- uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||
@@ -305,10 +314,10 @@ jobs:
|
||||
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
|
||||
RUST_BACKTRACE: 1
|
||||
CARGO_INCREMENTAL: 0
|
||||
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
|
||||
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
|
||||
GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
|
||||
GT_S3_REGION: ${{ secrets.S3_REGION }}
|
||||
GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
|
||||
GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
|
||||
GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
|
||||
GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
|
||||
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
|
||||
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
|
||||
UNITTEST_LOG_DIR: "__unittest_logs"
|
||||
|
||||
2
.github/workflows/license.yaml
vendored
2
.github/workflows/license.yaml
vendored
@@ -13,4 +13,4 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Check License Header
|
||||
uses: korandoru/hawkeye@v4
|
||||
uses: korandoru/hawkeye@v5
|
||||
|
||||
10
.github/workflows/nightly-ci.yml
vendored
10
.github/workflows/nightly-ci.yml
vendored
@@ -12,7 +12,7 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
RUST_TOOLCHAIN: nightly-2023-12-19
|
||||
RUST_TOOLCHAIN: nightly-2024-04-18
|
||||
|
||||
jobs:
|
||||
sqlness:
|
||||
@@ -85,10 +85,10 @@ jobs:
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
CARGO_INCREMENTAL: 0
|
||||
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
|
||||
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
|
||||
GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
|
||||
GT_S3_REGION: ${{ secrets.S3_REGION }}
|
||||
GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
|
||||
GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
|
||||
GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
|
||||
GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
|
||||
UNITTEST_LOG_DIR: "__unittest_logs"
|
||||
- name: Notify slack if failed
|
||||
if: failure()
|
||||
|
||||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -82,7 +82,7 @@ on:
|
||||
# Use env variables to control all the release process.
|
||||
env:
|
||||
# The arguments of building greptime.
|
||||
RUST_TOOLCHAIN: nightly-2023-12-19
|
||||
RUST_TOOLCHAIN: nightly-2024-04-18
|
||||
CARGO_PROFILE: nightly
|
||||
|
||||
# Controls whether to run tests, include unit-test, integration-test and sqlness.
|
||||
|
||||
21
.github/workflows/unassign.yml
vendored
Normal file
21
.github/workflows/unassign.yml
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
name: Auto Unassign
|
||||
on:
|
||||
schedule:
|
||||
- cron: '4 2 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
auto-unassign:
|
||||
name: Auto Unassign
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Auto Unassign
|
||||
uses: tisonspieces/auto-unassign@main
|
||||
with:
|
||||
token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
|
||||
repository: ${{ github.repository }}
|
||||
@@ -1,132 +0,0 @@
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
We as members, contributors, and leaders pledge to make participation in our
|
||||
community a harassment-free experience for everyone, regardless of age, body
|
||||
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
||||
identity and expression, level of experience, education, socio-economic status,
|
||||
nationality, personal appearance, race, caste, color, religion, or sexual
|
||||
identity and orientation.
|
||||
|
||||
We pledge to act and interact in ways that contribute to an open, welcoming,
|
||||
diverse, inclusive, and healthy community.
|
||||
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to a positive environment for our
|
||||
community include:
|
||||
|
||||
* Demonstrating empathy and kindness toward other people
|
||||
* Being respectful of differing opinions, viewpoints, and experiences
|
||||
* Giving and gracefully accepting constructive feedback
|
||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
and learning from the experience
|
||||
* Focusing on what is best not just for us as individuals, but for the overall
|
||||
community
|
||||
|
||||
Examples of unacceptable behavior include:
|
||||
|
||||
* The use of sexualized language or imagery, and sexual attention or advances of
|
||||
any kind
|
||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or email address,
|
||||
without their explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Enforcement Responsibilities
|
||||
|
||||
Community leaders are responsible for clarifying and enforcing our standards of
|
||||
acceptable behavior and will take appropriate and fair corrective action in
|
||||
response to any behavior that they deem inappropriate, threatening, offensive,
|
||||
or harmful.
|
||||
|
||||
Community leaders have the right and responsibility to remove, edit, or reject
|
||||
comments, commits, code, wiki edits, issues, and other contributions that are
|
||||
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
||||
decisions when appropriate.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies within all community spaces, and also applies when
|
||||
an individual is officially representing the community in public spaces.
|
||||
Examples of representing our community include using an official e-mail address,
|
||||
posting via an official social media account, or acting as an appointed
|
||||
representative at an online or offline event.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||
reported to the community leaders responsible for enforcement at
|
||||
info@greptime.com.
|
||||
All complaints will be reviewed and investigated promptly and fairly.
|
||||
|
||||
All community leaders are obligated to respect the privacy and security of the
|
||||
reporter of any incident.
|
||||
|
||||
## Enforcement Guidelines
|
||||
|
||||
Community leaders will follow these Community Impact Guidelines in determining
|
||||
the consequences for any action they deem in violation of this Code of Conduct:
|
||||
|
||||
### 1. Correction
|
||||
|
||||
**Community Impact**: Use of inappropriate language or other behavior deemed
|
||||
unprofessional or unwelcome in the community.
|
||||
|
||||
**Consequence**: A private, written warning from community leaders, providing
|
||||
clarity around the nature of the violation and an explanation of why the
|
||||
behavior was inappropriate. A public apology may be requested.
|
||||
|
||||
### 2. Warning
|
||||
|
||||
**Community Impact**: A violation through a single incident or series of
|
||||
actions.
|
||||
|
||||
**Consequence**: A warning with consequences for continued behavior. No
|
||||
interaction with the people involved, including unsolicited interaction with
|
||||
those enforcing the Code of Conduct, for a specified period of time. This
|
||||
includes avoiding interactions in community spaces as well as external channels
|
||||
like social media. Violating these terms may lead to a temporary or permanent
|
||||
ban.
|
||||
|
||||
### 3. Temporary Ban
|
||||
|
||||
**Community Impact**: A serious violation of community standards, including
|
||||
sustained inappropriate behavior.
|
||||
|
||||
**Consequence**: A temporary ban from any sort of interaction or public
|
||||
communication with the community for a specified period of time. No public or
|
||||
private interaction with the people involved, including unsolicited interaction
|
||||
with those enforcing the Code of Conduct, is allowed during this period.
|
||||
Violating these terms may lead to a permanent ban.
|
||||
|
||||
### 4. Permanent Ban
|
||||
|
||||
**Community Impact**: Demonstrating a pattern of violation of community
|
||||
standards, including sustained inappropriate behavior, harassment of an
|
||||
individual, or aggression toward or disparagement of classes of individuals.
|
||||
|
||||
**Consequence**: A permanent ban from any sort of public interaction within the
|
||||
community.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
||||
version 2.1, available at
|
||||
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
||||
|
||||
Community Impact Guidelines were inspired by
|
||||
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
||||
|
||||
For answers to common questions about this code of conduct, see the FAQ at
|
||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
||||
[https://www.contributor-covenant.org/translations][translations].
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
@@ -50,7 +50,7 @@ GreptimeDB uses the [Apache 2.0 license](https://github.com/GreptimeTeam/greptim
|
||||
|
||||
- To ensure that community is free and confident in its ability to use your contributions, please sign the Contributor License Agreement (CLA) which will be incorporated in the pull request process.
|
||||
- Make sure all files have proper license header (running `docker run --rm -v $(pwd):/github/workspace ghcr.io/korandoru/hawkeye-native:v3 format` from the project root).
|
||||
- Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/).
|
||||
- Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/) and [style guide](http://github.com/greptimeTeam/docs/style-guide.md).
|
||||
- Make sure all unit tests are passed (using `cargo test --workspace` or [nextest](https://nexte.st/index.html) `cargo nextest run`).
|
||||
- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings`).
|
||||
|
||||
|
||||
2933
Cargo.lock
generated
2933
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
62
Cargo.toml
62
Cargo.toml
@@ -62,7 +62,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.7.1"
|
||||
version = "0.7.2"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -70,16 +70,24 @@ license = "Apache-2.0"
|
||||
clippy.print_stdout = "warn"
|
||||
clippy.print_stderr = "warn"
|
||||
clippy.implicit_clone = "warn"
|
||||
clippy.readonly_write_lock = "allow"
|
||||
rust.unknown_lints = "deny"
|
||||
# Remove this after https://github.com/PyO3/pyo3/issues/4094
|
||||
rust.non_local_definitions = "allow"
|
||||
|
||||
[workspace.dependencies]
|
||||
# We turn off default-features for some dependencies here so the workspaces which inherit them can
|
||||
# selectively turn them on if needed, since we can override default-features = true (from false)
|
||||
# for the inherited dependency but cannot do the reverse (override from true to false).
|
||||
#
|
||||
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
|
||||
ahash = { version = "0.8", features = ["compile-time-rng"] }
|
||||
aquamarine = "0.3"
|
||||
arrow = { version = "47.0" }
|
||||
arrow-array = "47.0"
|
||||
arrow-flight = "47.0"
|
||||
arrow-ipc = { version = "47.0", features = ["lz4"] }
|
||||
arrow-schema = { version = "47.0", features = ["serde"] }
|
||||
arrow = { version = "51.0.0", features = ["prettyprint"] }
|
||||
arrow-array = { version = "51.0.0", default-features = false, features = ["chrono-tz"] }
|
||||
arrow-flight = "51.0"
|
||||
arrow-ipc = { version = "51.0.0", default-features = false, features = ["lz4"] }
|
||||
arrow-schema = { version = "51.0", features = ["serde"] }
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
axum = { version = "0.6", features = ["headers"] }
|
||||
@@ -91,33 +99,38 @@ bytes = { version = "1.5", features = ["serde"] }
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
clap = { version = "4.4", features = ["derive"] }
|
||||
dashmap = "5.4"
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
|
||||
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
|
||||
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
|
||||
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
|
||||
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
|
||||
datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
|
||||
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
|
||||
datafusion-functions = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
|
||||
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
|
||||
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
|
||||
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
|
||||
datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
|
||||
derive_builder = "0.12"
|
||||
etcd-client = "0.12"
|
||||
dotenv = "0.15"
|
||||
# TODO(LFC): Wait for https://github.com/etcdv3/etcd-client/pull/76
|
||||
etcd-client = { git = "https://github.com/MichaelScofield/etcd-client.git", rev = "4c371e9b3ea8e0a8ee2f9cbd7ded26e54a45df3b" }
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "96f1f0404f421ee560a4310c73c5071e49168168" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "73ac0207ab71dfea48f30259ffdb611501b5ecb8" }
|
||||
humantime = "2.1"
|
||||
humantime-serde = "1.1"
|
||||
itertools = "0.10"
|
||||
lazy_static = "1.4"
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80b72716dcde47ec4161478416a5c6c21343364d" }
|
||||
mockall = "0.11.4"
|
||||
moka = "0.12"
|
||||
notify = "6.1"
|
||||
num_cpus = "1.16"
|
||||
once_cell = "1.18"
|
||||
opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.git", rev = "33841b38dda79b15f2024952be5f32533325ca02", features = [
|
||||
opentelemetry-proto = { version = "0.5", features = [
|
||||
"gen-tonic",
|
||||
"metrics",
|
||||
"trace",
|
||||
] }
|
||||
parquet = "47.0"
|
||||
parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
|
||||
paste = "1.0"
|
||||
pin-project = "1.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
@@ -125,32 +138,35 @@ prost = "0.12"
|
||||
raft-engine = { version = "0.4.1", default-features = false }
|
||||
rand = "0.8"
|
||||
regex = "1.8"
|
||||
regex-automata = { version = "0.2", features = ["transducer"] }
|
||||
regex-automata = { version = "0.4" }
|
||||
reqwest = { version = "0.11", default-features = false, features = [
|
||||
"json",
|
||||
"rustls-tls-native-roots",
|
||||
"stream",
|
||||
"multipart",
|
||||
] }
|
||||
rskafka = "0.5"
|
||||
rust_decimal = "1.33"
|
||||
schemars = "0.8"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = { version = "1.0", features = ["float_roundtrip"] }
|
||||
serde_with = "3"
|
||||
smallvec = { version = "1", features = ["serde"] }
|
||||
snafu = "0.7"
|
||||
sysinfo = "0.30"
|
||||
# on branch v0.38.x
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "6a93567ae38d42be5c8d08b13c8ff4dde26502ef", features = [
|
||||
# on branch v0.44.x
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "c919990bf62ad38d2b0c0a3bc90b26ad919d51b0", features = [
|
||||
"visitor",
|
||||
] }
|
||||
strum = { version = "0.25", features = ["derive"] }
|
||||
tempfile = "3"
|
||||
tokio = { version = "1.28", features = ["full"] }
|
||||
tokio = { version = "1.36", features = ["full"] }
|
||||
tokio-stream = { version = "0.1" }
|
||||
tokio-util = { version = "0.7", features = ["io-util", "compat"] }
|
||||
toml = "0.8.8"
|
||||
tonic = { version = "0.10", features = ["tls"] }
|
||||
uuid = { version = "1", features = ["serde", "v4", "fast-rng"] }
|
||||
tonic = { version = "0.11", features = ["tls"] }
|
||||
uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
|
||||
zstd = "0.13"
|
||||
|
||||
## workspaces members
|
||||
api = { path = "src/api" }
|
||||
|
||||
14
Makefile
14
Makefile
@@ -169,6 +169,10 @@ check: ## Cargo check all the targets.
|
||||
clippy: ## Check clippy rules.
|
||||
cargo clippy --workspace --all-targets --all-features -- -D warnings
|
||||
|
||||
.PHONY: fix-clippy
|
||||
fix-clippy: ## Fix clippy violations.
|
||||
cargo clippy --workspace --all-targets --all-features --fix
|
||||
|
||||
.PHONY: fmt-check
|
||||
fmt-check: ## Check code format.
|
||||
cargo fmt --all -- --check
|
||||
@@ -188,6 +192,16 @@ run-it-in-container: start-etcd ## Run integration tests in dev-builder.
|
||||
-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-${BASE_IMAGE}:latest \
|
||||
make test sqlness-test BUILD_JOBS=${BUILD_JOBS}
|
||||
|
||||
##@ Docs
|
||||
config-docs: ## Generate configuration documentation from toml files.
|
||||
docker run --rm \
|
||||
-v ${PWD}:/greptimedb \
|
||||
-w /greptimedb/config \
|
||||
toml2docs/toml2docs:latest \
|
||||
-p '##' \
|
||||
-t ./config-docs-template.md \
|
||||
-o ./config.md
|
||||
|
||||
##@ General
|
||||
|
||||
# The help target prints out all targets with their descriptions organized
|
||||
|
||||
131
README.md
131
README.md
@@ -6,36 +6,90 @@
|
||||
</picture>
|
||||
</p>
|
||||
|
||||
[](https://codecov.io/gh/GrepTimeTeam/greptimedb)
|
||||
[](https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml)
|
||||
[](https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE)
|
||||
<h1 align="center">Cloud-scale, Fast and Efficient Time Series Database</h1>
|
||||
|
||||
<div align="center">
|
||||
<h3 align="center">
|
||||
<a href="https://greptime.com/product/cloud">GreptimeCloud</a> |
|
||||
<a href="https://docs.greptime.com/">User guide</a> |
|
||||
<a href="https://greptimedb.rs/">API Docs</a> |
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/issues/3412">Roadmap 2024</a>
|
||||
</h4>
|
||||
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/releases/latest">
|
||||
<img src="https://img.shields.io/github/v/release/GreptimeTeam/greptimedb.svg" alt="Version"/>
|
||||
</a>
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/releases/latest">
|
||||
<img src="https://img.shields.io/github/release-date/GreptimeTeam/greptimedb.svg" alt="Releases"/>
|
||||
</a>
|
||||
<a href="https://hub.docker.com/r/greptime/greptimedb/">
|
||||
<img src="https://img.shields.io/docker/pulls/greptime/greptimedb.svg" alt="Docker Pulls"/>
|
||||
</a>
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml">
|
||||
<img src="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml/badge.svg" alt="GitHub Actions"/>
|
||||
</a>
|
||||
<a href="https://codecov.io/gh/GrepTimeTeam/greptimedb">
|
||||
<img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/main/graph/badge.svg?token=FITFDI3J3C" alt="Codecov"/>
|
||||
</a>
|
||||
<a href="https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE">
|
||||
<img src="https://img.shields.io/github/license/greptimeTeam/greptimedb" alt="License"/>
|
||||
</a>
|
||||
|
||||
<br/>
|
||||
|
||||
[](https://twitter.com/greptime/)
|
||||
[](https://www.linkedin.com/company/greptime/)
|
||||
[](https://greptime.com/slack)
|
||||
<a href="https://greptime.com/slack">
|
||||
<img src="https://img.shields.io/badge/slack-GreptimeDB-0abd59?logo=slack&style=for-the-badge" alt="Slack"/>
|
||||
</a>
|
||||
<a href="https://twitter.com/greptime">
|
||||
<img src="https://img.shields.io/badge/twitter-follow_us-1d9bf0.svg?style=for-the-badge" alt="Twitter"/>
|
||||
</a>
|
||||
<a href="https://www.linkedin.com/company/greptime/">
|
||||
<img src="https://img.shields.io/badge/linkedin-connect_with_us-0a66c2.svg?style=for-the-badge" alt="LinkedIn"/>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
## What is GreptimeDB
|
||||
## Introduction
|
||||
|
||||
GreptimeDB is an open-source time-series database focusing on efficiency, scalability, and analytical capabilities.
|
||||
It's designed to work on infrastructure of the cloud era, and users benefit from its elasticity and commodity storage.
|
||||
**GreptimeDB** is an open-source time-series database focusing on efficiency, scalability, and analytical capabilities.
|
||||
Designed to work on infrastructure of the cloud era, GreptimeDB benefits users with its elasticity and commodity storage, offering a fast and cost-effective **alternative to InfluxDB** and a **long-term storage for Prometheus**.
|
||||
|
||||
Our core developers have been building time-series data platforms for years. Based on their best-practices, GreptimeDB is born to give you:
|
||||
## Why GreptimeDB
|
||||
|
||||
* **Compatible with InfluxDB, Prometheus and more protocols**: Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc. [Read more](https://docs.greptime.com/user-guide/clients/overview).
|
||||
* **Easy horizontal scaling**: Seamless scalability from a standalone binary at edge to a robust, highly available distributed cluster in cloud, with a transparent experience for both developers and administrators.
|
||||
* **Analyzing time-series data**: Native SQL and PromQL for queries, and Python scripting to facilitate complex analytical tasks.
|
||||
* **Cloud-native distributed database**: Fully open-source distributed cluster architecture that harnesses the power of cloud-native elastic computing resources.
|
||||
* **Performance and Cost-effective**: Flexible indexing capabilities and distributed, parallel-processing query engine, tackling high cardinality issues down. Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends, particularly cloud object storage with 50x cost efficiency.
|
||||
Our core developers have been building time-series data platforms for years. Based on our best-practices, GreptimeDB is born to give you:
|
||||
|
||||
## Quickstart with [GreptimePlay](https://greptime.com/playground)
|
||||
* **Easy horizontal scaling**
|
||||
|
||||
Seamless scalability from a standalone binary at edge to a robust, highly available distributed cluster in cloud, with a transparent experience for both developers and administrators.
|
||||
|
||||
* **Analyzing time-series data**
|
||||
|
||||
Query your time-series data with SQL and PromQL. Use Python scripts to facilitate complex analytical tasks.
|
||||
|
||||
* **Cloud-native distributed database**
|
||||
|
||||
Fully open-source distributed cluster architecture that harnesses the power of cloud-native elastic computing resources.
|
||||
|
||||
* **Performance and Cost-effective**
|
||||
|
||||
Flexible indexing capabilities and distributed, parallel-processing query engine, tackling high cardinality issues down. Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends, particularly cloud object storage with 50x cost efficiency.
|
||||
|
||||
* **Compatible with InfluxDB, Prometheus and more protocols**
|
||||
|
||||
Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc. [Read more](https://docs.greptime.com/user-guide/clients/overview).
|
||||
|
||||
## Try GreptimeDB
|
||||
|
||||
### 1. [GreptimePlay](https://greptime.com/playground)
|
||||
|
||||
Try out the features of GreptimeDB right from your browser.
|
||||
|
||||
## Up & Running
|
||||
### 2. [GreptimeCloud](https://console.greptime.cloud/)
|
||||
|
||||
The recommended way to install GreptimeDB is via Docker:
|
||||
Start instantly with a free cluster.
|
||||
|
||||
### 3. Docker Image
|
||||
|
||||
To install GreptimeDB locally, the recommended way is via Docker:
|
||||
|
||||
```shell
|
||||
docker pull greptime/greptimedb
|
||||
@@ -44,32 +98,13 @@ docker pull greptime/greptimedb
|
||||
Start a GreptimeDB container with:
|
||||
|
||||
```shell
|
||||
docker run -p 4000-4003:4000-4003 \
|
||||
-p 4242:4242 -v "$(pwd)/greptimedb:/tmp/greptimedb" \
|
||||
--name greptime --rm \
|
||||
greptime/greptimedb standalone start \
|
||||
--http-addr 0.0.0.0:4000 \
|
||||
--rpc-addr 0.0.0.0:4001 \
|
||||
--mysql-addr 0.0.0.0:4002 \
|
||||
--postgres-addr 0.0.0.0:4003 \
|
||||
--opentsdb-addr 0.0.0.0:4242
|
||||
docker run --rm --name greptime --net=host greptime/greptimedb standalone start
|
||||
```
|
||||
|
||||
Connect to the server and test:
|
||||
Read more about [Installation](https://docs.greptime.com/getting-started/installation/overview) on docs.
|
||||
|
||||
```shell
|
||||
curl -X POST -d 'sql=SELECT 42&format=csv' http://localhost:4000/v1/sql
|
||||
```
|
||||
## Getting Started
|
||||
|
||||
You should get a reply as:
|
||||
|
||||
```
|
||||
42
|
||||
```
|
||||
|
||||
Read more on docs:
|
||||
|
||||
* [Installation](https://docs.greptime.com/getting-started/installation/overview)
|
||||
* [Quickstart](https://docs.greptime.com/getting-started/quick-start/overview)
|
||||
* [Write Data](https://docs.greptime.com/user-guide/clients/overview)
|
||||
* [Query Data](https://docs.greptime.com/user-guide/query-data/overview)
|
||||
@@ -95,11 +130,6 @@ Run a standalone server:
|
||||
cargo run -- standalone start
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
- [User guide](https://docs.greptime.com/user-guide/concepts/overview)
|
||||
- [API docs](https://greptimedb.rs)
|
||||
|
||||
## Extension
|
||||
|
||||
### Dashboard
|
||||
@@ -113,7 +143,7 @@ cargo run -- standalone start
|
||||
- [GreptimeDB C++ Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-cpp)
|
||||
- [GreptimeDB Erlang Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-erl)
|
||||
- [GreptimeDB Rust Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-rust)
|
||||
- [GreptimeDB JavaScript Ingester](https://github.com/GreptimeTeam/greptime-ingester-js)
|
||||
- [GreptimeDB JavaScript Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-js)
|
||||
|
||||
### Grafana Dashboard
|
||||
|
||||
@@ -121,12 +151,9 @@ Our official Grafana dashboard is available at [grafana](grafana/README.md) dire
|
||||
|
||||
## Project Status
|
||||
|
||||
This project is in its early stage and under heavy development. We move fast and
|
||||
break things. Benchmark on development branch may not represent its potential
|
||||
performance. We release pre-built binaries constantly for functional
|
||||
evaluation. Do not use it in production at the moment.
|
||||
|
||||
For future plans, check out [GreptimeDB roadmap](https://github.com/GreptimeTeam/greptimedb/issues/3412).
|
||||
The current version has not yet reached General Availability version standards.
|
||||
In line with our Greptime 2024 Roadmap, we plan to achieve a production-level
|
||||
version with the update to v1.0 in August. [[Join Force]](https://github.com/GreptimeTeam/greptimedb/issues/3412)
|
||||
|
||||
## Community
|
||||
|
||||
|
||||
@@ -8,12 +8,31 @@ license.workspace = true
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
arrow.workspace = true
|
||||
chrono.workspace = true
|
||||
clap.workspace = true
|
||||
client.workspace = true
|
||||
common-base.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-wal.workspace = true
|
||||
dotenv.workspace = true
|
||||
futures.workspace = true
|
||||
futures-util.workspace = true
|
||||
humantime.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
indicatif = "0.17.1"
|
||||
itertools.workspace = true
|
||||
lazy_static.workspace = true
|
||||
log-store.workspace = true
|
||||
mito2.workspace = true
|
||||
num_cpus.workspace = true
|
||||
parquet.workspace = true
|
||||
prometheus.workspace = true
|
||||
rand.workspace = true
|
||||
rskafka.workspace = true
|
||||
serde.workspace = true
|
||||
store-api.workspace = true
|
||||
tokio.workspace = true
|
||||
toml.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
11
benchmarks/README.md
Normal file
11
benchmarks/README.md
Normal file
@@ -0,0 +1,11 @@
|
||||
Benchmarkers for GreptimeDB
|
||||
--------------------------------
|
||||
|
||||
## Wal Benchmarker
|
||||
The wal benchmarker serves to evaluate the performance of GreptimeDB's Write-Ahead Log (WAL) component. It meticulously assesses the read/write performance of the WAL under diverse workloads generated by the benchmarker.
|
||||
|
||||
|
||||
### How to use
|
||||
To compile the benchmarker, navigate to the `greptimedb/benchmarks` directory and execute `cargo build --release`. Subsequently, you'll find the compiled target located at `greptimedb/target/release/wal_bench`.
|
||||
|
||||
The `./wal_bench -h` command reveals numerous arguments that the target accepts. Among these, a notable one is the `cfg-file` argument. By utilizing a configuration file in the TOML format, you can bypass the need to repeatedly specify cumbersome arguments.
|
||||
21
benchmarks/config/wal_bench.example.toml
Normal file
21
benchmarks/config/wal_bench.example.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
# Refers to the documents of `Args` in benchmarks/src/wal.rs`.
|
||||
wal_provider = "kafka"
|
||||
bootstrap_brokers = ["localhost:9092"]
|
||||
num_workers = 10
|
||||
num_topics = 32
|
||||
num_regions = 1000
|
||||
num_scrapes = 1000
|
||||
num_rows = 5
|
||||
col_types = "ifs"
|
||||
max_batch_size = "512KB"
|
||||
linger = "1ms"
|
||||
backoff_init = "10ms"
|
||||
backoff_max = "1ms"
|
||||
backoff_base = 2
|
||||
backoff_deadline = "3s"
|
||||
compression = "zstd"
|
||||
rng_seed = 42
|
||||
skip_read = false
|
||||
skip_write = false
|
||||
random_topics = true
|
||||
report_metrics = false
|
||||
@@ -215,37 +215,7 @@ fn build_values(column: &ArrayRef) -> (Values, ColumnDataType) {
|
||||
ColumnDataType::String,
|
||||
)
|
||||
}
|
||||
DataType::Null
|
||||
| DataType::Boolean
|
||||
| DataType::Int8
|
||||
| DataType::Int16
|
||||
| DataType::Int32
|
||||
| DataType::UInt8
|
||||
| DataType::UInt16
|
||||
| DataType::UInt32
|
||||
| DataType::UInt64
|
||||
| DataType::Float16
|
||||
| DataType::Float32
|
||||
| DataType::Date32
|
||||
| DataType::Date64
|
||||
| DataType::Time32(_)
|
||||
| DataType::Time64(_)
|
||||
| DataType::Duration(_)
|
||||
| DataType::Interval(_)
|
||||
| DataType::Binary
|
||||
| DataType::FixedSizeBinary(_)
|
||||
| DataType::LargeBinary
|
||||
| DataType::LargeUtf8
|
||||
| DataType::List(_)
|
||||
| DataType::FixedSizeList(_, _)
|
||||
| DataType::LargeList(_)
|
||||
| DataType::Struct(_)
|
||||
| DataType::Union(_, _)
|
||||
| DataType::Dictionary(_, _)
|
||||
| DataType::Decimal128(_, _)
|
||||
| DataType::Decimal256(_, _)
|
||||
| DataType::RunEndEncoded(_, _)
|
||||
| DataType::Map(_, _) => todo!(),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -444,7 +414,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
|
||||
fn query_set(table_name: &str) -> HashMap<String, String> {
|
||||
HashMap::from([
|
||||
(
|
||||
"count_all".to_string(),
|
||||
"count_all".to_string(),
|
||||
format!("SELECT COUNT(*) FROM {table_name};"),
|
||||
),
|
||||
(
|
||||
|
||||
326
benchmarks/src/bin/wal_bench.rs
Normal file
326
benchmarks/src/bin/wal_bench.rs
Normal file
@@ -0,0 +1,326 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(int_roundings)]
|
||||
|
||||
use std::fs;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use api::v1::{ColumnDataType, ColumnSchema, SemanticType};
|
||||
use benchmarks::metrics;
|
||||
use benchmarks::wal_bench::{Args, Config, Region, WalProvider};
|
||||
use clap::Parser;
|
||||
use common_telemetry::info;
|
||||
use common_wal::config::kafka::common::BackoffConfig;
|
||||
use common_wal::config::kafka::DatanodeKafkaConfig as KafkaConfig;
|
||||
use common_wal::config::raft_engine::RaftEngineConfig;
|
||||
use common_wal::options::{KafkaWalOptions, WalOptions};
|
||||
use itertools::Itertools;
|
||||
use log_store::kafka::log_store::KafkaLogStore;
|
||||
use log_store::raft_engine::log_store::RaftEngineLogStore;
|
||||
use mito2::wal::Wal;
|
||||
use prometheus::{Encoder, TextEncoder};
|
||||
use rand::distributions::{Alphanumeric, DistString};
|
||||
use rand::rngs::SmallRng;
|
||||
use rand::SeedableRng;
|
||||
use rskafka::client::partition::Compression;
|
||||
use rskafka::client::ClientBuilder;
|
||||
use store_api::logstore::LogStore;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
async fn run_benchmarker<S: LogStore>(cfg: &Config, topics: &[String], wal: Arc<Wal<S>>) {
|
||||
let chunk_size = cfg.num_regions.div_ceil(cfg.num_workers);
|
||||
let region_chunks = (0..cfg.num_regions)
|
||||
.map(|id| {
|
||||
build_region(
|
||||
id as u64,
|
||||
topics,
|
||||
&mut SmallRng::seed_from_u64(cfg.rng_seed),
|
||||
cfg,
|
||||
)
|
||||
})
|
||||
.chunks(chunk_size as usize)
|
||||
.into_iter()
|
||||
.map(|chunk| Arc::new(chunk.collect::<Vec<_>>()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut write_elapsed = 0;
|
||||
let mut read_elapsed = 0;
|
||||
|
||||
if !cfg.skip_write {
|
||||
info!("Benchmarking write ...");
|
||||
|
||||
let num_scrapes = cfg.num_scrapes;
|
||||
let timer = Instant::now();
|
||||
futures::future::join_all((0..cfg.num_workers).map(|i| {
|
||||
let wal = wal.clone();
|
||||
let regions = region_chunks[i as usize].clone();
|
||||
tokio::spawn(async move {
|
||||
for _ in 0..num_scrapes {
|
||||
let mut wal_writer = wal.writer();
|
||||
regions
|
||||
.iter()
|
||||
.for_each(|region| region.add_wal_entry(&mut wal_writer));
|
||||
wal_writer.write_to_wal().await.unwrap();
|
||||
}
|
||||
})
|
||||
}))
|
||||
.await;
|
||||
write_elapsed += timer.elapsed().as_millis();
|
||||
}
|
||||
|
||||
if !cfg.skip_read {
|
||||
info!("Benchmarking read ...");
|
||||
|
||||
let timer = Instant::now();
|
||||
futures::future::join_all((0..cfg.num_workers).map(|i| {
|
||||
let wal = wal.clone();
|
||||
let regions = region_chunks[i as usize].clone();
|
||||
tokio::spawn(async move {
|
||||
for region in regions.iter() {
|
||||
region.replay(&wal).await;
|
||||
}
|
||||
})
|
||||
}))
|
||||
.await;
|
||||
read_elapsed = timer.elapsed().as_millis();
|
||||
}
|
||||
|
||||
dump_report(cfg, write_elapsed, read_elapsed);
|
||||
}
|
||||
|
||||
fn build_region(id: u64, topics: &[String], rng: &mut SmallRng, cfg: &Config) -> Region {
|
||||
let wal_options = match cfg.wal_provider {
|
||||
WalProvider::Kafka => {
|
||||
assert!(!topics.is_empty());
|
||||
WalOptions::Kafka(KafkaWalOptions {
|
||||
topic: topics.get(id as usize % topics.len()).cloned().unwrap(),
|
||||
})
|
||||
}
|
||||
WalProvider::RaftEngine => WalOptions::RaftEngine,
|
||||
};
|
||||
Region::new(
|
||||
RegionId::from_u64(id),
|
||||
build_schema(&parse_col_types(&cfg.col_types), rng),
|
||||
wal_options,
|
||||
cfg.num_rows,
|
||||
cfg.rng_seed,
|
||||
)
|
||||
}
|
||||
|
||||
fn build_schema(col_types: &[ColumnDataType], mut rng: &mut SmallRng) -> Vec<ColumnSchema> {
|
||||
col_types
|
||||
.iter()
|
||||
.map(|col_type| ColumnSchema {
|
||||
column_name: Alphanumeric.sample_string(&mut rng, 5),
|
||||
datatype: *col_type as i32,
|
||||
semantic_type: SemanticType::Field as i32,
|
||||
datatype_extension: None,
|
||||
})
|
||||
.chain(vec![ColumnSchema {
|
||||
column_name: "ts".to_string(),
|
||||
datatype: ColumnDataType::TimestampMillisecond as i32,
|
||||
semantic_type: SemanticType::Tag as i32,
|
||||
datatype_extension: None,
|
||||
}])
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn dump_report(cfg: &Config, write_elapsed: u128, read_elapsed: u128) {
|
||||
let cost_report = format!(
|
||||
"write costs: {} ms, read costs: {} ms",
|
||||
write_elapsed, read_elapsed,
|
||||
);
|
||||
|
||||
let total_written_bytes = metrics::METRIC_WAL_WRITE_BYTES_TOTAL.get() as u128;
|
||||
let write_throughput = if write_elapsed > 0 {
|
||||
(total_written_bytes * 1000).div_floor(write_elapsed)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let total_read_bytes = metrics::METRIC_WAL_READ_BYTES_TOTAL.get() as u128;
|
||||
let read_throughput = if read_elapsed > 0 {
|
||||
(total_read_bytes * 1000).div_floor(read_elapsed)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let throughput_report = format!(
|
||||
"total written bytes: {} bytes, total read bytes: {} bytes, write throuput: {} bytes/s ({} mb/s), read throughput: {} bytes/s ({} mb/s)",
|
||||
total_written_bytes,
|
||||
total_read_bytes,
|
||||
write_throughput,
|
||||
write_throughput.div_floor(1 << 20),
|
||||
read_throughput,
|
||||
read_throughput.div_floor(1 << 20),
|
||||
);
|
||||
|
||||
let metrics_report = if cfg.report_metrics {
|
||||
let mut buffer = Vec::new();
|
||||
let encoder = TextEncoder::new();
|
||||
let metrics = prometheus::gather();
|
||||
encoder.encode(&metrics, &mut buffer).unwrap();
|
||||
String::from_utf8(buffer).unwrap()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
info!(
|
||||
r#"
|
||||
Benchmark config:
|
||||
{cfg:?}
|
||||
|
||||
Benchmark report:
|
||||
{cost_report}
|
||||
{throughput_report}
|
||||
{metrics_report}"#
|
||||
);
|
||||
}
|
||||
|
||||
async fn create_topics(cfg: &Config) -> Vec<String> {
|
||||
// Creates topics.
|
||||
let client = ClientBuilder::new(cfg.bootstrap_brokers.clone())
|
||||
.build()
|
||||
.await
|
||||
.unwrap();
|
||||
let ctrl_client = client.controller_client().unwrap();
|
||||
let (topics, tasks): (Vec<_>, Vec<_>) = (0..cfg.num_topics)
|
||||
.map(|i| {
|
||||
let topic = if cfg.random_topics {
|
||||
format!(
|
||||
"greptime_wal_bench_topic_{}_{}",
|
||||
uuid::Uuid::new_v4().as_u128(),
|
||||
i
|
||||
)
|
||||
} else {
|
||||
format!("greptime_wal_bench_topic_{}", i)
|
||||
};
|
||||
let task = ctrl_client.create_topic(
|
||||
topic.clone(),
|
||||
1,
|
||||
cfg.bootstrap_brokers.len() as i16,
|
||||
2000,
|
||||
);
|
||||
(topic, task)
|
||||
})
|
||||
.unzip();
|
||||
// Must ignore errors since we allow topics being created more than once.
|
||||
let _ = futures::future::try_join_all(tasks).await;
|
||||
|
||||
topics
|
||||
}
|
||||
|
||||
fn parse_compression(comp: &str) -> Compression {
|
||||
match comp {
|
||||
"no" => Compression::NoCompression,
|
||||
"gzip" => Compression::Gzip,
|
||||
"lz4" => Compression::Lz4,
|
||||
"snappy" => Compression::Snappy,
|
||||
"zstd" => Compression::Zstd,
|
||||
other => unreachable!("Unrecognized compression {other}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_col_types(col_types: &str) -> Vec<ColumnDataType> {
|
||||
let parts = col_types.split('x').collect::<Vec<_>>();
|
||||
assert!(parts.len() <= 2);
|
||||
|
||||
let pattern = parts[0];
|
||||
let repeat = parts
|
||||
.get(1)
|
||||
.map(|r| r.parse::<usize>().unwrap())
|
||||
.unwrap_or(1);
|
||||
|
||||
pattern
|
||||
.chars()
|
||||
.map(|c| match c {
|
||||
'i' | 'I' => ColumnDataType::Int64,
|
||||
'f' | 'F' => ColumnDataType::Float64,
|
||||
's' | 'S' => ColumnDataType::String,
|
||||
other => unreachable!("Cannot parse {other} as a column data type"),
|
||||
})
|
||||
.cycle()
|
||||
.take(pattern.len() * repeat)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// Sets the global logging to INFO and suppress loggings from rskafka other than ERROR and upper ones.
|
||||
std::env::set_var("UNITTEST_LOG_LEVEL", "info,rskafka=error");
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let args = Args::parse();
|
||||
let cfg = if !args.cfg_file.is_empty() {
|
||||
toml::from_str(&fs::read_to_string(&args.cfg_file).unwrap()).unwrap()
|
||||
} else {
|
||||
Config::from(args)
|
||||
};
|
||||
|
||||
// Validates arguments.
|
||||
if cfg.num_regions < cfg.num_workers {
|
||||
panic!("num_regions must be greater than or equal to num_workers");
|
||||
}
|
||||
if cfg
|
||||
.num_workers
|
||||
.min(cfg.num_topics)
|
||||
.min(cfg.num_regions)
|
||||
.min(cfg.num_scrapes)
|
||||
.min(cfg.max_batch_size.as_bytes() as u32)
|
||||
.min(cfg.bootstrap_brokers.len() as u32)
|
||||
== 0
|
||||
{
|
||||
panic!("Invalid arguments");
|
||||
}
|
||||
|
||||
tokio::runtime::Builder::new_multi_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap()
|
||||
.block_on(async {
|
||||
match cfg.wal_provider {
|
||||
WalProvider::Kafka => {
|
||||
let topics = create_topics(&cfg).await;
|
||||
let kafka_cfg = KafkaConfig {
|
||||
broker_endpoints: cfg.bootstrap_brokers.clone(),
|
||||
max_batch_size: cfg.max_batch_size,
|
||||
linger: cfg.linger,
|
||||
backoff: BackoffConfig {
|
||||
init: cfg.backoff_init,
|
||||
max: cfg.backoff_max,
|
||||
base: cfg.backoff_base,
|
||||
deadline: Some(cfg.backoff_deadline),
|
||||
},
|
||||
compression: parse_compression(&cfg.compression),
|
||||
..Default::default()
|
||||
};
|
||||
let store = Arc::new(KafkaLogStore::try_new(&kafka_cfg).await.unwrap());
|
||||
let wal = Arc::new(Wal::new(store));
|
||||
run_benchmarker(&cfg, &topics, wal).await;
|
||||
}
|
||||
WalProvider::RaftEngine => {
|
||||
// The benchmarker assumes the raft engine directory exists.
|
||||
let store = RaftEngineLogStore::try_new(
|
||||
"/tmp/greptimedb/raft-engine-wal".to_string(),
|
||||
RaftEngineConfig::default(),
|
||||
)
|
||||
.await
|
||||
.map(Arc::new)
|
||||
.unwrap();
|
||||
let wal = Arc::new(Wal::new(store));
|
||||
run_benchmarker(&cfg, &[], wal).await;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -11,3 +11,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod metrics;
|
||||
pub mod wal_bench;
|
||||
39
benchmarks/src/metrics.rs
Normal file
39
benchmarks/src/metrics.rs
Normal file
@@ -0,0 +1,39 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use prometheus::*;
|
||||
|
||||
/// Logstore label.
|
||||
pub const LOGSTORE_LABEL: &str = "logstore";
|
||||
/// Operation type label.
|
||||
pub const OPTYPE_LABEL: &str = "optype";
|
||||
|
||||
lazy_static! {
|
||||
/// Counters of bytes of each operation on a logstore.
|
||||
pub static ref METRIC_WAL_OP_BYTES_TOTAL: IntCounterVec = register_int_counter_vec!(
|
||||
"greptime_bench_wal_op_bytes_total",
|
||||
"wal operation bytes total",
|
||||
&[OPTYPE_LABEL],
|
||||
)
|
||||
.unwrap();
|
||||
/// Counter of bytes of the append_batch operation.
|
||||
pub static ref METRIC_WAL_WRITE_BYTES_TOTAL: IntCounter = METRIC_WAL_OP_BYTES_TOTAL.with_label_values(
|
||||
&["write"],
|
||||
);
|
||||
/// Counter of bytes of the read operation.
|
||||
pub static ref METRIC_WAL_READ_BYTES_TOTAL: IntCounter = METRIC_WAL_OP_BYTES_TOTAL.with_label_values(
|
||||
&["read"],
|
||||
);
|
||||
}
|
||||
361
benchmarks/src/wal_bench.rs
Normal file
361
benchmarks/src/wal_bench.rs
Normal file
@@ -0,0 +1,361 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::mem::size_of;
|
||||
use std::sync::atomic::{AtomicI64, AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{ColumnDataType, ColumnSchema, Mutation, OpType, Row, Rows, Value, WalEntry};
|
||||
use clap::{Parser, ValueEnum};
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_wal::options::WalOptions;
|
||||
use futures::StreamExt;
|
||||
use mito2::wal::{Wal, WalWriter};
|
||||
use rand::distributions::{Alphanumeric, DistString, Uniform};
|
||||
use rand::rngs::SmallRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use store_api::logstore::LogStore;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::metrics;
|
||||
|
||||
/// The wal provider.
|
||||
#[derive(Clone, ValueEnum, Default, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum WalProvider {
|
||||
#[default]
|
||||
RaftEngine,
|
||||
Kafka,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
pub struct Args {
|
||||
/// The provided configuration file.
|
||||
/// The example configuration file can be found at `greptimedb/benchmarks/config/wal_bench.example.toml`.
|
||||
#[clap(long, short = 'c')]
|
||||
pub cfg_file: String,
|
||||
|
||||
/// The wal provider.
|
||||
#[clap(long, value_enum, default_value_t = WalProvider::default())]
|
||||
pub wal_provider: WalProvider,
|
||||
|
||||
/// The advertised addresses of the kafka brokers.
|
||||
/// If there're multiple bootstrap brokers, their addresses should be separated by comma, for e.g. "localhost:9092,localhost:9093".
|
||||
#[clap(long, short = 'b', default_value = "localhost:9092")]
|
||||
pub bootstrap_brokers: String,
|
||||
|
||||
/// The number of workers each running in a dedicated thread.
|
||||
#[clap(long, default_value_t = num_cpus::get() as u32)]
|
||||
pub num_workers: u32,
|
||||
|
||||
/// The number of kafka topics to be created.
|
||||
#[clap(long, default_value_t = 32)]
|
||||
pub num_topics: u32,
|
||||
|
||||
/// The number of regions.
|
||||
#[clap(long, default_value_t = 1000)]
|
||||
pub num_regions: u32,
|
||||
|
||||
/// The number of times each region is scraped.
|
||||
#[clap(long, default_value_t = 1000)]
|
||||
pub num_scrapes: u32,
|
||||
|
||||
/// The number of rows in each wal entry.
|
||||
/// Each time a region is scraped, a wal entry containing will be produced.
|
||||
#[clap(long, default_value_t = 5)]
|
||||
pub num_rows: u32,
|
||||
|
||||
/// The column types of the schema for each region.
|
||||
/// Currently, three column types are supported:
|
||||
/// - i = ColumnDataType::Int64
|
||||
/// - f = ColumnDataType::Float64
|
||||
/// - s = ColumnDataType::String
|
||||
/// For e.g., "ifs" will be parsed as three columns: i64, f64, and string.
|
||||
///
|
||||
/// Additionally, a "x" sign can be provided to repeat the column types for a given number of times.
|
||||
/// For e.g., "iix2" will be parsed as 4 columns: i64, i64, i64, and i64.
|
||||
/// This feature is useful if you want to specify many columns.
|
||||
#[clap(long, default_value = "ifs")]
|
||||
pub col_types: String,
|
||||
|
||||
/// The maximum size of a batch of kafka records.
|
||||
/// The default value is 1mb.
|
||||
#[clap(long, default_value = "512KB")]
|
||||
pub max_batch_size: ReadableSize,
|
||||
|
||||
/// The minimum latency the kafka client issues a batch of kafka records.
|
||||
/// However, a batch of kafka records would be immediately issued if a record cannot be fit into the batch.
|
||||
#[clap(long, default_value = "1ms")]
|
||||
pub linger: String,
|
||||
|
||||
/// The initial backoff delay of the kafka consumer.
|
||||
#[clap(long, default_value = "10ms")]
|
||||
pub backoff_init: String,
|
||||
|
||||
/// The maximum backoff delay of the kafka consumer.
|
||||
#[clap(long, default_value = "1s")]
|
||||
pub backoff_max: String,
|
||||
|
||||
/// The exponential backoff rate of the kafka consumer. The next back off = base * the current backoff.
|
||||
#[clap(long, default_value_t = 2)]
|
||||
pub backoff_base: u32,
|
||||
|
||||
/// The deadline of backoff. The backoff ends if the total backoff delay reaches the deadline.
|
||||
#[clap(long, default_value = "3s")]
|
||||
pub backoff_deadline: String,
|
||||
|
||||
/// The client-side compression algorithm for kafka records.
|
||||
#[clap(long, default_value = "zstd")]
|
||||
pub compression: String,
|
||||
|
||||
/// The seed of random number generators.
|
||||
#[clap(long, default_value_t = 42)]
|
||||
pub rng_seed: u64,
|
||||
|
||||
/// Skips the read phase, aka. region replay, if set to true.
|
||||
#[clap(long, default_value_t = false)]
|
||||
pub skip_read: bool,
|
||||
|
||||
/// Skips the write phase if set to true.
|
||||
#[clap(long, default_value_t = false)]
|
||||
pub skip_write: bool,
|
||||
|
||||
/// Randomly generates topic names if set to true.
|
||||
/// Useful when you want to run the benchmarker without worrying about the topics created before.
|
||||
#[clap(long, default_value_t = false)]
|
||||
pub random_topics: bool,
|
||||
|
||||
/// Logs out the gathered prometheus metrics when the benchmarker ends.
|
||||
#[clap(long, default_value_t = false)]
|
||||
pub report_metrics: bool,
|
||||
}
|
||||
|
||||
/// Benchmarker config.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Config {
|
||||
pub wal_provider: WalProvider,
|
||||
pub bootstrap_brokers: Vec<String>,
|
||||
pub num_workers: u32,
|
||||
pub num_topics: u32,
|
||||
pub num_regions: u32,
|
||||
pub num_scrapes: u32,
|
||||
pub num_rows: u32,
|
||||
pub col_types: String,
|
||||
pub max_batch_size: ReadableSize,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub linger: Duration,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub backoff_init: Duration,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub backoff_max: Duration,
|
||||
pub backoff_base: u32,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub backoff_deadline: Duration,
|
||||
pub compression: String,
|
||||
pub rng_seed: u64,
|
||||
pub skip_read: bool,
|
||||
pub skip_write: bool,
|
||||
pub random_topics: bool,
|
||||
pub report_metrics: bool,
|
||||
}
|
||||
|
||||
impl From<Args> for Config {
|
||||
fn from(args: Args) -> Self {
|
||||
let cfg = Self {
|
||||
wal_provider: args.wal_provider,
|
||||
bootstrap_brokers: args
|
||||
.bootstrap_brokers
|
||||
.split(',')
|
||||
.map(ToString::to_string)
|
||||
.collect::<Vec<_>>(),
|
||||
num_workers: args.num_workers.min(num_cpus::get() as u32),
|
||||
num_topics: args.num_topics,
|
||||
num_regions: args.num_regions,
|
||||
num_scrapes: args.num_scrapes,
|
||||
num_rows: args.num_rows,
|
||||
col_types: args.col_types,
|
||||
max_batch_size: args.max_batch_size,
|
||||
linger: humantime::parse_duration(&args.linger).unwrap(),
|
||||
backoff_init: humantime::parse_duration(&args.backoff_init).unwrap(),
|
||||
backoff_max: humantime::parse_duration(&args.backoff_max).unwrap(),
|
||||
backoff_base: args.backoff_base,
|
||||
backoff_deadline: humantime::parse_duration(&args.backoff_deadline).unwrap(),
|
||||
compression: args.compression,
|
||||
rng_seed: args.rng_seed,
|
||||
skip_read: args.skip_read,
|
||||
skip_write: args.skip_write,
|
||||
random_topics: args.random_topics,
|
||||
report_metrics: args.report_metrics,
|
||||
};
|
||||
|
||||
cfg
|
||||
}
|
||||
}
|
||||
|
||||
/// The region used for wal benchmarker.
|
||||
pub struct Region {
|
||||
id: RegionId,
|
||||
schema: Vec<ColumnSchema>,
|
||||
wal_options: WalOptions,
|
||||
next_sequence: AtomicU64,
|
||||
next_entry_id: AtomicU64,
|
||||
next_timestamp: AtomicI64,
|
||||
rng: Mutex<Option<SmallRng>>,
|
||||
num_rows: u32,
|
||||
}
|
||||
|
||||
impl Region {
|
||||
/// Creates a new region.
|
||||
pub fn new(
|
||||
id: RegionId,
|
||||
schema: Vec<ColumnSchema>,
|
||||
wal_options: WalOptions,
|
||||
num_rows: u32,
|
||||
rng_seed: u64,
|
||||
) -> Self {
|
||||
Self {
|
||||
id,
|
||||
schema,
|
||||
wal_options,
|
||||
next_sequence: AtomicU64::new(1),
|
||||
next_entry_id: AtomicU64::new(1),
|
||||
next_timestamp: AtomicI64::new(1655276557000),
|
||||
rng: Mutex::new(Some(SmallRng::seed_from_u64(rng_seed))),
|
||||
num_rows,
|
||||
}
|
||||
}
|
||||
|
||||
/// Scrapes the region and adds the generated entry to wal.
|
||||
pub fn add_wal_entry<S: LogStore>(&self, wal_writer: &mut WalWriter<S>) {
|
||||
let mutation = Mutation {
|
||||
op_type: OpType::Put as i32,
|
||||
sequence: self
|
||||
.next_sequence
|
||||
.fetch_add(self.num_rows as u64, Ordering::Relaxed),
|
||||
rows: Some(self.build_rows()),
|
||||
};
|
||||
let entry = WalEntry {
|
||||
mutations: vec![mutation],
|
||||
};
|
||||
metrics::METRIC_WAL_WRITE_BYTES_TOTAL.inc_by(Self::entry_estimated_size(&entry) as u64);
|
||||
|
||||
wal_writer
|
||||
.add_entry(
|
||||
self.id,
|
||||
self.next_entry_id.fetch_add(1, Ordering::Relaxed),
|
||||
&entry,
|
||||
&self.wal_options,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// Replays the region.
|
||||
pub async fn replay<S: LogStore>(&self, wal: &Arc<Wal<S>>) {
|
||||
let mut wal_stream = wal.scan(self.id, 0, &self.wal_options).unwrap();
|
||||
while let Some(res) = wal_stream.next().await {
|
||||
let (_, entry) = res.unwrap();
|
||||
metrics::METRIC_WAL_READ_BYTES_TOTAL.inc_by(Self::entry_estimated_size(&entry) as u64);
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the estimated size in bytes of the entry.
|
||||
pub fn entry_estimated_size(entry: &WalEntry) -> usize {
|
||||
let wrapper_size = size_of::<WalEntry>()
|
||||
+ entry.mutations.capacity() * size_of::<Mutation>()
|
||||
+ size_of::<Rows>();
|
||||
|
||||
let rows = entry.mutations[0].rows.as_ref().unwrap();
|
||||
|
||||
let schema_size = rows.schema.capacity() * size_of::<ColumnSchema>()
|
||||
+ rows
|
||||
.schema
|
||||
.iter()
|
||||
.map(|s| s.column_name.capacity())
|
||||
.sum::<usize>();
|
||||
let values_size = (rows.rows.capacity() * size_of::<Row>())
|
||||
+ rows
|
||||
.rows
|
||||
.iter()
|
||||
.map(|r| r.values.capacity() * size_of::<Value>())
|
||||
.sum::<usize>();
|
||||
|
||||
wrapper_size + schema_size + values_size
|
||||
}
|
||||
|
||||
fn build_rows(&self) -> Rows {
|
||||
let cols = self
|
||||
.schema
|
||||
.iter()
|
||||
.map(|col_schema| {
|
||||
let col_data_type = ColumnDataType::try_from(col_schema.datatype).unwrap();
|
||||
self.build_col(&col_data_type, self.num_rows)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let rows = (0..self.num_rows)
|
||||
.map(|i| {
|
||||
let values = cols.iter().map(|col| col[i as usize].clone()).collect();
|
||||
Row { values }
|
||||
})
|
||||
.collect();
|
||||
|
||||
Rows {
|
||||
schema: self.schema.clone(),
|
||||
rows,
|
||||
}
|
||||
}
|
||||
|
||||
fn build_col(&self, col_data_type: &ColumnDataType, num_rows: u32) -> Vec<Value> {
|
||||
let mut rng_guard = self.rng.lock().unwrap();
|
||||
let rng = rng_guard.as_mut().unwrap();
|
||||
match col_data_type {
|
||||
ColumnDataType::TimestampMillisecond => (0..num_rows)
|
||||
.map(|_| {
|
||||
let ts = self.next_timestamp.fetch_add(1000, Ordering::Relaxed);
|
||||
Value {
|
||||
value_data: Some(ValueData::TimestampMillisecondValue(ts)),
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
ColumnDataType::Int64 => (0..num_rows)
|
||||
.map(|_| {
|
||||
let v = rng.sample(Uniform::new(0, 10_000));
|
||||
Value {
|
||||
value_data: Some(ValueData::I64Value(v)),
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
ColumnDataType::Float64 => (0..num_rows)
|
||||
.map(|_| {
|
||||
let v = rng.sample(Uniform::new(0.0, 5000.0));
|
||||
Value {
|
||||
value_data: Some(ValueData::F64Value(v)),
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
ColumnDataType::String => (0..num_rows)
|
||||
.map(|_| {
|
||||
let v = Alphanumeric.sample_string(rng, 10);
|
||||
Value {
|
||||
value_data: Some(ValueData::StringValue(v)),
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
127
cliff.toml
Normal file
127
cliff.toml
Normal file
@@ -0,0 +1,127 @@
|
||||
# https://git-cliff.org/docs/configuration
|
||||
|
||||
[remote.github]
|
||||
owner = "GreptimeTeam"
|
||||
repo = "greptimedb"
|
||||
|
||||
[changelog]
|
||||
header = ""
|
||||
footer = ""
|
||||
# template for the changelog body
|
||||
# https://keats.github.io/tera/docs/#introduction
|
||||
body = """
|
||||
# {{ version }}
|
||||
|
||||
Release date: {{ timestamp | date(format="%B %d, %Y") }}
|
||||
|
||||
{%- set breakings = commits | filter(attribute="breaking", value=true) -%}
|
||||
{%- if breakings | length > 0 %}
|
||||
|
||||
## Breaking changes
|
||||
{% for commit in breakings %}
|
||||
* {{ commit.github.pr_title }}\
|
||||
{% if commit.github.username %} by \
|
||||
{% set author = commit.github.username -%}
|
||||
[@{{ author }}](https://github.com/{{ author }})
|
||||
{%- endif -%}
|
||||
{% if commit.github.pr_number %} in \
|
||||
{% set number = commit.github.pr_number -%}
|
||||
[#{{ number }}]({{ self::remote_url() }}/pull/{{ number }})
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endif -%}
|
||||
|
||||
{%- set grouped_commits = commits | filter(attribute="breaking", value=false) | group_by(attribute="group") -%}
|
||||
{% for group, commits in grouped_commits %}
|
||||
|
||||
### {{ group | striptags | trim | upper_first }}
|
||||
{% for commit in commits %}
|
||||
* {{ commit.github.pr_title }}\
|
||||
{% if commit.github.username %} by \
|
||||
{% set author = commit.github.username -%}
|
||||
[@{{ author }}](https://github.com/{{ author }})
|
||||
{%- endif -%}
|
||||
{% if commit.github.pr_number %} in \
|
||||
{% set number = commit.github.pr_number -%}
|
||||
[#{{ number }}]({{ self::remote_url() }}/pull/{{ number }})
|
||||
{%- endif %}
|
||||
{%- endfor -%}
|
||||
{% endfor %}
|
||||
|
||||
{%- if github.contributors | filter(attribute="is_first_time", value=true) | length != 0 %}
|
||||
{% raw %}\n{% endraw -%}
|
||||
## New Contributors
|
||||
{% endif -%}
|
||||
{% for contributor in github.contributors | filter(attribute="is_first_time", value=true) %}
|
||||
* [@{{ contributor.username }}](https://github.com/{{ contributor.username }}) made their first contribution
|
||||
{%- if contributor.pr_number %} in \
|
||||
[#{{ contributor.pr_number }}]({{ self::remote_url() }}/pull/{{ contributor.pr_number }}) \
|
||||
{%- endif %}
|
||||
{%- endfor -%}
|
||||
|
||||
{% if github.contributors | length != 0 %}
|
||||
{% raw %}\n{% endraw -%}
|
||||
## All Contributors
|
||||
|
||||
We would like to thank the following contributors from the GreptimeDB community:
|
||||
|
||||
{%- set contributors = github.contributors | sort(attribute="username") | map(attribute="username") -%}
|
||||
{%- set bots = ['dependabot[bot]'] %}
|
||||
|
||||
{% for contributor in contributors %}
|
||||
{%- if bots is containing(contributor) -%}{% continue %}{%- endif -%}
|
||||
{%- if loop.first -%}
|
||||
[@{{ contributor }}](https://github.com/{{ contributor }})
|
||||
{%- else -%}
|
||||
, [@{{ contributor }}](https://github.com/{{ contributor }})
|
||||
{%- endif -%}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{% raw %}\n{% endraw %}
|
||||
|
||||
{%- macro remote_url() -%}
|
||||
https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}
|
||||
{%- endmacro -%}
|
||||
"""
|
||||
trim = true
|
||||
|
||||
[git]
|
||||
# parse the commits based on https://www.conventionalcommits.org
|
||||
conventional_commits = true
|
||||
# filter out the commits that are not conventional
|
||||
filter_unconventional = true
|
||||
# process each line of a commit as an individual commit
|
||||
split_commits = false
|
||||
# regex for parsing and grouping commits
|
||||
commit_parsers = [
|
||||
{ message = "^feat", group = "<!-- 0 -->🚀 Features" },
|
||||
{ message = "^fix", group = "<!-- 1 -->🐛 Bug Fixes" },
|
||||
{ message = "^doc", group = "<!-- 3 -->📚 Documentation" },
|
||||
{ message = "^perf", group = "<!-- 4 -->⚡ Performance" },
|
||||
{ message = "^refactor", group = "<!-- 2 -->🚜 Refactor" },
|
||||
{ message = "^style", group = "<!-- 5 -->🎨 Styling" },
|
||||
{ message = "^test", group = "<!-- 6 -->🧪 Testing" },
|
||||
{ message = "^chore\\(release\\): prepare for", skip = true },
|
||||
{ message = "^chore\\(deps.*\\)", skip = true },
|
||||
{ message = "^chore\\(pr\\)", skip = true },
|
||||
{ message = "^chore\\(pull\\)", skip = true },
|
||||
{ message = "^chore|^ci", group = "<!-- 7 -->⚙️ Miscellaneous Tasks" },
|
||||
{ body = ".*security", group = "<!-- 8 -->🛡️ Security" },
|
||||
{ message = "^revert", group = "<!-- 9 -->◀️ Revert" },
|
||||
]
|
||||
# protect breaking changes from being skipped due to matching a skipping commit_parser
|
||||
protect_breaking_commits = false
|
||||
# filter out the commits that are not matched by commit parsers
|
||||
filter_commits = false
|
||||
# regex for matching git tags
|
||||
# tag_pattern = "v[0-9].*"
|
||||
# regex for skipping tags
|
||||
# skip_tags = ""
|
||||
# regex for ignoring tags
|
||||
ignore_tags = ".*-nightly-.*"
|
||||
# sort the tags topologically
|
||||
topo_order = false
|
||||
# sort the commits inside sections by oldest/newest order
|
||||
sort_commits = "oldest"
|
||||
# limit the number of commits included in the changelog.
|
||||
# limit_commits = 42
|
||||
19
config/config-docs-template.md
Normal file
19
config/config-docs-template.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Configurations
|
||||
|
||||
## Standalone Mode
|
||||
|
||||
{{ toml2docs "./standalone.example.toml" }}
|
||||
|
||||
## Cluster Mode
|
||||
|
||||
### Frontend
|
||||
|
||||
{{ toml2docs "./frontend.example.toml" }}
|
||||
|
||||
### Metasrv
|
||||
|
||||
{{ toml2docs "./metasrv.example.toml" }}
|
||||
|
||||
### Datanode
|
||||
|
||||
{{ toml2docs "./datanode.example.toml" }}
|
||||
376
config/config.md
Normal file
376
config/config.md
Normal file
@@ -0,0 +1,376 @@
|
||||
# Configurations
|
||||
|
||||
## Standalone Mode
|
||||
|
||||
| Key | Type | Default | Descriptions |
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
|
||||
| `default_timezone` | String | `None` | The default timezone of the server. |
|
||||
| `http` | -- | -- | The HTTP server options. |
|
||||
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
|
||||
| `http.timeout` | String | `30s` | HTTP request timeout. |
|
||||
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `mysql` | -- | -- | MySQL server options. |
|
||||
| `mysql.enable` | Bool | `true` | Whether to enable. |
|
||||
| `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
|
||||
| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `mysql.tls` | -- | -- | -- |
|
||||
| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
|
||||
| `mysql.tls.cert_path` | String | `None` | Certificate file path. |
|
||||
| `mysql.tls.key_path` | String | `None` | Private key file path. |
|
||||
| `mysql.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
|
||||
| `postgres` | -- | -- | PostgresSQL server options. |
|
||||
| `postgres.enable` | Bool | `true` | Whether to enable |
|
||||
| `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
|
||||
| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql_options.tls` section. |
|
||||
| `postgres.tls.mode` | String | `disable` | TLS mode. |
|
||||
| `postgres.tls.cert_path` | String | `None` | Certificate file path. |
|
||||
| `postgres.tls.key_path` | String | `None` | Private key file path. |
|
||||
| `postgres.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
|
||||
| `opentsdb` | -- | -- | OpenTSDB protocol options. |
|
||||
| `opentsdb.enable` | Bool | `true` | Whether to enable |
|
||||
| `opentsdb.addr` | String | `127.0.0.1:4242` | OpenTSDB telnet API server address. |
|
||||
| `opentsdb.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `influxdb` | -- | -- | InfluxDB protocol options. |
|
||||
| `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
|
||||
| `prom_store` | -- | -- | Prometheus remote storage options |
|
||||
| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
|
||||
| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
|
||||
| `wal` | -- | -- | The WAL options. |
|
||||
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
|
||||
| `wal.dir` | String | `None` | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.max_batch_size` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.linger` | String | `200ms` | The linger duration of a kafka batch producer.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_init` | String | `500ms` | The initial backoff delay.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `metadata_store` | -- | -- | Metadata storage options. |
|
||||
| `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. |
|
||||
| `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. |
|
||||
| `procedure` | -- | -- | Procedure storage options. |
|
||||
| `procedure.max_retry_times` | Integer | `3` | Procedure max retry time. |
|
||||
| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
| `storage.cache_path` | String | `None` | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | `None` | The local file cache capacity in bytes. |
|
||||
| `storage.bucket` | String | `None` | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
|
||||
| `storage.root` | String | `None` | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
|
||||
| `storage.access_key_id` | String | `None` | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
|
||||
| `storage.secret_access_key` | String | `None` | The secret access key of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3`**. |
|
||||
| `storage.access_key_secret` | String | `None` | The secret access key of the aliyun account.<br/>**It's only used when the storage type is `Oss`**. |
|
||||
| `storage.account_name` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `storage.region` | String | `None` | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
|
||||
| `region_engine.mito` | -- | -- | The Mito engine options. |
|
||||
| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
|
||||
| `region_engine.mito.worker_channel_size` | Integer | `128` | Request channel size of each worker. |
|
||||
| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
|
||||
| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
|
||||
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
|
||||
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
|
||||
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
|
||||
| `region_engine.mito.global_write_buffer_size` | String | `1GB` | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
|
||||
| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
|
||||
| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
|
||||
| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. |
|
||||
| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query<br/>- `auto`: automatically<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `64M` | Memory threshold for performing an external sort during index creation.<br/>Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. |
|
||||
| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). |
|
||||
| `region_engine.mito.memtable` | -- | -- | -- |
|
||||
| `region_engine.mito.memtable.type` | String | `time_series` | Memtable type.<br/>- `time_series`: time-series memtable<br/>- `partition_tree`: partition tree memtable (experimental) |
|
||||
| `region_engine.mito.memtable.index_max_keys_per_shard` | Integer | `8192` | The max number of keys in one shard.<br/>Only available for `partition_tree` memtable. |
|
||||
| `region_engine.mito.memtable.data_freeze_threshold` | Integer | `32768` | The max rows of data inside the actively writing buffer in one shard.<br/>Only available for `partition_tree` memtable. |
|
||||
| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes.<br/>Only available for `partition_tree` memtable. |
|
||||
| `logging` | -- | -- | The logging options. |
|
||||
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
|
||||
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
|
||||
| `export_metrics.self_import.db` | String | `None` | -- |
|
||||
| `export_metrics.remote_write` | -- | -- | -- |
|
||||
| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
|
||||
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
|
||||
|
||||
|
||||
## Cluster Mode
|
||||
|
||||
### Frontend
|
||||
|
||||
| Key | Type | Default | Descriptions |
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
|
||||
| `default_timezone` | String | `None` | The default timezone of the server. |
|
||||
| `heartbeat` | -- | -- | The heartbeat options. |
|
||||
| `heartbeat.interval` | String | `18s` | Interval for sending heartbeat messages to the metasrv. |
|
||||
| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |
|
||||
| `http` | -- | -- | The HTTP server options. |
|
||||
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
|
||||
| `http.timeout` | String | `30s` | HTTP request timeout. |
|
||||
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `mysql` | -- | -- | MySQL server options. |
|
||||
| `mysql.enable` | Bool | `true` | Whether to enable. |
|
||||
| `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
|
||||
| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `mysql.tls` | -- | -- | -- |
|
||||
| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
|
||||
| `mysql.tls.cert_path` | String | `None` | Certificate file path. |
|
||||
| `mysql.tls.key_path` | String | `None` | Private key file path. |
|
||||
| `mysql.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
|
||||
| `postgres` | -- | -- | PostgresSQL server options. |
|
||||
| `postgres.enable` | Bool | `true` | Whether to enable |
|
||||
| `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
|
||||
| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql_options.tls` section. |
|
||||
| `postgres.tls.mode` | String | `disable` | TLS mode. |
|
||||
| `postgres.tls.cert_path` | String | `None` | Certificate file path. |
|
||||
| `postgres.tls.key_path` | String | `None` | Private key file path. |
|
||||
| `postgres.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
|
||||
| `opentsdb` | -- | -- | OpenTSDB protocol options. |
|
||||
| `opentsdb.enable` | Bool | `true` | Whether to enable |
|
||||
| `opentsdb.addr` | String | `127.0.0.1:4242` | OpenTSDB telnet API server address. |
|
||||
| `opentsdb.runtime_size` | Integer | `2` | The number of server worker threads. |
|
||||
| `influxdb` | -- | -- | InfluxDB protocol options. |
|
||||
| `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
|
||||
| `prom_store` | -- | -- | Prometheus remote storage options |
|
||||
| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
|
||||
| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
|
||||
| `meta_client` | -- | -- | The metasrv client options. |
|
||||
| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
|
||||
| `meta_client.timeout` | String | `3s` | Operation timeout. |
|
||||
| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
|
||||
| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
|
||||
| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
|
||||
| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
|
||||
| `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. |
|
||||
| `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. |
|
||||
| `meta_client.metadata_cache_tti` | String | `5m` | -- |
|
||||
| `datanode` | -- | -- | Datanode options. |
|
||||
| `datanode.client` | -- | -- | Datanode client options. |
|
||||
| `datanode.client.timeout` | String | `10s` | -- |
|
||||
| `datanode.client.connect_timeout` | String | `10s` | -- |
|
||||
| `datanode.client.tcp_nodelay` | Bool | `true` | -- |
|
||||
| `logging` | -- | -- | The logging options. |
|
||||
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
|
||||
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
|
||||
| `export_metrics.self_import.db` | String | `None` | -- |
|
||||
| `export_metrics.remote_write` | -- | -- | -- |
|
||||
| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
|
||||
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
|
||||
|
||||
|
||||
### Metasrv
|
||||
|
||||
| Key | Type | Default | Descriptions |
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `data_home` | String | `/tmp/metasrv/` | The working home directory. |
|
||||
| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
|
||||
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. |
|
||||
| `store_addr` | String | `127.0.0.1:2379` | Etcd server address. |
|
||||
| `selector` | String | `lease_based` | Datanode selector type.<br/>- `lease_based` (default value).<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
|
||||
| `use_memory_store` | Bool | `false` | Store data in memory. |
|
||||
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. |
|
||||
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
|
||||
| `procedure` | -- | -- | Procedure storage options. |
|
||||
| `procedure.max_retry_times` | Integer | `12` | Procedure max retry time. |
|
||||
| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
|
||||
| `procedure.max_metadata_value_size` | String | `1500KiB` | Auto split large value<br/>GreptimeDB procedure uses etcd as the default metadata storage backend.<br/>The etcd the maximum size of any request is 1.5 MiB<br/>1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key)<br/>Comments out the `max_metadata_value_size`, for don't split large value (no limit). |
|
||||
| `failure_detector` | -- | -- | -- |
|
||||
| `failure_detector.threshold` | Float | `8.0` | -- |
|
||||
| `failure_detector.min_std_deviation` | String | `100ms` | -- |
|
||||
| `failure_detector.acceptable_heartbeat_pause` | String | `3000ms` | -- |
|
||||
| `failure_detector.first_heartbeat_estimate` | String | `1000ms` | -- |
|
||||
| `datanode` | -- | -- | Datanode options. |
|
||||
| `datanode.client` | -- | -- | Datanode client options. |
|
||||
| `datanode.client.timeout` | String | `10s` | -- |
|
||||
| `datanode.client.connect_timeout` | String | `10s` | -- |
|
||||
| `datanode.client.tcp_nodelay` | Bool | `true` | -- |
|
||||
| `wal` | -- | -- | -- |
|
||||
| `wal.provider` | String | `raft_engine` | -- |
|
||||
| `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster. |
|
||||
| `wal.num_topics` | Integer | `64` | Number of topics to be created upon start. |
|
||||
| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default) |
|
||||
| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`. |
|
||||
| `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition. |
|
||||
| `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled. |
|
||||
| `wal.backoff_init` | String | `500ms` | The initial backoff for kafka clients. |
|
||||
| `wal.backoff_max` | String | `10s` | The maximum backoff for kafka clients. |
|
||||
| `wal.backoff_base` | Integer | `2` | Exponential backoff rate, i.e. next backoff = base * current backoff. |
|
||||
| `wal.backoff_deadline` | String | `5mins` | Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate. |
|
||||
| `logging` | -- | -- | The logging options. |
|
||||
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
|
||||
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
|
||||
| `export_metrics.self_import.db` | String | `None` | -- |
|
||||
| `export_metrics.remote_write` | -- | -- | -- |
|
||||
| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
|
||||
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
|
||||
|
||||
|
||||
### Datanode
|
||||
|
||||
| Key | Type | Default | Descriptions |
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
|
||||
| `node_id` | Integer | `None` | The datanode identifier and should be unique in the cluster. |
|
||||
| `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.<br/>It will block the datanode start if it can't receive leases in the heartbeat from metasrv. |
|
||||
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
|
||||
| `rpc_addr` | String | `127.0.0.1:3001` | The gRPC address of the datanode. |
|
||||
| `rpc_hostname` | String | `None` | The hostname of the datanode. |
|
||||
| `rpc_runtime_size` | Integer | `8` | The number of gRPC server worker threads. |
|
||||
| `rpc_max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
|
||||
| `rpc_max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
|
||||
| `heartbeat` | -- | -- | The heartbeat options. |
|
||||
| `heartbeat.interval` | String | `3s` | Interval for sending heartbeat messages to the metasrv. |
|
||||
| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |
|
||||
| `meta_client` | -- | -- | The metasrv client options. |
|
||||
| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
|
||||
| `meta_client.timeout` | String | `3s` | Operation timeout. |
|
||||
| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
|
||||
| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
|
||||
| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
|
||||
| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
|
||||
| `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. |
|
||||
| `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. |
|
||||
| `meta_client.metadata_cache_tti` | String | `5m` | -- |
|
||||
| `wal` | -- | -- | The WAL options. |
|
||||
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
|
||||
| `wal.dir` | String | `None` | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.max_batch_size` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.linger` | String | `200ms` | The linger duration of a kafka batch producer.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_init` | String | `500ms` | The initial backoff delay.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
| `storage.cache_path` | String | `None` | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | `None` | The local file cache capacity in bytes. |
|
||||
| `storage.bucket` | String | `None` | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
|
||||
| `storage.root` | String | `None` | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
|
||||
| `storage.access_key_id` | String | `None` | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
|
||||
| `storage.secret_access_key` | String | `None` | The secret access key of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3`**. |
|
||||
| `storage.access_key_secret` | String | `None` | The secret access key of the aliyun account.<br/>**It's only used when the storage type is `Oss`**. |
|
||||
| `storage.account_name` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `storage.region` | String | `None` | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
|
||||
| `region_engine.mito` | -- | -- | The Mito engine options. |
|
||||
| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
|
||||
| `region_engine.mito.worker_channel_size` | Integer | `128` | Request channel size of each worker. |
|
||||
| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
|
||||
| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
|
||||
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
|
||||
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
|
||||
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
|
||||
| `region_engine.mito.global_write_buffer_size` | String | `1GB` | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
|
||||
| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
|
||||
| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
|
||||
| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. |
|
||||
| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query<br/>- `auto`: automatically<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `64M` | Memory threshold for performing an external sort during index creation.<br/>Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. |
|
||||
| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). |
|
||||
| `region_engine.mito.memtable` | -- | -- | -- |
|
||||
| `region_engine.mito.memtable.type` | String | `time_series` | Memtable type.<br/>- `time_series`: time-series memtable<br/>- `partition_tree`: partition tree memtable (experimental) |
|
||||
| `region_engine.mito.memtable.index_max_keys_per_shard` | Integer | `8192` | The max number of keys in one shard.<br/>Only available for `partition_tree` memtable. |
|
||||
| `region_engine.mito.memtable.data_freeze_threshold` | Integer | `32768` | The max rows of data inside the actively writing buffer in one shard.<br/>Only available for `partition_tree` memtable. |
|
||||
| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes.<br/>Only available for `partition_tree` memtable. |
|
||||
| `logging` | -- | -- | The logging options. |
|
||||
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
|
||||
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
|
||||
| `export_metrics.self_import.db` | String | `None` | -- |
|
||||
| `export_metrics.remote_write` | -- | -- | -- |
|
||||
| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
|
||||
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
|
||||
@@ -1,171 +1,430 @@
|
||||
# Node running mode, see `standalone.example.toml`.
|
||||
mode = "distributed"
|
||||
# The datanode identifier, should be unique.
|
||||
## The running mode of the datanode. It can be `standalone` or `distributed`.
|
||||
mode = "standalone"
|
||||
|
||||
## The datanode identifier and should be unique in the cluster.
|
||||
## +toml2docs:none-default
|
||||
node_id = 42
|
||||
# gRPC server address, "127.0.0.1:3001" by default.
|
||||
rpc_addr = "127.0.0.1:3001"
|
||||
# Hostname of this node.
|
||||
rpc_hostname = "127.0.0.1"
|
||||
# The number of gRPC server worker threads, 8 by default.
|
||||
rpc_runtime_size = 8
|
||||
# Start services after regions have obtained leases.
|
||||
# It will block the datanode start if it can't receive leases in the heartbeat from metasrv.
|
||||
|
||||
## Start services after regions have obtained leases.
|
||||
## It will block the datanode start if it can't receive leases in the heartbeat from metasrv.
|
||||
require_lease_before_startup = false
|
||||
|
||||
# Initialize all regions in the background during the startup.
|
||||
# By default, it provides services after all regions have been initialized.
|
||||
## Initialize all regions in the background during the startup.
|
||||
## By default, it provides services after all regions have been initialized.
|
||||
init_regions_in_background = false
|
||||
|
||||
## The gRPC address of the datanode.
|
||||
rpc_addr = "127.0.0.1:3001"
|
||||
|
||||
## The hostname of the datanode.
|
||||
## +toml2docs:none-default
|
||||
rpc_hostname = "127.0.0.1"
|
||||
|
||||
## The number of gRPC server worker threads.
|
||||
rpc_runtime_size = 8
|
||||
|
||||
## The maximum receive message size for gRPC server.
|
||||
rpc_max_recv_message_size = "512MB"
|
||||
|
||||
## The maximum send message size for gRPC server.
|
||||
rpc_max_send_message_size = "512MB"
|
||||
|
||||
## Enable telemetry to collect anonymous usage data.
|
||||
enable_telemetry = true
|
||||
|
||||
## The heartbeat options.
|
||||
[heartbeat]
|
||||
# Interval for sending heartbeat messages to the Metasrv, 3 seconds by default.
|
||||
## Interval for sending heartbeat messages to the metasrv.
|
||||
interval = "3s"
|
||||
|
||||
# Metasrv client options.
|
||||
## Interval for retrying to send heartbeat messages to the metasrv.
|
||||
retry_interval = "3s"
|
||||
|
||||
## The metasrv client options.
|
||||
[meta_client]
|
||||
# Metasrv address list.
|
||||
## The addresses of the metasrv.
|
||||
metasrv_addrs = ["127.0.0.1:3002"]
|
||||
# Heartbeat timeout, 500 milliseconds by default.
|
||||
heartbeat_timeout = "500ms"
|
||||
# Operation timeout, 3 seconds by default.
|
||||
|
||||
## Operation timeout.
|
||||
timeout = "3s"
|
||||
# Connect server timeout, 1 second by default.
|
||||
|
||||
## Heartbeat timeout.
|
||||
heartbeat_timeout = "500ms"
|
||||
|
||||
## DDL timeout.
|
||||
ddl_timeout = "10s"
|
||||
|
||||
## Connect server timeout.
|
||||
connect_timeout = "1s"
|
||||
# `TCP_NODELAY` option for accepted connections, true by default.
|
||||
|
||||
## `TCP_NODELAY` option for accepted connections.
|
||||
tcp_nodelay = true
|
||||
|
||||
# WAL options.
|
||||
## The configuration about the cache of the metadata.
|
||||
metadata_cache_max_capacity = 100000
|
||||
|
||||
## TTL of the metadata cache.
|
||||
metadata_cache_ttl = "10m"
|
||||
|
||||
# TTI of the metadata cache.
|
||||
metadata_cache_tti = "5m"
|
||||
|
||||
## The WAL options.
|
||||
[wal]
|
||||
## The provider of the WAL.
|
||||
## - `raft_engine`: the wal is stored in the local file system by raft-engine.
|
||||
## - `kafka`: it's remote wal that data is stored in Kafka.
|
||||
provider = "raft_engine"
|
||||
|
||||
# Raft-engine wal options, see `standalone.example.toml`.
|
||||
# dir = "/tmp/greptimedb/wal"
|
||||
## The directory to store the WAL files.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
## +toml2docs:none-default
|
||||
dir = "/tmp/greptimedb/wal"
|
||||
|
||||
## The size of the WAL segment file.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
file_size = "256MB"
|
||||
|
||||
## The threshold of the WAL size to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_threshold = "4GB"
|
||||
|
||||
## The interval to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_interval = "10m"
|
||||
|
||||
## The read batch size.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
read_batch_size = 128
|
||||
|
||||
## Whether to use sync write.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
sync_write = false
|
||||
|
||||
# Kafka wal options, see `standalone.example.toml`.
|
||||
# broker_endpoints = ["127.0.0.1:9092"]
|
||||
# Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
# max_batch_size = "1MB"
|
||||
# linger = "200ms"
|
||||
# consumer_wait_timeout = "100ms"
|
||||
# backoff_init = "500ms"
|
||||
# backoff_max = "10s"
|
||||
# backoff_base = 2
|
||||
# backoff_deadline = "5mins"
|
||||
## Whether to reuse logically truncated log files.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
enable_log_recycle = true
|
||||
|
||||
# Storage options, see `standalone.example.toml`.
|
||||
## Whether to pre-create log files on start up.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
prefill_log_files = false
|
||||
|
||||
## Duration for fsyncing log files.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
sync_period = "10s"
|
||||
|
||||
## The Kafka broker endpoints.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
|
||||
## The max size of a single producer batch.
|
||||
## Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
max_batch_size = "1MB"
|
||||
|
||||
## The linger duration of a kafka batch producer.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
linger = "200ms"
|
||||
|
||||
## The consumer wait timeout.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
consumer_wait_timeout = "100ms"
|
||||
|
||||
## The initial backoff delay.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_init = "500ms"
|
||||
|
||||
## The maximum backoff delay.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_max = "10s"
|
||||
|
||||
## The exponential backoff rate, i.e. next backoff = base * current backoff.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_base = 2
|
||||
|
||||
## The deadline of retries.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_deadline = "5mins"
|
||||
|
||||
# Example of using S3 as the storage.
|
||||
# [storage]
|
||||
# type = "S3"
|
||||
# bucket = "greptimedb"
|
||||
# root = "data"
|
||||
# access_key_id = "test"
|
||||
# secret_access_key = "123456"
|
||||
# endpoint = "https://s3.amazonaws.com"
|
||||
# region = "us-west-2"
|
||||
|
||||
# Example of using Oss as the storage.
|
||||
# [storage]
|
||||
# type = "Oss"
|
||||
# bucket = "greptimedb"
|
||||
# root = "data"
|
||||
# access_key_id = "test"
|
||||
# access_key_secret = "123456"
|
||||
# endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
|
||||
|
||||
# Example of using Azblob as the storage.
|
||||
# [storage]
|
||||
# type = "Azblob"
|
||||
# container = "greptimedb"
|
||||
# root = "data"
|
||||
# account_name = "test"
|
||||
# account_key = "123456"
|
||||
# endpoint = "https://greptimedb.blob.core.windows.net"
|
||||
# sas_token = ""
|
||||
|
||||
# Example of using Gcs as the storage.
|
||||
# [storage]
|
||||
# type = "Gcs"
|
||||
# bucket = "greptimedb"
|
||||
# root = "data"
|
||||
# scope = "test"
|
||||
# credential_path = "123456"
|
||||
# endpoint = "https://storage.googleapis.com"
|
||||
|
||||
## The data storage options.
|
||||
[storage]
|
||||
# The working home directory.
|
||||
## The working home directory.
|
||||
data_home = "/tmp/greptimedb/"
|
||||
# Storage type.
|
||||
type = "File"
|
||||
# TTL for all tables. Disabled by default.
|
||||
# global_ttl = "7d"
|
||||
|
||||
# Cache configuration for object storage such as 'S3' etc.
|
||||
# The local file cache directory
|
||||
# cache_path = "/path/local_cache"
|
||||
# The local file cache capacity in bytes.
|
||||
# cache_capacity = "256MB"
|
||||
## The storage type used to store the data.
|
||||
## - `File`: the data is stored in the local file system.
|
||||
## - `S3`: the data is stored in the S3 object storage.
|
||||
## - `Gcs`: the data is stored in the Google Cloud Storage.
|
||||
## - `Azblob`: the data is stored in the Azure Blob Storage.
|
||||
## - `Oss`: the data is stored in the Aliyun OSS.
|
||||
type = "File"
|
||||
|
||||
## Cache configuration for object storage such as 'S3' etc.
|
||||
## The local file cache directory.
|
||||
## +toml2docs:none-default
|
||||
cache_path = "/path/local_cache"
|
||||
|
||||
## The local file cache capacity in bytes.
|
||||
## +toml2docs:none-default
|
||||
cache_capacity = "256MB"
|
||||
|
||||
## The S3 bucket name.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
bucket = "greptimedb"
|
||||
|
||||
## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
root = "greptimedb"
|
||||
|
||||
## The access key id of the aws account.
|
||||
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
|
||||
## **It's only used when the storage type is `S3` and `Oss`**.
|
||||
## +toml2docs:none-default
|
||||
access_key_id = "test"
|
||||
|
||||
## The secret access key of the aws account.
|
||||
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
|
||||
## **It's only used when the storage type is `S3`**.
|
||||
## +toml2docs:none-default
|
||||
secret_access_key = "test"
|
||||
|
||||
## The secret access key of the aliyun account.
|
||||
## **It's only used when the storage type is `Oss`**.
|
||||
## +toml2docs:none-default
|
||||
access_key_secret = "test"
|
||||
|
||||
## The account key of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
account_name = "test"
|
||||
|
||||
## The account key of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
account_key = "test"
|
||||
|
||||
## The scope of the google cloud storage.
|
||||
## **It's only used when the storage type is `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
scope = "test"
|
||||
|
||||
## The credential path of the google cloud storage.
|
||||
## **It's only used when the storage type is `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
credential_path = "test"
|
||||
|
||||
## The container of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
container = "greptimedb"
|
||||
|
||||
## The sas token of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
sas_token = ""
|
||||
|
||||
## The endpoint of the S3 service.
|
||||
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
endpoint = "https://s3.amazonaws.com"
|
||||
|
||||
## The region of the S3 service.
|
||||
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
region = "us-west-2"
|
||||
|
||||
# Custom storage options
|
||||
#[[storage.providers]]
|
||||
#type = "S3"
|
||||
#[[storage.providers]]
|
||||
#type = "Gcs"
|
||||
# [[storage.providers]]
|
||||
# type = "S3"
|
||||
# [[storage.providers]]
|
||||
# type = "Gcs"
|
||||
|
||||
# Mito engine options
|
||||
## The region engine options. You can configure multiple region engines.
|
||||
[[region_engine]]
|
||||
|
||||
## The Mito engine options.
|
||||
[region_engine.mito]
|
||||
# Number of region workers
|
||||
|
||||
## Number of region workers.
|
||||
num_workers = 8
|
||||
# Request channel size of each worker
|
||||
|
||||
## Request channel size of each worker.
|
||||
worker_channel_size = 128
|
||||
# Max batch size for a worker to handle requests
|
||||
|
||||
## Max batch size for a worker to handle requests.
|
||||
worker_request_batch_size = 64
|
||||
# Number of meta action updated to trigger a new checkpoint for the manifest
|
||||
|
||||
## Number of meta action updated to trigger a new checkpoint for the manifest.
|
||||
manifest_checkpoint_distance = 10
|
||||
# Whether to compress manifest and checkpoint file by gzip (default false).
|
||||
|
||||
## Whether to compress manifest and checkpoint file by gzip (default false).
|
||||
compress_manifest = false
|
||||
# Max number of running background jobs
|
||||
|
||||
## Max number of running background jobs
|
||||
max_background_jobs = 4
|
||||
# Interval to auto flush a region if it has not flushed yet.
|
||||
|
||||
## Interval to auto flush a region if it has not flushed yet.
|
||||
auto_flush_interval = "1h"
|
||||
# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
|
||||
|
||||
## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
|
||||
global_write_buffer_size = "1GB"
|
||||
# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
|
||||
|
||||
## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
|
||||
global_write_buffer_reject_size = "2GB"
|
||||
# Cache size for SST metadata. Setting it to 0 to disable the cache.
|
||||
# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
|
||||
|
||||
## Cache size for SST metadata. Setting it to 0 to disable the cache.
|
||||
## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
|
||||
sst_meta_cache_size = "128MB"
|
||||
# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
|
||||
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
|
||||
## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
|
||||
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
vector_cache_size = "512MB"
|
||||
# Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
|
||||
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
|
||||
## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
|
||||
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
page_cache_size = "512MB"
|
||||
# Buffer size for SST writing.
|
||||
|
||||
## Buffer size for SST writing.
|
||||
sst_write_buffer_size = "8MB"
|
||||
# Parallelism to scan a region (default: 1/4 of cpu cores).
|
||||
# - 0: using the default value (1/4 of cpu cores).
|
||||
# - 1: scan in current thread.
|
||||
# - n: scan in parallelism n.
|
||||
|
||||
## Parallelism to scan a region (default: 1/4 of cpu cores).
|
||||
## - `0`: using the default value (1/4 of cpu cores).
|
||||
## - `1`: scan in current thread.
|
||||
## - `n`: scan in parallelism n.
|
||||
scan_parallelism = 0
|
||||
# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
|
||||
|
||||
## Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
parallel_scan_channel_size = 32
|
||||
# Whether to allow stale WAL entries read during replay.
|
||||
|
||||
## Whether to allow stale WAL entries read during replay.
|
||||
allow_stale_entries = false
|
||||
|
||||
## The options for inverted index in Mito engine.
|
||||
[region_engine.mito.inverted_index]
|
||||
# Whether to create the index on flush.
|
||||
# - "auto": automatically
|
||||
# - "disable": never
|
||||
|
||||
## Whether to create the index on flush.
|
||||
## - `auto`: automatically
|
||||
## - `disable`: never
|
||||
create_on_flush = "auto"
|
||||
# Whether to create the index on compaction.
|
||||
# - "auto": automatically
|
||||
# - "disable": never
|
||||
|
||||
## Whether to create the index on compaction.
|
||||
## - `auto`: automatically
|
||||
## - `disable`: never
|
||||
create_on_compaction = "auto"
|
||||
# Whether to apply the index on query
|
||||
# - "auto": automatically
|
||||
# - "disable": never
|
||||
|
||||
## Whether to apply the index on query
|
||||
## - `auto`: automatically
|
||||
## - `disable`: never
|
||||
apply_on_query = "auto"
|
||||
# Memory threshold for performing an external sort during index creation.
|
||||
# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
|
||||
|
||||
## Memory threshold for performing an external sort during index creation.
|
||||
## Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
|
||||
mem_threshold_on_create = "64M"
|
||||
# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
|
||||
|
||||
## File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
|
||||
intermediate_path = ""
|
||||
|
||||
[region_engine.mito.memtable]
|
||||
# Memtable type.
|
||||
# - "experimental": experimental memtable
|
||||
# - "time_series": time-series memtable (deprecated)
|
||||
type = "experimental"
|
||||
# The max number of keys in one shard.
|
||||
## Memtable type.
|
||||
## - `time_series`: time-series memtable
|
||||
## - `partition_tree`: partition tree memtable (experimental)
|
||||
type = "time_series"
|
||||
|
||||
## The max number of keys in one shard.
|
||||
## Only available for `partition_tree` memtable.
|
||||
index_max_keys_per_shard = 8192
|
||||
# The max rows of data inside the actively writing buffer in one shard.
|
||||
|
||||
## The max rows of data inside the actively writing buffer in one shard.
|
||||
## Only available for `partition_tree` memtable.
|
||||
data_freeze_threshold = 32768
|
||||
# Max dictionary bytes.
|
||||
|
||||
## Max dictionary bytes.
|
||||
## Only available for `partition_tree` memtable.
|
||||
fork_dictionary_bytes = "1GiB"
|
||||
|
||||
# Log options, see `standalone.example.toml`
|
||||
# [logging]
|
||||
# dir = "/tmp/greptimedb/logs"
|
||||
# level = "info"
|
||||
## The logging options.
|
||||
[logging]
|
||||
## The directory to store the log files.
|
||||
dir = "/tmp/greptimedb/logs"
|
||||
|
||||
# Datanode export the metrics generated by itself
|
||||
# encoded to Prometheus remote-write format
|
||||
# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
|
||||
# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
# [export_metrics]
|
||||
# whether enable export metrics, default is false
|
||||
# enable = false
|
||||
# The interval of export metrics
|
||||
# write_interval = "30s"
|
||||
# [export_metrics.remote_write]
|
||||
# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
|
||||
# url = ""
|
||||
# HTTP headers of Prometheus remote-write carry
|
||||
# headers = {}
|
||||
## The log level. Can be `info`/`debug`/`warn`/`error`.
|
||||
## +toml2docs:none-default
|
||||
level = "info"
|
||||
|
||||
## Enable OTLP tracing.
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
## +toml2docs:none-default
|
||||
otlp_endpoint = ""
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
## whether enable export metrics.
|
||||
enable = false
|
||||
|
||||
## The interval of export metrics.
|
||||
write_interval = "30s"
|
||||
|
||||
## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
|
||||
[export_metrics.self_import]
|
||||
## +toml2docs:none-default
|
||||
db = "information_schema"
|
||||
|
||||
[export_metrics.remote_write]
|
||||
## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
|
||||
url = ""
|
||||
|
||||
## HTTP headers of Prometheus remote-write carry.
|
||||
headers = { }
|
||||
|
||||
@@ -1,106 +1,192 @@
|
||||
# Node running mode, see `standalone.example.toml`.
|
||||
mode = "distributed"
|
||||
# The default timezone of the server
|
||||
# default_timezone = "UTC"
|
||||
## The running mode of the datanode. It can be `standalone` or `distributed`.
|
||||
mode = "standalone"
|
||||
|
||||
## The default timezone of the server.
|
||||
## +toml2docs:none-default
|
||||
default_timezone = "UTC"
|
||||
|
||||
## The heartbeat options.
|
||||
[heartbeat]
|
||||
# Interval for sending heartbeat task to the Metasrv, 5 seconds by default.
|
||||
interval = "5s"
|
||||
# Interval for retry sending heartbeat task, 5 seconds by default.
|
||||
retry_interval = "5s"
|
||||
## Interval for sending heartbeat messages to the metasrv.
|
||||
interval = "18s"
|
||||
|
||||
# HTTP server options, see `standalone.example.toml`.
|
||||
## Interval for retrying to send heartbeat messages to the metasrv.
|
||||
retry_interval = "3s"
|
||||
|
||||
## The HTTP server options.
|
||||
[http]
|
||||
## The address to bind the HTTP server.
|
||||
addr = "127.0.0.1:4000"
|
||||
## HTTP request timeout.
|
||||
timeout = "30s"
|
||||
## HTTP request body limit.
|
||||
## Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
|
||||
body_limit = "64MB"
|
||||
|
||||
# gRPC server options, see `standalone.example.toml`.
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
## The address to bind the gRPC server.
|
||||
addr = "127.0.0.1:4001"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 8
|
||||
|
||||
# MySQL server options, see `standalone.example.toml`.
|
||||
## MySQL server options.
|
||||
[mysql]
|
||||
## Whether to enable.
|
||||
enable = true
|
||||
## The addr to bind the MySQL server.
|
||||
addr = "127.0.0.1:4002"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
|
||||
# MySQL server TLS options, see `standalone.example.toml`.
|
||||
# MySQL server TLS options.
|
||||
[mysql.tls]
|
||||
|
||||
## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
|
||||
## - `disable` (default value)
|
||||
## - `prefer`
|
||||
## - `require`
|
||||
## - `verify-ca`
|
||||
## - `verify-full`
|
||||
mode = "disable"
|
||||
|
||||
## Certificate file path.
|
||||
## +toml2docs:none-default
|
||||
cert_path = ""
|
||||
|
||||
## Private key file path.
|
||||
## +toml2docs:none-default
|
||||
key_path = ""
|
||||
|
||||
## Watch for Certificate and key file change and auto reload
|
||||
watch = false
|
||||
|
||||
# PostgresSQL server options, see `standalone.example.toml`.
|
||||
## PostgresSQL server options.
|
||||
[postgres]
|
||||
## Whether to enable
|
||||
enable = true
|
||||
## The addr to bind the PostgresSQL server.
|
||||
addr = "127.0.0.1:4003"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
|
||||
# PostgresSQL server TLS options, see `standalone.example.toml`.
|
||||
## PostgresSQL server TLS options, see `mysql_options.tls` section.
|
||||
[postgres.tls]
|
||||
## TLS mode.
|
||||
mode = "disable"
|
||||
|
||||
## Certificate file path.
|
||||
## +toml2docs:none-default
|
||||
cert_path = ""
|
||||
|
||||
## Private key file path.
|
||||
## +toml2docs:none-default
|
||||
key_path = ""
|
||||
|
||||
## Watch for Certificate and key file change and auto reload
|
||||
watch = false
|
||||
|
||||
# OpenTSDB protocol options, see `standalone.example.toml`.
|
||||
## OpenTSDB protocol options.
|
||||
[opentsdb]
|
||||
## Whether to enable
|
||||
enable = true
|
||||
## OpenTSDB telnet API server address.
|
||||
addr = "127.0.0.1:4242"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
|
||||
# InfluxDB protocol options, see `standalone.example.toml`.
|
||||
## InfluxDB protocol options.
|
||||
[influxdb]
|
||||
## Whether to enable InfluxDB protocol in HTTP API.
|
||||
enable = true
|
||||
|
||||
# Prometheus remote storage options, see `standalone.example.toml`.
|
||||
## Prometheus remote storage options
|
||||
[prom_store]
|
||||
## Whether to enable Prometheus remote write and read in HTTP API.
|
||||
enable = true
|
||||
# Whether to store the data from Prometheus remote write in metric engine.
|
||||
# true by default
|
||||
## Whether to store the data from Prometheus remote write in metric engine.
|
||||
with_metric_engine = true
|
||||
|
||||
# Metasrv client options, see `datanode.example.toml`.
|
||||
## The metasrv client options.
|
||||
[meta_client]
|
||||
## The addresses of the metasrv.
|
||||
metasrv_addrs = ["127.0.0.1:3002"]
|
||||
|
||||
## Operation timeout.
|
||||
timeout = "3s"
|
||||
# DDL timeouts options.
|
||||
|
||||
## Heartbeat timeout.
|
||||
heartbeat_timeout = "500ms"
|
||||
|
||||
## DDL timeout.
|
||||
ddl_timeout = "10s"
|
||||
|
||||
## Connect server timeout.
|
||||
connect_timeout = "1s"
|
||||
|
||||
## `TCP_NODELAY` option for accepted connections.
|
||||
tcp_nodelay = true
|
||||
# The configuration about the cache of the Metadata.
|
||||
# default: 100000
|
||||
|
||||
## The configuration about the cache of the metadata.
|
||||
metadata_cache_max_capacity = 100000
|
||||
# default: 10m
|
||||
|
||||
## TTL of the metadata cache.
|
||||
metadata_cache_ttl = "10m"
|
||||
# default: 5m
|
||||
|
||||
# TTI of the metadata cache.
|
||||
metadata_cache_tti = "5m"
|
||||
|
||||
# Log options, see `standalone.example.toml`
|
||||
# [logging]
|
||||
# dir = "/tmp/greptimedb/logs"
|
||||
# level = "info"
|
||||
|
||||
# Datanode options.
|
||||
## Datanode options.
|
||||
[datanode]
|
||||
# Datanode client options.
|
||||
## Datanode client options.
|
||||
[datanode.client]
|
||||
timeout = "10s"
|
||||
connect_timeout = "10s"
|
||||
tcp_nodelay = true
|
||||
|
||||
# Frontend export the metrics generated by itself
|
||||
# encoded to Prometheus remote-write format
|
||||
# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
|
||||
# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
# [export_metrics]
|
||||
# whether enable export metrics, default is false
|
||||
# enable = false
|
||||
# The interval of export metrics
|
||||
# write_interval = "30s"
|
||||
# for `frontend`, `self_import` is recommend to collect metrics generated by itself
|
||||
# [export_metrics.self_import]
|
||||
# db = "information_schema"
|
||||
## The logging options.
|
||||
[logging]
|
||||
## The directory to store the log files.
|
||||
dir = "/tmp/greptimedb/logs"
|
||||
|
||||
## The log level. Can be `info`/`debug`/`warn`/`error`.
|
||||
## +toml2docs:none-default
|
||||
level = "info"
|
||||
|
||||
## Enable OTLP tracing.
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
## +toml2docs:none-default
|
||||
otlp_endpoint = ""
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
## whether enable export metrics.
|
||||
enable = false
|
||||
|
||||
## The interval of export metrics.
|
||||
write_interval = "30s"
|
||||
|
||||
## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
|
||||
[export_metrics.self_import]
|
||||
## +toml2docs:none-default
|
||||
db = "information_schema"
|
||||
|
||||
[export_metrics.remote_write]
|
||||
## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
|
||||
url = ""
|
||||
|
||||
## HTTP headers of Prometheus remote-write carry.
|
||||
headers = { }
|
||||
|
||||
@@ -1,35 +1,46 @@
|
||||
# The working home directory.
|
||||
## The working home directory.
|
||||
data_home = "/tmp/metasrv/"
|
||||
# The bind address of metasrv, "127.0.0.1:3002" by default.
|
||||
|
||||
## The bind address of metasrv.
|
||||
bind_addr = "127.0.0.1:3002"
|
||||
# The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost.
|
||||
|
||||
## The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost.
|
||||
server_addr = "127.0.0.1:3002"
|
||||
# Etcd server address, "127.0.0.1:2379" by default.
|
||||
|
||||
## Etcd server address.
|
||||
store_addr = "127.0.0.1:2379"
|
||||
# Datanode selector type.
|
||||
# - "lease_based" (default value).
|
||||
# - "load_based"
|
||||
# For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector".
|
||||
|
||||
## Datanode selector type.
|
||||
## - `lease_based` (default value).
|
||||
## - `load_based`
|
||||
## For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector".
|
||||
selector = "lease_based"
|
||||
# Store data in memory, false by default.
|
||||
|
||||
## Store data in memory.
|
||||
use_memory_store = false
|
||||
# Whether to enable greptimedb telemetry, true by default.
|
||||
|
||||
## Whether to enable greptimedb telemetry.
|
||||
enable_telemetry = true
|
||||
# If it's not empty, the metasrv will store all data with this key prefix.
|
||||
|
||||
## If it's not empty, the metasrv will store all data with this key prefix.
|
||||
store_key_prefix = ""
|
||||
|
||||
# Log options, see `standalone.example.toml`
|
||||
# [logging]
|
||||
# dir = "/tmp/greptimedb/logs"
|
||||
# level = "info"
|
||||
|
||||
# Procedure storage options.
|
||||
## Procedure storage options.
|
||||
[procedure]
|
||||
# Procedure max retry time.
|
||||
|
||||
## Procedure max retry time.
|
||||
max_retry_times = 12
|
||||
# Initial retry delay of procedures, increases exponentially
|
||||
|
||||
## Initial retry delay of procedures, increases exponentially
|
||||
retry_delay = "500ms"
|
||||
|
||||
## Auto split large value
|
||||
## GreptimeDB procedure uses etcd as the default metadata storage backend.
|
||||
## The etcd the maximum size of any request is 1.5 MiB
|
||||
## 1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key)
|
||||
## Comments out the `max_metadata_value_size`, for don't split large value (no limit).
|
||||
max_metadata_value_size = "1500KiB"
|
||||
|
||||
# Failure detectors options.
|
||||
[failure_detector]
|
||||
threshold = 8.0
|
||||
@@ -37,57 +48,96 @@ min_std_deviation = "100ms"
|
||||
acceptable_heartbeat_pause = "3000ms"
|
||||
first_heartbeat_estimate = "1000ms"
|
||||
|
||||
# # Datanode options.
|
||||
# [datanode]
|
||||
# # Datanode client options.
|
||||
# [datanode.client_options]
|
||||
# timeout = "10s"
|
||||
# connect_timeout = "10s"
|
||||
# tcp_nodelay = true
|
||||
## Datanode options.
|
||||
[datanode]
|
||||
## Datanode client options.
|
||||
[datanode.client]
|
||||
timeout = "10s"
|
||||
connect_timeout = "10s"
|
||||
tcp_nodelay = true
|
||||
|
||||
[wal]
|
||||
# Available wal providers:
|
||||
# - "raft_engine" (default)
|
||||
# - "kafka"
|
||||
# - `raft_engine` (default): there're none raft-engine wal config since metasrv only involves in remote wal currently.
|
||||
# - `kafka`: metasrv **have to be** configured with kafka wal config when using kafka wal provider in datanode.
|
||||
provider = "raft_engine"
|
||||
|
||||
# There're none raft-engine wal config since meta srv only involves in remote wal currently.
|
||||
|
||||
# Kafka wal config.
|
||||
# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default.
|
||||
# broker_endpoints = ["127.0.0.1:9092"]
|
||||
# Number of topics to be created upon start.
|
||||
# num_topics = 64
|
||||
# Topic selector type.
|
||||
# Available selector types:
|
||||
# - "round_robin" (default)
|
||||
# selector_type = "round_robin"
|
||||
# A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
|
||||
# topic_name_prefix = "greptimedb_wal_topic"
|
||||
# Expected number of replicas of each partition.
|
||||
# replication_factor = 1
|
||||
# Above which a topic creation operation will be cancelled.
|
||||
# create_topic_timeout = "30s"
|
||||
# The initial backoff for kafka clients.
|
||||
# backoff_init = "500ms"
|
||||
# The maximum backoff for kafka clients.
|
||||
# backoff_max = "10s"
|
||||
# Exponential backoff rate, i.e. next backoff = base * current backoff.
|
||||
# backoff_base = 2
|
||||
# Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
|
||||
# backoff_deadline = "5mins"
|
||||
|
||||
# Metasrv export the metrics generated by itself
|
||||
# encoded to Prometheus remote-write format
|
||||
# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
|
||||
# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
# [export_metrics]
|
||||
# whether enable export metrics, default is false
|
||||
# enable = false
|
||||
# The interval of export metrics
|
||||
# write_interval = "30s"
|
||||
# [export_metrics.remote_write]
|
||||
# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
|
||||
# url = ""
|
||||
# HTTP headers of Prometheus remote-write carry
|
||||
# headers = {}
|
||||
## The broker endpoints of the Kafka cluster.
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
|
||||
## Number of topics to be created upon start.
|
||||
num_topics = 64
|
||||
|
||||
## Topic selector type.
|
||||
## Available selector types:
|
||||
## - `round_robin` (default)
|
||||
selector_type = "round_robin"
|
||||
|
||||
## A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
|
||||
topic_name_prefix = "greptimedb_wal_topic"
|
||||
|
||||
## Expected number of replicas of each partition.
|
||||
replication_factor = 1
|
||||
|
||||
## Above which a topic creation operation will be cancelled.
|
||||
create_topic_timeout = "30s"
|
||||
## The initial backoff for kafka clients.
|
||||
backoff_init = "500ms"
|
||||
|
||||
## The maximum backoff for kafka clients.
|
||||
backoff_max = "10s"
|
||||
|
||||
## Exponential backoff rate, i.e. next backoff = base * current backoff.
|
||||
backoff_base = 2
|
||||
|
||||
## Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
|
||||
backoff_deadline = "5mins"
|
||||
|
||||
## The logging options.
|
||||
[logging]
|
||||
## The directory to store the log files.
|
||||
dir = "/tmp/greptimedb/logs"
|
||||
|
||||
## The log level. Can be `info`/`debug`/`warn`/`error`.
|
||||
## +toml2docs:none-default
|
||||
level = "info"
|
||||
|
||||
## Enable OTLP tracing.
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
## +toml2docs:none-default
|
||||
otlp_endpoint = ""
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
## whether enable export metrics.
|
||||
enable = false
|
||||
|
||||
## The interval of export metrics.
|
||||
write_interval = "30s"
|
||||
|
||||
## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
|
||||
[export_metrics.self_import]
|
||||
## +toml2docs:none-default
|
||||
db = "information_schema"
|
||||
|
||||
[export_metrics.remote_write]
|
||||
## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
|
||||
url = ""
|
||||
|
||||
## HTTP headers of Prometheus remote-write carry.
|
||||
headers = { }
|
||||
|
||||
@@ -1,286 +1,477 @@
|
||||
# Node running mode, "standalone" or "distributed".
|
||||
## The running mode of the datanode. It can be `standalone` or `distributed`.
|
||||
mode = "standalone"
|
||||
# Whether to enable greptimedb telemetry, true by default.
|
||||
enable_telemetry = true
|
||||
# The default timezone of the server
|
||||
# default_timezone = "UTC"
|
||||
|
||||
# HTTP server options.
|
||||
## Enable telemetry to collect anonymous usage data.
|
||||
enable_telemetry = true
|
||||
|
||||
## The default timezone of the server.
|
||||
## +toml2docs:none-default
|
||||
default_timezone = "UTC"
|
||||
|
||||
## The HTTP server options.
|
||||
[http]
|
||||
# Server address, "127.0.0.1:4000" by default.
|
||||
## The address to bind the HTTP server.
|
||||
addr = "127.0.0.1:4000"
|
||||
# HTTP request timeout, 30s by default.
|
||||
## HTTP request timeout.
|
||||
timeout = "30s"
|
||||
# HTTP request body limit, 64Mb by default.
|
||||
# the following units are supported: B, KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, PiB
|
||||
## HTTP request body limit.
|
||||
## Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
|
||||
body_limit = "64MB"
|
||||
|
||||
# gRPC server options.
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
# Server address, "127.0.0.1:4001" by default.
|
||||
## The address to bind the gRPC server.
|
||||
addr = "127.0.0.1:4001"
|
||||
# The number of server worker threads, 8 by default.
|
||||
## The number of server worker threads.
|
||||
runtime_size = 8
|
||||
|
||||
# MySQL server options.
|
||||
## MySQL server options.
|
||||
[mysql]
|
||||
# Whether to enable
|
||||
## Whether to enable.
|
||||
enable = true
|
||||
# Server address, "127.0.0.1:4002" by default.
|
||||
## The addr to bind the MySQL server.
|
||||
addr = "127.0.0.1:4002"
|
||||
# The number of server worker threads, 2 by default.
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
|
||||
# MySQL server TLS options.
|
||||
[mysql.tls]
|
||||
# TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
|
||||
# - "disable" (default value)
|
||||
# - "prefer"
|
||||
# - "require"
|
||||
# - "verify-ca"
|
||||
# - "verify-full"
|
||||
|
||||
## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
|
||||
## - `disable` (default value)
|
||||
## - `prefer`
|
||||
## - `require`
|
||||
## - `verify-ca`
|
||||
## - `verify-full`
|
||||
mode = "disable"
|
||||
# Certificate file path.
|
||||
|
||||
## Certificate file path.
|
||||
## +toml2docs:none-default
|
||||
cert_path = ""
|
||||
# Private key file path.
|
||||
|
||||
## Private key file path.
|
||||
## +toml2docs:none-default
|
||||
key_path = ""
|
||||
# Watch for Certificate and key file change and auto reload
|
||||
|
||||
## Watch for Certificate and key file change and auto reload
|
||||
watch = false
|
||||
|
||||
# PostgresSQL server options.
|
||||
## PostgresSQL server options.
|
||||
[postgres]
|
||||
# Whether to enable
|
||||
## Whether to enable
|
||||
enable = true
|
||||
# Server address, "127.0.0.1:4003" by default.
|
||||
## The addr to bind the PostgresSQL server.
|
||||
addr = "127.0.0.1:4003"
|
||||
# The number of server worker threads, 2 by default.
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
|
||||
# PostgresSQL server TLS options, see `[mysql_options.tls]` section.
|
||||
## PostgresSQL server TLS options, see `mysql_options.tls` section.
|
||||
[postgres.tls]
|
||||
# TLS mode.
|
||||
## TLS mode.
|
||||
mode = "disable"
|
||||
# certificate file path.
|
||||
|
||||
## Certificate file path.
|
||||
## +toml2docs:none-default
|
||||
cert_path = ""
|
||||
# private key file path.
|
||||
|
||||
## Private key file path.
|
||||
## +toml2docs:none-default
|
||||
key_path = ""
|
||||
# Watch for Certificate and key file change and auto reload
|
||||
|
||||
## Watch for Certificate and key file change and auto reload
|
||||
watch = false
|
||||
|
||||
# OpenTSDB protocol options.
|
||||
## OpenTSDB protocol options.
|
||||
[opentsdb]
|
||||
# Whether to enable
|
||||
## Whether to enable
|
||||
enable = true
|
||||
# OpenTSDB telnet API server address, "127.0.0.1:4242" by default.
|
||||
## OpenTSDB telnet API server address.
|
||||
addr = "127.0.0.1:4242"
|
||||
# The number of server worker threads, 2 by default.
|
||||
## The number of server worker threads.
|
||||
runtime_size = 2
|
||||
|
||||
# InfluxDB protocol options.
|
||||
## InfluxDB protocol options.
|
||||
[influxdb]
|
||||
# Whether to enable InfluxDB protocol in HTTP API, true by default.
|
||||
## Whether to enable InfluxDB protocol in HTTP API.
|
||||
enable = true
|
||||
|
||||
# Prometheus remote storage options
|
||||
## Prometheus remote storage options
|
||||
[prom_store]
|
||||
# Whether to enable Prometheus remote write and read in HTTP API, true by default.
|
||||
## Whether to enable Prometheus remote write and read in HTTP API.
|
||||
enable = true
|
||||
# Whether to store the data from Prometheus remote write in metric engine.
|
||||
# true by default
|
||||
## Whether to store the data from Prometheus remote write in metric engine.
|
||||
with_metric_engine = true
|
||||
|
||||
## The WAL options.
|
||||
[wal]
|
||||
# Available wal providers:
|
||||
# - "raft_engine" (default)
|
||||
# - "kafka"
|
||||
## The provider of the WAL.
|
||||
## - `raft_engine`: the wal is stored in the local file system by raft-engine.
|
||||
## - `kafka`: it's remote wal that data is stored in Kafka.
|
||||
provider = "raft_engine"
|
||||
|
||||
# Raft-engine wal options.
|
||||
# WAL data directory
|
||||
# dir = "/tmp/greptimedb/wal"
|
||||
# WAL file size in bytes.
|
||||
## The directory to store the WAL files.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
## +toml2docs:none-default
|
||||
dir = "/tmp/greptimedb/wal"
|
||||
|
||||
## The size of the WAL segment file.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
file_size = "256MB"
|
||||
# WAL purge threshold.
|
||||
|
||||
## The threshold of the WAL size to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_threshold = "4GB"
|
||||
# WAL purge interval in seconds.
|
||||
|
||||
## The interval to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_interval = "10m"
|
||||
# WAL read batch size.
|
||||
|
||||
## The read batch size.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
read_batch_size = 128
|
||||
# Whether to sync log file after every write.
|
||||
|
||||
## Whether to use sync write.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
sync_write = false
|
||||
# Whether to reuse logically truncated log files.
|
||||
|
||||
## Whether to reuse logically truncated log files.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
enable_log_recycle = true
|
||||
# Whether to pre-create log files on start up
|
||||
|
||||
## Whether to pre-create log files on start up.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
prefill_log_files = false
|
||||
# Duration for fsyncing log files.
|
||||
sync_period = "1000ms"
|
||||
|
||||
# Kafka wal options.
|
||||
# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default.
|
||||
# broker_endpoints = ["127.0.0.1:9092"]
|
||||
## Duration for fsyncing log files.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
sync_period = "10s"
|
||||
|
||||
# Number of topics to be created upon start.
|
||||
# num_topics = 64
|
||||
# Topic selector type.
|
||||
# Available selector types:
|
||||
# - "round_robin" (default)
|
||||
# selector_type = "round_robin"
|
||||
# The prefix of topic name.
|
||||
# topic_name_prefix = "greptimedb_wal_topic"
|
||||
# The number of replicas of each partition.
|
||||
# Warning: the replication factor must be positive and must not be greater than the number of broker endpoints.
|
||||
# replication_factor = 1
|
||||
## The Kafka broker endpoints.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
|
||||
# The max size of a single producer batch.
|
||||
# Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
# max_batch_size = "1MB"
|
||||
# The linger duration.
|
||||
# linger = "200ms"
|
||||
# The consumer wait timeout.
|
||||
# consumer_wait_timeout = "100ms"
|
||||
# Create topic timeout.
|
||||
# create_topic_timeout = "30s"
|
||||
## The max size of a single producer batch.
|
||||
## Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
max_batch_size = "1MB"
|
||||
|
||||
# The initial backoff delay.
|
||||
# backoff_init = "500ms"
|
||||
# The maximum backoff delay.
|
||||
# backoff_max = "10s"
|
||||
# Exponential backoff rate, i.e. next backoff = base * current backoff.
|
||||
# backoff_base = 2
|
||||
# The deadline of retries.
|
||||
# backoff_deadline = "5mins"
|
||||
## The linger duration of a kafka batch producer.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
linger = "200ms"
|
||||
|
||||
# Metadata storage options.
|
||||
## The consumer wait timeout.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
consumer_wait_timeout = "100ms"
|
||||
|
||||
## The initial backoff delay.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_init = "500ms"
|
||||
|
||||
## The maximum backoff delay.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_max = "10s"
|
||||
|
||||
## The exponential backoff rate, i.e. next backoff = base * current backoff.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_base = 2
|
||||
|
||||
## The deadline of retries.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_deadline = "5mins"
|
||||
|
||||
## Metadata storage options.
|
||||
[metadata_store]
|
||||
# Kv file size in bytes.
|
||||
## Kv file size in bytes.
|
||||
file_size = "256MB"
|
||||
# Kv purge threshold.
|
||||
## Kv purge threshold.
|
||||
purge_threshold = "4GB"
|
||||
|
||||
# Procedure storage options.
|
||||
## Procedure storage options.
|
||||
[procedure]
|
||||
# Procedure max retry time.
|
||||
## Procedure max retry time.
|
||||
max_retry_times = 3
|
||||
# Initial retry delay of procedures, increases exponentially
|
||||
## Initial retry delay of procedures, increases exponentially
|
||||
retry_delay = "500ms"
|
||||
|
||||
# Storage options.
|
||||
# Example of using S3 as the storage.
|
||||
# [storage]
|
||||
# type = "S3"
|
||||
# bucket = "greptimedb"
|
||||
# root = "data"
|
||||
# access_key_id = "test"
|
||||
# secret_access_key = "123456"
|
||||
# endpoint = "https://s3.amazonaws.com"
|
||||
# region = "us-west-2"
|
||||
|
||||
# Example of using Oss as the storage.
|
||||
# [storage]
|
||||
# type = "Oss"
|
||||
# bucket = "greptimedb"
|
||||
# root = "data"
|
||||
# access_key_id = "test"
|
||||
# access_key_secret = "123456"
|
||||
# endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
|
||||
|
||||
# Example of using Azblob as the storage.
|
||||
# [storage]
|
||||
# type = "Azblob"
|
||||
# container = "greptimedb"
|
||||
# root = "data"
|
||||
# account_name = "test"
|
||||
# account_key = "123456"
|
||||
# endpoint = "https://greptimedb.blob.core.windows.net"
|
||||
# sas_token = ""
|
||||
|
||||
# Example of using Gcs as the storage.
|
||||
# [storage]
|
||||
# type = "Gcs"
|
||||
# bucket = "greptimedb"
|
||||
# root = "data"
|
||||
# scope = "test"
|
||||
# credential_path = "123456"
|
||||
# endpoint = "https://storage.googleapis.com"
|
||||
|
||||
## The data storage options.
|
||||
[storage]
|
||||
# The working home directory.
|
||||
## The working home directory.
|
||||
data_home = "/tmp/greptimedb/"
|
||||
# Storage type.
|
||||
|
||||
## The storage type used to store the data.
|
||||
## - `File`: the data is stored in the local file system.
|
||||
## - `S3`: the data is stored in the S3 object storage.
|
||||
## - `Gcs`: the data is stored in the Google Cloud Storage.
|
||||
## - `Azblob`: the data is stored in the Azure Blob Storage.
|
||||
## - `Oss`: the data is stored in the Aliyun OSS.
|
||||
type = "File"
|
||||
# TTL for all tables. Disabled by default.
|
||||
# global_ttl = "7d"
|
||||
# Cache configuration for object storage such as 'S3' etc.
|
||||
# cache_path = "/path/local_cache"
|
||||
# The local file cache capacity in bytes.
|
||||
# cache_capacity = "256MB"
|
||||
|
||||
## Cache configuration for object storage such as 'S3' etc.
|
||||
## The local file cache directory.
|
||||
## +toml2docs:none-default
|
||||
cache_path = "/path/local_cache"
|
||||
|
||||
## The local file cache capacity in bytes.
|
||||
## +toml2docs:none-default
|
||||
cache_capacity = "256MB"
|
||||
|
||||
## The S3 bucket name.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
bucket = "greptimedb"
|
||||
|
||||
## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
root = "greptimedb"
|
||||
|
||||
## The access key id of the aws account.
|
||||
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
|
||||
## **It's only used when the storage type is `S3` and `Oss`**.
|
||||
## +toml2docs:none-default
|
||||
access_key_id = "test"
|
||||
|
||||
## The secret access key of the aws account.
|
||||
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
|
||||
## **It's only used when the storage type is `S3`**.
|
||||
## +toml2docs:none-default
|
||||
secret_access_key = "test"
|
||||
|
||||
## The secret access key of the aliyun account.
|
||||
## **It's only used when the storage type is `Oss`**.
|
||||
## +toml2docs:none-default
|
||||
access_key_secret = "test"
|
||||
|
||||
## The account key of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
account_name = "test"
|
||||
|
||||
## The account key of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
account_key = "test"
|
||||
|
||||
## The scope of the google cloud storage.
|
||||
## **It's only used when the storage type is `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
scope = "test"
|
||||
|
||||
## The credential path of the google cloud storage.
|
||||
## **It's only used when the storage type is `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
credential_path = "test"
|
||||
|
||||
## The container of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
container = "greptimedb"
|
||||
|
||||
## The sas token of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
sas_token = ""
|
||||
|
||||
## The endpoint of the S3 service.
|
||||
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
endpoint = "https://s3.amazonaws.com"
|
||||
|
||||
## The region of the S3 service.
|
||||
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
region = "us-west-2"
|
||||
|
||||
# Custom storage options
|
||||
#[[storage.providers]]
|
||||
#type = "S3"
|
||||
#[[storage.providers]]
|
||||
#type = "Gcs"
|
||||
# [[storage.providers]]
|
||||
# type = "S3"
|
||||
# [[storage.providers]]
|
||||
# type = "Gcs"
|
||||
|
||||
# Mito engine options
|
||||
## The region engine options. You can configure multiple region engines.
|
||||
[[region_engine]]
|
||||
|
||||
## The Mito engine options.
|
||||
[region_engine.mito]
|
||||
# Number of region workers
|
||||
|
||||
## Number of region workers.
|
||||
num_workers = 8
|
||||
# Request channel size of each worker
|
||||
|
||||
## Request channel size of each worker.
|
||||
worker_channel_size = 128
|
||||
# Max batch size for a worker to handle requests
|
||||
|
||||
## Max batch size for a worker to handle requests.
|
||||
worker_request_batch_size = 64
|
||||
# Number of meta action updated to trigger a new checkpoint for the manifest
|
||||
|
||||
## Number of meta action updated to trigger a new checkpoint for the manifest.
|
||||
manifest_checkpoint_distance = 10
|
||||
# Whether to compress manifest and checkpoint file by gzip (default false).
|
||||
|
||||
## Whether to compress manifest and checkpoint file by gzip (default false).
|
||||
compress_manifest = false
|
||||
# Max number of running background jobs
|
||||
|
||||
## Max number of running background jobs
|
||||
max_background_jobs = 4
|
||||
# Interval to auto flush a region if it has not flushed yet.
|
||||
|
||||
## Interval to auto flush a region if it has not flushed yet.
|
||||
auto_flush_interval = "1h"
|
||||
# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
|
||||
|
||||
## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
|
||||
global_write_buffer_size = "1GB"
|
||||
# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
|
||||
|
||||
## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
|
||||
global_write_buffer_reject_size = "2GB"
|
||||
# Cache size for SST metadata. Setting it to 0 to disable the cache.
|
||||
# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
|
||||
|
||||
## Cache size for SST metadata. Setting it to 0 to disable the cache.
|
||||
## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
|
||||
sst_meta_cache_size = "128MB"
|
||||
# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
|
||||
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
|
||||
## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
|
||||
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
vector_cache_size = "512MB"
|
||||
# Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
|
||||
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
|
||||
## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
|
||||
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
|
||||
page_cache_size = "512MB"
|
||||
# Buffer size for SST writing.
|
||||
|
||||
## Buffer size for SST writing.
|
||||
sst_write_buffer_size = "8MB"
|
||||
# Parallelism to scan a region (default: 1/4 of cpu cores).
|
||||
# - 0: using the default value (1/4 of cpu cores).
|
||||
# - 1: scan in current thread.
|
||||
# - n: scan in parallelism n.
|
||||
|
||||
## Parallelism to scan a region (default: 1/4 of cpu cores).
|
||||
## - `0`: using the default value (1/4 of cpu cores).
|
||||
## - `1`: scan in current thread.
|
||||
## - `n`: scan in parallelism n.
|
||||
scan_parallelism = 0
|
||||
# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
|
||||
|
||||
## Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
parallel_scan_channel_size = 32
|
||||
# Whether to allow stale WAL entries read during replay.
|
||||
|
||||
## Whether to allow stale WAL entries read during replay.
|
||||
allow_stale_entries = false
|
||||
|
||||
## The options for inverted index in Mito engine.
|
||||
[region_engine.mito.inverted_index]
|
||||
# Whether to create the index on flush.
|
||||
# - "auto": automatically
|
||||
# - "disable": never
|
||||
|
||||
## Whether to create the index on flush.
|
||||
## - `auto`: automatically
|
||||
## - `disable`: never
|
||||
create_on_flush = "auto"
|
||||
# Whether to create the index on compaction.
|
||||
# - "auto": automatically
|
||||
# - "disable": never
|
||||
|
||||
## Whether to create the index on compaction.
|
||||
## - `auto`: automatically
|
||||
## - `disable`: never
|
||||
create_on_compaction = "auto"
|
||||
# Whether to apply the index on query
|
||||
# - "auto": automatically
|
||||
# - "disable": never
|
||||
|
||||
## Whether to apply the index on query
|
||||
## - `auto`: automatically
|
||||
## - `disable`: never
|
||||
apply_on_query = "auto"
|
||||
# Memory threshold for performing an external sort during index creation.
|
||||
# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
|
||||
|
||||
## Memory threshold for performing an external sort during index creation.
|
||||
## Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
|
||||
mem_threshold_on_create = "64M"
|
||||
# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
|
||||
|
||||
## File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
|
||||
intermediate_path = ""
|
||||
|
||||
[region_engine.mito.memtable]
|
||||
# Memtable type.
|
||||
# - "experimental": experimental memtable
|
||||
# - "time_series": time-series memtable (deprecated)
|
||||
type = "experimental"
|
||||
# The max number of keys in one shard.
|
||||
## Memtable type.
|
||||
## - `time_series`: time-series memtable
|
||||
## - `partition_tree`: partition tree memtable (experimental)
|
||||
type = "time_series"
|
||||
|
||||
## The max number of keys in one shard.
|
||||
## Only available for `partition_tree` memtable.
|
||||
index_max_keys_per_shard = 8192
|
||||
# The max rows of data inside the actively writing buffer in one shard.
|
||||
|
||||
## The max rows of data inside the actively writing buffer in one shard.
|
||||
## Only available for `partition_tree` memtable.
|
||||
data_freeze_threshold = 32768
|
||||
# Max dictionary bytes.
|
||||
|
||||
## Max dictionary bytes.
|
||||
## Only available for `partition_tree` memtable.
|
||||
fork_dictionary_bytes = "1GiB"
|
||||
|
||||
# Log options
|
||||
# [logging]
|
||||
# Specify logs directory.
|
||||
# dir = "/tmp/greptimedb/logs"
|
||||
# Specify the log level [info | debug | error | warn]
|
||||
# level = "info"
|
||||
# whether enable tracing, default is false
|
||||
# enable_otlp_tracing = false
|
||||
# tracing exporter endpoint with format `ip:port`, we use grpc oltp as exporter, default endpoint is `localhost:4317`
|
||||
# otlp_endpoint = "localhost:4317"
|
||||
# Whether to append logs to stdout. Defaults to true.
|
||||
# append_stdout = true
|
||||
# The percentage of tracing will be sampled and exported. Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
# [logging.tracing_sample_ratio]
|
||||
# default_ratio = 0.0
|
||||
## The logging options.
|
||||
[logging]
|
||||
## The directory to store the log files.
|
||||
dir = "/tmp/greptimedb/logs"
|
||||
|
||||
# Standalone export the metrics generated by itself
|
||||
# encoded to Prometheus remote-write format
|
||||
# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
|
||||
# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
# [export_metrics]
|
||||
# whether enable export metrics, default is false
|
||||
# enable = false
|
||||
# The interval of export metrics
|
||||
# write_interval = "30s"
|
||||
# for `standalone`, `self_import` is recommend to collect metrics generated by itself
|
||||
# [export_metrics.self_import]
|
||||
# db = "information_schema"
|
||||
## The log level. Can be `info`/`debug`/`warn`/`error`.
|
||||
## +toml2docs:none-default
|
||||
level = "info"
|
||||
|
||||
## Enable OTLP tracing.
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
## +toml2docs:none-default
|
||||
otlp_endpoint = ""
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
## whether enable export metrics.
|
||||
enable = false
|
||||
|
||||
## The interval of export metrics.
|
||||
write_interval = "30s"
|
||||
|
||||
## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
|
||||
[export_metrics.self_import]
|
||||
## +toml2docs:none-default
|
||||
db = "information_schema"
|
||||
|
||||
[export_metrics.remote_write]
|
||||
## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
|
||||
url = ""
|
||||
|
||||
## HTTP headers of Prometheus remote-write carry.
|
||||
headers = { }
|
||||
|
||||
@@ -27,8 +27,8 @@ subgraph Frontend["Frontend"]
|
||||
end
|
||||
end
|
||||
|
||||
MyTable --> MetaSrv
|
||||
MetaSrv --> ETCD
|
||||
MyTable --> Metasrv
|
||||
Metasrv --> ETCD
|
||||
|
||||
MyTable-->TableEngine0
|
||||
MyTable-->TableEngine1
|
||||
@@ -95,8 +95,8 @@ subgraph Frontend["Frontend"]
|
||||
end
|
||||
end
|
||||
|
||||
MyTable --> MetaSrv
|
||||
MetaSrv --> ETCD
|
||||
MyTable --> Metasrv
|
||||
Metasrv --> ETCD
|
||||
|
||||
MyTable-->RegionEngine
|
||||
MyTable-->RegionEngine1
|
||||
|
||||
@@ -36,7 +36,7 @@ Hence, we choose the third option, and use a simple logical plan that's anagonis
|
||||
## Deploy mode and protocol
|
||||
- Greptime Flow is an independent streaming compute component. It can be used either within a standalone node or as a dedicated node at the same level as frontend in distributed mode.
|
||||
- It accepts insert request Rows, which is used between frontend and datanode.
|
||||
- New flow job is submitted in the format of modified SQL query like snowflake do, like: `CREATE TASK avg_over_5m WINDOW_SIZE = "5m" AS SELECT avg(value) FROM table WHERE time > now() - 5m GROUP BY time(1m)`. Flow job then got stored in MetaSrv.
|
||||
- New flow job is submitted in the format of modified SQL query like snowflake do, like: `CREATE TASK avg_over_5m WINDOW_SIZE = "5m" AS SELECT avg(value) FROM table WHERE time > now() - 5m GROUP BY time(1m)`. Flow job then got stored in Metasrv.
|
||||
- It also persists results in the format of Rows to frontend.
|
||||
- The query plan uses Substrait as codec format. It's the same with GreptimeDB's query engine.
|
||||
- Greptime Flow needs a WAL for recovering. It's possible to reuse datanode's.
|
||||
|
||||
46
docs/style-guide.md
Normal file
46
docs/style-guide.md
Normal file
@@ -0,0 +1,46 @@
|
||||
# GreptimeDB Style Guide
|
||||
|
||||
This style guide is intended to help contributors to GreptimeDB write code that is consistent with the rest of the codebase. It is a living document and will be updated as the codebase evolves.
|
||||
|
||||
It's mainly an complement to the [Rust Style Guide](https://pingcap.github.io/style-guide/rust/).
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- Formatting
|
||||
- Modules
|
||||
- Comments
|
||||
|
||||
## Formatting
|
||||
|
||||
- Place all `mod` declaration before any `use`.
|
||||
- Use `unimplemented!()` instead of `todo!()` for things that aren't likely to be implemented.
|
||||
- Add an empty line before and after declaration blocks.
|
||||
- Place comment before attributes (`#[]`) and derive (`#[derive]`).
|
||||
|
||||
## Modules
|
||||
|
||||
- Use the file with same name instead of `mod.rs` to define a module. E.g.:
|
||||
|
||||
```
|
||||
.
|
||||
├── cache
|
||||
│ ├── cache_size.rs
|
||||
│ └── write_cache.rs
|
||||
└── cache.rs
|
||||
```
|
||||
|
||||
## Comments
|
||||
|
||||
- Add comments for public functions and structs.
|
||||
- Prefer document comment (`///`) over normal comment (`//`) for structs, fields, functions etc.
|
||||
- Add link (`[]`) to struct, method, or any other reference. And make sure that link works.
|
||||
|
||||
## Error handling
|
||||
|
||||
- Define a custom error type for the module if needed.
|
||||
- Prefer `with_context()` over `context()` when allocation is needed to construct an error.
|
||||
- Use `error!()` or `warn!()` macros in the `common_telemetry` crate to log errors. E.g.:
|
||||
|
||||
```rust
|
||||
error!(e; "Failed to do something");
|
||||
```
|
||||
@@ -19,6 +19,12 @@ includes = [
|
||||
"*.py",
|
||||
]
|
||||
|
||||
excludes = [
|
||||
# copied sources
|
||||
"src/common/base/src/readable_size.rs",
|
||||
"src/servers/src/repeated_field.rs",
|
||||
]
|
||||
|
||||
[properties]
|
||||
inceptionYear = 2023
|
||||
copyrightOwner = "Greptime Team"
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
[toolchain]
|
||||
channel = "nightly-2023-12-19"
|
||||
channel = "nightly-2024-04-18"
|
||||
|
||||
@@ -27,7 +27,7 @@ function retry_fetch() {
|
||||
echo "Failed to download $url"
|
||||
echo "You may try to set http_proxy and https_proxy environment variables."
|
||||
if [[ -z "$GITHUB_PROXY_URL" ]]; then
|
||||
echo "You may try to set GITHUB_PROXY_URL=http://mirror.ghproxy.com/"
|
||||
echo "You may try to set GITHUB_PROXY_URL=http://mirror.ghproxy.com/https://github.com/"
|
||||
fi
|
||||
exit 1
|
||||
}
|
||||
@@ -39,7 +39,7 @@ function retry_fetch() {
|
||||
retry_fetch "${GITHUB_URL}/GreptimeTeam/dashboard/releases/download/${RELEASE_VERSION}/sha256.txt" sha256.txt
|
||||
|
||||
# Download the tar file containing the built dashboard assets.
|
||||
retry_fetch "${GITHUB_URL}/GreptimeTeam/dashboard/releases/download/$RELEASE_VERSION/build.tar.gz" build.tar.gz
|
||||
retry_fetch "${GITHUB_URL}/GreptimeTeam/dashboard/releases/download/${RELEASE_VERSION}/build.tar.gz" build.tar.gz
|
||||
|
||||
# Verify the checksums match; exit if they don't.
|
||||
case "$(uname -s)" in
|
||||
|
||||
@@ -18,7 +18,6 @@ greptime-proto.workspace = true
|
||||
paste = "1.0"
|
||||
prost.workspace = true
|
||||
snafu.workspace = true
|
||||
tonic.workspace = true
|
||||
|
||||
[build-dependencies]
|
||||
tonic-build = "0.9"
|
||||
|
||||
@@ -707,7 +707,6 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
|
||||
}
|
||||
|
||||
pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
|
||||
// TODO(fys): use macros to optimize code
|
||||
match data_type {
|
||||
ConcreteDataType::Int64(_) => values
|
||||
.i64_values
|
||||
|
||||
@@ -21,6 +21,7 @@ pub mod prom_store {
|
||||
}
|
||||
}
|
||||
|
||||
pub mod region;
|
||||
pub mod v1;
|
||||
|
||||
pub use greptime_proto;
|
||||
|
||||
42
src/api/src/region.rs
Normal file
42
src/api/src/region.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_base::AffectedRows;
|
||||
use greptime_proto::v1::region::RegionResponse as RegionResponseV1;
|
||||
|
||||
/// This result struct is derived from [RegionResponseV1]
|
||||
#[derive(Debug)]
|
||||
pub struct RegionResponse {
|
||||
pub affected_rows: AffectedRows,
|
||||
pub extension: HashMap<String, Vec<u8>>,
|
||||
}
|
||||
|
||||
impl RegionResponse {
|
||||
pub fn from_region_response(region_response: RegionResponseV1) -> Self {
|
||||
Self {
|
||||
affected_rows: region_response.affected_rows as _,
|
||||
extension: region_response.extension,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates one response without extension
|
||||
pub fn new(affected_rows: AffectedRows) -> Self {
|
||||
Self {
|
||||
affected_rows,
|
||||
extension: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -16,8 +16,9 @@ api.workspace = true
|
||||
async-trait.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
digest = "0.10"
|
||||
hex = { version = "0.4" }
|
||||
notify.workspace = true
|
||||
secrecy = { version = "0.8", features = ["serde", "alloc"] }
|
||||
sha1 = "0.10"
|
||||
snafu.workspace = true
|
||||
|
||||
@@ -22,6 +22,9 @@ use snafu::{ensure, OptionExt};
|
||||
use crate::error::{IllegalParamSnafu, InvalidConfigSnafu, Result, UserPasswordMismatchSnafu};
|
||||
use crate::user_info::DefaultUserInfo;
|
||||
use crate::user_provider::static_user_provider::{StaticUserProvider, STATIC_USER_PROVIDER};
|
||||
use crate::user_provider::watch_file_user_provider::{
|
||||
WatchFileUserProvider, WATCH_FILE_USER_PROVIDER,
|
||||
};
|
||||
use crate::{UserInfoRef, UserProviderRef};
|
||||
|
||||
pub(crate) const DEFAULT_USERNAME: &str = "greptime";
|
||||
@@ -40,9 +43,12 @@ pub fn user_provider_from_option(opt: &String) -> Result<UserProviderRef> {
|
||||
match name {
|
||||
STATIC_USER_PROVIDER => {
|
||||
let provider =
|
||||
StaticUserProvider::try_from(content).map(|p| Arc::new(p) as UserProviderRef)?;
|
||||
StaticUserProvider::new(content).map(|p| Arc::new(p) as UserProviderRef)?;
|
||||
Ok(provider)
|
||||
}
|
||||
WATCH_FILE_USER_PROVIDER => {
|
||||
WatchFileUserProvider::new(content).map(|p| Arc::new(p) as UserProviderRef)
|
||||
}
|
||||
_ => InvalidConfigSnafu {
|
||||
value: name.to_string(),
|
||||
msg: "Invalid UserProviderOption",
|
||||
|
||||
@@ -64,6 +64,13 @@ pub enum Error {
|
||||
username: String,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to initialize a watcher for file {}", path))]
|
||||
FileWatch {
|
||||
path: String,
|
||||
#[snafu(source)]
|
||||
error: notify::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("User is not authorized to perform this action"))]
|
||||
PermissionDenied { location: Location },
|
||||
}
|
||||
@@ -73,6 +80,7 @@ impl ErrorExt for Error {
|
||||
match self {
|
||||
Error::InvalidConfig { .. } => StatusCode::InvalidArguments,
|
||||
Error::IllegalParam { .. } => StatusCode::InvalidArguments,
|
||||
Error::FileWatch { .. } => StatusCode::InvalidArguments,
|
||||
Error::InternalState { .. } => StatusCode::Unexpected,
|
||||
Error::Io { .. } => StatusCode::Internal,
|
||||
Error::AuthBackend { .. } => StatusCode::Internal,
|
||||
|
||||
@@ -45,9 +45,9 @@ impl Default for MockUserProvider {
|
||||
|
||||
impl MockUserProvider {
|
||||
pub fn set_authorization_info(&mut self, info: DatabaseAuthInfo) {
|
||||
self.catalog = info.catalog.to_owned();
|
||||
self.schema = info.schema.to_owned();
|
||||
self.username = info.username.to_owned();
|
||||
info.catalog.clone_into(&mut self.catalog);
|
||||
info.schema.clone_into(&mut self.schema);
|
||||
info.username.clone_into(&mut self.username);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,10 +13,24 @@
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) mod static_user_provider;
|
||||
pub(crate) mod watch_file_user_provider;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::BufRead;
|
||||
use std::path::Path;
|
||||
|
||||
use secrecy::ExposeSecret;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::common::{Identity, Password};
|
||||
use crate::error::Result;
|
||||
use crate::UserInfoRef;
|
||||
use crate::error::{
|
||||
IllegalParamSnafu, InvalidConfigSnafu, IoSnafu, Result, UnsupportedPasswordTypeSnafu,
|
||||
UserNotFoundSnafu, UserPasswordMismatchSnafu,
|
||||
};
|
||||
use crate::user_info::DefaultUserInfo;
|
||||
use crate::{auth_mysql, UserInfoRef};
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait UserProvider: Send + Sync {
|
||||
@@ -44,3 +58,88 @@ pub trait UserProvider: Send + Sync {
|
||||
Ok(user_info)
|
||||
}
|
||||
}
|
||||
|
||||
fn load_credential_from_file(filepath: &str) -> Result<Option<HashMap<String, Vec<u8>>>> {
|
||||
// check valid path
|
||||
let path = Path::new(filepath);
|
||||
if !path.exists() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
ensure!(
|
||||
path.is_file(),
|
||||
InvalidConfigSnafu {
|
||||
value: filepath,
|
||||
msg: "UserProvider file must be a file",
|
||||
}
|
||||
);
|
||||
let file = File::open(path).context(IoSnafu)?;
|
||||
let credential = io::BufReader::new(file)
|
||||
.lines()
|
||||
.map_while(std::result::Result::ok)
|
||||
.filter_map(|line| {
|
||||
if let Some((k, v)) = line.split_once('=') {
|
||||
Some((k.to_string(), v.as_bytes().to_vec()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<HashMap<String, Vec<u8>>>();
|
||||
|
||||
ensure!(
|
||||
!credential.is_empty(),
|
||||
InvalidConfigSnafu {
|
||||
value: filepath,
|
||||
msg: "UserProvider's file must contains at least one valid credential",
|
||||
}
|
||||
);
|
||||
|
||||
Ok(Some(credential))
|
||||
}
|
||||
|
||||
fn authenticate_with_credential(
|
||||
users: &HashMap<String, Vec<u8>>,
|
||||
input_id: Identity<'_>,
|
||||
input_pwd: Password<'_>,
|
||||
) -> Result<UserInfoRef> {
|
||||
match input_id {
|
||||
Identity::UserId(username, _) => {
|
||||
ensure!(
|
||||
!username.is_empty(),
|
||||
IllegalParamSnafu {
|
||||
msg: "blank username"
|
||||
}
|
||||
);
|
||||
let save_pwd = users.get(username).context(UserNotFoundSnafu {
|
||||
username: username.to_string(),
|
||||
})?;
|
||||
|
||||
match input_pwd {
|
||||
Password::PlainText(pwd) => {
|
||||
ensure!(
|
||||
!pwd.expose_secret().is_empty(),
|
||||
IllegalParamSnafu {
|
||||
msg: "blank password"
|
||||
}
|
||||
);
|
||||
if save_pwd == pwd.expose_secret().as_bytes() {
|
||||
Ok(DefaultUserInfo::with_name(username))
|
||||
} else {
|
||||
UserPasswordMismatchSnafu {
|
||||
username: username.to_string(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
Password::MysqlNativePassword(auth_data, salt) => {
|
||||
auth_mysql(auth_data, salt, username, save_pwd)
|
||||
.map(|_| DefaultUserInfo::with_name(username))
|
||||
}
|
||||
Password::PgMD5(_, _) => UnsupportedPasswordTypeSnafu {
|
||||
password_type: "pg_md5",
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,60 +13,34 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::BufRead;
|
||||
use std::path::Path;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use secrecy::ExposeSecret;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::error::{
|
||||
Error, IllegalParamSnafu, InvalidConfigSnafu, IoSnafu, Result, UnsupportedPasswordTypeSnafu,
|
||||
UserNotFoundSnafu, UserPasswordMismatchSnafu,
|
||||
};
|
||||
use crate::user_info::DefaultUserInfo;
|
||||
use crate::{auth_mysql, Identity, Password, UserInfoRef, UserProvider};
|
||||
use crate::error::{InvalidConfigSnafu, Result};
|
||||
use crate::user_provider::{authenticate_with_credential, load_credential_from_file};
|
||||
use crate::{Identity, Password, UserInfoRef, UserProvider};
|
||||
|
||||
pub(crate) const STATIC_USER_PROVIDER: &str = "static_user_provider";
|
||||
|
||||
impl TryFrom<&str> for StaticUserProvider {
|
||||
type Error = Error;
|
||||
pub(crate) struct StaticUserProvider {
|
||||
users: HashMap<String, Vec<u8>>,
|
||||
}
|
||||
|
||||
fn try_from(value: &str) -> Result<Self> {
|
||||
impl StaticUserProvider {
|
||||
pub(crate) fn new(value: &str) -> Result<Self> {
|
||||
let (mode, content) = value.split_once(':').context(InvalidConfigSnafu {
|
||||
value: value.to_string(),
|
||||
msg: "StaticUserProviderOption must be in format `<option>:<value>`",
|
||||
})?;
|
||||
return match mode {
|
||||
"file" => {
|
||||
// check valid path
|
||||
let path = Path::new(content);
|
||||
ensure!(path.exists() && path.is_file(), InvalidConfigSnafu {
|
||||
value: content.to_string(),
|
||||
msg: "StaticUserProviderOption file must be a valid file path",
|
||||
});
|
||||
|
||||
let file = File::open(path).context(IoSnafu)?;
|
||||
let credential = io::BufReader::new(file)
|
||||
.lines()
|
||||
.map_while(std::result::Result::ok)
|
||||
.filter_map(|line| {
|
||||
if let Some((k, v)) = line.split_once('=') {
|
||||
Some((k.to_string(), v.as_bytes().to_vec()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<HashMap<String, Vec<u8>>>();
|
||||
|
||||
ensure!(!credential.is_empty(), InvalidConfigSnafu {
|
||||
value: content.to_string(),
|
||||
msg: "StaticUserProviderOption file must contains at least one valid credential",
|
||||
});
|
||||
|
||||
Ok(StaticUserProvider { users: credential, })
|
||||
let users = load_credential_from_file(content)?
|
||||
.context(InvalidConfigSnafu {
|
||||
value: content.to_string(),
|
||||
msg: "StaticFileUserProvider must be a valid file path",
|
||||
})?;
|
||||
Ok(StaticUserProvider { users })
|
||||
}
|
||||
"cmd" => content
|
||||
.split(',')
|
||||
@@ -83,66 +57,19 @@ impl TryFrom<&str> for StaticUserProvider {
|
||||
value: mode.to_string(),
|
||||
msg: "StaticUserProviderOption must be in format `file:<path>` or `cmd:<values>`",
|
||||
}
|
||||
.fail(),
|
||||
.fail(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct StaticUserProvider {
|
||||
users: HashMap<String, Vec<u8>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl UserProvider for StaticUserProvider {
|
||||
fn name(&self) -> &str {
|
||||
STATIC_USER_PROVIDER
|
||||
}
|
||||
|
||||
async fn authenticate(
|
||||
&self,
|
||||
input_id: Identity<'_>,
|
||||
input_pwd: Password<'_>,
|
||||
) -> Result<UserInfoRef> {
|
||||
match input_id {
|
||||
Identity::UserId(username, _) => {
|
||||
ensure!(
|
||||
!username.is_empty(),
|
||||
IllegalParamSnafu {
|
||||
msg: "blank username"
|
||||
}
|
||||
);
|
||||
let save_pwd = self.users.get(username).context(UserNotFoundSnafu {
|
||||
username: username.to_string(),
|
||||
})?;
|
||||
|
||||
match input_pwd {
|
||||
Password::PlainText(pwd) => {
|
||||
ensure!(
|
||||
!pwd.expose_secret().is_empty(),
|
||||
IllegalParamSnafu {
|
||||
msg: "blank password"
|
||||
}
|
||||
);
|
||||
return if save_pwd == pwd.expose_secret().as_bytes() {
|
||||
Ok(DefaultUserInfo::with_name(username))
|
||||
} else {
|
||||
UserPasswordMismatchSnafu {
|
||||
username: username.to_string(),
|
||||
}
|
||||
.fail()
|
||||
};
|
||||
}
|
||||
Password::MysqlNativePassword(auth_data, salt) => {
|
||||
auth_mysql(auth_data, salt, username, save_pwd)
|
||||
.map(|_| DefaultUserInfo::with_name(username))
|
||||
}
|
||||
Password::PgMD5(_, _) => UnsupportedPasswordTypeSnafu {
|
||||
password_type: "pg_md5",
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
async fn authenticate(&self, id: Identity<'_>, pwd: Password<'_>) -> Result<UserInfoRef> {
|
||||
authenticate_with_credential(&self.users, id, pwd)
|
||||
}
|
||||
|
||||
async fn authorize(
|
||||
@@ -181,7 +108,7 @@ pub mod test {
|
||||
#[tokio::test]
|
||||
async fn test_authorize() {
|
||||
let user_info = DefaultUserInfo::with_name("root");
|
||||
let provider = StaticUserProvider::try_from("cmd:root=123456,admin=654321").unwrap();
|
||||
let provider = StaticUserProvider::new("cmd:root=123456,admin=654321").unwrap();
|
||||
provider
|
||||
.authorize("catalog", "schema", &user_info)
|
||||
.await
|
||||
@@ -190,7 +117,7 @@ pub mod test {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_inline_provider() {
|
||||
let provider = StaticUserProvider::try_from("cmd:root=123456,admin=654321").unwrap();
|
||||
let provider = StaticUserProvider::new("cmd:root=123456,admin=654321").unwrap();
|
||||
test_authenticate(&provider, "root", "123456").await;
|
||||
test_authenticate(&provider, "admin", "654321").await;
|
||||
}
|
||||
@@ -214,7 +141,7 @@ admin=654321",
|
||||
}
|
||||
|
||||
let param = format!("file:{file_path}");
|
||||
let provider = StaticUserProvider::try_from(param.as_str()).unwrap();
|
||||
let provider = StaticUserProvider::new(param.as_str()).unwrap();
|
||||
test_authenticate(&provider, "root", "123456").await;
|
||||
test_authenticate(&provider, "admin", "654321").await;
|
||||
}
|
||||
|
||||
215
src/auth/src/user_provider/watch_file_user_provider.rs
Normal file
215
src/auth/src/user_provider/watch_file_user_provider.rs
Normal file
@@ -0,0 +1,215 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::sync::mpsc::channel;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_telemetry::{info, warn};
|
||||
use notify::{EventKind, RecursiveMode, Watcher};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::error::{FileWatchSnafu, InvalidConfigSnafu, Result};
|
||||
use crate::user_info::DefaultUserInfo;
|
||||
use crate::user_provider::{authenticate_with_credential, load_credential_from_file};
|
||||
use crate::{Identity, Password, UserInfoRef, UserProvider};
|
||||
|
||||
pub(crate) const WATCH_FILE_USER_PROVIDER: &str = "watch_file_user_provider";
|
||||
|
||||
type WatchedCredentialRef = Arc<Mutex<Option<HashMap<String, Vec<u8>>>>>;
|
||||
|
||||
/// A user provider that reads user credential from a file and watches the file for changes.
|
||||
///
|
||||
/// Empty file is invalid; but file not exist means every user can be authenticated.
|
||||
pub(crate) struct WatchFileUserProvider {
|
||||
users: WatchedCredentialRef,
|
||||
}
|
||||
|
||||
impl WatchFileUserProvider {
|
||||
pub fn new(filepath: &str) -> Result<Self> {
|
||||
let credential = load_credential_from_file(filepath)?;
|
||||
let users = Arc::new(Mutex::new(credential));
|
||||
let this = WatchFileUserProvider {
|
||||
users: users.clone(),
|
||||
};
|
||||
|
||||
let (tx, rx) = channel::<notify::Result<notify::Event>>();
|
||||
let mut debouncer =
|
||||
notify::recommended_watcher(tx).context(FileWatchSnafu { path: "<none>" })?;
|
||||
let mut dir = Path::new(filepath).to_path_buf();
|
||||
ensure!(
|
||||
dir.pop(),
|
||||
InvalidConfigSnafu {
|
||||
value: filepath,
|
||||
msg: "UserProvider path must be a file path",
|
||||
}
|
||||
);
|
||||
debouncer
|
||||
.watch(&dir, RecursiveMode::NonRecursive)
|
||||
.context(FileWatchSnafu { path: filepath })?;
|
||||
|
||||
let filepath = filepath.to_string();
|
||||
std::thread::spawn(move || {
|
||||
let filename = Path::new(&filepath).file_name();
|
||||
let _hold = debouncer;
|
||||
while let Ok(res) = rx.recv() {
|
||||
if let Ok(event) = res {
|
||||
let is_this_file = event.paths.iter().any(|p| p.file_name() == filename);
|
||||
let is_relevant_event = matches!(
|
||||
event.kind,
|
||||
EventKind::Modify(_) | EventKind::Create(_) | EventKind::Remove(_)
|
||||
);
|
||||
if is_this_file && is_relevant_event {
|
||||
info!(?event.kind, "User provider file {} changed", &filepath);
|
||||
match load_credential_from_file(&filepath) {
|
||||
Ok(credential) => {
|
||||
let mut users =
|
||||
users.lock().expect("users credential must be valid");
|
||||
#[cfg(not(test))]
|
||||
info!("User provider file {filepath} reloaded");
|
||||
#[cfg(test)]
|
||||
info!("User provider file {filepath} reloaded: {credential:?}");
|
||||
*users = credential;
|
||||
}
|
||||
Err(err) => {
|
||||
warn!(
|
||||
?err,
|
||||
"Fail to load credential from file {filepath}; keep the old one",
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(this)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl UserProvider for WatchFileUserProvider {
|
||||
fn name(&self) -> &str {
|
||||
WATCH_FILE_USER_PROVIDER
|
||||
}
|
||||
|
||||
async fn authenticate(&self, id: Identity<'_>, password: Password<'_>) -> Result<UserInfoRef> {
|
||||
let users = self.users.lock().expect("users credential must be valid");
|
||||
if let Some(users) = users.as_ref() {
|
||||
authenticate_with_credential(users, id, password)
|
||||
} else {
|
||||
match id {
|
||||
Identity::UserId(id, _) => {
|
||||
warn!(id, "User provider file not exist, allow all users");
|
||||
Ok(DefaultUserInfo::with_name(id))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn authorize(&self, _: &str, _: &str, _: &UserInfoRef) -> Result<()> {
|
||||
// default allow all
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use tokio::time::sleep;
|
||||
|
||||
use crate::user_provider::watch_file_user_provider::WatchFileUserProvider;
|
||||
use crate::user_provider::{Identity, Password};
|
||||
use crate::UserProvider;
|
||||
|
||||
async fn test_authenticate(
|
||||
provider: &dyn UserProvider,
|
||||
username: &str,
|
||||
password: &str,
|
||||
ok: bool,
|
||||
timeout: Option<Duration>,
|
||||
) {
|
||||
if let Some(timeout) = timeout {
|
||||
let deadline = Instant::now().checked_add(timeout).unwrap();
|
||||
loop {
|
||||
let re = provider
|
||||
.authenticate(
|
||||
Identity::UserId(username, None),
|
||||
Password::PlainText(password.to_string().into()),
|
||||
)
|
||||
.await;
|
||||
if re.is_ok() == ok {
|
||||
break;
|
||||
} else if Instant::now() < deadline {
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
} else {
|
||||
panic!("timeout (username: {username}, password: {password}, expected: {ok})");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let re = provider
|
||||
.authenticate(
|
||||
Identity::UserId(username, None),
|
||||
Password::PlainText(password.to_string().into()),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(
|
||||
re.is_ok(),
|
||||
ok,
|
||||
"username: {}, password: {}",
|
||||
username,
|
||||
password
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_file_provider() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let dir = create_temp_dir("test_file_provider");
|
||||
let file_path = format!("{}/test_file_provider", dir.path().to_str().unwrap());
|
||||
|
||||
// write a tmp file
|
||||
assert!(std::fs::write(&file_path, "root=123456\nadmin=654321\n").is_ok());
|
||||
let provider = WatchFileUserProvider::new(file_path.as_str()).unwrap();
|
||||
let timeout = Duration::from_secs(60);
|
||||
|
||||
test_authenticate(&provider, "root", "123456", true, None).await;
|
||||
test_authenticate(&provider, "admin", "654321", true, None).await;
|
||||
test_authenticate(&provider, "root", "654321", false, None).await;
|
||||
|
||||
// update the tmp file
|
||||
assert!(std::fs::write(&file_path, "root=654321\n").is_ok());
|
||||
test_authenticate(&provider, "root", "123456", false, Some(timeout)).await;
|
||||
test_authenticate(&provider, "root", "654321", true, Some(timeout)).await;
|
||||
test_authenticate(&provider, "admin", "654321", false, Some(timeout)).await;
|
||||
|
||||
// remove the tmp file
|
||||
assert!(std::fs::remove_file(&file_path).is_ok());
|
||||
test_authenticate(&provider, "root", "123456", true, Some(timeout)).await;
|
||||
test_authenticate(&provider, "root", "654321", true, Some(timeout)).await;
|
||||
test_authenticate(&provider, "admin", "654321", true, Some(timeout)).await;
|
||||
|
||||
// recreate the tmp file
|
||||
assert!(std::fs::write(&file_path, "root=123456\n").is_ok());
|
||||
test_authenticate(&provider, "root", "123456", true, Some(timeout)).await;
|
||||
test_authenticate(&provider, "root", "654321", false, Some(timeout)).await;
|
||||
test_authenticate(&provider, "admin", "654321", false, Some(timeout)).await;
|
||||
}
|
||||
}
|
||||
@@ -12,19 +12,16 @@ workspace = true
|
||||
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
arc-swap = "1.0"
|
||||
arrow.workspace = true
|
||||
arrow-schema.workspace = true
|
||||
async-stream.workspace = true
|
||||
async-trait = "0.1"
|
||||
common-catalog.workspace = true
|
||||
common-error.workspace = true
|
||||
common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
common-runtime.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
common-version.workspace = true
|
||||
@@ -37,15 +34,13 @@ itertools.workspace = true
|
||||
lazy_static.workspace = true
|
||||
meta-client.workspace = true
|
||||
moka = { workspace = true, features = ["future", "sync"] }
|
||||
parking_lot = "0.12"
|
||||
partition.workspace = true
|
||||
paste = "1.0"
|
||||
prometheus.workspace = true
|
||||
regex.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
session.workspace = true
|
||||
snafu.workspace = true
|
||||
sql.workspace = true
|
||||
store-api.workspace = true
|
||||
table.workspace = true
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -216,7 +216,7 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to perform metasrv operation"))]
|
||||
MetaSrv {
|
||||
Metasrv {
|
||||
location: Location,
|
||||
source: meta_client::error::Error,
|
||||
},
|
||||
@@ -304,7 +304,7 @@ impl ErrorExt for Error {
|
||||
| Error::CreateTable { source, .. }
|
||||
| Error::TableSchemaMismatch { source, .. } => source.status_code(),
|
||||
|
||||
Error::MetaSrv { source, .. } => source.status_code(),
|
||||
Error::Metasrv { source, .. } => source.status_code(),
|
||||
Error::SystemCatalogTableScan { source, .. } => source.status_code(),
|
||||
Error::SystemCatalogTableScanExec { source, .. } => source.status_code(),
|
||||
Error::InvalidTableInfoInCatalog { source, .. } => source.status_code(),
|
||||
|
||||
@@ -12,14 +12,15 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod columns;
|
||||
mod key_column_usage;
|
||||
pub mod columns;
|
||||
pub mod key_column_usage;
|
||||
mod memory_table;
|
||||
mod partitions;
|
||||
mod predicate;
|
||||
mod region_peers;
|
||||
mod runtime_metrics;
|
||||
pub mod schemata;
|
||||
mod table_constraints;
|
||||
mod table_names;
|
||||
pub mod tables;
|
||||
|
||||
@@ -41,8 +42,7 @@ use table::error::{SchemaConversionSnafu, TablesRecordBatchSnafu};
|
||||
use table::metadata::{
|
||||
FilterPushDownType, TableInfoBuilder, TableInfoRef, TableMetaBuilder, TableType,
|
||||
};
|
||||
use table::thin_table::{ThinTable, ThinTableAdapter};
|
||||
use table::TableRef;
|
||||
use table::{Table, TableRef};
|
||||
pub use table_names::*;
|
||||
|
||||
use self::columns::InformationSchemaColumns;
|
||||
@@ -53,6 +53,7 @@ use crate::information_schema::partitions::InformationSchemaPartitions;
|
||||
use crate::information_schema::region_peers::InformationSchemaRegionPeers;
|
||||
use crate::information_schema::runtime_metrics::InformationSchemaMetrics;
|
||||
use crate::information_schema::schemata::InformationSchemaSchemata;
|
||||
use crate::information_schema::table_constraints::InformationSchemaTableConstraints;
|
||||
use crate::information_schema::tables::InformationSchemaTables;
|
||||
use crate::CatalogManager;
|
||||
|
||||
@@ -174,6 +175,10 @@ impl InformationSchemaProvider {
|
||||
KEY_COLUMN_USAGE.to_string(),
|
||||
self.build_table(KEY_COLUMN_USAGE).unwrap(),
|
||||
);
|
||||
tables.insert(
|
||||
TABLE_CONSTRAINTS.to_string(),
|
||||
self.build_table(TABLE_CONSTRAINTS).unwrap(),
|
||||
);
|
||||
|
||||
// Add memory tables
|
||||
for name in MEMORY_TABLES.iter() {
|
||||
@@ -187,10 +192,9 @@ impl InformationSchemaProvider {
|
||||
self.information_table(name).map(|table| {
|
||||
let table_info = Self::table_info(self.catalog_name.clone(), &table);
|
||||
let filter_pushdown = FilterPushDownType::Inexact;
|
||||
let thin_table = ThinTable::new(table_info, filter_pushdown);
|
||||
|
||||
let data_source = Arc::new(InformationTableDataSource::new(table));
|
||||
Arc::new(ThinTableAdapter::new(thin_table, data_source)) as _
|
||||
let table = Table::new(table_info, filter_pushdown, data_source);
|
||||
Arc::new(table)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -243,6 +247,10 @@ impl InformationSchemaProvider {
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
)) as _),
|
||||
TABLE_CONSTRAINTS => Some(Arc::new(InformationSchemaTableConstraints::new(
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
)) as _),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,13 +26,16 @@ use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, DataType};
|
||||
use datatypes::prelude::{ConcreteDataType, DataType, MutableVector};
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{StringVectorBuilder, VectorRef};
|
||||
use datatypes::vectors::{
|
||||
ConstantVector, Int64Vector, Int64VectorBuilder, StringVector, StringVectorBuilder, VectorRef,
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use sql::statements;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::{InformationTable, COLUMNS};
|
||||
@@ -48,18 +51,42 @@ pub(super) struct InformationSchemaColumns {
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
}
|
||||
|
||||
const TABLE_CATALOG: &str = "table_catalog";
|
||||
const TABLE_SCHEMA: &str = "table_schema";
|
||||
const TABLE_NAME: &str = "table_name";
|
||||
const COLUMN_NAME: &str = "column_name";
|
||||
const DATA_TYPE: &str = "data_type";
|
||||
const SEMANTIC_TYPE: &str = "semantic_type";
|
||||
const COLUMN_DEFAULT: &str = "column_default";
|
||||
const IS_NULLABLE: &str = "is_nullable";
|
||||
pub const TABLE_CATALOG: &str = "table_catalog";
|
||||
pub const TABLE_SCHEMA: &str = "table_schema";
|
||||
pub const TABLE_NAME: &str = "table_name";
|
||||
pub const COLUMN_NAME: &str = "column_name";
|
||||
const ORDINAL_POSITION: &str = "ordinal_position";
|
||||
const CHARACTER_MAXIMUM_LENGTH: &str = "character_maximum_length";
|
||||
const CHARACTER_OCTET_LENGTH: &str = "character_octet_length";
|
||||
const NUMERIC_PRECISION: &str = "numeric_precision";
|
||||
const NUMERIC_SCALE: &str = "numeric_scale";
|
||||
const DATETIME_PRECISION: &str = "datetime_precision";
|
||||
const CHARACTER_SET_NAME: &str = "character_set_name";
|
||||
pub const COLLATION_NAME: &str = "collation_name";
|
||||
pub const COLUMN_KEY: &str = "column_key";
|
||||
pub const EXTRA: &str = "extra";
|
||||
pub const PRIVILEGES: &str = "privileges";
|
||||
const GENERATION_EXPRESSION: &str = "generation_expression";
|
||||
// Extension field to keep greptime data type name
|
||||
pub const GREPTIME_DATA_TYPE: &str = "greptime_data_type";
|
||||
pub const DATA_TYPE: &str = "data_type";
|
||||
pub const SEMANTIC_TYPE: &str = "semantic_type";
|
||||
pub const COLUMN_DEFAULT: &str = "column_default";
|
||||
pub const IS_NULLABLE: &str = "is_nullable";
|
||||
const COLUMN_TYPE: &str = "column_type";
|
||||
const COLUMN_COMMENT: &str = "column_comment";
|
||||
pub const COLUMN_COMMENT: &str = "column_comment";
|
||||
const SRS_ID: &str = "srs_id";
|
||||
const INIT_CAPACITY: usize = 42;
|
||||
|
||||
// The maximum length of string type
|
||||
const MAX_STRING_LENGTH: i64 = 2147483647;
|
||||
const UTF8_CHARSET_NAME: &str = "utf8";
|
||||
const UTF8_COLLATE_NAME: &str = "utf8_bin";
|
||||
const PRI_COLUMN_KEY: &str = "PRI";
|
||||
const TIME_INDEX_COLUMN_KEY: &str = "TIME INDEX";
|
||||
const DEFAULT_PRIVILEGES: &str = "select,insert";
|
||||
const EMPTY_STR: &str = "";
|
||||
|
||||
impl InformationSchemaColumns {
|
||||
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
|
||||
Self {
|
||||
@@ -75,12 +102,46 @@ impl InformationSchemaColumns {
|
||||
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(ORDINAL_POSITION, ConcreteDataType::int64_datatype(), false),
|
||||
ColumnSchema::new(
|
||||
CHARACTER_MAXIMUM_LENGTH,
|
||||
ConcreteDataType::int64_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
CHARACTER_OCTET_LENGTH,
|
||||
ConcreteDataType::int64_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(NUMERIC_PRECISION, ConcreteDataType::int64_datatype(), true),
|
||||
ColumnSchema::new(NUMERIC_SCALE, ConcreteDataType::int64_datatype(), true),
|
||||
ColumnSchema::new(DATETIME_PRECISION, ConcreteDataType::int64_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
CHARACTER_SET_NAME,
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(COLLATION_NAME, ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new(COLUMN_KEY, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(EXTRA, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(PRIVILEGES, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(
|
||||
GENERATION_EXPRESSION,
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
GREPTIME_DATA_TYPE,
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(DATA_TYPE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(SEMANTIC_TYPE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(COLUMN_DEFAULT, ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new(IS_NULLABLE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(COLUMN_TYPE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(COLUMN_COMMENT, ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new(SRS_ID, ConcreteDataType::int64_datatype(), true),
|
||||
]))
|
||||
}
|
||||
|
||||
@@ -136,9 +197,18 @@ struct InformationSchemaColumnsBuilder {
|
||||
schema_names: StringVectorBuilder,
|
||||
table_names: StringVectorBuilder,
|
||||
column_names: StringVectorBuilder,
|
||||
ordinal_positions: Int64VectorBuilder,
|
||||
character_maximum_lengths: Int64VectorBuilder,
|
||||
character_octet_lengths: Int64VectorBuilder,
|
||||
numeric_precisions: Int64VectorBuilder,
|
||||
numeric_scales: Int64VectorBuilder,
|
||||
datetime_precisions: Int64VectorBuilder,
|
||||
character_set_names: StringVectorBuilder,
|
||||
collation_names: StringVectorBuilder,
|
||||
column_keys: StringVectorBuilder,
|
||||
greptime_data_types: StringVectorBuilder,
|
||||
data_types: StringVectorBuilder,
|
||||
semantic_types: StringVectorBuilder,
|
||||
|
||||
column_defaults: StringVectorBuilder,
|
||||
is_nullables: StringVectorBuilder,
|
||||
column_types: StringVectorBuilder,
|
||||
@@ -159,6 +229,16 @@ impl InformationSchemaColumnsBuilder {
|
||||
schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
column_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
character_maximum_lengths: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
character_octet_lengths: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
numeric_precisions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
numeric_scales: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
datetime_precisions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
character_set_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
collation_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
column_keys: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
greptime_data_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
data_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
semantic_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
column_defaults: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
@@ -195,6 +275,7 @@ impl InformationSchemaColumnsBuilder {
|
||||
|
||||
self.add_column(
|
||||
&predicates,
|
||||
idx,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
&table.table_info().name,
|
||||
@@ -208,16 +289,27 @@ impl InformationSchemaColumnsBuilder {
|
||||
self.finish()
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn add_column(
|
||||
&mut self,
|
||||
predicates: &Predicates,
|
||||
index: usize,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
semantic_type: &str,
|
||||
column_schema: &ColumnSchema,
|
||||
) {
|
||||
let data_type = &column_schema.data_type.name();
|
||||
// Use sql data type name
|
||||
let data_type = statements::concrete_data_type_to_sql_data_type(&column_schema.data_type)
|
||||
.map(|dt| dt.to_string().to_lowercase())
|
||||
.unwrap_or_else(|_| column_schema.data_type.name());
|
||||
|
||||
let column_key = match semantic_type {
|
||||
SEMANTIC_TYPE_PRIMARY_KEY => PRI_COLUMN_KEY,
|
||||
SEMANTIC_TYPE_TIME_INDEX => TIME_INDEX_COLUMN_KEY,
|
||||
_ => EMPTY_STR,
|
||||
};
|
||||
|
||||
let row = [
|
||||
(TABLE_CATALOG, &Value::from(catalog_name)),
|
||||
@@ -226,6 +318,8 @@ impl InformationSchemaColumnsBuilder {
|
||||
(COLUMN_NAME, &Value::from(column_schema.name.as_str())),
|
||||
(DATA_TYPE, &Value::from(data_type.as_str())),
|
||||
(SEMANTIC_TYPE, &Value::from(semantic_type)),
|
||||
(ORDINAL_POSITION, &Value::from((index + 1) as i64)),
|
||||
(COLUMN_KEY, &Value::from(column_key)),
|
||||
];
|
||||
|
||||
if !predicates.eval(&row) {
|
||||
@@ -236,7 +330,63 @@ impl InformationSchemaColumnsBuilder {
|
||||
self.schema_names.push(Some(schema_name));
|
||||
self.table_names.push(Some(table_name));
|
||||
self.column_names.push(Some(&column_schema.name));
|
||||
self.data_types.push(Some(data_type));
|
||||
// Starts from 1
|
||||
self.ordinal_positions.push(Some((index + 1) as i64));
|
||||
|
||||
if column_schema.data_type.is_string() {
|
||||
self.character_maximum_lengths.push(Some(MAX_STRING_LENGTH));
|
||||
self.character_octet_lengths.push(Some(MAX_STRING_LENGTH));
|
||||
self.numeric_precisions.push(None);
|
||||
self.numeric_scales.push(None);
|
||||
self.datetime_precisions.push(None);
|
||||
self.character_set_names.push(Some(UTF8_CHARSET_NAME));
|
||||
self.collation_names.push(Some(UTF8_COLLATE_NAME));
|
||||
} else if column_schema.data_type.is_numeric() || column_schema.data_type.is_decimal() {
|
||||
self.character_maximum_lengths.push(None);
|
||||
self.character_octet_lengths.push(None);
|
||||
|
||||
self.numeric_precisions.push(
|
||||
column_schema
|
||||
.data_type
|
||||
.numeric_precision()
|
||||
.map(|x| x as i64),
|
||||
);
|
||||
self.numeric_scales
|
||||
.push(column_schema.data_type.numeric_scale().map(|x| x as i64));
|
||||
|
||||
self.datetime_precisions.push(None);
|
||||
self.character_set_names.push(None);
|
||||
self.collation_names.push(None);
|
||||
} else {
|
||||
self.character_maximum_lengths.push(None);
|
||||
self.character_octet_lengths.push(None);
|
||||
self.numeric_precisions.push(None);
|
||||
self.numeric_scales.push(None);
|
||||
|
||||
match &column_schema.data_type {
|
||||
ConcreteDataType::DateTime(datetime_type) => {
|
||||
self.datetime_precisions
|
||||
.push(Some(datetime_type.precision() as i64));
|
||||
}
|
||||
ConcreteDataType::Timestamp(ts_type) => {
|
||||
self.datetime_precisions
|
||||
.push(Some(ts_type.precision() as i64));
|
||||
}
|
||||
ConcreteDataType::Time(time_type) => {
|
||||
self.datetime_precisions
|
||||
.push(Some(time_type.precision() as i64));
|
||||
}
|
||||
_ => self.datetime_precisions.push(None),
|
||||
}
|
||||
|
||||
self.character_set_names.push(None);
|
||||
self.collation_names.push(None);
|
||||
}
|
||||
|
||||
self.column_keys.push(Some(column_key));
|
||||
self.greptime_data_types
|
||||
.push(Some(&column_schema.data_type.name()));
|
||||
self.data_types.push(Some(&data_type));
|
||||
self.semantic_types.push(Some(semantic_type));
|
||||
self.column_defaults.push(
|
||||
column_schema
|
||||
@@ -249,23 +399,52 @@ impl InformationSchemaColumnsBuilder {
|
||||
} else {
|
||||
self.is_nullables.push(Some("No"));
|
||||
}
|
||||
self.column_types.push(Some(data_type));
|
||||
self.column_types.push(Some(&data_type));
|
||||
self.column_comments
|
||||
.push(column_schema.column_comment().map(|x| x.as_ref()));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<RecordBatch> {
|
||||
let rows_num = self.collation_names.len();
|
||||
|
||||
let privileges = Arc::new(ConstantVector::new(
|
||||
Arc::new(StringVector::from(vec![DEFAULT_PRIVILEGES])),
|
||||
rows_num,
|
||||
));
|
||||
let empty_string = Arc::new(ConstantVector::new(
|
||||
Arc::new(StringVector::from(vec![EMPTY_STR])),
|
||||
rows_num,
|
||||
));
|
||||
let srs_ids = Arc::new(ConstantVector::new(
|
||||
Arc::new(Int64Vector::from(vec![None])),
|
||||
rows_num,
|
||||
));
|
||||
|
||||
let columns: Vec<VectorRef> = vec![
|
||||
Arc::new(self.catalog_names.finish()),
|
||||
Arc::new(self.schema_names.finish()),
|
||||
Arc::new(self.table_names.finish()),
|
||||
Arc::new(self.column_names.finish()),
|
||||
Arc::new(self.ordinal_positions.finish()),
|
||||
Arc::new(self.character_maximum_lengths.finish()),
|
||||
Arc::new(self.character_octet_lengths.finish()),
|
||||
Arc::new(self.numeric_precisions.finish()),
|
||||
Arc::new(self.numeric_scales.finish()),
|
||||
Arc::new(self.datetime_precisions.finish()),
|
||||
Arc::new(self.character_set_names.finish()),
|
||||
Arc::new(self.collation_names.finish()),
|
||||
Arc::new(self.column_keys.finish()),
|
||||
empty_string.clone(),
|
||||
privileges,
|
||||
empty_string,
|
||||
Arc::new(self.greptime_data_types.finish()),
|
||||
Arc::new(self.data_types.finish()),
|
||||
Arc::new(self.semantic_types.finish()),
|
||||
Arc::new(self.column_defaults.finish()),
|
||||
Arc::new(self.is_nullables.finish()),
|
||||
Arc::new(self.column_types.finish()),
|
||||
Arc::new(self.column_comments.finish()),
|
||||
srs_ids,
|
||||
];
|
||||
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
|
||||
@@ -37,15 +37,23 @@ use crate::error::{
|
||||
use crate::information_schema::{InformationTable, Predicates};
|
||||
use crate::CatalogManager;
|
||||
|
||||
const CONSTRAINT_SCHEMA: &str = "constraint_schema";
|
||||
const CONSTRAINT_NAME: &str = "constraint_name";
|
||||
const TABLE_CATALOG: &str = "table_catalog";
|
||||
const TABLE_SCHEMA: &str = "table_schema";
|
||||
const TABLE_NAME: &str = "table_name";
|
||||
const COLUMN_NAME: &str = "column_name";
|
||||
const ORDINAL_POSITION: &str = "ordinal_position";
|
||||
pub const CONSTRAINT_SCHEMA: &str = "constraint_schema";
|
||||
pub const CONSTRAINT_NAME: &str = "constraint_name";
|
||||
// It's always `def` in MySQL
|
||||
pub const TABLE_CATALOG: &str = "table_catalog";
|
||||
// The real catalog name for this key column.
|
||||
pub const REAL_TABLE_CATALOG: &str = "real_table_catalog";
|
||||
pub const TABLE_SCHEMA: &str = "table_schema";
|
||||
pub const TABLE_NAME: &str = "table_name";
|
||||
pub const COLUMN_NAME: &str = "column_name";
|
||||
pub const ORDINAL_POSITION: &str = "ordinal_position";
|
||||
const INIT_CAPACITY: usize = 42;
|
||||
|
||||
/// Primary key constraint name
|
||||
pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY";
|
||||
/// Time index constraint name
|
||||
pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX";
|
||||
|
||||
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
|
||||
pub(super) struct InformationSchemaKeyColumnUsage {
|
||||
schema: SchemaRef,
|
||||
@@ -76,6 +84,11 @@ impl InformationSchemaKeyColumnUsage {
|
||||
),
|
||||
ColumnSchema::new(CONSTRAINT_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(
|
||||
REAL_TABLE_CATALOG,
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
|
||||
@@ -158,6 +171,7 @@ struct InformationSchemaKeyColumnUsageBuilder {
|
||||
constraint_schema: StringVectorBuilder,
|
||||
constraint_name: StringVectorBuilder,
|
||||
table_catalog: StringVectorBuilder,
|
||||
real_table_catalog: StringVectorBuilder,
|
||||
table_schema: StringVectorBuilder,
|
||||
table_name: StringVectorBuilder,
|
||||
column_name: StringVectorBuilder,
|
||||
@@ -179,6 +193,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
constraint_schema: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
constraint_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_catalog: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
real_table_catalog: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_schema: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
column_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
@@ -222,7 +237,8 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
self.add_key_column_usage(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
"TIME INDEX",
|
||||
TIME_INDEX_CONSTRAINT_NAME,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
&table_name,
|
||||
&column.name,
|
||||
@@ -231,6 +247,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
}
|
||||
if keys.contains(&idx) {
|
||||
primary_constraints.push((
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
table_name.clone(),
|
||||
column.name.clone(),
|
||||
@@ -244,13 +261,14 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
for (i, (schema_name, table_name, column_name)) in
|
||||
for (i, (catalog_name, schema_name, table_name, column_name)) in
|
||||
primary_constraints.into_iter().enumerate()
|
||||
{
|
||||
self.add_key_column_usage(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
"PRIMARY",
|
||||
PRI_CONSTRAINT_NAME,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
&table_name,
|
||||
&column_name,
|
||||
@@ -269,6 +287,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
predicates: &Predicates,
|
||||
constraint_schema: &str,
|
||||
constraint_name: &str,
|
||||
table_catalog: &str,
|
||||
table_schema: &str,
|
||||
table_name: &str,
|
||||
column_name: &str,
|
||||
@@ -277,6 +296,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
let row = [
|
||||
(CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
|
||||
(CONSTRAINT_NAME, &Value::from(constraint_name)),
|
||||
(REAL_TABLE_CATALOG, &Value::from(table_catalog)),
|
||||
(TABLE_SCHEMA, &Value::from(table_schema)),
|
||||
(TABLE_NAME, &Value::from(table_name)),
|
||||
(COLUMN_NAME, &Value::from(column_name)),
|
||||
@@ -291,6 +311,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
self.constraint_schema.push(Some(constraint_schema));
|
||||
self.constraint_name.push(Some(constraint_name));
|
||||
self.table_catalog.push(Some("def"));
|
||||
self.real_table_catalog.push(Some(table_catalog));
|
||||
self.table_schema.push(Some(table_schema));
|
||||
self.table_name.push(Some(table_name));
|
||||
self.column_name.push(Some(column_name));
|
||||
@@ -310,6 +331,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
Arc::new(self.constraint_schema.finish()),
|
||||
Arc::new(self.constraint_name.finish()),
|
||||
Arc::new(self.table_catalog.finish()),
|
||||
Arc::new(self.real_table_catalog.finish()),
|
||||
Arc::new(self.table_schema.finish()),
|
||||
Arc::new(self.table_name.finish()),
|
||||
Arc::new(self.column_name.finish()),
|
||||
|
||||
@@ -14,13 +14,15 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::MITO_ENGINE;
|
||||
use common_catalog::consts::{METRIC_ENGINE, MITO_ENGINE};
|
||||
use datatypes::prelude::{ConcreteDataType, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::vectors::{Int64Vector, StringVector};
|
||||
|
||||
use crate::information_schema::table_names::*;
|
||||
|
||||
const NO_VALUE: &str = "NO";
|
||||
|
||||
/// Find the schema and columns by the table_name, only valid for memory tables.
|
||||
/// Safety: the user MUST ensure the table schema exists, panic otherwise.
|
||||
pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) {
|
||||
@@ -59,14 +61,15 @@ pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) {
|
||||
"SAVEPOINTS",
|
||||
]),
|
||||
vec![
|
||||
Arc::new(StringVector::from(vec![MITO_ENGINE])),
|
||||
Arc::new(StringVector::from(vec!["DEFAULT"])),
|
||||
Arc::new(StringVector::from(vec![MITO_ENGINE, METRIC_ENGINE])),
|
||||
Arc::new(StringVector::from(vec!["DEFAULT", "YES"])),
|
||||
Arc::new(StringVector::from(vec![
|
||||
"Storage engine for time-series data",
|
||||
"Storage engine for observability scenarios, which is adept at handling a large number of small tables, making it particularly suitable for cloud-native monitoring",
|
||||
])),
|
||||
Arc::new(StringVector::from(vec!["NO"])),
|
||||
Arc::new(StringVector::from(vec!["NO"])),
|
||||
Arc::new(StringVector::from(vec!["NO"])),
|
||||
Arc::new(StringVector::from(vec![NO_VALUE, NO_VALUE])),
|
||||
Arc::new(StringVector::from(vec![NO_VALUE, NO_VALUE])),
|
||||
Arc::new(StringVector::from(vec![NO_VALUE, NO_VALUE])),
|
||||
],
|
||||
),
|
||||
|
||||
|
||||
@@ -109,11 +109,7 @@ impl Predicate {
|
||||
};
|
||||
}
|
||||
Predicate::Not(p) => {
|
||||
let Some(b) = p.eval(row) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
return Some(!b);
|
||||
return Some(!p.eval(row)?);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,13 +121,7 @@ impl Predicate {
|
||||
fn from_expr(expr: DfExpr) -> Option<Predicate> {
|
||||
match expr {
|
||||
// NOT expr
|
||||
DfExpr::Not(expr) => {
|
||||
let Some(p) = Self::from_expr(*expr) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
Some(Predicate::Not(Box::new(p)))
|
||||
}
|
||||
DfExpr::Not(expr) => Some(Predicate::Not(Box::new(Self::from_expr(*expr)?))),
|
||||
// expr LIKE pattern
|
||||
DfExpr::Like(Like {
|
||||
negated,
|
||||
@@ -178,25 +168,15 @@ impl Predicate {
|
||||
}
|
||||
// left AND right
|
||||
(left, Operator::And, right) => {
|
||||
let Some(left) = Self::from_expr(left) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let Some(right) = Self::from_expr(right) else {
|
||||
return None;
|
||||
};
|
||||
let left = Self::from_expr(left)?;
|
||||
let right = Self::from_expr(right)?;
|
||||
|
||||
Some(Predicate::And(Box::new(left), Box::new(right)))
|
||||
}
|
||||
// left OR right
|
||||
(left, Operator::Or, right) => {
|
||||
let Some(left) = Self::from_expr(left) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let Some(right) = Self::from_expr(right) else {
|
||||
return None;
|
||||
};
|
||||
let left = Self::from_expr(left)?;
|
||||
let right = Self::from_expr(right)?;
|
||||
|
||||
Some(Predicate::Or(Box::new(left), Box::new(right)))
|
||||
}
|
||||
|
||||
286
src/catalog/src/information_schema/table_constraints.rs
Normal file
286
src/catalog/src/information_schema/table_constraints.rs
Normal file
@@ -0,0 +1,286 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_TABLE_CONSTRAINTS_TABLE_ID;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_query::physical_plan::TaskContext;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, MutableVector};
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, VectorRef};
|
||||
use futures::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::{InformationTable, TABLE_CONSTRAINTS};
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::key_column_usage::{
|
||||
PRI_CONSTRAINT_NAME, TIME_INDEX_CONSTRAINT_NAME,
|
||||
};
|
||||
use crate::information_schema::Predicates;
|
||||
use crate::CatalogManager;
|
||||
|
||||
/// The `TABLE_CONSTRAINTS` table describes which tables have constraints.
|
||||
pub(super) struct InformationSchemaTableConstraints {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
}
|
||||
|
||||
const CONSTRAINT_CATALOG: &str = "constraint_catalog";
|
||||
const CONSTRAINT_SCHEMA: &str = "constraint_schema";
|
||||
const CONSTRAINT_NAME: &str = "constraint_name";
|
||||
const TABLE_SCHEMA: &str = "table_schema";
|
||||
const TABLE_NAME: &str = "table_name";
|
||||
const CONSTRAINT_TYPE: &str = "constraint_type";
|
||||
const ENFORCED: &str = "enforced";
|
||||
|
||||
const INIT_CAPACITY: usize = 42;
|
||||
|
||||
const TIME_INDEX_CONSTRAINT_TYPE: &str = "TIME INDEX";
|
||||
const PRI_KEY_CONSTRAINT_TYPE: &str = "PRIMARY KEY";
|
||||
|
||||
impl InformationSchemaTableConstraints {
|
||||
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
|
||||
Self {
|
||||
schema: Self::schema(),
|
||||
catalog_name,
|
||||
catalog_manager,
|
||||
}
|
||||
}
|
||||
|
||||
fn schema() -> SchemaRef {
|
||||
Arc::new(Schema::new(vec![
|
||||
ColumnSchema::new(
|
||||
CONSTRAINT_CATALOG,
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
CONSTRAINT_SCHEMA,
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(CONSTRAINT_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(CONSTRAINT_TYPE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(ENFORCED, ConcreteDataType::string_datatype(), false),
|
||||
]))
|
||||
}
|
||||
|
||||
fn builder(&self) -> InformationSchemaTableConstraintsBuilder {
|
||||
InformationSchemaTableConstraintsBuilder::new(
|
||||
self.schema.clone(),
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl InformationTable for InformationSchemaTableConstraints {
|
||||
fn table_id(&self) -> TableId {
|
||||
INFORMATION_SCHEMA_TABLE_CONSTRAINTS_TABLE_ID
|
||||
}
|
||||
|
||||
fn table_name(&self) -> &'static str {
|
||||
TABLE_CONSTRAINTS
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_table_constraints(Some(request))
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
));
|
||||
Ok(Box::pin(
|
||||
RecordBatchStreamAdapter::try_new(stream)
|
||||
.map_err(BoxedError::new)
|
||||
.context(InternalSnafu)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
struct InformationSchemaTableConstraintsBuilder {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
|
||||
constraint_schemas: StringVectorBuilder,
|
||||
constraint_names: StringVectorBuilder,
|
||||
table_schemas: StringVectorBuilder,
|
||||
table_names: StringVectorBuilder,
|
||||
constraint_types: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaTableConstraintsBuilder {
|
||||
fn new(
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
catalog_name,
|
||||
catalog_manager,
|
||||
constraint_schemas: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
constraint_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_schemas: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
constraint_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct the `information_schema.table_constraints` virtual table
|
||||
async fn make_table_constraints(
|
||||
&mut self,
|
||||
request: Option<ScanRequest>,
|
||||
) -> Result<RecordBatch> {
|
||||
let catalog_name = self.catalog_name.clone();
|
||||
let catalog_manager = self
|
||||
.catalog_manager
|
||||
.upgrade()
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name).await;
|
||||
|
||||
while let Some(table) = stream.try_next().await? {
|
||||
let keys = &table.table_info().meta.primary_key_indices;
|
||||
let schema = table.schema();
|
||||
|
||||
if schema.timestamp_index().is_some() {
|
||||
self.add_table_constraint(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
TIME_INDEX_CONSTRAINT_NAME,
|
||||
&schema_name,
|
||||
&table.table_info().name,
|
||||
TIME_INDEX_CONSTRAINT_TYPE,
|
||||
);
|
||||
}
|
||||
|
||||
if !keys.is_empty() {
|
||||
self.add_table_constraint(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
PRI_CONSTRAINT_NAME,
|
||||
&schema_name,
|
||||
&table.table_info().name,
|
||||
PRI_KEY_CONSTRAINT_TYPE,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.finish()
|
||||
}
|
||||
|
||||
fn add_table_constraint(
|
||||
&mut self,
|
||||
predicates: &Predicates,
|
||||
constraint_schema: &str,
|
||||
constraint_name: &str,
|
||||
table_schema: &str,
|
||||
table_name: &str,
|
||||
constraint_type: &str,
|
||||
) {
|
||||
let row = [
|
||||
(CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
|
||||
(CONSTRAINT_NAME, &Value::from(constraint_name)),
|
||||
(TABLE_SCHEMA, &Value::from(table_schema)),
|
||||
(TABLE_NAME, &Value::from(table_name)),
|
||||
(CONSTRAINT_TYPE, &Value::from(constraint_type)),
|
||||
];
|
||||
|
||||
if !predicates.eval(&row) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.constraint_schemas.push(Some(constraint_schema));
|
||||
self.constraint_names.push(Some(constraint_name));
|
||||
self.table_schemas.push(Some(table_schema));
|
||||
self.table_names.push(Some(table_name));
|
||||
self.constraint_types.push(Some(constraint_type));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<RecordBatch> {
|
||||
let rows_num = self.constraint_names.len();
|
||||
|
||||
let constraint_catalogs = Arc::new(ConstantVector::new(
|
||||
Arc::new(StringVector::from(vec!["def"])),
|
||||
rows_num,
|
||||
));
|
||||
let enforceds = Arc::new(ConstantVector::new(
|
||||
Arc::new(StringVector::from(vec!["YES"])),
|
||||
rows_num,
|
||||
));
|
||||
|
||||
let columns: Vec<VectorRef> = vec![
|
||||
constraint_catalogs,
|
||||
Arc::new(self.constraint_schemas.finish()),
|
||||
Arc::new(self.constraint_names.finish()),
|
||||
Arc::new(self.table_schemas.finish()),
|
||||
Arc::new(self.table_names.finish()),
|
||||
Arc::new(self.constraint_types.finish()),
|
||||
enforceds,
|
||||
];
|
||||
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
impl DfPartitionStream for InformationSchemaTableConstraints {
|
||||
fn schema(&self) -> &ArrowSchemaRef {
|
||||
self.schema.arrow_schema()
|
||||
}
|
||||
|
||||
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_table_constraints(None)
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -41,3 +41,4 @@ pub const SESSION_STATUS: &str = "session_status";
|
||||
pub const RUNTIME_METRICS: &str = "runtime_metrics";
|
||||
pub const PARTITIONS: &str = "partitions";
|
||||
pub const REGION_PEERS: &str = "greptime_region_peers";
|
||||
pub const TABLE_CONSTRAINTS: &str = "table_constraints";
|
||||
|
||||
@@ -17,7 +17,6 @@ use std::fmt::Debug;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
use std::usize;
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache_invalidator::KvCacheInvalidator;
|
||||
@@ -364,6 +363,10 @@ impl KvBackend for MetaKvBackend {
|
||||
"MetaKvBackend"
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
|
||||
self.client
|
||||
.range(req)
|
||||
@@ -372,27 +375,6 @@ impl KvBackend for MetaKvBackend {
|
||||
.context(ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
|
||||
let mut response = self
|
||||
.client
|
||||
.range(RangeRequest::new().with_key(key))
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
Ok(response.take_kvs().get_mut(0).map(|kv| KeyValue {
|
||||
key: kv.take_key(),
|
||||
value: kv.take_value(),
|
||||
}))
|
||||
}
|
||||
|
||||
async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
|
||||
self.client
|
||||
.batch_put(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn put(&self, req: PutRequest) -> Result<PutResponse> {
|
||||
self.client
|
||||
.put(req)
|
||||
@@ -401,17 +383,9 @@ impl KvBackend for MetaKvBackend {
|
||||
.context(ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
|
||||
async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
|
||||
self.client
|
||||
.delete_range(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
|
||||
self.client
|
||||
.batch_delete(req)
|
||||
.batch_put(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)
|
||||
@@ -436,8 +410,33 @@ impl KvBackend for MetaKvBackend {
|
||||
.context(ExternalSnafu)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
|
||||
self.client
|
||||
.delete_range(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
|
||||
self.client
|
||||
.batch_delete(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
|
||||
let mut response = self
|
||||
.client
|
||||
.range(RangeRequest::new().with_key(key))
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
Ok(response.take_kvs().get_mut(0).map(|kv| KeyValue {
|
||||
key: kv.take_key(),
|
||||
value: kv.take_value(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -506,32 +505,32 @@ mod tests {
|
||||
}
|
||||
|
||||
async fn range(&self, _req: RangeRequest) -> Result<RangeResponse, Self::Error> {
|
||||
todo!()
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn batch_put(&self, _req: BatchPutRequest) -> Result<BatchPutResponse, Self::Error> {
|
||||
todo!()
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn compare_and_put(
|
||||
&self,
|
||||
_req: CompareAndPutRequest,
|
||||
) -> Result<CompareAndPutResponse, Self::Error> {
|
||||
todo!()
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn delete_range(
|
||||
&self,
|
||||
_req: DeleteRangeRequest,
|
||||
) -> Result<DeleteRangeResponse, Self::Error> {
|
||||
todo!()
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn batch_delete(
|
||||
&self,
|
||||
_req: BatchDeleteRequest,
|
||||
) -> Result<BatchDeleteResponse, Self::Error> {
|
||||
todo!()
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -23,15 +23,14 @@ use common_catalog::consts::{
|
||||
};
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache_invalidator::{CacheInvalidator, CacheInvalidatorRef, Context};
|
||||
use common_meta::error::Result as MetaResult;
|
||||
use common_meta::cache_invalidator::{CacheInvalidator, Context, MultiCacheInvalidator};
|
||||
use common_meta::instruction::CacheIdent;
|
||||
use common_meta::key::catalog_name::CatalogNameKey;
|
||||
use common_meta::key::schema_name::SchemaNameKey;
|
||||
use common_meta::key::table_info::TableInfoValue;
|
||||
use common_meta::key::table_name::TableNameKey;
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::table_name::TableName;
|
||||
use futures_util::stream::BoxStream;
|
||||
use futures_util::{StreamExt, TryStreamExt};
|
||||
use moka::future::{Cache as AsyncCache, CacheBuilder};
|
||||
@@ -39,14 +38,13 @@ use moka::sync::Cache;
|
||||
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
|
||||
use snafu::prelude::*;
|
||||
use table::dist_table::DistTable;
|
||||
use table::metadata::TableId;
|
||||
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error::Error::{GetTableCache, TableCacheNotGet};
|
||||
use crate::error::{
|
||||
self as catalog_err, ListCatalogsSnafu, ListSchemasSnafu, ListTablesSnafu,
|
||||
Result as CatalogResult, TableCacheNotGetSnafu, TableMetadataManagerSnafu,
|
||||
InvalidTableInfoInCatalogSnafu, ListCatalogsSnafu, ListSchemasSnafu, ListTablesSnafu, Result,
|
||||
TableCacheNotGetSnafu, TableMetadataManagerSnafu,
|
||||
};
|
||||
use crate::information_schema::InformationSchemaProvider;
|
||||
use crate::CatalogManager;
|
||||
@@ -58,10 +56,6 @@ use crate::CatalogManager;
|
||||
/// comes from `SystemCatalog`, which is static and read-only.
|
||||
#[derive(Clone)]
|
||||
pub struct KvBackendCatalogManager {
|
||||
// TODO(LFC): Maybe use a real implementation for Standalone mode.
|
||||
// Now we use `NoopKvCacheInvalidator` for Standalone mode. In Standalone mode, the KV backend
|
||||
// is implemented by RaftEngine. Maybe we need a cache for it?
|
||||
cache_invalidator: CacheInvalidatorRef,
|
||||
partition_manager: PartitionRuleManagerRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
/// A sub-CatalogManager that handles system tables
|
||||
@@ -69,33 +63,33 @@ pub struct KvBackendCatalogManager {
|
||||
table_cache: AsyncCache<String, TableRef>,
|
||||
}
|
||||
|
||||
fn make_table(table_info_value: TableInfoValue) -> CatalogResult<TableRef> {
|
||||
let table_info = table_info_value
|
||||
.table_info
|
||||
.try_into()
|
||||
.context(catalog_err::InvalidTableInfoInCatalogSnafu)?;
|
||||
Ok(DistTable::table(Arc::new(table_info)))
|
||||
struct TableCacheInvalidator {
|
||||
table_cache: AsyncCache<String, TableRef>,
|
||||
}
|
||||
|
||||
impl TableCacheInvalidator {
|
||||
pub fn new(table_cache: AsyncCache<String, TableRef>) -> Self {
|
||||
Self { table_cache }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl CacheInvalidator for KvBackendCatalogManager {
|
||||
async fn invalidate_table_id(&self, ctx: &Context, table_id: TableId) -> MetaResult<()> {
|
||||
self.cache_invalidator
|
||||
.invalidate_table_id(ctx, table_id)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn invalidate_table_name(&self, ctx: &Context, table_name: TableName) -> MetaResult<()> {
|
||||
let table_cache_key = format_full_table_name(
|
||||
&table_name.catalog_name,
|
||||
&table_name.schema_name,
|
||||
&table_name.table_name,
|
||||
);
|
||||
self.cache_invalidator
|
||||
.invalidate_table_name(ctx, table_name)
|
||||
.await?;
|
||||
self.table_cache.invalidate(&table_cache_key).await;
|
||||
|
||||
impl CacheInvalidator for TableCacheInvalidator {
|
||||
async fn invalidate(
|
||||
&self,
|
||||
_ctx: &Context,
|
||||
caches: Vec<CacheIdent>,
|
||||
) -> common_meta::error::Result<()> {
|
||||
for cache in caches {
|
||||
if let CacheIdent::TableName(table_name) = cache {
|
||||
let table_cache_key = format_full_table_name(
|
||||
&table_name.catalog_name,
|
||||
&table_name.schema_name,
|
||||
&table_name.table_name,
|
||||
);
|
||||
self.table_cache.invalidate(&table_cache_key).await;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -106,11 +100,21 @@ const TABLE_CACHE_TTL: Duration = Duration::from_secs(10 * 60);
|
||||
const TABLE_CACHE_TTI: Duration = Duration::from_secs(5 * 60);
|
||||
|
||||
impl KvBackendCatalogManager {
|
||||
pub fn new(backend: KvBackendRef, cache_invalidator: CacheInvalidatorRef) -> Arc<Self> {
|
||||
pub async fn new(
|
||||
backend: KvBackendRef,
|
||||
multi_cache_invalidator: Arc<MultiCacheInvalidator>,
|
||||
) -> Arc<Self> {
|
||||
let table_cache: AsyncCache<String, TableRef> = CacheBuilder::new(TABLE_CACHE_MAX_CAPACITY)
|
||||
.time_to_live(TABLE_CACHE_TTL)
|
||||
.time_to_idle(TABLE_CACHE_TTI)
|
||||
.build();
|
||||
multi_cache_invalidator
|
||||
.add_invalidator(Arc::new(TableCacheInvalidator::new(table_cache.clone())))
|
||||
.await;
|
||||
|
||||
Arc::new_cyclic(|me| Self {
|
||||
partition_manager: Arc::new(PartitionRuleManager::new(backend.clone())),
|
||||
table_metadata_manager: Arc::new(TableMetadataManager::new(backend)),
|
||||
cache_invalidator,
|
||||
system_catalog: SystemCatalog {
|
||||
catalog_manager: me.clone(),
|
||||
catalog_cache: Cache::new(CATALOG_CACHE_MAX_CAPACITY),
|
||||
@@ -119,10 +123,7 @@ impl KvBackendCatalogManager {
|
||||
me.clone(),
|
||||
)),
|
||||
},
|
||||
table_cache: CacheBuilder::new(TABLE_CACHE_MAX_CAPACITY)
|
||||
.time_to_live(TABLE_CACHE_TTL)
|
||||
.time_to_idle(TABLE_CACHE_TTI)
|
||||
.build(),
|
||||
table_cache,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -141,12 +142,11 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
self
|
||||
}
|
||||
|
||||
async fn catalog_names(&self) -> CatalogResult<Vec<String>> {
|
||||
async fn catalog_names(&self) -> Result<Vec<String>> {
|
||||
let stream = self
|
||||
.table_metadata_manager
|
||||
.catalog_manager()
|
||||
.catalog_names()
|
||||
.await;
|
||||
.catalog_names();
|
||||
|
||||
let keys = stream
|
||||
.try_collect::<Vec<_>>()
|
||||
@@ -157,12 +157,11 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
Ok(keys)
|
||||
}
|
||||
|
||||
async fn schema_names(&self, catalog: &str) -> CatalogResult<Vec<String>> {
|
||||
async fn schema_names(&self, catalog: &str) -> Result<Vec<String>> {
|
||||
let stream = self
|
||||
.table_metadata_manager
|
||||
.schema_manager()
|
||||
.schema_names(catalog)
|
||||
.await;
|
||||
.schema_names(catalog);
|
||||
let mut keys = stream
|
||||
.try_collect::<BTreeSet<_>>()
|
||||
.await
|
||||
@@ -174,12 +173,11 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
Ok(keys.into_iter().collect())
|
||||
}
|
||||
|
||||
async fn table_names(&self, catalog: &str, schema: &str) -> CatalogResult<Vec<String>> {
|
||||
async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>> {
|
||||
let stream = self
|
||||
.table_metadata_manager
|
||||
.table_name_manager()
|
||||
.tables(catalog, schema)
|
||||
.await;
|
||||
.tables(catalog, schema);
|
||||
let mut tables = stream
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
@@ -193,7 +191,7 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
Ok(tables.into_iter().collect())
|
||||
}
|
||||
|
||||
async fn catalog_exists(&self, catalog: &str) -> CatalogResult<bool> {
|
||||
async fn catalog_exists(&self, catalog: &str) -> Result<bool> {
|
||||
self.table_metadata_manager
|
||||
.catalog_manager()
|
||||
.exists(CatalogNameKey::new(catalog))
|
||||
@@ -201,7 +199,7 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
.context(TableMetadataManagerSnafu)
|
||||
}
|
||||
|
||||
async fn schema_exists(&self, catalog: &str, schema: &str) -> CatalogResult<bool> {
|
||||
async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool> {
|
||||
if self.system_catalog.schema_exist(schema) {
|
||||
return Ok(true);
|
||||
}
|
||||
@@ -213,7 +211,7 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
.context(TableMetadataManagerSnafu)
|
||||
}
|
||||
|
||||
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> CatalogResult<bool> {
|
||||
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
|
||||
if self.system_catalog.table_exist(schema, table) {
|
||||
return Ok(true);
|
||||
}
|
||||
@@ -232,7 +230,7 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table_name: &str,
|
||||
) -> CatalogResult<Option<TableRef>> {
|
||||
) -> Result<Option<TableRef>> {
|
||||
if let Some(table) = self.system_catalog.table(catalog, schema, table_name) {
|
||||
return Ok(Some(table));
|
||||
}
|
||||
@@ -266,7 +264,7 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
make_table(table_info_value)
|
||||
build_table(table_info_value)
|
||||
};
|
||||
|
||||
match self
|
||||
@@ -289,7 +287,7 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
&'a self,
|
||||
catalog: &'a str,
|
||||
schema: &'a str,
|
||||
) -> BoxStream<'a, CatalogResult<TableRef>> {
|
||||
) -> BoxStream<'a, Result<TableRef>> {
|
||||
let sys_tables = try_stream!({
|
||||
// System tables
|
||||
let sys_table_names = self.system_catalog.table_names(schema);
|
||||
@@ -304,7 +302,6 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
.table_metadata_manager
|
||||
.table_name_manager()
|
||||
.tables(catalog, schema)
|
||||
.await
|
||||
.map_ok(|(_, v)| v.table_id());
|
||||
const BATCH_SIZE: usize = 128;
|
||||
let user_tables = try_stream!({
|
||||
@@ -314,7 +311,7 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
while let Some(table_ids) = table_id_chunks.next().await {
|
||||
let table_ids = table_ids
|
||||
.into_iter()
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.map_err(BoxedError::new)
|
||||
.context(ListTablesSnafu { catalog, schema })?;
|
||||
|
||||
@@ -326,7 +323,7 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
for table_info_value in table_info_values.into_values() {
|
||||
yield make_table(table_info_value)?;
|
||||
yield build_table(table_info_value)?;
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -335,6 +332,14 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_table(table_info_value: TableInfoValue) -> Result<TableRef> {
|
||||
let table_info = table_info_value
|
||||
.table_info
|
||||
.try_into()
|
||||
.context(InvalidTableInfoInCatalogSnafu)?;
|
||||
Ok(DistTable::table(Arc::new(table_info)))
|
||||
}
|
||||
|
||||
// TODO: This struct can hold a static map of all system tables when
|
||||
// the upper layer (e.g., procedure) can inform the catalog manager
|
||||
// a new catalog is created.
|
||||
|
||||
@@ -19,10 +19,10 @@ use std::any::Any;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::CreateTableExpr;
|
||||
use futures::future::BoxFuture;
|
||||
use futures_util::stream::BoxStream;
|
||||
use table::metadata::TableId;
|
||||
use table::requests::CreateTableRequest;
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error::Result;
|
||||
@@ -75,9 +75,9 @@ pub type OpenSystemTableHook =
|
||||
/// Register system table request:
|
||||
/// - When system table is already created and registered, the hook will be called
|
||||
/// with table ref after opening the system table
|
||||
/// - When system table is not exists, create and register the table by create_table_request and calls open_hook with the created table.
|
||||
/// - When system table is not exists, create and register the table by `create_table_expr` and calls `open_hook` with the created table.
|
||||
pub struct RegisterSystemTableRequest {
|
||||
pub create_table_request: CreateTableRequest,
|
||||
pub create_table_expr: CreateTableExpr,
|
||||
pub open_hook: Option<OpenSystemTableHook>,
|
||||
}
|
||||
|
||||
|
||||
@@ -49,10 +49,7 @@ impl DfTableSourceProvider {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn resolve_table_ref<'a>(
|
||||
&'a self,
|
||||
table_ref: TableReference<'a>,
|
||||
) -> Result<ResolvedTableReference<'a>> {
|
||||
pub fn resolve_table_ref(&self, table_ref: TableReference) -> Result<ResolvedTableReference> {
|
||||
if self.disallow_cross_catalog_query {
|
||||
match &table_ref {
|
||||
TableReference::Bare { .. } => (),
|
||||
@@ -76,7 +73,7 @@ impl DfTableSourceProvider {
|
||||
|
||||
pub async fn resolve_table(
|
||||
&mut self,
|
||||
table_ref: TableReference<'_>,
|
||||
table_ref: TableReference,
|
||||
) -> Result<Arc<dyn TableSource>> {
|
||||
let table_ref = self.resolve_table_ref(table_ref)?;
|
||||
|
||||
@@ -106,8 +103,6 @@ impl DfTableSourceProvider {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::borrow::Cow;
|
||||
|
||||
use session::context::QueryContext;
|
||||
|
||||
use super::*;
|
||||
@@ -120,68 +115,37 @@ mod tests {
|
||||
let table_provider =
|
||||
DfTableSourceProvider::new(MemoryCatalogManager::with_default_setup(), true, query_ctx);
|
||||
|
||||
let table_ref = TableReference::Bare {
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let table_ref = TableReference::bare("table_name");
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let table_ref = TableReference::Partial {
|
||||
schema: Cow::Borrowed("public"),
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let table_ref = TableReference::partial("public", "table_name");
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let table_ref = TableReference::Partial {
|
||||
schema: Cow::Borrowed("wrong_schema"),
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let table_ref = TableReference::partial("wrong_schema", "table_name");
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let table_ref = TableReference::Full {
|
||||
catalog: Cow::Borrowed("greptime"),
|
||||
schema: Cow::Borrowed("public"),
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let table_ref = TableReference::full("greptime", "public", "table_name");
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let table_ref = TableReference::Full {
|
||||
catalog: Cow::Borrowed("wrong_catalog"),
|
||||
schema: Cow::Borrowed("public"),
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let table_ref = TableReference::full("wrong_catalog", "public", "table_name");
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
assert!(result.is_err());
|
||||
|
||||
let table_ref = TableReference::Partial {
|
||||
schema: Cow::Borrowed("information_schema"),
|
||||
table: Cow::Borrowed("columns"),
|
||||
};
|
||||
let table_ref = TableReference::partial("information_schema", "columns");
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let table_ref = TableReference::Full {
|
||||
catalog: Cow::Borrowed("greptime"),
|
||||
schema: Cow::Borrowed("information_schema"),
|
||||
table: Cow::Borrowed("columns"),
|
||||
};
|
||||
let table_ref = TableReference::full("greptime", "information_schema", "columns");
|
||||
assert!(table_provider.resolve_table_ref(table_ref).is_ok());
|
||||
|
||||
let table_ref = TableReference::Full {
|
||||
catalog: Cow::Borrowed("dummy"),
|
||||
schema: Cow::Borrowed("information_schema"),
|
||||
table: Cow::Borrowed("columns"),
|
||||
};
|
||||
let table_ref = TableReference::full("dummy", "information_schema", "columns");
|
||||
assert!(table_provider.resolve_table_ref(table_ref).is_err());
|
||||
|
||||
let table_ref = TableReference::Full {
|
||||
catalog: Cow::Borrowed("greptime"),
|
||||
schema: Cow::Borrowed("greptime_private"),
|
||||
table: Cow::Borrowed("columns"),
|
||||
};
|
||||
let table_ref = TableReference::full("greptime", "greptime_private", "columns");
|
||||
assert!(table_provider.resolve_table_ref(table_ref).is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@ arc-swap = "1.6"
|
||||
arrow-flight.workspace = true
|
||||
async-stream.workspace = true
|
||||
async-trait.workspace = true
|
||||
common-base.workspace = true
|
||||
common-catalog.workspace = true
|
||||
common-error.workspace = true
|
||||
common-grpc.workspace = true
|
||||
@@ -25,10 +24,6 @@ common-meta.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
datafusion.workspace = true
|
||||
datatypes.workspace = true
|
||||
derive_builder.workspace = true
|
||||
enum_dispatch = "0.3"
|
||||
futures-util.workspace = true
|
||||
lazy_static.workspace = true
|
||||
@@ -37,9 +32,7 @@ parking_lot = "0.12"
|
||||
prometheus.workspace = true
|
||||
prost.workspace = true
|
||||
rand.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
session.workspace = true
|
||||
snafu.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-stream = { workspace = true, features = ["net"] }
|
||||
|
||||
@@ -37,6 +37,8 @@ use snafu::{ensure, ResultExt};
|
||||
use crate::error::{ConvertFlightDataSnafu, Error, IllegalFlightMessagesSnafu, ServerSnafu};
|
||||
use crate::{error, from_grpc_response, metrics, Client, Result, StreamInserter};
|
||||
|
||||
pub const DEFAULT_LOOKBACK_STRING: &str = "5m";
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct Database {
|
||||
// The "catalog" and "schema" to be used in processing the requests at the server side.
|
||||
@@ -215,6 +217,7 @@ impl Database {
|
||||
start: start.to_string(),
|
||||
end: end.to_string(),
|
||||
step: step.to_string(),
|
||||
lookback: DEFAULT_LOOKBACK_STRING.to_string(),
|
||||
})),
|
||||
}))
|
||||
.await
|
||||
|
||||
@@ -14,7 +14,8 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::region::{QueryRequest, RegionRequest, RegionResponse};
|
||||
use api::region::RegionResponse;
|
||||
use api::v1::region::{QueryRequest, RegionRequest};
|
||||
use api::v1::ResponseHeader;
|
||||
use arc_swap::ArcSwapOption;
|
||||
use arrow_flight::Ticket;
|
||||
@@ -23,7 +24,7 @@ use async_trait::async_trait;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_grpc::flight::{FlightDecoder, FlightMessage};
|
||||
use common_meta::datanode_manager::{AffectedRows, Datanode};
|
||||
use common_meta::datanode_manager::Datanode;
|
||||
use common_meta::error::{self as meta_error, Result as MetaResult};
|
||||
use common_recordbatch::error::ExternalSnafu;
|
||||
use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
|
||||
@@ -46,7 +47,7 @@ pub struct RegionRequester {
|
||||
|
||||
#[async_trait]
|
||||
impl Datanode for RegionRequester {
|
||||
async fn handle(&self, request: RegionRequest) -> MetaResult<AffectedRows> {
|
||||
async fn handle(&self, request: RegionRequest) -> MetaResult<RegionResponse> {
|
||||
self.handle_inner(request).await.map_err(|err| {
|
||||
if err.should_retry() {
|
||||
meta_error::Error::RetryLater {
|
||||
@@ -165,7 +166,7 @@ impl RegionRequester {
|
||||
Ok(Box::pin(record_batch_stream))
|
||||
}
|
||||
|
||||
async fn handle_inner(&self, request: RegionRequest) -> Result<AffectedRows> {
|
||||
async fn handle_inner(&self, request: RegionRequest) -> Result<RegionResponse> {
|
||||
let request_type = request
|
||||
.body
|
||||
.as_ref()
|
||||
@@ -178,10 +179,7 @@ impl RegionRequester {
|
||||
|
||||
let mut client = self.client.raw_region_client()?;
|
||||
|
||||
let RegionResponse {
|
||||
header,
|
||||
affected_rows,
|
||||
} = client
|
||||
let response = client
|
||||
.handle(request)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
@@ -195,19 +193,20 @@ impl RegionRequester {
|
||||
})?
|
||||
.into_inner();
|
||||
|
||||
check_response_header(header)?;
|
||||
check_response_header(&response.header)?;
|
||||
|
||||
Ok(affected_rows as _)
|
||||
Ok(RegionResponse::from_region_response(response))
|
||||
}
|
||||
|
||||
pub async fn handle(&self, request: RegionRequest) -> Result<AffectedRows> {
|
||||
pub async fn handle(&self, request: RegionRequest) -> Result<RegionResponse> {
|
||||
self.handle_inner(request).await
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_response_header(header: Option<ResponseHeader>) -> Result<()> {
|
||||
pub fn check_response_header(header: &Option<ResponseHeader>) -> Result<()> {
|
||||
let status = header
|
||||
.and_then(|header| header.status)
|
||||
.as_ref()
|
||||
.and_then(|header| header.status.as_ref())
|
||||
.context(IllegalDatabaseResponseSnafu {
|
||||
err_msg: "either response header or status is missing",
|
||||
})?;
|
||||
@@ -221,7 +220,7 @@ pub fn check_response_header(header: Option<ResponseHeader>) -> Result<()> {
|
||||
})?;
|
||||
ServerSnafu {
|
||||
code,
|
||||
msg: status.err_msg,
|
||||
msg: status.err_msg.clone(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
@@ -236,19 +235,19 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn test_check_response_header() {
|
||||
let result = check_response_header(None);
|
||||
let result = check_response_header(&None);
|
||||
assert!(matches!(
|
||||
result.unwrap_err(),
|
||||
IllegalDatabaseResponse { .. }
|
||||
));
|
||||
|
||||
let result = check_response_header(Some(ResponseHeader { status: None }));
|
||||
let result = check_response_header(&Some(ResponseHeader { status: None }));
|
||||
assert!(matches!(
|
||||
result.unwrap_err(),
|
||||
IllegalDatabaseResponse { .. }
|
||||
));
|
||||
|
||||
let result = check_response_header(Some(ResponseHeader {
|
||||
let result = check_response_header(&Some(ResponseHeader {
|
||||
status: Some(PbStatus {
|
||||
status_code: StatusCode::Success as u32,
|
||||
err_msg: String::default(),
|
||||
@@ -256,7 +255,7 @@ mod test {
|
||||
}));
|
||||
assert!(result.is_ok());
|
||||
|
||||
let result = check_response_header(Some(ResponseHeader {
|
||||
let result = check_response_header(&Some(ResponseHeader {
|
||||
status: Some(PbStatus {
|
||||
status_code: u32::MAX,
|
||||
err_msg: String::default(),
|
||||
@@ -267,7 +266,7 @@ mod test {
|
||||
IllegalDatabaseResponse { .. }
|
||||
));
|
||||
|
||||
let result = check_response_header(Some(ResponseHeader {
|
||||
let result = check_response_header(&Some(ResponseHeader {
|
||||
status: Some(PbStatus {
|
||||
status_code: StatusCode::Internal as u32,
|
||||
err_msg: "blabla".to_string(),
|
||||
|
||||
@@ -16,7 +16,6 @@ tokio-console = ["common-telemetry/tokio-console"]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anymap = "1.0.0-beta.2"
|
||||
async-trait.workspace = true
|
||||
auth.workspace = true
|
||||
catalog.workspace = true
|
||||
@@ -52,7 +51,6 @@ meta-client.workspace = true
|
||||
meta-srv.workspace = true
|
||||
mito2.workspace = true
|
||||
nu-ansi-term = "0.46"
|
||||
partition.workspace = true
|
||||
plugins.workspace = true
|
||||
prometheus.workspace = true
|
||||
prost.workspace = true
|
||||
@@ -78,6 +76,7 @@ tikv-jemallocator = "0.5"
|
||||
common-test-util.workspace = true
|
||||
serde.workspace = true
|
||||
temp-env = "0.3"
|
||||
tempfile.workspace = true
|
||||
|
||||
[target.'cfg(not(windows))'.dev-dependencies]
|
||||
rexpect = "0.5"
|
||||
|
||||
@@ -13,5 +13,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
fn main() {
|
||||
// Trigger this script if the git branch/commit changes
|
||||
println!("cargo:rerun-if-changed=.git/refs/heads");
|
||||
|
||||
common_version::setup_build_info();
|
||||
}
|
||||
|
||||
@@ -84,10 +84,10 @@ impl Command {
|
||||
let mut logging_opts = LoggingOptions::default();
|
||||
|
||||
if let Some(dir) = &cli_options.log_dir {
|
||||
logging_opts.dir = dir.clone();
|
||||
logging_opts.dir.clone_from(dir);
|
||||
}
|
||||
|
||||
logging_opts.level = cli_options.log_level.clone();
|
||||
logging_opts.level.clone_from(&cli_options.log_level);
|
||||
|
||||
Ok(Options::Cli(Box::new(logging_opts)))
|
||||
}
|
||||
|
||||
@@ -106,9 +106,12 @@ impl TableMetadataBencher {
|
||||
.await
|
||||
.unwrap();
|
||||
let start = Instant::now();
|
||||
let table_info = table_info.unwrap();
|
||||
let table_route = table_route.unwrap();
|
||||
let table_id = table_info.table_info.ident.table_id;
|
||||
let _ = self
|
||||
.table_metadata_manager
|
||||
.delete_table_metadata(&table_info.unwrap(), &table_route.unwrap())
|
||||
.delete_table_metadata(table_id, &table_info.table_name(), &table_route)
|
||||
.await;
|
||||
start.elapsed()
|
||||
},
|
||||
@@ -134,7 +137,7 @@ impl TableMetadataBencher {
|
||||
let start = Instant::now();
|
||||
let _ = self
|
||||
.table_metadata_manager
|
||||
.rename_table(table_info.unwrap(), new_table_name)
|
||||
.rename_table(&table_info.unwrap(), new_table_name)
|
||||
.await;
|
||||
|
||||
start.elapsed()
|
||||
|
||||
@@ -226,7 +226,10 @@ impl Export {
|
||||
}
|
||||
|
||||
async fn show_create_table(&self, catalog: &str, schema: &str, table: &str) -> Result<String> {
|
||||
let sql = format!("show create table {}.{}.{}", catalog, schema, table);
|
||||
let sql = format!(
|
||||
r#"show create table "{}"."{}"."{}""#,
|
||||
catalog, schema, table
|
||||
);
|
||||
let mut client = self.client.clone();
|
||||
client.set_catalog(catalog);
|
||||
client.set_schema(schema);
|
||||
@@ -273,7 +276,7 @@ impl Export {
|
||||
for (c, s, t) in table_list {
|
||||
match self.show_create_table(&c, &s, &t).await {
|
||||
Err(e) => {
|
||||
error!(e; "Failed to export table {}.{}.{}", c, s, t)
|
||||
error!(e; r#"Failed to export table "{}"."{}"."{}""#, c, s, t)
|
||||
}
|
||||
Ok(create_table) => {
|
||||
file.write_all(create_table.as_bytes())
|
||||
@@ -417,3 +420,84 @@ fn split_database(database: &str) -> Result<(String, Option<String>)> {
|
||||
Ok((catalog.to_string(), Some(schema.to_string())))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use clap::Parser;
|
||||
use client::{Client, Database};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::options::{CliOptions, Options};
|
||||
use crate::{cli, standalone, App};
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_export_create_table_with_quoted_names() -> Result<()> {
|
||||
let output_dir = tempfile::tempdir().unwrap();
|
||||
|
||||
let standalone = standalone::Command::parse_from([
|
||||
"standalone",
|
||||
"start",
|
||||
"--data-home",
|
||||
&*output_dir.path().to_string_lossy(),
|
||||
]);
|
||||
let Options::Standalone(standalone_opts) =
|
||||
standalone.load_options(&CliOptions::default())?
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
let mut instance = standalone.build(*standalone_opts).await?;
|
||||
instance.start().await?;
|
||||
|
||||
let client = Client::with_urls(["127.0.0.1:4001"]);
|
||||
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
|
||||
database
|
||||
.sql(r#"CREATE DATABASE "cli.export.create_table";"#)
|
||||
.await
|
||||
.unwrap();
|
||||
database
|
||||
.sql(
|
||||
r#"CREATE TABLE "cli.export.create_table"."a.b.c"(
|
||||
ts TIMESTAMP,
|
||||
TIME INDEX (ts)
|
||||
) engine=mito;
|
||||
"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let output_dir = tempfile::tempdir().unwrap();
|
||||
let cli = cli::Command::parse_from([
|
||||
"cli",
|
||||
"export",
|
||||
"--addr",
|
||||
"127.0.0.1:4001",
|
||||
"--output-dir",
|
||||
&*output_dir.path().to_string_lossy(),
|
||||
"--target",
|
||||
"create-table",
|
||||
]);
|
||||
let mut cli_app = cli.build().await?;
|
||||
cli_app.start().await?;
|
||||
|
||||
instance.stop().await?;
|
||||
|
||||
let output_file = output_dir
|
||||
.path()
|
||||
.join("greptime-cli.export.create_table.sql");
|
||||
let res = std::fs::read_to_string(output_file).unwrap();
|
||||
let expect = r#"CREATE TABLE IF NOT EXISTS "a.b.c" (
|
||||
"ts" TIMESTAMP(3) NOT NULL,
|
||||
TIME INDEX ("ts")
|
||||
)
|
||||
|
||||
ENGINE=mito
|
||||
WITH(
|
||||
regions = 1
|
||||
);
|
||||
"#;
|
||||
assert_eq!(res.trim(), expect.trim());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ use catalog::kvbackend::{
|
||||
use client::{Client, Database, OutputData, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_base::Plugins;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_meta::cache_invalidator::MultiCacheInvalidator;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use common_telemetry::logging;
|
||||
@@ -252,9 +253,11 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
|
||||
|
||||
let cached_meta_backend =
|
||||
Arc::new(CachedMetaKvBackendBuilder::new(meta_client.clone()).build());
|
||||
|
||||
let multi_cache_invalidator = Arc::new(MultiCacheInvalidator::with_invalidators(vec![
|
||||
cached_meta_backend.clone(),
|
||||
]));
|
||||
let catalog_list =
|
||||
KvBackendCatalogManager::new(cached_meta_backend.clone(), cached_meta_backend);
|
||||
KvBackendCatalogManager::new(cached_meta_backend.clone(), multi_cache_invalidator).await;
|
||||
let plugins: Plugins = Default::default();
|
||||
let state = Arc::new(QueryEngineState::new(
|
||||
catalog_list,
|
||||
|
||||
@@ -139,19 +139,19 @@ impl StartCommand {
|
||||
)?;
|
||||
|
||||
if let Some(dir) = &cli_options.log_dir {
|
||||
opts.logging.dir = dir.clone();
|
||||
opts.logging.dir.clone_from(dir);
|
||||
}
|
||||
|
||||
if cli_options.log_level.is_some() {
|
||||
opts.logging.level = cli_options.log_level.clone();
|
||||
opts.logging.level.clone_from(&cli_options.log_level);
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.rpc_addr {
|
||||
opts.rpc_addr = addr.clone();
|
||||
opts.rpc_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if self.rpc_hostname.is_some() {
|
||||
opts.rpc_hostname = self.rpc_hostname.clone();
|
||||
opts.rpc_hostname.clone_from(&self.rpc_hostname);
|
||||
}
|
||||
|
||||
if let Some(node_id) = self.node_id {
|
||||
@@ -161,7 +161,8 @@ impl StartCommand {
|
||||
if let Some(metasrv_addrs) = &self.metasrv_addr {
|
||||
opts.meta_client
|
||||
.get_or_insert_with(MetaClientOptions::default)
|
||||
.metasrv_addrs = metasrv_addrs.clone();
|
||||
.metasrv_addrs
|
||||
.clone_from(metasrv_addrs);
|
||||
opts.mode = Mode::Distributed;
|
||||
}
|
||||
|
||||
@@ -173,7 +174,7 @@ impl StartCommand {
|
||||
}
|
||||
|
||||
if let Some(data_home) = &self.data_home {
|
||||
opts.storage.data_home = data_home.clone();
|
||||
opts.storage.data_home.clone_from(data_home);
|
||||
}
|
||||
|
||||
// `wal_dir` only affects raft-engine config.
|
||||
@@ -191,7 +192,7 @@ impl StartCommand {
|
||||
}
|
||||
|
||||
if let Some(http_addr) = &self.http_addr {
|
||||
opts.http.addr = http_addr.clone();
|
||||
opts.http.addr.clone_from(http_addr);
|
||||
}
|
||||
|
||||
if let Some(http_timeout) = self.http_timeout {
|
||||
|
||||
@@ -16,9 +16,10 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use catalog::kvbackend::CachedMetaKvBackendBuilder;
|
||||
use catalog::kvbackend::{CachedMetaKvBackendBuilder, KvBackendCatalogManager};
|
||||
use clap::Parser;
|
||||
use client::client_manager::DatanodeClients;
|
||||
use common_meta::cache_invalidator::MultiCacheInvalidator;
|
||||
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
|
||||
use common_meta::heartbeat::handler::HandlerGroupExecutor;
|
||||
use common_telemetry::logging;
|
||||
@@ -156,11 +157,11 @@ impl StartCommand {
|
||||
)?;
|
||||
|
||||
if let Some(dir) = &cli_options.log_dir {
|
||||
opts.logging.dir = dir.clone();
|
||||
opts.logging.dir.clone_from(dir);
|
||||
}
|
||||
|
||||
if cli_options.log_level.is_some() {
|
||||
opts.logging.level = cli_options.log_level.clone();
|
||||
opts.logging.level.clone_from(&cli_options.log_level);
|
||||
}
|
||||
|
||||
let tls_opts = TlsOption::new(
|
||||
@@ -170,7 +171,7 @@ impl StartCommand {
|
||||
);
|
||||
|
||||
if let Some(addr) = &self.http_addr {
|
||||
opts.http.addr = addr.clone()
|
||||
opts.http.addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(http_timeout) = self.http_timeout {
|
||||
@@ -182,24 +183,24 @@ impl StartCommand {
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.rpc_addr {
|
||||
opts.grpc.addr = addr.clone()
|
||||
opts.grpc.addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.mysql_addr {
|
||||
opts.mysql.enable = true;
|
||||
opts.mysql.addr = addr.clone();
|
||||
opts.mysql.addr.clone_from(addr);
|
||||
opts.mysql.tls = tls_opts.clone();
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.postgres_addr {
|
||||
opts.postgres.enable = true;
|
||||
opts.postgres.addr = addr.clone();
|
||||
opts.postgres.addr.clone_from(addr);
|
||||
opts.postgres.tls = tls_opts;
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.opentsdb_addr {
|
||||
opts.opentsdb.enable = true;
|
||||
opts.opentsdb.addr = addr.clone();
|
||||
opts.opentsdb.addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(enable) = self.influxdb_enable {
|
||||
@@ -209,11 +210,12 @@ impl StartCommand {
|
||||
if let Some(metasrv_addrs) = &self.metasrv_addr {
|
||||
opts.meta_client
|
||||
.get_or_insert_with(MetaClientOptions::default)
|
||||
.metasrv_addrs = metasrv_addrs.clone();
|
||||
.metasrv_addrs
|
||||
.clone_from(metasrv_addrs);
|
||||
opts.mode = Mode::Distributed;
|
||||
}
|
||||
|
||||
opts.user_provider = self.user_provider.clone();
|
||||
opts.user_provider.clone_from(&self.user_provider);
|
||||
|
||||
Ok(Options::Frontend(Box::new(opts)))
|
||||
}
|
||||
@@ -247,11 +249,19 @@ impl StartCommand {
|
||||
.cache_tti(cache_tti)
|
||||
.build();
|
||||
let cached_meta_backend = Arc::new(cached_meta_backend);
|
||||
let multi_cache_invalidator = Arc::new(MultiCacheInvalidator::with_invalidators(vec![
|
||||
cached_meta_backend.clone(),
|
||||
]));
|
||||
let catalog_manager = KvBackendCatalogManager::new(
|
||||
cached_meta_backend.clone(),
|
||||
multi_cache_invalidator.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
let executor = HandlerGroupExecutor::new(vec![
|
||||
Arc::new(ParseMailboxMessageHandler),
|
||||
Arc::new(InvalidateTableCacheHandler::new(
|
||||
cached_meta_backend.clone(),
|
||||
multi_cache_invalidator.clone(),
|
||||
)),
|
||||
]);
|
||||
|
||||
@@ -263,11 +273,12 @@ impl StartCommand {
|
||||
|
||||
let mut instance = FrontendBuilder::new(
|
||||
cached_meta_backend.clone(),
|
||||
catalog_manager,
|
||||
Arc::new(DatanodeClients::default()),
|
||||
meta_client,
|
||||
)
|
||||
.with_cache_invalidator(cached_meta_backend)
|
||||
.with_plugin(plugins.clone())
|
||||
.with_cache_invalidator(multi_cache_invalidator)
|
||||
.with_heartbeat_task(heartbeat_task)
|
||||
.try_build()
|
||||
.await
|
||||
|
||||
@@ -17,8 +17,8 @@ use std::time::Duration;
|
||||
use async_trait::async_trait;
|
||||
use clap::Parser;
|
||||
use common_telemetry::logging;
|
||||
use meta_srv::bootstrap::MetaSrvInstance;
|
||||
use meta_srv::metasrv::MetaSrvOptions;
|
||||
use meta_srv::bootstrap::MetasrvInstance;
|
||||
use meta_srv::metasrv::MetasrvOptions;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result, StartMetaServerSnafu};
|
||||
@@ -26,11 +26,11 @@ use crate::options::{CliOptions, Options};
|
||||
use crate::App;
|
||||
|
||||
pub struct Instance {
|
||||
instance: MetaSrvInstance,
|
||||
instance: MetasrvInstance,
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
fn new(instance: MetaSrvInstance) -> Self {
|
||||
fn new(instance: MetasrvInstance) -> Self {
|
||||
Self { instance }
|
||||
}
|
||||
}
|
||||
@@ -42,7 +42,7 @@ impl App for Instance {
|
||||
}
|
||||
|
||||
async fn start(&mut self) -> Result<()> {
|
||||
plugins::start_meta_srv_plugins(self.instance.plugins())
|
||||
plugins::start_metasrv_plugins(self.instance.plugins())
|
||||
.await
|
||||
.context(StartMetaServerSnafu)?;
|
||||
|
||||
@@ -64,7 +64,7 @@ pub struct Command {
|
||||
}
|
||||
|
||||
impl Command {
|
||||
pub async fn build(self, opts: MetaSrvOptions) -> Result<Instance> {
|
||||
pub async fn build(self, opts: MetasrvOptions) -> Result<Instance> {
|
||||
self.subcmd.build(opts).await
|
||||
}
|
||||
|
||||
@@ -79,7 +79,7 @@ enum SubCommand {
|
||||
}
|
||||
|
||||
impl SubCommand {
|
||||
async fn build(self, opts: MetaSrvOptions) -> Result<Instance> {
|
||||
async fn build(self, opts: MetasrvOptions) -> Result<Instance> {
|
||||
match self {
|
||||
SubCommand::Start(cmd) => cmd.build(opts).await,
|
||||
}
|
||||
@@ -127,30 +127,30 @@ struct StartCommand {
|
||||
|
||||
impl StartCommand {
|
||||
fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
|
||||
let mut opts: MetaSrvOptions = Options::load_layered_options(
|
||||
let mut opts: MetasrvOptions = Options::load_layered_options(
|
||||
self.config_file.as_deref(),
|
||||
self.env_prefix.as_ref(),
|
||||
MetaSrvOptions::env_list_keys(),
|
||||
MetasrvOptions::env_list_keys(),
|
||||
)?;
|
||||
|
||||
if let Some(dir) = &cli_options.log_dir {
|
||||
opts.logging.dir = dir.clone();
|
||||
opts.logging.dir.clone_from(dir);
|
||||
}
|
||||
|
||||
if cli_options.log_level.is_some() {
|
||||
opts.logging.level = cli_options.log_level.clone();
|
||||
opts.logging.level.clone_from(&cli_options.log_level);
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.bind_addr {
|
||||
opts.bind_addr = addr.clone();
|
||||
opts.bind_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.server_addr {
|
||||
opts.server_addr = addr.clone();
|
||||
opts.server_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.store_addr {
|
||||
opts.store_addr = addr.clone();
|
||||
opts.store_addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(selector_type) = &self.selector {
|
||||
@@ -168,7 +168,7 @@ impl StartCommand {
|
||||
}
|
||||
|
||||
if let Some(http_addr) = &self.http_addr {
|
||||
opts.http.addr = http_addr.clone();
|
||||
opts.http.addr.clone_from(http_addr);
|
||||
}
|
||||
|
||||
if let Some(http_timeout) = self.http_timeout {
|
||||
@@ -176,11 +176,11 @@ impl StartCommand {
|
||||
}
|
||||
|
||||
if let Some(data_home) = &self.data_home {
|
||||
opts.data_home = data_home.clone();
|
||||
opts.data_home.clone_from(data_home);
|
||||
}
|
||||
|
||||
if !self.store_key_prefix.is_empty() {
|
||||
opts.store_key_prefix = self.store_key_prefix.clone()
|
||||
opts.store_key_prefix.clone_from(&self.store_key_prefix)
|
||||
}
|
||||
|
||||
if let Some(max_txn_ops) = self.max_txn_ops {
|
||||
@@ -193,20 +193,20 @@ impl StartCommand {
|
||||
Ok(Options::Metasrv(Box::new(opts)))
|
||||
}
|
||||
|
||||
async fn build(self, mut opts: MetaSrvOptions) -> Result<Instance> {
|
||||
let plugins = plugins::setup_meta_srv_plugins(&mut opts)
|
||||
async fn build(self, mut opts: MetasrvOptions) -> Result<Instance> {
|
||||
let plugins = plugins::setup_metasrv_plugins(&mut opts)
|
||||
.await
|
||||
.context(StartMetaServerSnafu)?;
|
||||
|
||||
logging::info!("MetaSrv start command: {:#?}", self);
|
||||
logging::info!("MetaSrv options: {:#?}", opts);
|
||||
logging::info!("Metasrv start command: {:#?}", self);
|
||||
logging::info!("Metasrv options: {:#?}", opts);
|
||||
|
||||
let builder = meta_srv::bootstrap::metasrv_builder(&opts, plugins.clone(), None)
|
||||
.await
|
||||
.context(error::BuildMetaServerSnafu)?;
|
||||
let metasrv = builder.build().await.context(error::BuildMetaServerSnafu)?;
|
||||
|
||||
let instance = MetaSrvInstance::new(opts, plugins, metasrv)
|
||||
let instance = MetasrvInstance::new(opts, plugins, metasrv)
|
||||
.await
|
||||
.context(error::BuildMetaServerSnafu)?;
|
||||
|
||||
@@ -218,6 +218,7 @@ impl StartCommand {
|
||||
mod tests {
|
||||
use std::io::Write;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_test_util::temp_dir::create_named_temp_file;
|
||||
use meta_srv::selector::SelectorType;
|
||||
|
||||
@@ -297,6 +298,10 @@ mod tests {
|
||||
.first_heartbeat_estimate
|
||||
.as_millis()
|
||||
);
|
||||
assert_eq!(
|
||||
options.procedure.max_metadata_value_size,
|
||||
Some(ReadableSize::kb(1500))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -15,12 +15,12 @@
|
||||
use clap::ArgMatches;
|
||||
use common_config::KvBackendConfig;
|
||||
use common_telemetry::logging::{LoggingOptions, TracingOptions};
|
||||
use common_wal::config::MetaSrvWalConfig;
|
||||
use common_wal::config::MetasrvWalConfig;
|
||||
use config::{Config, Environment, File, FileFormat};
|
||||
use datanode::config::{DatanodeOptions, ProcedureConfig};
|
||||
use frontend::error::{Result as FeResult, TomlFormatSnafu};
|
||||
use frontend::frontend::{FrontendOptions, TomlSerializable};
|
||||
use meta_srv::metasrv::MetaSrvOptions;
|
||||
use meta_srv::metasrv::MetasrvOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -38,7 +38,7 @@ pub struct MixOptions {
|
||||
pub frontend: FrontendOptions,
|
||||
pub datanode: DatanodeOptions,
|
||||
pub logging: LoggingOptions,
|
||||
pub wal_meta: MetaSrvWalConfig,
|
||||
pub wal_meta: MetasrvWalConfig,
|
||||
}
|
||||
|
||||
impl From<MixOptions> for FrontendOptions {
|
||||
@@ -56,7 +56,7 @@ impl TomlSerializable for MixOptions {
|
||||
pub enum Options {
|
||||
Datanode(Box<DatanodeOptions>),
|
||||
Frontend(Box<FrontendOptions>),
|
||||
Metasrv(Box<MetaSrvOptions>),
|
||||
Metasrv(Box<MetasrvOptions>),
|
||||
Standalone(Box<MixOptions>),
|
||||
Cli(Box<LoggingOptions>),
|
||||
}
|
||||
|
||||
@@ -16,10 +16,11 @@ use std::sync::Arc;
|
||||
use std::{fs, path};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use catalog::kvbackend::KvBackendCatalogManager;
|
||||
use clap::Parser;
|
||||
use common_catalog::consts::MIN_USER_TABLE_ID;
|
||||
use common_config::{metadata_store_dir, KvBackendConfig};
|
||||
use common_meta::cache_invalidator::DummyCacheInvalidator;
|
||||
use common_meta::cache_invalidator::{CacheInvalidatorRef, MultiCacheInvalidator};
|
||||
use common_meta::datanode_manager::DatanodeManagerRef;
|
||||
use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef};
|
||||
use common_meta::ddl::ProcedureExecutorRef;
|
||||
@@ -292,11 +293,11 @@ impl StartCommand {
|
||||
opts.mode = Mode::Standalone;
|
||||
|
||||
if let Some(dir) = &cli_options.log_dir {
|
||||
opts.logging.dir = dir.clone();
|
||||
opts.logging.dir.clone_from(dir);
|
||||
}
|
||||
|
||||
if cli_options.log_level.is_some() {
|
||||
opts.logging.level = cli_options.log_level.clone();
|
||||
opts.logging.level.clone_from(&cli_options.log_level);
|
||||
}
|
||||
|
||||
let tls_opts = TlsOption::new(
|
||||
@@ -306,11 +307,11 @@ impl StartCommand {
|
||||
);
|
||||
|
||||
if let Some(addr) = &self.http_addr {
|
||||
opts.http.addr = addr.clone()
|
||||
opts.http.addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if let Some(data_home) = &self.data_home {
|
||||
opts.storage.data_home = data_home.clone();
|
||||
opts.storage.data_home.clone_from(data_home);
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.rpc_addr {
|
||||
@@ -324,31 +325,31 @@ impl StartCommand {
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
opts.grpc.addr = addr.clone()
|
||||
opts.grpc.addr.clone_from(addr)
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.mysql_addr {
|
||||
opts.mysql.enable = true;
|
||||
opts.mysql.addr = addr.clone();
|
||||
opts.mysql.addr.clone_from(addr);
|
||||
opts.mysql.tls = tls_opts.clone();
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.postgres_addr {
|
||||
opts.postgres.enable = true;
|
||||
opts.postgres.addr = addr.clone();
|
||||
opts.postgres.addr.clone_from(addr);
|
||||
opts.postgres.tls = tls_opts;
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.opentsdb_addr {
|
||||
opts.opentsdb.enable = true;
|
||||
opts.opentsdb.addr = addr.clone();
|
||||
opts.opentsdb.addr.clone_from(addr);
|
||||
}
|
||||
|
||||
if self.influxdb_enable {
|
||||
opts.influxdb.enable = self.influxdb_enable;
|
||||
}
|
||||
|
||||
opts.user_provider = self.user_provider.clone();
|
||||
opts.user_provider.clone_from(&self.user_provider);
|
||||
|
||||
let metadata_store = opts.metadata_store.clone();
|
||||
let procedure = opts.procedure.clone();
|
||||
@@ -399,6 +400,10 @@ impl StartCommand {
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
let multi_cache_invalidator = Arc::new(MultiCacheInvalidator::default());
|
||||
let catalog_manager =
|
||||
KvBackendCatalogManager::new(kv_backend.clone(), multi_cache_invalidator.clone()).await;
|
||||
|
||||
let builder =
|
||||
DatanodeBuilder::new(dn_opts, fe_plugins.clone()).with_kv_backend(kv_backend.clone());
|
||||
let datanode = builder.build().await.context(StartDatanodeSnafu)?;
|
||||
@@ -422,22 +427,27 @@ impl StartCommand {
|
||||
let table_meta_allocator = Arc::new(TableMetadataAllocator::new(
|
||||
table_id_sequence,
|
||||
wal_options_allocator.clone(),
|
||||
table_metadata_manager.table_name_manager().clone(),
|
||||
));
|
||||
|
||||
let ddl_task_executor = Self::create_ddl_task_executor(
|
||||
table_metadata_manager,
|
||||
procedure_manager.clone(),
|
||||
datanode_manager.clone(),
|
||||
multi_cache_invalidator,
|
||||
table_meta_allocator,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut frontend = FrontendBuilder::new(kv_backend, datanode_manager, ddl_task_executor)
|
||||
.with_plugin(fe_plugins.clone())
|
||||
.try_build()
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
let mut frontend = FrontendBuilder::new(
|
||||
kv_backend,
|
||||
catalog_manager,
|
||||
datanode_manager,
|
||||
ddl_task_executor,
|
||||
)
|
||||
.with_plugin(fe_plugins.clone())
|
||||
.try_build()
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
let servers = Services::new(fe_opts.clone(), Arc::new(frontend.clone()), fe_plugins)
|
||||
.build()
|
||||
@@ -459,16 +469,18 @@ impl StartCommand {
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
datanode_manager: DatanodeManagerRef,
|
||||
cache_invalidator: CacheInvalidatorRef,
|
||||
table_meta_allocator: TableMetadataAllocatorRef,
|
||||
) -> Result<ProcedureExecutorRef> {
|
||||
let procedure_executor: ProcedureExecutorRef = Arc::new(
|
||||
DdlManager::try_new(
|
||||
procedure_manager,
|
||||
datanode_manager,
|
||||
Arc::new(DummyCacheInvalidator),
|
||||
cache_invalidator,
|
||||
table_metadata_manager,
|
||||
table_meta_allocator,
|
||||
Arc::new(MemoryRegionKeeper::default()),
|
||||
true,
|
||||
)
|
||||
.context(InitDdlManagerSnafu)?,
|
||||
);
|
||||
|
||||
@@ -1,20 +1,6 @@
|
||||
// Copyright (c) 2017-present, PingCAP, Inc. Licensed under Apache-2.0.
|
||||
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// This file is copied from https://github.com/tikv/raft-engine/blob/8dd2a39f359ff16f5295f35343f626e0c10132fa/src/util.rs
|
||||
// This file is copied from https://github.com/tikv/raft-engine/blob/0.3.0/src/util.rs
|
||||
|
||||
use std::fmt::{self, Debug, Display, Write};
|
||||
use std::ops::{Div, Mul};
|
||||
|
||||
@@ -86,6 +86,8 @@ pub const INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID: u32 = 27;
|
||||
pub const INFORMATION_SCHEMA_PARTITIONS_TABLE_ID: u32 = 28;
|
||||
/// id for information_schema.REGION_PEERS
|
||||
pub const INFORMATION_SCHEMA_REGION_PEERS_TABLE_ID: u32 = 29;
|
||||
/// id for information_schema.columns
|
||||
pub const INFORMATION_SCHEMA_TABLE_CONSTRAINTS_TABLE_ID: u32 = 30;
|
||||
/// ----- End of information_schema tables -----
|
||||
|
||||
pub const MITO_ENGINE: &str = "mito";
|
||||
|
||||
@@ -55,10 +55,10 @@ pub fn build_db_string(catalog: &str, schema: &str) -> String {
|
||||
/// schema name
|
||||
/// - if `[<catalog>-]` is provided, we split database name with `-` and use
|
||||
/// `<catalog>` and `<schema>`.
|
||||
pub fn parse_catalog_and_schema_from_db_string(db: &str) -> (&str, &str) {
|
||||
pub fn parse_catalog_and_schema_from_db_string(db: &str) -> (String, String) {
|
||||
match parse_optional_catalog_and_schema_from_db_string(db) {
|
||||
(Some(catalog), schema) => (catalog, schema),
|
||||
(None, schema) => (DEFAULT_CATALOG_NAME, schema),
|
||||
(None, schema) => (DEFAULT_CATALOG_NAME.to_string(), schema),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,12 +66,12 @@ pub fn parse_catalog_and_schema_from_db_string(db: &str) -> (&str, &str) {
|
||||
///
|
||||
/// Similar to [`parse_catalog_and_schema_from_db_string`] but returns an optional
|
||||
/// catalog if it's not provided in the database name.
|
||||
pub fn parse_optional_catalog_and_schema_from_db_string(db: &str) -> (Option<&str>, &str) {
|
||||
pub fn parse_optional_catalog_and_schema_from_db_string(db: &str) -> (Option<String>, String) {
|
||||
let parts = db.splitn(2, '-').collect::<Vec<&str>>();
|
||||
if parts.len() == 2 {
|
||||
(Some(parts[0]), parts[1])
|
||||
(Some(parts[0].to_lowercase()), parts[1].to_lowercase())
|
||||
} else {
|
||||
(None, db)
|
||||
(None, db.to_lowercase())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,32 +88,37 @@ mod tests {
|
||||
#[test]
|
||||
fn test_parse_catalog_and_schema() {
|
||||
assert_eq!(
|
||||
(DEFAULT_CATALOG_NAME, "fullschema"),
|
||||
(DEFAULT_CATALOG_NAME.to_string(), "fullschema".to_string()),
|
||||
parse_catalog_and_schema_from_db_string("fullschema")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
("catalog", "schema"),
|
||||
("catalog".to_string(), "schema".to_string()),
|
||||
parse_catalog_and_schema_from_db_string("catalog-schema")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
("catalog", "schema1-schema2"),
|
||||
("catalog".to_string(), "schema1-schema2".to_string()),
|
||||
parse_catalog_and_schema_from_db_string("catalog-schema1-schema2")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
(None, "fullschema"),
|
||||
(None, "fullschema".to_string()),
|
||||
parse_optional_catalog_and_schema_from_db_string("fullschema")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
(Some("catalog"), "schema"),
|
||||
(Some("catalog".to_string()), "schema".to_string()),
|
||||
parse_optional_catalog_and_schema_from_db_string("catalog-schema")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
(Some("catalog"), "schema1-schema2"),
|
||||
(Some("catalog".to_string()), "schema".to_string()),
|
||||
parse_optional_catalog_and_schema_from_db_string("CATALOG-SCHEMA")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
(Some("catalog".to_string()), "schema1-schema2".to_string()),
|
||||
parse_optional_catalog_and_schema_from_db_string("catalog-schema1-schema2")
|
||||
);
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@ workspace = true
|
||||
|
||||
[dependencies]
|
||||
common-base.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
num_cpus.workspace = true
|
||||
serde.workspace = true
|
||||
sysinfo.workspace = true
|
||||
|
||||
@@ -30,7 +30,7 @@ derive_builder.workspace = true
|
||||
futures.workspace = true
|
||||
lazy_static.workspace = true
|
||||
object-store.workspace = true
|
||||
orc-rust = "0.2"
|
||||
orc-rust = { git = "https://github.com/MichaelScofield/orc-rs.git", rev = "17347f5f084ac937863317df882218055c4ea8c1" }
|
||||
parquet.workspace = true
|
||||
paste = "1.0"
|
||||
regex = "1.7"
|
||||
|
||||
@@ -60,12 +60,6 @@ impl<
|
||||
.context(error::BufferedWriterClosedSnafu)?;
|
||||
let metadata = encoder.close().await?;
|
||||
|
||||
// Use `rows_written` to keep a track of if any rows have been written.
|
||||
// If no row's been written, then we can simply close the underlying
|
||||
// writer without flush so that no file will be actually created.
|
||||
if self.rows_written != 0 {
|
||||
self.bytes_written += self.try_flush(true).await?;
|
||||
}
|
||||
// It's important to shut down! flushes all pending writes
|
||||
self.close_inner_writer().await?;
|
||||
Ok((metadata, self.bytes_written))
|
||||
@@ -79,8 +73,15 @@ impl<
|
||||
Fut: Future<Output = Result<T>>,
|
||||
> LazyBufferedWriter<T, U, F>
|
||||
{
|
||||
/// Closes the writer without flushing the buffer data.
|
||||
/// Closes the writer and flushes the buffer data.
|
||||
pub async fn close_inner_writer(&mut self) -> Result<()> {
|
||||
// Use `rows_written` to keep a track of if any rows have been written.
|
||||
// If no row's been written, then we can simply close the underlying
|
||||
// writer without flush so that no file will be actually created.
|
||||
if self.rows_written != 0 {
|
||||
self.bytes_written += self.try_flush(true).await?;
|
||||
}
|
||||
|
||||
if let Some(writer) = &mut self.writer {
|
||||
writer.shutdown().await.context(error::AsyncWriteSnafu)?;
|
||||
}
|
||||
@@ -117,7 +118,7 @@ impl<
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn try_flush(&mut self, all: bool) -> Result<u64> {
|
||||
async fn try_flush(&mut self, all: bool) -> Result<u64> {
|
||||
let mut bytes_written: u64 = 0;
|
||||
|
||||
// Once buffered data size reaches threshold, split the data in chunks (typically 4MB)
|
||||
|
||||
@@ -213,10 +213,6 @@ pub async fn stream_to_file<T: DfRecordBatchEncoder, U: Fn(SharedBuffer) -> T>(
|
||||
writer.write(&batch).await?;
|
||||
rows += batch.num_rows();
|
||||
}
|
||||
|
||||
// Flushes all pending writes
|
||||
let _ = writer.try_flush(true).await?;
|
||||
writer.close_inner_writer().await?;
|
||||
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
@@ -117,7 +117,7 @@ impl CsvConfig {
|
||||
let mut builder = csv::ReaderBuilder::new(self.file_schema.clone())
|
||||
.with_delimiter(self.delimiter)
|
||||
.with_batch_size(self.batch_size)
|
||||
.has_header(self.has_header);
|
||||
.with_header(self.has_header);
|
||||
|
||||
if let Some(proj) = &self.file_projection {
|
||||
builder = builder.with_projection(proj.clone());
|
||||
|
||||
@@ -215,10 +215,7 @@ impl BufferedWriter {
|
||||
|
||||
/// Write a record batch to stream writer.
|
||||
pub async fn write(&mut self, arrow_batch: &RecordBatch) -> error::Result<()> {
|
||||
self.inner.write(arrow_batch).await?;
|
||||
self.inner.try_flush(false).await?;
|
||||
|
||||
Ok(())
|
||||
self.inner.write(arrow_batch).await
|
||||
}
|
||||
|
||||
/// Close parquet writer.
|
||||
|
||||
@@ -19,6 +19,7 @@ use std::vec;
|
||||
|
||||
use common_test_util::find_workspace_path;
|
||||
use datafusion::assert_batches_eq;
|
||||
use datafusion::config::TableParquetOptions;
|
||||
use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStream, ParquetExec};
|
||||
use datafusion::execution::context::TaskContext;
|
||||
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
|
||||
@@ -166,7 +167,7 @@ async fn test_parquet_exec() {
|
||||
.to_string();
|
||||
let base_config = scan_config(schema.clone(), None, path);
|
||||
|
||||
let exec = ParquetExec::new(base_config, None, None)
|
||||
let exec = ParquetExec::new(base_config, None, None, TableParquetOptions::default())
|
||||
.with_parquet_file_reader_factory(Arc::new(DefaultParquetFileReaderFactory::new(store)));
|
||||
|
||||
let ctx = SessionContext::new();
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::sync::Arc;
|
||||
|
||||
use arrow_schema::{DataType, Field, Schema, SchemaRef};
|
||||
use common_test_util::temp_dir::{create_temp_dir, TempDir};
|
||||
use datafusion::common::Statistics;
|
||||
use datafusion::datasource::listing::PartitionedFile;
|
||||
use datafusion::datasource::object_store::ObjectStoreUrl;
|
||||
use datafusion::datasource::physical_plan::{FileScanConfig, FileStream};
|
||||
@@ -72,17 +73,16 @@ pub fn test_basic_schema() -> SchemaRef {
|
||||
pub fn scan_config(file_schema: SchemaRef, limit: Option<usize>, filename: &str) -> FileScanConfig {
|
||||
// object_store only recognize the Unix style path, so make it happy.
|
||||
let filename = &filename.replace('\\', "/");
|
||||
|
||||
let statistics = Statistics::new_unknown(file_schema.as_ref());
|
||||
FileScanConfig {
|
||||
object_store_url: ObjectStoreUrl::parse("empty://").unwrap(), // won't be used
|
||||
file_schema,
|
||||
file_groups: vec![vec![PartitionedFile::new(filename.to_string(), 10)]],
|
||||
statistics: Default::default(),
|
||||
statistics,
|
||||
projection: None,
|
||||
limit,
|
||||
table_partition_cols: vec![],
|
||||
output_ordering: vec![],
|
||||
infinite_source: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ license.workspace = true
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
arrow.workspace = true
|
||||
bigdecimal.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
|
||||
@@ -11,7 +11,6 @@ workspace = true
|
||||
api.workspace = true
|
||||
arc-swap = "1.0"
|
||||
async-trait.workspace = true
|
||||
chrono-tz = "0.6"
|
||||
common-base.workspace = true
|
||||
common-catalog.workspace = true
|
||||
common-error.workspace = true
|
||||
@@ -24,7 +23,6 @@ common-time.workspace = true
|
||||
common-version.workspace = true
|
||||
datafusion.workspace = true
|
||||
datatypes.workspace = true
|
||||
libc = "0.2"
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
once_cell.workspace = true
|
||||
|
||||
@@ -18,6 +18,7 @@ use async_trait::async_trait;
|
||||
use common_base::AffectedRows;
|
||||
use common_meta::rpc::procedure::{MigrateRegionRequest, ProcedureStateResponse};
|
||||
use common_query::error::Result;
|
||||
use common_query::Output;
|
||||
use session::context::QueryContextRef;
|
||||
use store_api::storage::RegionId;
|
||||
use table::requests::{CompactTableRequest, DeleteRequest, FlushTableRequest, InsertRequest};
|
||||
@@ -26,7 +27,7 @@ use table::requests::{CompactTableRequest, DeleteRequest, FlushTableRequest, Ins
|
||||
#[async_trait]
|
||||
pub trait TableMutationHandler: Send + Sync {
|
||||
/// Inserts rows into the table.
|
||||
async fn insert(&self, request: InsertRequest, ctx: QueryContextRef) -> Result<AffectedRows>;
|
||||
async fn insert(&self, request: InsertRequest, ctx: QueryContextRef) -> Result<Output>;
|
||||
|
||||
/// Delete rows from the table.
|
||||
async fn delete(&self, request: DeleteRequest, ctx: QueryContextRef) -> Result<AffectedRows>;
|
||||
|
||||
@@ -56,7 +56,7 @@ where
|
||||
.map(|&n| n.into())
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
nums,
|
||||
I::LogicalType::build_data_type(),
|
||||
))])
|
||||
}
|
||||
@@ -120,10 +120,7 @@ where
|
||||
O::from_native(native).into()
|
||||
})
|
||||
.collect::<Vec<Value>>();
|
||||
let diff = Value::List(ListValue::new(
|
||||
Some(Box::new(diff)),
|
||||
O::LogicalType::build_data_type(),
|
||||
));
|
||||
let diff = Value::List(ListValue::new(diff, O::LogicalType::build_data_type()));
|
||||
Ok(diff)
|
||||
}
|
||||
}
|
||||
@@ -218,10 +215,7 @@ mod test {
|
||||
let values = vec![Value::from(2_i64), Value::from(1_i64)];
|
||||
diff.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(values)),
|
||||
ConcreteDataType::int64_datatype()
|
||||
)),
|
||||
Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
|
||||
diff.evaluate().unwrap()
|
||||
);
|
||||
|
||||
@@ -236,10 +230,7 @@ mod test {
|
||||
let values = vec![Value::from(5_i64), Value::from(1_i64)];
|
||||
diff.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(values)),
|
||||
ConcreteDataType::int64_datatype()
|
||||
)),
|
||||
Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
|
||||
diff.evaluate().unwrap()
|
||||
);
|
||||
|
||||
@@ -252,10 +243,7 @@ mod test {
|
||||
let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)];
|
||||
diff.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(values)),
|
||||
ConcreteDataType::int64_datatype()
|
||||
)),
|
||||
Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
|
||||
diff.evaluate().unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -104,10 +104,7 @@ where
|
||||
.map(|&n| n.into())
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
|
||||
self.p.into(),
|
||||
])
|
||||
}
|
||||
|
||||
@@ -72,10 +72,7 @@ where
|
||||
.map(|&n| n.into())
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
|
||||
self.x.into(),
|
||||
])
|
||||
}
|
||||
|
||||
@@ -56,10 +56,7 @@ where
|
||||
.map(|&x| x.into())
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
|
||||
self.x.into(),
|
||||
])
|
||||
}
|
||||
|
||||
@@ -56,10 +56,7 @@ where
|
||||
.map(|&x| x.into())
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
|
||||
self.x.into(),
|
||||
])
|
||||
}
|
||||
|
||||
@@ -77,7 +77,7 @@ impl Function for RangeFunction {
|
||||
/// `range_fn` will never been used. As long as a legal signature is returned, the specific content of the signature does not matter.
|
||||
/// In fact, the arguments loaded by `range_fn` are very complicated, and it is difficult to use `Signature` to describe
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::any(0, Volatility::Immutable)
|
||||
Signature::variadic_any(Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
|
||||
@@ -23,7 +23,7 @@ use datatypes::prelude::VectorRef;
|
||||
use datatypes::types::TimestampType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
Int64Vector, StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
};
|
||||
use snafu::{ensure, OptionExt};
|
||||
@@ -43,6 +43,7 @@ fn convert_to_timezone(arg: &str) -> Option<Timezone> {
|
||||
fn convert_to_timestamp(arg: &Value) -> Option<Timestamp> {
|
||||
match arg {
|
||||
Value::Timestamp(ts) => Some(*ts),
|
||||
Value::Int64(i) => Some(Timestamp::new_millisecond(*i)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -66,6 +67,8 @@ impl Function for ToTimezoneFunction {
|
||||
fn signature(&self) -> Signature {
|
||||
helper::one_of_sigs2(
|
||||
vec![
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
@@ -86,39 +89,45 @@ impl Function for ToTimezoneFunction {
|
||||
}
|
||||
);
|
||||
|
||||
// TODO: maybe support epoch timestamp? https://github.com/GreptimeTeam/greptimedb/issues/3477
|
||||
let ts = columns[0].data_type().as_timestamp().with_context(|| {
|
||||
UnsupportedInputDataTypeSnafu {
|
||||
let array = columns[0].to_arrow_array();
|
||||
let times = match columns[0].data_type() {
|
||||
ConcreteDataType::Int64(_) | ConcreteDataType::Int32(_) => {
|
||||
let vector = Int64Vector::try_from_arrow_array(array).unwrap();
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
ConcreteDataType::Timestamp(ts) => match ts {
|
||||
TimestampType::Second(_) => {
|
||||
let vector = TimestampSecondVector::try_from_arrow_array(array).unwrap();
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
TimestampType::Millisecond(_) => {
|
||||
let vector = TimestampMillisecondVector::try_from_arrow_array(array).unwrap();
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
TimestampType::Microsecond(_) => {
|
||||
let vector = TimestampMicrosecondVector::try_from_arrow_array(array).unwrap();
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
TimestampType::Nanosecond(_) => {
|
||||
let vector = TimestampNanosecondVector::try_from_arrow_array(array).unwrap();
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
},
|
||||
_ => UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
})?;
|
||||
let array = columns[0].to_arrow_array();
|
||||
let times = match ts {
|
||||
TimestampType::Second(_) => {
|
||||
let vector = TimestampSecondVector::try_from_arrow_array(array).unwrap();
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
TimestampType::Millisecond(_) => {
|
||||
let vector = TimestampMillisecondVector::try_from_arrow_array(array).unwrap();
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
TimestampType::Microsecond(_) => {
|
||||
let vector = TimestampMicrosecondVector::try_from_arrow_array(array).unwrap();
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
TimestampType::Nanosecond(_) => {
|
||||
let vector = TimestampNanosecondVector::try_from_arrow_array(array).unwrap();
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
|
||||
let tzs = {
|
||||
@@ -153,7 +162,7 @@ mod tests {
|
||||
use datatypes::timestamp::{
|
||||
TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond,
|
||||
};
|
||||
use datatypes::vectors::StringVector;
|
||||
use datatypes::vectors::{Int64Vector, StringVector};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -257,4 +266,48 @@ mod tests {
|
||||
let expect_times: VectorRef = Arc::new(StringVector::from(results));
|
||||
assert_eq!(expect_times, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_numerical_to_timezone() {
|
||||
let f = ToTimezoneFunction;
|
||||
let results = vec![
|
||||
Some("1969-12-31 19:00:00.001"),
|
||||
None,
|
||||
Some("1970-01-01 03:00:00.001"),
|
||||
None,
|
||||
Some("2024-03-26 23:01:50"),
|
||||
None,
|
||||
Some("2024-03-27 06:02:00"),
|
||||
None,
|
||||
];
|
||||
let times: Vec<Option<i64>> = vec![
|
||||
Some(1),
|
||||
None,
|
||||
Some(1),
|
||||
None,
|
||||
Some(1711508510000),
|
||||
None,
|
||||
Some(1711508520000),
|
||||
None,
|
||||
];
|
||||
let ts_vector: Int64Vector = Int64Vector::from_owned_iterator(times.into_iter());
|
||||
let tzs = vec![
|
||||
Some("America/New_York"),
|
||||
None,
|
||||
Some("Europe/Moscow"),
|
||||
None,
|
||||
Some("America/New_York"),
|
||||
None,
|
||||
Some("Europe/Moscow"),
|
||||
None,
|
||||
];
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(ts_vector),
|
||||
Arc::new(StringVector::from(tzs.clone())),
|
||||
];
|
||||
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||
assert_eq!(8, vector.len());
|
||||
let expect_times: VectorRef = Arc::new(StringVector::from(results));
|
||||
assert_eq!(expect_times, vector);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +35,7 @@ impl FunctionState {
|
||||
use common_base::AffectedRows;
|
||||
use common_meta::rpc::procedure::{MigrateRegionRequest, ProcedureStateResponse};
|
||||
use common_query::error::Result;
|
||||
use common_query::Output;
|
||||
use session::context::QueryContextRef;
|
||||
use store_api::storage::RegionId;
|
||||
use table::requests::{
|
||||
@@ -70,8 +71,8 @@ impl FunctionState {
|
||||
&self,
|
||||
_request: InsertRequest,
|
||||
_ctx: QueryContextRef,
|
||||
) -> Result<AffectedRows> {
|
||||
Ok(ROWS)
|
||||
) -> Result<Output> {
|
||||
Ok(Output::new_with_affected_rows(ROWS))
|
||||
}
|
||||
|
||||
async fn delete(
|
||||
|
||||
@@ -9,12 +9,10 @@ workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait.workspace = true
|
||||
common-error.workspace = true
|
||||
common-runtime.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
reqwest.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
tokio.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
|
||||
@@ -9,13 +9,11 @@ workspace = true
|
||||
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
async-trait.workspace = true
|
||||
common-base.workspace = true
|
||||
common-catalog.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-query.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
datatypes.workspace = true
|
||||
snafu.workspace = true
|
||||
|
||||
@@ -10,8 +10,6 @@ workspace = true
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
arrow-flight.workspace = true
|
||||
async-trait = "0.1"
|
||||
backtrace = "0.3"
|
||||
common-base.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
@@ -20,10 +18,8 @@ common-runtime.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
dashmap.workspace = true
|
||||
datafusion.workspace = true
|
||||
datatypes.workspace = true
|
||||
flatbuffers = "23.1"
|
||||
futures = "0.3"
|
||||
lazy_static.workspace = true
|
||||
prost.workspace = true
|
||||
snafu.workspace = true
|
||||
|
||||
@@ -56,6 +56,18 @@ pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenSt
|
||||
} = &sig;
|
||||
let arg_types = ok!(extract_input_types(inputs));
|
||||
|
||||
// with format like Float64Array
|
||||
let array_types = arg_types
|
||||
.iter()
|
||||
.map(|ty| {
|
||||
if let Type::Reference(TypeReference { elem, .. }) = ty {
|
||||
elem.as_ref().clone()
|
||||
} else {
|
||||
ty.clone()
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// build the struct and its impl block
|
||||
// only do this when `display_name` is specified
|
||||
if let Ok(display_name) = get_ident(&arg_map, "display_name", arg_span) {
|
||||
@@ -64,6 +76,8 @@ pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenSt
|
||||
vis,
|
||||
ok!(get_ident(&arg_map, "name", arg_span)),
|
||||
display_name,
|
||||
array_types,
|
||||
ok!(get_ident(&arg_map, "ret", arg_span)),
|
||||
);
|
||||
result.extend(struct_code);
|
||||
}
|
||||
@@ -90,6 +104,8 @@ fn build_struct(
|
||||
vis: Visibility,
|
||||
name: Ident,
|
||||
display_name_ident: Ident,
|
||||
array_types: Vec<Type>,
|
||||
return_array_type: Ident,
|
||||
) -> TokenStream {
|
||||
let display_name = display_name_ident.to_string();
|
||||
quote! {
|
||||
@@ -103,29 +119,25 @@ fn build_struct(
|
||||
}
|
||||
|
||||
pub fn scalar_udf() -> ScalarUDF {
|
||||
ScalarUDF {
|
||||
name: Self::name().to_string(),
|
||||
signature: Signature::new(
|
||||
// TODO(LFC): Use the new Datafusion UDF impl.
|
||||
#[allow(deprecated)]
|
||||
ScalarUDF::new(
|
||||
Self::name(),
|
||||
&Signature::new(
|
||||
TypeSignature::Exact(Self::input_type()),
|
||||
Volatility::Immutable,
|
||||
),
|
||||
return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))),
|
||||
fun: Arc::new(Self::calc),
|
||||
}
|
||||
&(Arc::new(|_: &_| Ok(Arc::new(Self::return_type()))) as _),
|
||||
&(Arc::new(Self::calc) as _),
|
||||
)
|
||||
}
|
||||
|
||||
// TODO(ruihang): this should be parameterized
|
||||
// time index column and value column
|
||||
fn input_type() -> Vec<DataType> {
|
||||
vec![
|
||||
RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)),
|
||||
RangeArray::convert_data_type(DataType::Float64),
|
||||
]
|
||||
vec![#( RangeArray::convert_data_type(#array_types::new_null(0).data_type().clone()), )*]
|
||||
}
|
||||
|
||||
// TODO(ruihang): this should be parameterized
|
||||
fn return_type() -> DataType {
|
||||
DataType::Float64
|
||||
#return_array_type::new_null(0).data_type().clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -160,6 +172,7 @@ fn build_calc_fn(
|
||||
.map(|name| Ident::new(&format!("{}_range_array", name), name.span()))
|
||||
.collect::<Vec<_>>();
|
||||
let first_range_array_name = range_array_names.first().unwrap().clone();
|
||||
let first_param_name = param_names.first().unwrap().clone();
|
||||
|
||||
quote! {
|
||||
impl #name {
|
||||
@@ -168,13 +181,29 @@ fn build_calc_fn(
|
||||
|
||||
#( let #range_array_names = RangeArray::try_new(extract_array(&input[#param_numbers])?.to_data().into())?; )*
|
||||
|
||||
// TODO(ruihang): add ensure!()
|
||||
// check arrays len
|
||||
{
|
||||
let len_first = #first_range_array_name.len();
|
||||
#(
|
||||
if len_first != #range_array_names.len() {
|
||||
return Err(DataFusionError::Execution(format!("RangeArray have different lengths in PromQL function {}: array1={}, array2={}", #name::name(), len_first, #range_array_names.len())));
|
||||
}
|
||||
)*
|
||||
}
|
||||
|
||||
let mut result_array = Vec::new();
|
||||
for index in 0..#first_range_array_name.len(){
|
||||
#( let #param_names = #range_array_names.get(index).unwrap().as_any().downcast_ref::<#unref_param_types>().unwrap().clone(); )*
|
||||
|
||||
// TODO(ruihang): add ensure!() to check length
|
||||
// check element len
|
||||
{
|
||||
let len_first = #first_param_name.len();
|
||||
#(
|
||||
if len_first != #param_names.len() {
|
||||
return Err(DataFusionError::Execution(format!("RangeArray's element {} have different lengths in PromQL function {}: array1={}, array2={}", index, #name::name(), len_first, #param_names.len())));
|
||||
}
|
||||
)*
|
||||
}
|
||||
|
||||
let result = #fn_name(#( &#param_names, )*);
|
||||
result_array.push(result);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user