mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-22 22:20:02 +00:00
Compare commits
59 Commits
v0.9.1-nig
...
v0.9.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
975b8c69e5 | ||
|
|
8036b44347 | ||
|
|
4c72b3f3fe | ||
|
|
76dc906574 | ||
|
|
2a73e0937f | ||
|
|
c8de8b80f4 | ||
|
|
ec59ce5c9a | ||
|
|
f578155602 | ||
|
|
d1472782d0 | ||
|
|
93be81c041 | ||
|
|
2c3fccb516 | ||
|
|
c1b1be47ba | ||
|
|
0f85037024 | ||
|
|
f88705080b | ||
|
|
cbb06cd0c6 | ||
|
|
b59a93dfbc | ||
|
|
202c730363 | ||
|
|
63e1892dc1 | ||
|
|
216bce6973 | ||
|
|
4466fee580 | ||
|
|
5aa4c70057 | ||
|
|
72a1732fb4 | ||
|
|
c821d21111 | ||
|
|
2e2eacf3b2 | ||
|
|
9bcaeaaa0e | ||
|
|
90cfe276b4 | ||
|
|
6694d2a930 | ||
|
|
9532ffb954 | ||
|
|
665b7e5c6e | ||
|
|
27d9aa0f3b | ||
|
|
8f3293d4fb | ||
|
|
7dd20b0348 | ||
|
|
4c1a3f29c0 | ||
|
|
0d70961448 | ||
|
|
a75cfaa516 | ||
|
|
aa3f53f08a | ||
|
|
8f0959fa9f | ||
|
|
4a3982ca60 | ||
|
|
559219496d | ||
|
|
685aa7dd8f | ||
|
|
be5364a056 | ||
|
|
a25d9f736f | ||
|
|
2cd4a78f17 | ||
|
|
188e182d75 | ||
|
|
d64cc79ab4 | ||
|
|
e6cc4df8c8 | ||
|
|
803780030d | ||
|
|
79f10d0415 | ||
|
|
3937e67694 | ||
|
|
4c93fe6c2d | ||
|
|
c4717abb68 | ||
|
|
3b701d8f5e | ||
|
|
cb4cffe636 | ||
|
|
cc7f33c90c | ||
|
|
fe1cfbf2b3 | ||
|
|
ded874da04 | ||
|
|
fe2d29a2a0 | ||
|
|
b388829a96 | ||
|
|
8e7c027bf5 |
@@ -14,10 +14,11 @@ GT_AZBLOB_CONTAINER=AZBLOB container
|
||||
GT_AZBLOB_ACCOUNT_NAME=AZBLOB account name
|
||||
GT_AZBLOB_ACCOUNT_KEY=AZBLOB account key
|
||||
GT_AZBLOB_ENDPOINT=AZBLOB endpoint
|
||||
# Settings for gcs test
|
||||
GT_GCS_BUCKET = GCS bucket
|
||||
# Settings for gcs test
|
||||
GT_GCS_BUCKET = GCS bucket
|
||||
GT_GCS_SCOPE = GCS scope
|
||||
GT_GCS_CREDENTIAL_PATH = GCS credential path
|
||||
GT_GCS_CREDENTIAL_PATH = GCS credential path
|
||||
GT_GCS_CREDENTIAL = GCS credential
|
||||
GT_GCS_ENDPOINT = GCS end point
|
||||
# Settings for kafka wal test
|
||||
GT_KAFKA_ENDPOINTS = localhost:9092
|
||||
|
||||
@@ -69,7 +69,7 @@ runs:
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: sqlness-logs
|
||||
path: C:\tmp\greptime-*.log
|
||||
path: C:\Users\RUNNER~1\AppData\Local\Temp\sqlness*
|
||||
retention-days: 3
|
||||
|
||||
- name: Build greptime binary
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
meta:
|
||||
config: |-
|
||||
configData: |-
|
||||
[runtime]
|
||||
global_rt_size = 4
|
||||
datanode:
|
||||
config: |-
|
||||
configData: |-
|
||||
[runtime]
|
||||
global_rt_size = 4
|
||||
compact_rt_size = 2
|
||||
frontend:
|
||||
config: |-
|
||||
configData: |-
|
||||
[runtime]
|
||||
global_rt_size = 4
|
||||
global_rt_size = 4
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
meta:
|
||||
config: |-
|
||||
configData: |-
|
||||
[runtime]
|
||||
global_rt_size = 4
|
||||
|
||||
@@ -7,7 +7,7 @@ meta:
|
||||
[datanode.client]
|
||||
timeout = "60s"
|
||||
datanode:
|
||||
config: |-
|
||||
configData: |-
|
||||
[runtime]
|
||||
global_rt_size = 4
|
||||
compact_rt_size = 2
|
||||
@@ -16,7 +16,7 @@ datanode:
|
||||
cache_path = "/data/greptimedb/s3cache"
|
||||
cache_capacity = "256MB"
|
||||
frontend:
|
||||
config: |-
|
||||
configData: |-
|
||||
[runtime]
|
||||
global_rt_size = 4
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
meta:
|
||||
config: |-
|
||||
configData: |-
|
||||
[runtime]
|
||||
global_rt_size = 4
|
||||
|
||||
@@ -7,12 +7,12 @@ meta:
|
||||
[datanode.client]
|
||||
timeout = "60s"
|
||||
datanode:
|
||||
config: |-
|
||||
configData: |-
|
||||
[runtime]
|
||||
global_rt_size = 4
|
||||
compact_rt_size = 2
|
||||
frontend:
|
||||
config: |-
|
||||
configData: |-
|
||||
[runtime]
|
||||
global_rt_size = 4
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
meta:
|
||||
config: |-
|
||||
configData: |-
|
||||
[runtime]
|
||||
global_rt_size = 4
|
||||
|
||||
@@ -13,7 +13,7 @@ meta:
|
||||
[datanode.client]
|
||||
timeout = "60s"
|
||||
datanode:
|
||||
config: |-
|
||||
configData: |-
|
||||
[runtime]
|
||||
global_rt_size = 4
|
||||
compact_rt_size = 2
|
||||
@@ -23,7 +23,7 @@ datanode:
|
||||
broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
|
||||
linger = "2ms"
|
||||
frontend:
|
||||
config: |-
|
||||
configData: |-
|
||||
[runtime]
|
||||
global_rt_size = 4
|
||||
|
||||
|
||||
30
.github/actions/setup-postgres-cluster/action.yml
vendored
Normal file
30
.github/actions/setup-postgres-cluster/action.yml
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
name: Setup PostgreSQL
|
||||
description: Deploy PostgreSQL on Kubernetes
|
||||
inputs:
|
||||
postgres-replicas:
|
||||
default: 1
|
||||
description: "Number of PostgreSQL replicas"
|
||||
namespace:
|
||||
default: "postgres-namespace"
|
||||
postgres-version:
|
||||
default: "14.2"
|
||||
description: "PostgreSQL version"
|
||||
storage-size:
|
||||
default: "1Gi"
|
||||
description: "Storage size for PostgreSQL"
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Install PostgreSQL
|
||||
shell: bash
|
||||
run: |
|
||||
helm upgrade \
|
||||
--install postgresql oci://registry-1.docker.io/bitnamicharts/postgresql \
|
||||
--set replicaCount=${{ inputs.postgres-replicas }} \
|
||||
--set image.tag=${{ inputs.postgres-version }} \
|
||||
--set persistence.size=${{ inputs.storage-size }} \
|
||||
--set postgresql.username=greptimedb \
|
||||
--set postgresql.password=admin \
|
||||
--create-namespace \
|
||||
-n ${{ inputs.namespace }}
|
||||
79
.github/workflows/develop.yml
vendored
79
.github/workflows/develop.yml
vendored
@@ -145,6 +145,18 @@ jobs:
|
||||
matrix:
|
||||
target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database", "fuzz_create_logical_table", "fuzz_alter_logical_table", "fuzz_insert", "fuzz_insert_logical_table" ]
|
||||
steps:
|
||||
- name: Remove unused software
|
||||
run: |
|
||||
echo "Disk space before:"
|
||||
df -h
|
||||
[[ -d /usr/share/dotnet ]] && sudo rm -rf /usr/share/dotnet
|
||||
[[ -d /usr/local/lib/android ]] && sudo rm -rf /usr/local/lib/android
|
||||
[[ -d /opt/ghc ]] && sudo rm -rf /opt/ghc
|
||||
[[ -d /opt/hostedtoolcache/CodeQL ]] && sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||
sudo docker image prune --all --force
|
||||
sudo docker builder prune -a
|
||||
echo "Disk space after:"
|
||||
df -h
|
||||
- uses: actions/checkout@v4
|
||||
- uses: arduino/setup-protoc@v3
|
||||
with:
|
||||
@@ -193,6 +205,18 @@ jobs:
|
||||
matrix:
|
||||
target: [ "unstable_fuzz_create_table_standalone" ]
|
||||
steps:
|
||||
- name: Remove unused software
|
||||
run: |
|
||||
echo "Disk space before:"
|
||||
df -h
|
||||
[[ -d /usr/share/dotnet ]] && sudo rm -rf /usr/share/dotnet
|
||||
[[ -d /usr/local/lib/android ]] && sudo rm -rf /usr/local/lib/android
|
||||
[[ -d /opt/ghc ]] && sudo rm -rf /opt/ghc
|
||||
[[ -d /opt/hostedtoolcache/CodeQL ]] && sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||
sudo docker image prune --all --force
|
||||
sudo docker builder prune -a
|
||||
echo "Disk space after:"
|
||||
df -h
|
||||
- uses: actions/checkout@v4
|
||||
- uses: arduino/setup-protoc@v3
|
||||
with:
|
||||
@@ -285,24 +309,24 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database", "fuzz_create_logical_table", "fuzz_alter_logical_table", "fuzz_insert", "fuzz_insert_logical_table" ]
|
||||
mode:
|
||||
- name: "Disk"
|
||||
minio: false
|
||||
kafka: false
|
||||
values: "with-disk.yaml"
|
||||
- name: "Minio"
|
||||
minio: true
|
||||
kafka: false
|
||||
values: "with-minio.yaml"
|
||||
- name: "Minio with Cache"
|
||||
minio: true
|
||||
kafka: false
|
||||
values: "with-minio-and-cache.yaml"
|
||||
mode:
|
||||
- name: "Remote WAL"
|
||||
minio: true
|
||||
kafka: true
|
||||
values: "with-remote-wal.yaml"
|
||||
steps:
|
||||
- name: Remove unused software
|
||||
run: |
|
||||
echo "Disk space before:"
|
||||
df -h
|
||||
[[ -d /usr/share/dotnet ]] && sudo rm -rf /usr/share/dotnet
|
||||
[[ -d /usr/local/lib/android ]] && sudo rm -rf /usr/local/lib/android
|
||||
[[ -d /opt/ghc ]] && sudo rm -rf /opt/ghc
|
||||
[[ -d /opt/hostedtoolcache/CodeQL ]] && sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||
sudo docker image prune --all --force
|
||||
sudo docker builder prune -a
|
||||
echo "Disk space after:"
|
||||
df -h
|
||||
- uses: actions/checkout@v4
|
||||
- name: Setup Kind
|
||||
uses: ./.github/actions/setup-kind
|
||||
@@ -314,6 +338,8 @@ jobs:
|
||||
uses: ./.github/actions/setup-kafka-cluster
|
||||
- name: Setup Etcd cluser
|
||||
uses: ./.github/actions/setup-etcd-cluster
|
||||
- name: Setup Postgres cluser
|
||||
uses: ./.github/actions/setup-postgres-cluster
|
||||
# Prepares for fuzz tests
|
||||
- uses: arduino/setup-protoc@v3
|
||||
with:
|
||||
@@ -427,6 +453,18 @@ jobs:
|
||||
kafka: true
|
||||
values: "with-remote-wal.yaml"
|
||||
steps:
|
||||
- name: Remove unused software
|
||||
run: |
|
||||
echo "Disk space before:"
|
||||
df -h
|
||||
[[ -d /usr/share/dotnet ]] && sudo rm -rf /usr/share/dotnet
|
||||
[[ -d /usr/local/lib/android ]] && sudo rm -rf /usr/local/lib/android
|
||||
[[ -d /opt/ghc ]] && sudo rm -rf /opt/ghc
|
||||
[[ -d /opt/hostedtoolcache/CodeQL ]] && sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||
sudo docker image prune --all --force
|
||||
sudo docker builder prune -a
|
||||
echo "Disk space after:"
|
||||
df -h
|
||||
- uses: actions/checkout@v4
|
||||
- name: Setup Kind
|
||||
uses: ./.github/actions/setup-kind
|
||||
@@ -440,6 +478,8 @@ jobs:
|
||||
uses: ./.github/actions/setup-kafka-cluster
|
||||
- name: Setup Etcd cluser
|
||||
uses: ./.github/actions/setup-etcd-cluster
|
||||
- name: Setup Postgres cluser
|
||||
uses: ./.github/actions/setup-postgres-cluster
|
||||
# Prepares for fuzz tests
|
||||
- uses: arduino/setup-protoc@v3
|
||||
with:
|
||||
@@ -557,6 +597,10 @@ jobs:
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- if: matrix.mode.kafka
|
||||
name: Setup kafka server
|
||||
working-directory: tests-integration/fixtures/kafka
|
||||
run: docker compose -f docker-compose-standalone.yml up -d --wait
|
||||
- name: Download pre-built binaries
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
@@ -564,10 +608,6 @@ jobs:
|
||||
path: .
|
||||
- name: Unzip binaries
|
||||
run: tar -xvf ./bins.tar.gz
|
||||
- if: matrix.mode.kafka
|
||||
name: Setup kafka server
|
||||
working-directory: tests-integration/fixtures/kafka
|
||||
run: docker compose -f docker-compose-standalone.yml up -d --wait
|
||||
- name: Run sqlness
|
||||
run: RUST_BACKTRACE=1 ./bins/sqlness-runner ${{ matrix.mode.opts }} -c ./tests/cases --bins-dir ./bins --preserve-state
|
||||
- name: Upload sqlness logs
|
||||
@@ -666,6 +706,9 @@ jobs:
|
||||
- name: Setup minio
|
||||
working-directory: tests-integration/fixtures/minio
|
||||
run: docker compose -f docker-compose-standalone.yml up -d --wait
|
||||
- name: Setup postgres server
|
||||
working-directory: tests-integration/fixtures/postgres
|
||||
run: docker compose -f docker-compose-standalone.yml up -d --wait
|
||||
- name: Run nextest cases
|
||||
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F pyo3_backend -F dashboard
|
||||
env:
|
||||
@@ -682,7 +725,9 @@ jobs:
|
||||
GT_MINIO_REGION: us-west-2
|
||||
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
|
||||
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
|
||||
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
|
||||
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
|
||||
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
|
||||
UNITTEST_LOG_DIR: "__unittest_logs"
|
||||
- name: Codecov upload
|
||||
uses: codecov/codecov-action@v4
|
||||
|
||||
11
.github/workflows/nightly-ci.yml
vendored
11
.github/workflows/nightly-ci.yml
vendored
@@ -33,6 +33,13 @@ jobs:
|
||||
aws-region: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
|
||||
aws-access-key-id: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
|
||||
- name: Upload sqlness logs
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: sqlness-logs-kind
|
||||
path: /tmp/kind/
|
||||
retention-days: 3
|
||||
|
||||
sqlness-windows:
|
||||
name: Sqlness tests on Windows
|
||||
@@ -55,11 +62,11 @@ jobs:
|
||||
env:
|
||||
SQLNESS_OPTS: "--preserve-state"
|
||||
- name: Upload sqlness logs
|
||||
if: always()
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: sqlness-logs
|
||||
path: C:\tmp\greptime-*.log
|
||||
path: C:\Users\RUNNER~1\AppData\Local\Temp\sqlness*
|
||||
retention-days: 3
|
||||
|
||||
test-on-windows:
|
||||
|
||||
442
Cargo.lock
generated
442
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
32
Cargo.toml
32
Cargo.toml
@@ -64,7 +64,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.9.0"
|
||||
version = "0.9.2"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -104,15 +104,15 @@ clap = { version = "4.4", features = ["derive"] }
|
||||
config = "0.13.0"
|
||||
crossbeam-utils = "0.8"
|
||||
dashmap = "5.4"
|
||||
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
|
||||
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
|
||||
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
|
||||
datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
|
||||
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
|
||||
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
|
||||
datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
|
||||
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
|
||||
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
|
||||
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
|
||||
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
|
||||
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
|
||||
datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
|
||||
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
|
||||
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
|
||||
datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
|
||||
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
|
||||
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
|
||||
derive_builder = "0.12"
|
||||
dotenv = "0.15"
|
||||
etcd-client = { version = "0.13" }
|
||||
@@ -124,7 +124,7 @@ humantime = "2.1"
|
||||
humantime-serde = "1.1"
|
||||
itertools = "0.10"
|
||||
lazy_static = "1.4"
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "049171eb16cb4249d8099751a0c46750d1fe88e7" }
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd" }
|
||||
mockall = "0.11.4"
|
||||
moka = "0.12"
|
||||
notify = "6.1"
|
||||
@@ -151,14 +151,19 @@ reqwest = { version = "0.12", default-features = false, features = [
|
||||
"stream",
|
||||
"multipart",
|
||||
] }
|
||||
rskafka = "0.5"
|
||||
# SCRAM-SHA-512 requires https://github.com/dequbed/rsasl/pull/48, https://github.com/influxdata/rskafka/pull/247
|
||||
rskafka = { git = "https://github.com/WenyXu/rskafka.git", rev = "940c6030012c5b746fad819fb72e3325b26e39de", features = [
|
||||
"transport-tls",
|
||||
] }
|
||||
rstest = "0.21"
|
||||
rstest_reuse = "0.7"
|
||||
rust_decimal = "1.33"
|
||||
rustc-hash = "2.0"
|
||||
schemars = "0.8"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = { version = "1.0", features = ["float_roundtrip"] }
|
||||
serde_with = "3"
|
||||
shadow-rs = "0.31"
|
||||
smallvec = { version = "1", features = ["serde"] }
|
||||
snafu = "0.8"
|
||||
sysinfo = "0.30"
|
||||
@@ -169,6 +174,7 @@ sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "5
|
||||
strum = { version = "0.25", features = ["derive"] }
|
||||
tempfile = "3"
|
||||
tokio = { version = "1.36", features = ["full"] }
|
||||
tokio-postgres = "0.7"
|
||||
tokio-stream = { version = "0.1" }
|
||||
tokio-util = { version = "0.7", features = ["io-util", "compat"] }
|
||||
toml = "0.8.8"
|
||||
@@ -238,7 +244,7 @@ table = { path = "src/table" }
|
||||
|
||||
[workspace.dependencies.meter-macros]
|
||||
git = "https://github.com/GreptimeTeam/greptime-meter.git"
|
||||
rev = "049171eb16cb4249d8099751a0c46750d1fe88e7"
|
||||
rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd"
|
||||
|
||||
[profile.release]
|
||||
debug = 1
|
||||
|
||||
@@ -67,6 +67,11 @@
|
||||
| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.num_topics` | Integer | `64` | Number of topics to be created upon start.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default)<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.max_batch_bytes` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_init` | String | `500ms` | The initial backoff delay.<br/>**It's only used when the provider is `kafka`**. |
|
||||
@@ -93,6 +98,7 @@
|
||||
| `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.credential` | String | `None` | The credential of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
@@ -145,7 +151,7 @@
|
||||
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
|
||||
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
@@ -230,7 +236,7 @@
|
||||
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
|
||||
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
@@ -254,7 +260,7 @@
|
||||
| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
|
||||
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. |
|
||||
| `store_addr` | String | `127.0.0.1:2379` | Etcd server address. |
|
||||
| `selector` | String | `lease_based` | Datanode selector type.<br/>- `lease_based` (default value).<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
|
||||
| `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
|
||||
| `use_memory_store` | Bool | `false` | Store data in memory. |
|
||||
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. |
|
||||
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
|
||||
@@ -292,7 +298,7 @@
|
||||
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
|
||||
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
@@ -368,6 +374,8 @@
|
||||
| `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
@@ -382,6 +390,7 @@
|
||||
| `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.credential` | String | `None` | The credential of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
|
||||
| `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
@@ -432,7 +441,7 @@
|
||||
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
|
||||
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
@@ -477,7 +486,7 @@
|
||||
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
|
||||
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
|
||||
@@ -187,6 +187,32 @@ backoff_base = 2
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_deadline = "5mins"
|
||||
|
||||
## Whether to enable WAL index creation.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
create_index = true
|
||||
|
||||
## The interval for dumping WAL indexes.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
dump_index_interval = "60s"
|
||||
|
||||
# The Kafka SASL configuration.
|
||||
# **It's only used when the provider is `kafka`**.
|
||||
# Available SASL mechanisms:
|
||||
# - `PLAIN`
|
||||
# - `SCRAM-SHA-256`
|
||||
# - `SCRAM-SHA-512`
|
||||
# [wal.sasl]
|
||||
# type = "SCRAM-SHA-512"
|
||||
# username = "user_kafka"
|
||||
# password = "secret"
|
||||
|
||||
# The Kafka TLS configuration.
|
||||
# **It's only used when the provider is `kafka`**.
|
||||
# [wal.tls]
|
||||
# server_ca_cert_path = "/path/to/server_cert"
|
||||
# client_cert_path = "/path/to/client_cert"
|
||||
# client_key_path = "/path/to/key"
|
||||
|
||||
# Example of using S3 as the storage.
|
||||
# [storage]
|
||||
# type = "S3"
|
||||
@@ -223,6 +249,7 @@ backoff_deadline = "5mins"
|
||||
# root = "data"
|
||||
# scope = "test"
|
||||
# credential_path = "123456"
|
||||
# credential = "base64-credential"
|
||||
# endpoint = "https://storage.googleapis.com"
|
||||
|
||||
## The data storage options.
|
||||
@@ -294,6 +321,11 @@ scope = "test"
|
||||
## +toml2docs:none-default
|
||||
credential_path = "test"
|
||||
|
||||
## The credential of the google cloud storage.
|
||||
## **It's only used when the storage type is `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
credential= "base64-credential"
|
||||
|
||||
## The container of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
@@ -493,8 +525,7 @@ level = "info"
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
## +toml2docs:none-default
|
||||
otlp_endpoint = ""
|
||||
otlp_endpoint = "http://localhost:4317"
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
|
||||
@@ -70,8 +70,7 @@ level = "info"
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
## +toml2docs:none-default
|
||||
otlp_endpoint = ""
|
||||
otlp_endpoint = "http://localhost:4317"
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
|
||||
@@ -177,8 +177,7 @@ level = "info"
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
## +toml2docs:none-default
|
||||
otlp_endpoint = ""
|
||||
otlp_endpoint = "http://localhost:4317"
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
|
||||
@@ -11,10 +11,11 @@ server_addr = "127.0.0.1:3002"
|
||||
store_addr = "127.0.0.1:2379"
|
||||
|
||||
## Datanode selector type.
|
||||
## - `lease_based` (default value).
|
||||
## - `round_robin` (default value)
|
||||
## - `lease_based`
|
||||
## - `load_based`
|
||||
## For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector".
|
||||
selector = "lease_based"
|
||||
selector = "round_robin"
|
||||
|
||||
## Store data in memory.
|
||||
use_memory_store = false
|
||||
@@ -123,6 +124,24 @@ backoff_base = 2
|
||||
## Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
|
||||
backoff_deadline = "5mins"
|
||||
|
||||
# The Kafka SASL configuration.
|
||||
# **It's only used when the provider is `kafka`**.
|
||||
# Available SASL mechanisms:
|
||||
# - `PLAIN`
|
||||
# - `SCRAM-SHA-256`
|
||||
# - `SCRAM-SHA-512`
|
||||
# [wal.sasl]
|
||||
# type = "SCRAM-SHA-512"
|
||||
# username = "user_kafka"
|
||||
# password = "secret"
|
||||
|
||||
# The Kafka TLS configuration.
|
||||
# **It's only used when the provider is `kafka`**.
|
||||
# [wal.tls]
|
||||
# server_ca_cert_path = "/path/to/server_cert"
|
||||
# client_cert_path = "/path/to/client_cert"
|
||||
# client_key_path = "/path/to/key"
|
||||
|
||||
## The logging options.
|
||||
[logging]
|
||||
## The directory to store the log files.
|
||||
@@ -136,8 +155,7 @@ level = "info"
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
## +toml2docs:none-default
|
||||
otlp_endpoint = ""
|
||||
otlp_endpoint = "http://localhost:4317"
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
|
||||
@@ -171,6 +171,28 @@ sync_period = "10s"
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
|
||||
## Number of topics to be created upon start.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
num_topics = 64
|
||||
|
||||
## Topic selector type.
|
||||
## Available selector types:
|
||||
## - `round_robin` (default)
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
selector_type = "round_robin"
|
||||
|
||||
## A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
topic_name_prefix = "greptimedb_wal_topic"
|
||||
|
||||
## Expected number of replicas of each partition.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
replication_factor = 1
|
||||
|
||||
## Above which a topic creation operation will be cancelled.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
create_topic_timeout = "30s"
|
||||
|
||||
## The max size of a single producer batch.
|
||||
## Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
@@ -196,6 +218,24 @@ backoff_base = 2
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_deadline = "5mins"
|
||||
|
||||
# The Kafka SASL configuration.
|
||||
# **It's only used when the provider is `kafka`**.
|
||||
# Available SASL mechanisms:
|
||||
# - `PLAIN`
|
||||
# - `SCRAM-SHA-256`
|
||||
# - `SCRAM-SHA-512`
|
||||
# [wal.sasl]
|
||||
# type = "SCRAM-SHA-512"
|
||||
# username = "user_kafka"
|
||||
# password = "secret"
|
||||
|
||||
# The Kafka TLS configuration.
|
||||
# **It's only used when the provider is `kafka`**.
|
||||
# [wal.tls]
|
||||
# server_ca_cert_path = "/path/to/server_cert"
|
||||
# client_cert_path = "/path/to/client_cert"
|
||||
# client_key_path = "/path/to/key"
|
||||
|
||||
## Metadata storage options.
|
||||
[metadata_store]
|
||||
## Kv file size in bytes.
|
||||
@@ -246,6 +286,7 @@ retry_delay = "500ms"
|
||||
# root = "data"
|
||||
# scope = "test"
|
||||
# credential_path = "123456"
|
||||
# credential = "base64-credential"
|
||||
# endpoint = "https://storage.googleapis.com"
|
||||
|
||||
## The data storage options.
|
||||
@@ -317,6 +358,11 @@ scope = "test"
|
||||
## +toml2docs:none-default
|
||||
credential_path = "test"
|
||||
|
||||
## The credential of the google cloud storage.
|
||||
## **It's only used when the storage type is `Gcs`**.
|
||||
## +toml2docs:none-default
|
||||
credential = "base64-credential"
|
||||
|
||||
## The container of the azure account.
|
||||
## **It's only used when the storage type is `Azblob`**.
|
||||
## +toml2docs:none-default
|
||||
@@ -522,8 +568,7 @@ level = "info"
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
## +toml2docs:none-default
|
||||
otlp_endpoint = ""
|
||||
otlp_endpoint = "http://localhost:4317"
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
x-custom:
|
||||
etcd_initial_cluster_token: &etcd_initial_cluster_token "--initial-cluster-token=etcd-cluster"
|
||||
etcd_common_settings: &etcd_common_settings
|
||||
image: quay.io/coreos/etcd:v3.5.10
|
||||
image: "${ETCD_REGISTRY:-quay.io}/${ETCD_NAMESPACE:-coreos}/etcd:${ETCD_VERSION:-v3.5.10}"
|
||||
entrypoint: /usr/local/bin/etcd
|
||||
greptimedb_image: &greptimedb_image docker.io/greptimedb/greptimedb:latest
|
||||
greptimedb_image: &greptimedb_image "${GREPTIMEDB_REGISTRY:-docker.io}/${GREPTIMEDB_NAMESPACE:-greptime}/greptimedb:${GREPTIMEDB_VERSION:-latest}"
|
||||
|
||||
services:
|
||||
etcd0:
|
||||
|
||||
@@ -25,7 +25,7 @@ Please ensure the following configuration before importing the dashboard into Gr
|
||||
|
||||
__1. Prometheus scrape config__
|
||||
|
||||
Assign `greptime_pod` label to each host target. We use this label to identify each node instance.
|
||||
Configure Prometheus to scrape the cluster.
|
||||
|
||||
```yml
|
||||
# example config
|
||||
@@ -34,27 +34,15 @@ Assign `greptime_pod` label to each host target. We use this label to identify e
|
||||
scrape_configs:
|
||||
- job_name: metasrv
|
||||
static_configs:
|
||||
- targets: ['<ip>:<port>']
|
||||
labels:
|
||||
greptime_pod: metasrv
|
||||
- targets: ['<metasrv-ip>:<port>']
|
||||
|
||||
- job_name: datanode
|
||||
static_configs:
|
||||
- targets: ['<ip>:<port>']
|
||||
labels:
|
||||
greptime_pod: datanode1
|
||||
- targets: ['<ip>:<port>']
|
||||
labels:
|
||||
greptime_pod: datanode2
|
||||
- targets: ['<ip>:<port>']
|
||||
labels:
|
||||
greptime_pod: datanode3
|
||||
- targets: ['<datanode0-ip>:<port>', '<datanode1-ip>:<port>', '<datanode2-ip>:<port>']
|
||||
|
||||
- job_name: frontend
|
||||
static_configs:
|
||||
- targets: ['<ip>:<port>']
|
||||
labels:
|
||||
greptime_pod: frontend
|
||||
- targets: ['<frontend-ip>:<port>']
|
||||
```
|
||||
|
||||
__2. Grafana config__
|
||||
@@ -63,4 +51,4 @@ Create a Prometheus data source in Grafana before using this dashboard. We use `
|
||||
|
||||
### Usage
|
||||
|
||||
Use `datasource` or `greptime_pod` on the upper-left corner to filter data from certain node.
|
||||
Use `datasource` or `instance` on the upper-left corner to filter data from certain node.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,62 +1,72 @@
|
||||
#!/bin/sh
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ue
|
||||
|
||||
OS_TYPE=
|
||||
ARCH_TYPE=
|
||||
|
||||
# Set the GitHub token to avoid GitHub API rate limit.
|
||||
# You can run with `GITHUB_TOKEN`:
|
||||
# GITHUB_TOKEN=<your_token> ./scripts/install.sh
|
||||
GITHUB_TOKEN=${GITHUB_TOKEN:-}
|
||||
|
||||
VERSION=${1:-latest}
|
||||
GITHUB_ORG=GreptimeTeam
|
||||
GITHUB_REPO=greptimedb
|
||||
BIN=greptime
|
||||
|
||||
get_os_type() {
|
||||
os_type="$(uname -s)"
|
||||
function get_os_type() {
|
||||
os_type="$(uname -s)"
|
||||
|
||||
case "$os_type" in
|
||||
case "$os_type" in
|
||||
Darwin)
|
||||
OS_TYPE=darwin
|
||||
;;
|
||||
OS_TYPE=darwin
|
||||
;;
|
||||
Linux)
|
||||
OS_TYPE=linux
|
||||
;;
|
||||
OS_TYPE=linux
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown OS type: $os_type"
|
||||
exit 1
|
||||
esac
|
||||
echo "Error: Unknown OS type: $os_type"
|
||||
exit 1
|
||||
esac
|
||||
}
|
||||
|
||||
get_arch_type() {
|
||||
arch_type="$(uname -m)"
|
||||
function get_arch_type() {
|
||||
arch_type="$(uname -m)"
|
||||
|
||||
case "$arch_type" in
|
||||
case "$arch_type" in
|
||||
arm64)
|
||||
ARCH_TYPE=arm64
|
||||
;;
|
||||
ARCH_TYPE=arm64
|
||||
;;
|
||||
aarch64)
|
||||
ARCH_TYPE=arm64
|
||||
;;
|
||||
ARCH_TYPE=arm64
|
||||
;;
|
||||
x86_64)
|
||||
ARCH_TYPE=amd64
|
||||
;;
|
||||
ARCH_TYPE=amd64
|
||||
;;
|
||||
amd64)
|
||||
ARCH_TYPE=amd64
|
||||
;;
|
||||
ARCH_TYPE=amd64
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown CPU type: $arch_type"
|
||||
exit 1
|
||||
esac
|
||||
echo "Error: Unknown CPU type: $arch_type"
|
||||
exit 1
|
||||
esac
|
||||
}
|
||||
|
||||
get_os_type
|
||||
get_arch_type
|
||||
|
||||
if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
|
||||
# Use the latest nightly version.
|
||||
function download_artifact() {
|
||||
if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
|
||||
# Use the latest stable released version.
|
||||
# GitHub API reference: https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#get-the-latest-release.
|
||||
if [ "${VERSION}" = "latest" ]; then
|
||||
VERSION=$(curl -s -XGET "https://api.github.com/repos/${GITHUB_ORG}/${GITHUB_REPO}/releases" | grep tag_name | grep nightly | cut -d: -f 2 | sed 's/.*"\(.*\)".*/\1/' | uniq | sort -r | head -n 1)
|
||||
if [ -z "${VERSION}" ]; then
|
||||
echo "Failed to get the latest version."
|
||||
exit 1
|
||||
# To avoid other tools dependency, we choose to use `curl` to get the version metadata and parsed by `sed`.
|
||||
VERSION=$(curl -sL \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
${GITHUB_TOKEN:+-H "Authorization: Bearer $GITHUB_TOKEN"} \
|
||||
"https://api.github.com/repos/${GITHUB_ORG}/${GITHUB_REPO}/releases/latest" | sed -n 's/.*"tag_name": "\([^"]*\)".*/\1/p')
|
||||
if [ -z "${VERSION}" ]; then
|
||||
echo "Failed to get the latest stable released version."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -73,4 +83,9 @@ if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
|
||||
rm -r "${PACKAGE_NAME%.tar.gz}" && \
|
||||
echo "Run './${BIN} --help' to get started"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
get_os_type
|
||||
get_arch_type
|
||||
download_artifact
|
||||
|
||||
@@ -40,6 +40,7 @@ moka = { workspace = true, features = ["future", "sync"] }
|
||||
partition.workspace = true
|
||||
paste = "1.0"
|
||||
prometheus.workspace = true
|
||||
rustc-hash.workspace = true
|
||||
serde_json.workspace = true
|
||||
session.workspace = true
|
||||
snafu.workspace = true
|
||||
|
||||
@@ -30,6 +30,7 @@ use pg_namespace::PGNamespace;
|
||||
use table::TableRef;
|
||||
pub use table_names::*;
|
||||
|
||||
use self::pg_namespace::oid_map::{PGNamespaceOidMap, PGNamespaceOidMapRef};
|
||||
use super::memory_table::MemoryTable;
|
||||
use super::utils::tables::u32_column;
|
||||
use super::{SystemSchemaProvider, SystemSchemaProviderInner, SystemTableRef};
|
||||
@@ -52,6 +53,9 @@ pub struct PGCatalogProvider {
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
tables: HashMap<String, TableRef>,
|
||||
|
||||
// Workaround to store mapping of schema_name to a numeric id
|
||||
namespace_oid_map: PGNamespaceOidMapRef,
|
||||
}
|
||||
|
||||
impl SystemSchemaProvider for PGCatalogProvider {
|
||||
@@ -85,6 +89,7 @@ impl PGCatalogProvider {
|
||||
catalog_name,
|
||||
catalog_manager,
|
||||
tables: HashMap::new(),
|
||||
namespace_oid_map: Arc::new(PGNamespaceOidMap::new()),
|
||||
};
|
||||
provider.build_tables();
|
||||
provider
|
||||
@@ -122,10 +127,12 @@ impl SystemSchemaProviderInner for PGCatalogProvider {
|
||||
table_names::PG_NAMESPACE => Some(Arc::new(PGNamespace::new(
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
self.namespace_oid_map.clone(),
|
||||
))),
|
||||
table_names::PG_CLASS => Some(Arc::new(PGClass::new(
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
self.namespace_oid_map.clone(),
|
||||
))),
|
||||
_ => None,
|
||||
}
|
||||
|
||||
@@ -31,6 +31,7 @@ use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::ScanRequest;
|
||||
use table::metadata::TableType;
|
||||
|
||||
use super::pg_namespace::oid_map::PGNamespaceOidMapRef;
|
||||
use super::{OID_COLUMN_NAME, PG_CLASS};
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
@@ -60,14 +61,22 @@ pub(super) struct PGClass {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
|
||||
// Workaround to convert schema_name to a numeric id
|
||||
namespace_oid_map: PGNamespaceOidMapRef,
|
||||
}
|
||||
|
||||
impl PGClass {
|
||||
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
|
||||
pub(super) fn new(
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
namespace_oid_map: PGNamespaceOidMapRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
schema: Self::schema(),
|
||||
catalog_name,
|
||||
catalog_manager,
|
||||
namespace_oid_map,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,7 +84,7 @@ impl PGClass {
|
||||
Arc::new(Schema::new(vec![
|
||||
u32_column(OID_COLUMN_NAME),
|
||||
string_column(RELNAME),
|
||||
string_column(RELNAMESPACE),
|
||||
u32_column(RELNAMESPACE),
|
||||
string_column(RELKIND),
|
||||
u32_column(RELOWNER),
|
||||
]))
|
||||
@@ -86,6 +95,7 @@ impl PGClass {
|
||||
self.schema.clone(),
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
self.namespace_oid_map.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -155,10 +165,11 @@ struct PGClassBuilder {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
namespace_oid_map: PGNamespaceOidMapRef,
|
||||
|
||||
oid: UInt32VectorBuilder,
|
||||
relname: StringVectorBuilder,
|
||||
relnamespace: StringVectorBuilder,
|
||||
relnamespace: UInt32VectorBuilder,
|
||||
relkind: StringVectorBuilder,
|
||||
relowner: UInt32VectorBuilder,
|
||||
}
|
||||
@@ -168,15 +179,17 @@ impl PGClassBuilder {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
namespace_oid_map: PGNamespaceOidMapRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
catalog_name,
|
||||
catalog_manager,
|
||||
namespace_oid_map,
|
||||
|
||||
oid: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
relname: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
relnamespace: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
relnamespace: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
relkind: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
relowner: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
@@ -217,6 +230,7 @@ impl PGClassBuilder {
|
||||
table: &str,
|
||||
kind: &str,
|
||||
) {
|
||||
let namespace_oid = self.namespace_oid_map.get_oid(schema);
|
||||
let row = [
|
||||
(OID_COLUMN_NAME, &Value::from(oid)),
|
||||
(RELNAMESPACE, &Value::from(schema)),
|
||||
@@ -230,7 +244,7 @@ impl PGClassBuilder {
|
||||
}
|
||||
|
||||
self.oid.push(Some(oid));
|
||||
self.relnamespace.push(Some(schema));
|
||||
self.relnamespace.push(Some(namespace_oid));
|
||||
self.relname.push(Some(table));
|
||||
self.relkind.push(Some(kind));
|
||||
self.relowner.push(Some(DUMMY_OWNER_ID));
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub(super) mod oid_map;
|
||||
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
@@ -25,16 +27,16 @@ use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{StringVectorBuilder, VectorRef};
|
||||
use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, VectorRef};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::ScanRequest;
|
||||
|
||||
use super::{OID_COLUMN_NAME, PG_NAMESPACE};
|
||||
use super::{PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE};
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::Predicates;
|
||||
use crate::system_schema::utils::tables::string_column;
|
||||
use crate::system_schema::utils::tables::{string_column, u32_column};
|
||||
use crate::system_schema::SystemTable;
|
||||
use crate::CatalogManager;
|
||||
|
||||
@@ -48,21 +50,29 @@ pub(super) struct PGNamespace {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
|
||||
// Workaround to convert schema_name to a numeric id
|
||||
oid_map: PGNamespaceOidMapRef,
|
||||
}
|
||||
|
||||
impl PGNamespace {
|
||||
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
|
||||
pub(super) fn new(
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
oid_map: PGNamespaceOidMapRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
schema: Self::schema(),
|
||||
catalog_name,
|
||||
catalog_manager,
|
||||
oid_map,
|
||||
}
|
||||
}
|
||||
|
||||
fn schema() -> SchemaRef {
|
||||
Arc::new(Schema::new(vec![
|
||||
// TODO(J0HN50N133): we do not have a numeric schema id, use schema name as a workaround. Use a proper schema id once we have it.
|
||||
string_column(OID_COLUMN_NAME),
|
||||
u32_column(OID_COLUMN_NAME),
|
||||
string_column(NSPNAME),
|
||||
]))
|
||||
}
|
||||
@@ -72,6 +82,7 @@ impl PGNamespace {
|
||||
self.schema.clone(),
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
self.oid_map.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -138,8 +149,9 @@ struct PGNamespaceBuilder {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
namespace_oid_map: PGNamespaceOidMapRef,
|
||||
|
||||
oid: StringVectorBuilder,
|
||||
oid: UInt32VectorBuilder,
|
||||
nspname: StringVectorBuilder,
|
||||
}
|
||||
|
||||
@@ -148,12 +160,14 @@ impl PGNamespaceBuilder {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
namespace_oid_map: PGNamespaceOidMapRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
catalog_name,
|
||||
catalog_manager,
|
||||
oid: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
namespace_oid_map,
|
||||
oid: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
nspname: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
}
|
||||
@@ -178,14 +192,15 @@ impl PGNamespaceBuilder {
|
||||
}
|
||||
|
||||
fn add_namespace(&mut self, predicates: &Predicates, schema_name: &str) {
|
||||
let oid = self.namespace_oid_map.get_oid(schema_name);
|
||||
let row = [
|
||||
(OID_COLUMN_NAME, &Value::from(schema_name)),
|
||||
(OID_COLUMN_NAME, &Value::from(oid)),
|
||||
(NSPNAME, &Value::from(schema_name)),
|
||||
];
|
||||
if !predicates.eval(&row) {
|
||||
return;
|
||||
}
|
||||
self.oid.push(Some(schema_name));
|
||||
self.oid.push(Some(oid));
|
||||
self.nspname.push(Some(schema_name));
|
||||
}
|
||||
}
|
||||
|
||||
100
src/catalog/src/system_schema/pg_catalog/pg_namespace/oid_map.rs
Normal file
100
src/catalog/src/system_schema/pg_catalog/pg_namespace/oid_map.rs
Normal file
@@ -0,0 +1,100 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::hash::BuildHasher;
|
||||
use std::sync::Arc;
|
||||
|
||||
use dashmap::DashMap;
|
||||
use rustc_hash::FxSeededState;
|
||||
|
||||
pub type PGNamespaceOidMapRef = Arc<PGNamespaceOidMap>;
|
||||
// Workaround to convert schema_name to a numeric id,
|
||||
// remove this when we have numeric schema id in greptime
|
||||
pub struct PGNamespaceOidMap {
|
||||
oid_map: DashMap<String, u32>,
|
||||
|
||||
// Rust use SipHasher by default, which provides resistance against DOS attacks.
|
||||
// This will produce different hash value between each greptime instance. This will
|
||||
// cause the sqlness test fail. We need a deterministic hash here to provide
|
||||
// same oid for the same schema name with best effort and DOS attacks aren't concern here.
|
||||
hasher: FxSeededState,
|
||||
}
|
||||
|
||||
impl PGNamespaceOidMap {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
oid_map: DashMap::new(),
|
||||
hasher: FxSeededState::with_seed(0), // PLEASE DO NOT MODIFY THIS SEED VALUE!!!
|
||||
}
|
||||
}
|
||||
|
||||
fn oid_is_used(&self, oid: u32) -> bool {
|
||||
self.oid_map.iter().any(|e| *e.value() == oid)
|
||||
}
|
||||
|
||||
pub fn get_oid(&self, schema_name: &str) -> u32 {
|
||||
if let Some(oid) = self.oid_map.get(schema_name) {
|
||||
*oid
|
||||
} else {
|
||||
let mut oid = self.hasher.hash_one(schema_name) as u32;
|
||||
while self.oid_is_used(oid) {
|
||||
oid = self.hasher.hash_one(oid) as u32;
|
||||
}
|
||||
self.oid_map.insert(schema_name.to_string(), oid);
|
||||
oid
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn oid_is_stable() {
|
||||
let oid_map_1 = PGNamespaceOidMap::new();
|
||||
let oid_map_2 = PGNamespaceOidMap::new();
|
||||
|
||||
let schema = "schema";
|
||||
let oid = oid_map_1.get_oid(schema);
|
||||
|
||||
// oid keep stable in the same instance
|
||||
assert_eq!(oid, oid_map_1.get_oid(schema));
|
||||
|
||||
// oid keep stable between different instances
|
||||
assert_eq!(oid, oid_map_2.get_oid(schema));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oid_collision() {
|
||||
let oid_map = PGNamespaceOidMap::new();
|
||||
|
||||
let key1 = "3178510";
|
||||
let key2 = "4215648";
|
||||
|
||||
// have collision
|
||||
assert_eq!(
|
||||
oid_map.hasher.hash_one(key1) as u32,
|
||||
oid_map.hasher.hash_one(key2) as u32
|
||||
);
|
||||
|
||||
// insert them into oid_map
|
||||
let oid1 = oid_map.get_oid(key1);
|
||||
let oid2 = oid_map.get_oid(key2);
|
||||
|
||||
// they should have different id
|
||||
assert_ne!(oid1, oid2);
|
||||
}
|
||||
}
|
||||
@@ -16,7 +16,7 @@ use api::v1::flow::{FlowRequest, FlowResponse};
|
||||
use api::v1::region::InsertRequests;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::node_manager::Flownode;
|
||||
use snafu::{location, Location, ResultExt};
|
||||
use snafu::{location, ResultExt};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::Client;
|
||||
|
||||
@@ -33,7 +33,7 @@ use common_telemetry::error;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use prost::Message;
|
||||
use query::query_engine::DefaultSerializer;
|
||||
use snafu::{location, Location, OptionExt, ResultExt};
|
||||
use snafu::{location, OptionExt, ResultExt};
|
||||
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
|
||||
use tokio_stream::StreamExt;
|
||||
|
||||
|
||||
@@ -62,8 +62,37 @@ enum SubCommand {
|
||||
#[global_allocator]
|
||||
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
fn main() -> Result<()> {
|
||||
use snafu::ResultExt;
|
||||
// Set the stack size to 8MB for the thread so it wouldn't overflow on large stack usage in debug mode
|
||||
// see https://github.com/GreptimeTeam/greptimedb/pull/4317
|
||||
// and https://github.com/rust-lang/rust/issues/34283
|
||||
std::thread::Builder::new()
|
||||
.name("main_spawn".to_string())
|
||||
.stack_size(8 * 1024 * 1024)
|
||||
.spawn(|| {
|
||||
{
|
||||
tokio::runtime::Builder::new_multi_thread()
|
||||
.thread_stack_size(8 * 1024 * 1024)
|
||||
.enable_all()
|
||||
.build()
|
||||
.expect("Failed building the Runtime")
|
||||
.block_on(main_body())
|
||||
}
|
||||
})
|
||||
.context(cmd::error::SpawnThreadSnafu)?
|
||||
.join()
|
||||
.expect("Couldn't join on the associated thread")
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
main_body().await
|
||||
}
|
||||
|
||||
async fn main_body() -> Result<()> {
|
||||
setup_human_panic();
|
||||
start(Command::parse()).await
|
||||
}
|
||||
|
||||
@@ -305,6 +305,12 @@ pub enum Error {
|
||||
error: std::io::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to spawn thread"))]
|
||||
SpawnThread {
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Other error"))]
|
||||
Other {
|
||||
source: BoxedError,
|
||||
@@ -395,7 +401,9 @@ impl ErrorExt for Error {
|
||||
Error::SubstraitEncodeLogicalPlan { source, .. } => source.status_code(),
|
||||
Error::StartCatalogManager { source, .. } => source.status_code(),
|
||||
|
||||
Error::SerdeJson { .. } | Error::FileIo { .. } => StatusCode::Unexpected,
|
||||
Error::SerdeJson { .. } | Error::FileIo { .. } | Error::SpawnThread { .. } => {
|
||||
StatusCode::Unexpected
|
||||
}
|
||||
|
||||
Error::Other { source, .. } => source.status_code(),
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ use common_grpc::channel_manager::ChannelConfig;
|
||||
use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
|
||||
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
|
||||
use common_meta::heartbeat::handler::HandlerGroupExecutor;
|
||||
use common_meta::key::flow::FlowMetadataManager;
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::TracingOptions;
|
||||
@@ -296,11 +297,13 @@ impl StartCommand {
|
||||
Arc::new(executor),
|
||||
);
|
||||
|
||||
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
|
||||
let flownode_builder = FlownodeBuilder::new(
|
||||
opts,
|
||||
Plugins::new(),
|
||||
table_metadata_manager,
|
||||
catalog_manager.clone(),
|
||||
flow_metadata_manager,
|
||||
)
|
||||
.with_heartbeat_task(heartbeat_task);
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ pub mod standalone;
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
static ref APP_VERSION: prometheus::IntGaugeVec =
|
||||
prometheus::register_int_gauge_vec!("greptime_app_version", "app version", &["short_version", "version"]).unwrap();
|
||||
prometheus::register_int_gauge_vec!("greptime_app_version", "app version", &["version", "short_version"]).unwrap();
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -74,16 +74,16 @@ pub trait App: Send {
|
||||
}
|
||||
|
||||
/// Log the versions of the application, and the arguments passed to the cli.
|
||||
/// `version_string` should be the same as the output of cli "--version";
|
||||
/// and the `app_version` is the short version of the codes, often consist of git branch and commit.
|
||||
pub fn log_versions(version_string: &str, app_version: &str) {
|
||||
/// `version` should be the same as the output of cli "--version";
|
||||
/// and the `short_version` is the short version of the codes, often consist of git branch and commit.
|
||||
pub fn log_versions(version: &str, short_version: &str) {
|
||||
// Report app version as gauge.
|
||||
APP_VERSION
|
||||
.with_label_values(&[env!("CARGO_PKG_VERSION"), app_version])
|
||||
.with_label_values(&[env!("CARGO_PKG_VERSION"), short_version])
|
||||
.inc();
|
||||
|
||||
// Log version and argument flags.
|
||||
info!("GreptimeDB version: {}", version_string);
|
||||
info!("GreptimeDB version: {}", version);
|
||||
|
||||
log_env_flags();
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ use common_telemetry::info;
|
||||
use common_telemetry::logging::TracingOptions;
|
||||
use common_version::{short_version, version};
|
||||
use meta_srv::bootstrap::MetasrvInstance;
|
||||
use meta_srv::metasrv::BackendImpl;
|
||||
use snafu::ResultExt;
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
@@ -137,6 +138,9 @@ struct StartCommand {
|
||||
/// The max operations per txn
|
||||
#[clap(long)]
|
||||
max_txn_ops: Option<usize>,
|
||||
/// The database backend.
|
||||
#[clap(long, value_enum)]
|
||||
backend: Option<BackendImpl>,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
@@ -219,6 +223,12 @@ impl StartCommand {
|
||||
opts.max_txn_ops = max_txn_ops;
|
||||
}
|
||||
|
||||
if let Some(backend) = &self.backend {
|
||||
opts.backend.clone_from(backend);
|
||||
} else {
|
||||
opts.backend = BackendImpl::default()
|
||||
}
|
||||
|
||||
// Disable dashboard in metasrv.
|
||||
opts.http.disable_dashboard = true;
|
||||
|
||||
|
||||
@@ -476,11 +476,13 @@ impl StartCommand {
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
|
||||
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
|
||||
let flow_builder = FlownodeBuilder::new(
|
||||
Default::default(),
|
||||
plugins.clone(),
|
||||
table_metadata_manager.clone(),
|
||||
catalog_manager.clone(),
|
||||
flow_metadata_manager.clone(),
|
||||
);
|
||||
let flownode = Arc::new(
|
||||
flow_builder
|
||||
@@ -511,7 +513,6 @@ impl StartCommand {
|
||||
opts.wal.into(),
|
||||
kv_backend.clone(),
|
||||
));
|
||||
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
|
||||
let table_meta_allocator = Arc::new(TableMetadataAllocator::new(
|
||||
table_id_sequence,
|
||||
wal_options_allocator.clone(),
|
||||
|
||||
@@ -22,7 +22,7 @@ use common_grpc::channel_manager::{
|
||||
DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
|
||||
};
|
||||
use common_runtime::global::RuntimeOptions;
|
||||
use common_telemetry::logging::LoggingOptions;
|
||||
use common_telemetry::logging::{LoggingOptions, DEFAULT_OTLP_ENDPOINT};
|
||||
use common_wal::config::raft_engine::RaftEngineConfig;
|
||||
use common_wal::config::DatanodeWalConfig;
|
||||
use datanode::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
|
||||
@@ -88,7 +88,7 @@ fn test_load_datanode_example_config() {
|
||||
],
|
||||
logging: LoggingOptions {
|
||||
level: Some("info".to_string()),
|
||||
otlp_endpoint: Some("".to_string()),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
|
||||
tracing_sample_ratio: Some(Default::default()),
|
||||
..Default::default()
|
||||
},
|
||||
@@ -136,7 +136,7 @@ fn test_load_frontend_example_config() {
|
||||
}),
|
||||
logging: LoggingOptions {
|
||||
level: Some("info".to_string()),
|
||||
otlp_endpoint: Some("".to_string()),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
|
||||
tracing_sample_ratio: Some(Default::default()),
|
||||
..Default::default()
|
||||
},
|
||||
@@ -169,12 +169,12 @@ fn test_load_metasrv_example_config() {
|
||||
compact_rt_size: 4,
|
||||
},
|
||||
component: MetasrvOptions {
|
||||
selector: SelectorType::LeaseBased,
|
||||
selector: SelectorType::default(),
|
||||
data_home: "/tmp/metasrv/".to_string(),
|
||||
logging: LoggingOptions {
|
||||
dir: "/tmp/greptimedb/logs".to_string(),
|
||||
level: Some("info".to_string()),
|
||||
otlp_endpoint: Some("".to_string()),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
|
||||
tracing_sample_ratio: Some(Default::default()),
|
||||
..Default::default()
|
||||
},
|
||||
@@ -228,7 +228,7 @@ fn test_load_standalone_example_config() {
|
||||
},
|
||||
logging: LoggingOptions {
|
||||
level: Some("info".to_string()),
|
||||
otlp_endpoint: Some("".to_string()),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
|
||||
tracing_sample_ratio: Some(Default::default()),
|
||||
..Default::default()
|
||||
},
|
||||
|
||||
@@ -19,9 +19,8 @@ use snafu::ResultExt;
|
||||
use crate::error::{BuildBackendSnafu, Result};
|
||||
|
||||
pub fn build_fs_backend(root: &str) -> Result<ObjectStore> {
|
||||
let mut builder = Fs::default();
|
||||
let _ = builder.root(root);
|
||||
let object_store = ObjectStore::new(builder)
|
||||
let builder = Fs::default();
|
||||
let object_store = ObjectStore::new(builder.root(root))
|
||||
.context(BuildBackendSnafu)?
|
||||
.layer(
|
||||
object_store::layers::LoggingLayer::default()
|
||||
|
||||
@@ -44,28 +44,26 @@ pub fn build_s3_backend(
|
||||
path: &str,
|
||||
connection: &HashMap<String, String>,
|
||||
) -> Result<ObjectStore> {
|
||||
let mut builder = S3::default();
|
||||
|
||||
let _ = builder.root(path).bucket(host);
|
||||
let mut builder = S3::default().root(path).bucket(host);
|
||||
|
||||
if let Some(endpoint) = connection.get(ENDPOINT) {
|
||||
let _ = builder.endpoint(endpoint);
|
||||
builder = builder.endpoint(endpoint);
|
||||
}
|
||||
|
||||
if let Some(region) = connection.get(REGION) {
|
||||
let _ = builder.region(region);
|
||||
builder = builder.region(region);
|
||||
}
|
||||
|
||||
if let Some(key_id) = connection.get(ACCESS_KEY_ID) {
|
||||
let _ = builder.access_key_id(key_id);
|
||||
builder = builder.access_key_id(key_id);
|
||||
}
|
||||
|
||||
if let Some(key) = connection.get(SECRET_ACCESS_KEY) {
|
||||
let _ = builder.secret_access_key(key);
|
||||
builder = builder.secret_access_key(key);
|
||||
}
|
||||
|
||||
if let Some(session_token) = connection.get(SESSION_TOKEN) {
|
||||
let _ = builder.security_token(session_token);
|
||||
builder = builder.session_token(session_token);
|
||||
}
|
||||
|
||||
if let Some(enable_str) = connection.get(ENABLE_VIRTUAL_HOST_STYLE) {
|
||||
@@ -79,7 +77,7 @@ pub fn build_s3_backend(
|
||||
.build()
|
||||
})?;
|
||||
if enable {
|
||||
let _ = builder.enable_virtual_host_style();
|
||||
builder = builder.enable_virtual_host_style();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -47,19 +47,15 @@ pub fn format_schema(schema: Schema) -> Vec<String> {
|
||||
}
|
||||
|
||||
pub fn test_store(root: &str) -> ObjectStore {
|
||||
let mut builder = Fs::default();
|
||||
let _ = builder.root(root);
|
||||
|
||||
ObjectStore::new(builder).unwrap().finish()
|
||||
let builder = Fs::default();
|
||||
ObjectStore::new(builder.root(root)).unwrap().finish()
|
||||
}
|
||||
|
||||
pub fn test_tmp_store(root: &str) -> (ObjectStore, TempDir) {
|
||||
let dir = create_temp_dir(root);
|
||||
|
||||
let mut builder = Fs::default();
|
||||
let _ = builder.root("/");
|
||||
|
||||
(ObjectStore::new(builder).unwrap().finish(), dir)
|
||||
let builder = Fs::default();
|
||||
(ObjectStore::new(builder.root("/")).unwrap().finish(), dir)
|
||||
}
|
||||
|
||||
pub fn test_basic_schema() -> SchemaRef {
|
||||
|
||||
@@ -33,6 +33,8 @@ use crate::handlers::TableMutationHandlerRef;
|
||||
|
||||
/// Compact type: strict window.
|
||||
const COMPACT_TYPE_STRICT_WINDOW: &str = "strict_window";
|
||||
/// Compact type: strict window (short name).
|
||||
const COMPACT_TYPE_STRICT_WINDOW_SHORT: &str = "swcs";
|
||||
|
||||
#[admin_fn(
|
||||
name = FlushTableFunction,
|
||||
@@ -168,8 +170,12 @@ fn parse_compact_params(
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses compaction strategy type. For `strict_window` or `swcs` strict window compaction is chose,
|
||||
/// otherwise choose regular (TWCS) compaction.
|
||||
fn parse_compact_type(type_str: &str, option: Option<&str>) -> Result<compact_request::Options> {
|
||||
if type_str.eq_ignore_ascii_case(COMPACT_TYPE_STRICT_WINDOW) {
|
||||
if type_str.eq_ignore_ascii_case(COMPACT_TYPE_STRICT_WINDOW)
|
||||
| type_str.eq_ignore_ascii_case(COMPACT_TYPE_STRICT_WINDOW_SHORT)
|
||||
{
|
||||
let window_seconds = option
|
||||
.map(|v| {
|
||||
i64::from_str(v).map_err(|_| {
|
||||
@@ -350,6 +356,17 @@ mod tests {
|
||||
compact_options: Options::Regular(Default::default()),
|
||||
},
|
||||
),
|
||||
(
|
||||
&["table", "swcs", "120"],
|
||||
CompactTableRequest {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "table".to_string(),
|
||||
compact_options: Options::StrictWindow(StrictWindow {
|
||||
window_seconds: 120,
|
||||
}),
|
||||
},
|
||||
),
|
||||
]);
|
||||
|
||||
assert!(parse_compact_params(
|
||||
|
||||
@@ -6,6 +6,7 @@ license.workspace = true
|
||||
|
||||
[features]
|
||||
testing = []
|
||||
pg_kvbackend = ["dep:tokio-postgres"]
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
@@ -56,6 +57,7 @@ store-api.workspace = true
|
||||
strum.workspace = true
|
||||
table.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-postgres = { workspace = true, optional = true }
|
||||
tonic.workspace = true
|
||||
typetag = "0.2"
|
||||
|
||||
|
||||
@@ -227,7 +227,7 @@ impl Procedure for DropTableProcedure {
|
||||
}
|
||||
|
||||
fn rollback_supported(&self) -> bool {
|
||||
!matches!(self.data.state, DropTableState::Prepare)
|
||||
!matches!(self.data.state, DropTableState::Prepare) && self.data.allow_rollback
|
||||
}
|
||||
|
||||
async fn rollback(&mut self, _: &ProcedureContext) -> ProcedureResult<()> {
|
||||
@@ -256,6 +256,8 @@ pub struct DropTableData {
|
||||
pub task: DropTableTask,
|
||||
pub physical_region_routes: Vec<RegionRoute>,
|
||||
pub physical_table_id: Option<TableId>,
|
||||
#[serde(default)]
|
||||
pub allow_rollback: bool,
|
||||
}
|
||||
|
||||
impl DropTableData {
|
||||
@@ -266,6 +268,7 @@ impl DropTableData {
|
||||
task,
|
||||
physical_region_routes: vec![],
|
||||
physical_table_id: None,
|
||||
allow_rollback: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,8 +12,12 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_catalog::format_full_table_name;
|
||||
use snafu::OptionExt;
|
||||
use store_api::metric_engine_consts::METRIC_ENGINE_NAME;
|
||||
|
||||
use crate::ddl::drop_table::DropTableProcedure;
|
||||
use crate::error::Result;
|
||||
use crate::error::{self, Result};
|
||||
|
||||
impl DropTableProcedure {
|
||||
/// Fetches the table info and physical table route.
|
||||
@@ -29,6 +33,23 @@ impl DropTableProcedure {
|
||||
self.data.physical_region_routes = physical_table_route_value.region_routes;
|
||||
self.data.physical_table_id = Some(physical_table_id);
|
||||
|
||||
if physical_table_id == self.data.table_id() {
|
||||
let table_info_value = self
|
||||
.context
|
||||
.table_metadata_manager
|
||||
.table_info_manager()
|
||||
.get(task.table_id)
|
||||
.await?
|
||||
.with_context(|| error::TableInfoNotFoundSnafu {
|
||||
table: format_full_table_name(&task.catalog, &task.schema, &task.table),
|
||||
})?
|
||||
.into_inner();
|
||||
|
||||
let engine = table_info_value.table_info.meta.engine;
|
||||
// rollback only if dropping the metric physical table fails
|
||||
self.data.allow_rollback = engine.as_str() == METRIC_ENGINE_NAME
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ use std::collections::HashMap;
|
||||
use api::v1::meta::Partition;
|
||||
use api::v1::{ColumnDataType, SemanticType};
|
||||
use common_procedure::Status;
|
||||
use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME};
|
||||
use table::metadata::{RawTableInfo, TableId};
|
||||
|
||||
use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure;
|
||||
@@ -130,6 +131,11 @@ pub fn test_create_logical_table_task(name: &str) -> CreateTableTask {
|
||||
.time_index("ts")
|
||||
.primary_keys(["host".into()])
|
||||
.table_name(name)
|
||||
.engine(METRIC_ENGINE_NAME)
|
||||
.table_options(HashMap::from([(
|
||||
LOGICAL_TABLE_METADATA_KEY.to_string(),
|
||||
"phy".to_string(),
|
||||
)]))
|
||||
.build()
|
||||
.unwrap()
|
||||
.into();
|
||||
@@ -166,6 +172,7 @@ pub fn test_create_physical_table_task(name: &str) -> CreateTableTask {
|
||||
.time_index("ts")
|
||||
.primary_keys(["value".into()])
|
||||
.table_name(name)
|
||||
.engine(METRIC_ENGINE_NAME)
|
||||
.build()
|
||||
.unwrap()
|
||||
.into();
|
||||
|
||||
@@ -127,7 +127,7 @@ pub fn build_raw_table_info_from_expr(expr: &CreateTableExpr) -> RawTableInfo {
|
||||
engine: expr.engine.to_string(),
|
||||
next_column_id: expr.column_defs.len() as u32,
|
||||
region_numbers: vec![],
|
||||
options: TableOptions::default(),
|
||||
options: TableOptions::try_from_iter(&expr.table_options).unwrap(),
|
||||
created_on: DateTime::default(),
|
||||
partition_key_indices: vec![],
|
||||
},
|
||||
|
||||
@@ -91,6 +91,7 @@ async fn test_on_prepare_table() {
|
||||
// Drop if exists
|
||||
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
|
||||
procedure.on_prepare().await.unwrap();
|
||||
assert!(!procedure.rollback_supported());
|
||||
|
||||
let task = new_drop_table_task(table_name, table_id, false);
|
||||
// Drop table
|
||||
@@ -224,9 +225,12 @@ async fn test_on_rollback() {
|
||||
let task = new_drop_table_task("phy_table", physical_table_id, false);
|
||||
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
|
||||
procedure.on_prepare().await.unwrap();
|
||||
assert!(procedure.rollback_supported());
|
||||
procedure.on_delete_metadata().await.unwrap();
|
||||
assert!(procedure.rollback_supported());
|
||||
procedure.rollback(&ctx).await.unwrap();
|
||||
// Rollback again
|
||||
assert!(procedure.rollback_supported());
|
||||
procedure.rollback(&ctx).await.unwrap();
|
||||
let kvs = kv_backend.dump();
|
||||
assert_eq!(kvs, expected_kvs);
|
||||
@@ -236,12 +240,7 @@ async fn test_on_rollback() {
|
||||
let task = new_drop_table_task("foo", table_ids[0], false);
|
||||
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
|
||||
procedure.on_prepare().await.unwrap();
|
||||
procedure.on_delete_metadata().await.unwrap();
|
||||
procedure.rollback(&ctx).await.unwrap();
|
||||
// Rollback again
|
||||
procedure.rollback(&ctx).await.unwrap();
|
||||
let kvs = kv_backend.dump();
|
||||
assert_eq!(kvs, expected_kvs);
|
||||
assert!(!procedure.rollback_supported());
|
||||
}
|
||||
|
||||
fn new_drop_table_task(table_name: &str, table_id: TableId, drop_if_exists: bool) -> DropTableTask {
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use common_catalog::consts::METRIC_ENGINE;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_procedure::error::Error as ProcedureError;
|
||||
use snafu::{ensure, location, Location, OptionExt};
|
||||
use snafu::{ensure, location, OptionExt};
|
||||
use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
|
||||
use table::metadata::TableId;
|
||||
|
||||
|
||||
@@ -499,6 +499,13 @@ pub enum Error {
|
||||
error: rskafka::client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to create TLS Config"))]
|
||||
TlsConfig {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_wal::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to resolve Kafka broker endpoint."))]
|
||||
ResolveKafkaEndpoint { source: common_wal::error::Error },
|
||||
|
||||
@@ -636,6 +643,15 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse {} from str to utf8", name))]
|
||||
StrFromUtf8 {
|
||||
name: String,
|
||||
#[snafu(source)]
|
||||
error: std::str::Utf8Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Value not exists"))]
|
||||
ValueNotExist {
|
||||
#[snafu(implicit)]
|
||||
@@ -644,6 +660,24 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Failed to get cache"))]
|
||||
GetCache { source: Arc<Error> },
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[snafu(display("Failed to execute via Postgres"))]
|
||||
PostgresExecution {
|
||||
#[snafu(source)]
|
||||
error: tokio_postgres::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[snafu(display("Failed to connect to Postgres"))]
|
||||
ConnectPostgres {
|
||||
#[snafu(source)]
|
||||
error: tokio_postgres::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -697,7 +731,8 @@ impl ErrorExt for Error {
|
||||
| UnexpectedLogicalRouteTable { .. }
|
||||
| ProcedureOutput { .. }
|
||||
| FromUtf8 { .. }
|
||||
| MetadataCorruption { .. } => StatusCode::Unexpected,
|
||||
| MetadataCorruption { .. }
|
||||
| StrFromUtf8 { .. } => StatusCode::Unexpected,
|
||||
|
||||
SendMessage { .. } | GetKvCache { .. } | CacheNotGet { .. } | RenameTable { .. } => {
|
||||
StatusCode::Internal
|
||||
@@ -714,7 +749,8 @@ impl ErrorExt for Error {
|
||||
| AlterLogicalTablesInvalidArguments { .. }
|
||||
| CreateLogicalTablesInvalidArguments { .. }
|
||||
| MismatchPrefix { .. }
|
||||
| DelimiterNotFound { .. } => StatusCode::InvalidArguments,
|
||||
| DelimiterNotFound { .. }
|
||||
| TlsConfig { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
FlowNotFound { .. } => StatusCode::FlowNotFound,
|
||||
FlowRouteNotFound { .. } => StatusCode::Unexpected,
|
||||
@@ -741,6 +777,11 @@ impl ErrorExt for Error {
|
||||
| ParseNum { .. }
|
||||
| InvalidRole { .. }
|
||||
| EmptyDdlTasks { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
PostgresExecution { .. } => StatusCode::Internal,
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
ConnectPostgres { .. } => StatusCode::Internal,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +31,8 @@ use crate::rpc::KeyValue;
|
||||
pub mod chroot;
|
||||
pub mod etcd;
|
||||
pub mod memory;
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
pub mod postgres;
|
||||
pub mod test;
|
||||
pub mod txn;
|
||||
|
||||
|
||||
626
src/common/meta/src/kv_backend/postgres.rs
Normal file
626
src/common/meta/src/kv_backend/postgres.rs
Normal file
@@ -0,0 +1,626 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::borrow::Cow;
|
||||
use std::sync::Arc;
|
||||
|
||||
use snafu::ResultExt;
|
||||
use tokio_postgres::types::ToSql;
|
||||
use tokio_postgres::{Client, NoTls};
|
||||
|
||||
use super::{KvBackend, TxnService};
|
||||
use crate::error::{ConnectPostgresSnafu, Error, PostgresExecutionSnafu, Result, StrFromUtf8Snafu};
|
||||
use crate::kv_backend::txn::{Txn as KvTxn, TxnResponse as KvTxnResponse};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::store::{
|
||||
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
|
||||
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
|
||||
DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
|
||||
};
|
||||
use crate::rpc::KeyValue;
|
||||
|
||||
/// Posgres backend store for metasrv
|
||||
pub struct PgStore {
|
||||
// TODO: Consider using sqlx crate.
|
||||
client: Client,
|
||||
}
|
||||
|
||||
const EMPTY: &[u8] = &[0];
|
||||
|
||||
// TODO: allow users to configure metadata table name.
|
||||
const METADKV_CREATION: &str =
|
||||
"CREATE TABLE IF NOT EXISTS greptime_metakv(k varchar PRIMARY KEY, v varchar)";
|
||||
|
||||
const FULL_TABLE_SCAN: &str = "SELECT k, v FROM greptime_metakv $1 ORDER BY K";
|
||||
|
||||
const POINT_GET: &str = "SELECT k, v FROM greptime_metakv WHERE k = $1";
|
||||
|
||||
const PREFIX_SCAN: &str = "SELECT k, v FROM greptime_metakv WHERE k LIKE $1 ORDER BY K";
|
||||
|
||||
const RANGE_SCAN_LEFT_BOUNDED: &str = "SELECT k, v FROM greptime_metakv WHERE k >= $1 ORDER BY K";
|
||||
|
||||
const RANGE_SCAN_FULL_RANGE: &str =
|
||||
"SELECT k, v FROM greptime_metakv WHERE k >= $1 AND K < $2 ORDER BY K";
|
||||
|
||||
const FULL_TABLE_DELETE: &str = "DELETE FROM greptime_metakv RETURNING k,v";
|
||||
|
||||
const POINT_DELETE: &str = "DELETE FROM greptime_metakv WHERE K = $1 RETURNING k,v;";
|
||||
|
||||
const PREFIX_DELETE: &str = "DELETE FROM greptime_metakv WHERE k LIKE $1 RETURNING k,v;";
|
||||
|
||||
const RANGE_DELETE_LEFT_BOUNDED: &str = "DELETE FROM greptime_metakv WHERE k >= $1 RETURNING k,v;";
|
||||
|
||||
const RANGE_DELETE_FULL_RANGE: &str =
|
||||
"DELETE FROM greptime_metakv WHERE k >= $1 AND K < $2 RETURNING k,v;";
|
||||
|
||||
const CAS: &str = r#"
|
||||
WITH prev AS (
|
||||
SELECT k,v FROM greptime_metakv WHERE k = $1 AND v = $2
|
||||
), update AS (
|
||||
UPDATE greptime_metakv
|
||||
SET k=$1,
|
||||
v=$2
|
||||
WHERE
|
||||
k=$1 AND v=$3
|
||||
)
|
||||
|
||||
SELECT k, v FROM prev;
|
||||
"#;
|
||||
|
||||
const PUT_IF_NOT_EXISTS: &str = r#"
|
||||
WITH prev AS (
|
||||
select k,v from greptime_metakv where k = $1
|
||||
), insert AS (
|
||||
INSERT INTO greptime_metakv
|
||||
VALUES ($1, $2)
|
||||
ON CONFLICT (k) DO NOTHING
|
||||
)
|
||||
|
||||
SELECT k, v FROM prev;"#;
|
||||
|
||||
impl PgStore {
|
||||
/// Create pgstore impl of KvBackendRef from url.
|
||||
pub async fn with_url(url: &str) -> Result<KvBackendRef> {
|
||||
// TODO: support tls.
|
||||
let (client, conn) = tokio_postgres::connect(url, NoTls)
|
||||
.await
|
||||
.context(ConnectPostgresSnafu)?;
|
||||
tokio::spawn(async move { conn.await.context(ConnectPostgresSnafu) });
|
||||
Self::with_pg_client(client).await
|
||||
}
|
||||
|
||||
/// Create pgstore impl of KvBackendRef from tokio-postgres client.
|
||||
pub async fn with_pg_client(client: Client) -> Result<KvBackendRef> {
|
||||
// This step ensures the postgres metadata backend is ready to use.
|
||||
// We check if greptime_metakv table exists, and we will create a new table
|
||||
// if it does not exist.
|
||||
client
|
||||
.execute(METADKV_CREATION, &[])
|
||||
.await
|
||||
.context(PostgresExecutionSnafu)?;
|
||||
Ok(Arc::new(Self { client }))
|
||||
}
|
||||
|
||||
async fn put_if_not_exists(&self, key: &str, value: &str) -> Result<bool> {
|
||||
let res = self
|
||||
.client
|
||||
.query(PUT_IF_NOT_EXISTS, &[&key, &value])
|
||||
.await
|
||||
.context(PostgresExecutionSnafu)?;
|
||||
Ok(res.is_empty())
|
||||
}
|
||||
}
|
||||
|
||||
fn select_range_template(req: &RangeRequest) -> &str {
|
||||
if req.range_end.is_empty() {
|
||||
return POINT_GET;
|
||||
}
|
||||
if req.key == EMPTY && req.range_end == EMPTY {
|
||||
FULL_TABLE_SCAN
|
||||
} else if req.range_end == EMPTY {
|
||||
RANGE_SCAN_LEFT_BOUNDED
|
||||
} else if is_prefix_range(&req.key, &req.range_end) {
|
||||
PREFIX_SCAN
|
||||
} else {
|
||||
RANGE_SCAN_FULL_RANGE
|
||||
}
|
||||
}
|
||||
|
||||
fn select_range_delete_template(req: &DeleteRangeRequest) -> &str {
|
||||
if req.range_end.is_empty() {
|
||||
return POINT_DELETE;
|
||||
}
|
||||
if req.key == EMPTY && req.range_end == EMPTY {
|
||||
FULL_TABLE_DELETE
|
||||
} else if req.range_end == EMPTY {
|
||||
RANGE_DELETE_LEFT_BOUNDED
|
||||
} else if is_prefix_range(&req.key, &req.range_end) {
|
||||
PREFIX_DELETE
|
||||
} else {
|
||||
RANGE_DELETE_FULL_RANGE
|
||||
}
|
||||
}
|
||||
|
||||
// Generate dynamic parameterized sql for batch get.
|
||||
fn generate_batch_get_query(key_len: usize) -> String {
|
||||
let in_placeholders: Vec<String> = (1..=key_len).map(|i| format!("${}", i)).collect();
|
||||
let in_clause = in_placeholders.join(", ");
|
||||
format!(
|
||||
"SELECT k, v FROM greptime_metakv WHERE k in ({});",
|
||||
in_clause
|
||||
)
|
||||
}
|
||||
|
||||
// Generate dynamic parameterized sql for batch delete.
|
||||
fn generate_batch_delete_query(key_len: usize) -> String {
|
||||
let in_placeholders: Vec<String> = (1..=key_len).map(|i| format!("${}", i)).collect();
|
||||
let in_clause = in_placeholders.join(", ");
|
||||
format!(
|
||||
"DELETE FROM greptime_metakv WHERE k in ({}) RETURNING k, v;",
|
||||
in_clause
|
||||
)
|
||||
}
|
||||
|
||||
// Generate dynamic parameterized sql for batch upsert.
|
||||
fn generate_batch_upsert_query(kv_len: usize) -> String {
|
||||
let in_placeholders: Vec<String> = (1..=kv_len).map(|i| format!("${}", i)).collect();
|
||||
let in_clause = in_placeholders.join(", ");
|
||||
let mut param_index = kv_len + 1;
|
||||
let mut values_placeholders = Vec::new();
|
||||
for _ in 0..kv_len {
|
||||
values_placeholders.push(format!("(${0}, ${1})", param_index, param_index + 1));
|
||||
param_index += 2;
|
||||
}
|
||||
let values_clause = values_placeholders.join(", ");
|
||||
|
||||
format!(
|
||||
r#"
|
||||
WITH prev AS (
|
||||
SELECT k,v FROM greptime_metakv WHERE k IN ({in_clause})
|
||||
), update AS (
|
||||
INSERT INTO greptime_metakv (k, v) VALUES
|
||||
{values_clause}
|
||||
ON CONFLICT (
|
||||
k
|
||||
) DO UPDATE SET
|
||||
v = excluded.v
|
||||
)
|
||||
|
||||
SELECT k, v FROM prev;
|
||||
"#
|
||||
)
|
||||
}
|
||||
|
||||
// Trim null byte at the end and convert bytes to string.
|
||||
fn process_bytes<'a>(data: &'a [u8], name: &str) -> Result<&'a str> {
|
||||
let mut len = data.len();
|
||||
// remove trailing null bytes to avoid error in postgres encoding.
|
||||
while len > 0 && data[len - 1] == 0 {
|
||||
len -= 1;
|
||||
}
|
||||
let res = std::str::from_utf8(&data[0..len]).context(StrFromUtf8Snafu { name })?;
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for PgStore {
|
||||
fn name(&self) -> &str {
|
||||
"Postgres"
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
|
||||
let mut params = vec![];
|
||||
let template = select_range_template(&req);
|
||||
if req.key != EMPTY {
|
||||
let key = process_bytes(&req.key, "rangeKey")?;
|
||||
if template == PREFIX_SCAN {
|
||||
let prefix = format!("{key}%");
|
||||
params.push(Cow::Owned(prefix))
|
||||
} else {
|
||||
params.push(Cow::Borrowed(key))
|
||||
}
|
||||
}
|
||||
if template == RANGE_SCAN_FULL_RANGE && req.range_end != EMPTY {
|
||||
let range_end = process_bytes(&req.range_end, "rangeEnd")?;
|
||||
params.push(Cow::Borrowed(range_end));
|
||||
}
|
||||
let limit = req.limit as usize;
|
||||
let limit_cause = match limit > 0 {
|
||||
true => format!(" LIMIT {};", limit + 1),
|
||||
false => ";".to_string(),
|
||||
};
|
||||
let template = format!("{}{}", template, limit_cause);
|
||||
let params: Vec<&(dyn ToSql + Sync)> = params
|
||||
.iter()
|
||||
.map(|x| match x {
|
||||
Cow::Borrowed(borrowed) => borrowed as &(dyn ToSql + Sync),
|
||||
Cow::Owned(owned) => owned as &(dyn ToSql + Sync),
|
||||
})
|
||||
.collect();
|
||||
let res = self
|
||||
.client
|
||||
.query(&template, ¶ms)
|
||||
.await
|
||||
.context(PostgresExecutionSnafu)?;
|
||||
let kvs: Vec<KeyValue> = res
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let key: String = r.get(0);
|
||||
if req.keys_only {
|
||||
return KeyValue {
|
||||
key: key.into_bytes(),
|
||||
value: vec![],
|
||||
};
|
||||
}
|
||||
let value: String = r.get(1);
|
||||
KeyValue {
|
||||
key: key.into_bytes(),
|
||||
value: value.into_bytes(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
if limit == 0 || limit > kvs.len() {
|
||||
return Ok(RangeResponse { kvs, more: false });
|
||||
}
|
||||
let (filtered_kvs, _) = kvs.split_at(limit);
|
||||
Ok(RangeResponse {
|
||||
kvs: filtered_kvs.to_vec(),
|
||||
more: kvs.len() > limit,
|
||||
})
|
||||
}
|
||||
|
||||
async fn put(&self, req: PutRequest) -> Result<PutResponse> {
|
||||
let kv = KeyValue {
|
||||
key: req.key,
|
||||
value: req.value,
|
||||
};
|
||||
let mut res = self
|
||||
.batch_put(BatchPutRequest {
|
||||
kvs: vec![kv],
|
||||
prev_kv: req.prev_kv,
|
||||
})
|
||||
.await?;
|
||||
|
||||
if !res.prev_kvs.is_empty() {
|
||||
return Ok(PutResponse {
|
||||
prev_kv: Some(res.prev_kvs.remove(0)),
|
||||
});
|
||||
}
|
||||
Ok(PutResponse { prev_kv: None })
|
||||
}
|
||||
|
||||
async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
|
||||
let mut in_params = Vec::with_capacity(req.kvs.len());
|
||||
let mut values_params = Vec::with_capacity(req.kvs.len() * 2);
|
||||
|
||||
for kv in &req.kvs {
|
||||
let processed_key = process_bytes(&kv.key, "BatchPutRequestKey")?;
|
||||
in_params.push(processed_key);
|
||||
|
||||
let processed_value = process_bytes(&kv.value, "BatchPutRequestValue")?;
|
||||
values_params.push(processed_key);
|
||||
values_params.push(processed_value);
|
||||
}
|
||||
in_params.extend(values_params);
|
||||
let params: Vec<&(dyn ToSql + Sync)> =
|
||||
in_params.iter().map(|x| x as &(dyn ToSql + Sync)).collect();
|
||||
|
||||
let query = generate_batch_upsert_query(req.kvs.len());
|
||||
let res = self
|
||||
.client
|
||||
.query(&query, ¶ms)
|
||||
.await
|
||||
.context(PostgresExecutionSnafu)?;
|
||||
if req.prev_kv {
|
||||
let kvs: Vec<KeyValue> = res
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let key: String = r.get(0);
|
||||
let value: String = r.get(1);
|
||||
KeyValue {
|
||||
key: key.into_bytes(),
|
||||
value: value.into_bytes(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
if !kvs.is_empty() {
|
||||
return Ok(BatchPutResponse { prev_kvs: kvs });
|
||||
}
|
||||
}
|
||||
Ok(BatchPutResponse { prev_kvs: vec![] })
|
||||
}
|
||||
|
||||
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
|
||||
if req.keys.is_empty() {
|
||||
return Ok(BatchGetResponse { kvs: vec![] });
|
||||
}
|
||||
let query = generate_batch_get_query(req.keys.len());
|
||||
let value_params = req
|
||||
.keys
|
||||
.iter()
|
||||
.map(|k| process_bytes(k, "BatchGetRequestKey"))
|
||||
.collect::<Result<Vec<&str>>>()?;
|
||||
let params: Vec<&(dyn ToSql + Sync)> = value_params
|
||||
.iter()
|
||||
.map(|x| x as &(dyn ToSql + Sync))
|
||||
.collect();
|
||||
let res = self
|
||||
.client
|
||||
.query(&query, ¶ms)
|
||||
.await
|
||||
.context(PostgresExecutionSnafu)?;
|
||||
let kvs: Vec<KeyValue> = res
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let key: String = r.get(0);
|
||||
let value: String = r.get(1);
|
||||
KeyValue {
|
||||
key: key.into_bytes(),
|
||||
value: value.into_bytes(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
Ok(BatchGetResponse { kvs })
|
||||
}
|
||||
|
||||
async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
|
||||
let mut params = vec![];
|
||||
let template = select_range_delete_template(&req);
|
||||
if req.key != EMPTY {
|
||||
let key = process_bytes(&req.key, "deleteRangeKey")?;
|
||||
if template == PREFIX_DELETE {
|
||||
let prefix = format!("{key}%");
|
||||
params.push(Cow::Owned(prefix));
|
||||
} else {
|
||||
params.push(Cow::Borrowed(key));
|
||||
}
|
||||
}
|
||||
if template == RANGE_DELETE_FULL_RANGE && req.range_end != EMPTY {
|
||||
let range_end = process_bytes(&req.range_end, "deleteRangeEnd")?;
|
||||
params.push(Cow::Borrowed(range_end));
|
||||
}
|
||||
let params: Vec<&(dyn ToSql + Sync)> = params
|
||||
.iter()
|
||||
.map(|x| match x {
|
||||
Cow::Borrowed(borrowed) => borrowed as &(dyn ToSql + Sync),
|
||||
Cow::Owned(owned) => owned as &(dyn ToSql + Sync),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let res = self
|
||||
.client
|
||||
.query(template, ¶ms)
|
||||
.await
|
||||
.context(PostgresExecutionSnafu)?;
|
||||
let deleted = res.len() as i64;
|
||||
if !req.prev_kv {
|
||||
return Ok({
|
||||
DeleteRangeResponse {
|
||||
deleted,
|
||||
prev_kvs: vec![],
|
||||
}
|
||||
});
|
||||
}
|
||||
let kvs: Vec<KeyValue> = res
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let key: String = r.get(0);
|
||||
let value: String = r.get(1);
|
||||
KeyValue {
|
||||
key: key.into_bytes(),
|
||||
value: value.into_bytes(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
Ok(DeleteRangeResponse {
|
||||
deleted,
|
||||
prev_kvs: kvs,
|
||||
})
|
||||
}
|
||||
|
||||
async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
|
||||
if req.keys.is_empty() {
|
||||
return Ok(BatchDeleteResponse { prev_kvs: vec![] });
|
||||
}
|
||||
let query = generate_batch_delete_query(req.keys.len());
|
||||
let value_params = req
|
||||
.keys
|
||||
.iter()
|
||||
.map(|k| process_bytes(k, "BatchDeleteRequestKey"))
|
||||
.collect::<Result<Vec<&str>>>()?;
|
||||
let params: Vec<&(dyn ToSql + Sync)> = value_params
|
||||
.iter()
|
||||
.map(|x| x as &(dyn ToSql + Sync))
|
||||
.collect();
|
||||
let res = self
|
||||
.client
|
||||
.query(&query, ¶ms)
|
||||
.await
|
||||
.context(PostgresExecutionSnafu)?;
|
||||
if !req.prev_kv {
|
||||
return Ok(BatchDeleteResponse { prev_kvs: vec![] });
|
||||
}
|
||||
let kvs: Vec<KeyValue> = res
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let key: String = r.get(0);
|
||||
let value: String = r.get(1);
|
||||
KeyValue {
|
||||
key: key.into_bytes(),
|
||||
value: value.into_bytes(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
Ok(BatchDeleteResponse { prev_kvs: kvs })
|
||||
}
|
||||
|
||||
async fn compare_and_put(&self, req: CompareAndPutRequest) -> Result<CompareAndPutResponse> {
|
||||
let key = process_bytes(&req.key, "CASKey")?;
|
||||
let value = process_bytes(&req.value, "CASValue")?;
|
||||
if req.expect.is_empty() {
|
||||
let put_res = self.put_if_not_exists(key, value).await?;
|
||||
return Ok(CompareAndPutResponse {
|
||||
success: put_res,
|
||||
prev_kv: None,
|
||||
});
|
||||
}
|
||||
let expect = process_bytes(&req.expect, "CASExpect")?;
|
||||
|
||||
let res = self
|
||||
.client
|
||||
.query(CAS, &[&key, &value, &expect])
|
||||
.await
|
||||
.context(PostgresExecutionSnafu)?;
|
||||
match res.is_empty() {
|
||||
true => Ok(CompareAndPutResponse {
|
||||
success: false,
|
||||
prev_kv: None,
|
||||
}),
|
||||
false => {
|
||||
let mut kvs: Vec<KeyValue> = res
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let key: String = r.get(0);
|
||||
let value: String = r.get(1);
|
||||
KeyValue {
|
||||
key: key.into_bytes(),
|
||||
value: value.into_bytes(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
Ok(CompareAndPutResponse {
|
||||
success: true,
|
||||
prev_kv: Some(kvs.remove(0)),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl TxnService for PgStore {
|
||||
type Error = Error;
|
||||
|
||||
async fn txn(&self, _txn: KvTxn) -> Result<KvTxnResponse> {
|
||||
// TODO: implement txn for pg kv backend.
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn max_txn_ops(&self) -> usize {
|
||||
unreachable!("postgres backend does not support max_txn_ops!")
|
||||
}
|
||||
}
|
||||
|
||||
fn is_prefix_range(start: &[u8], end: &[u8]) -> bool {
|
||||
if start.len() != end.len() {
|
||||
return false;
|
||||
}
|
||||
let l = start.len();
|
||||
let same_prefix = start[0..l - 1] == end[0..l - 1];
|
||||
if let (Some(rhs), Some(lhs)) = (start.last(), end.last()) {
|
||||
return same_prefix && (*rhs + 1) == *lhs;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::kv_backend::test::{
|
||||
prepare_kv_with_prefix, test_kv_batch_delete_with_prefix, test_kv_batch_get_with_prefix,
|
||||
test_kv_compare_and_put_with_prefix, test_kv_delete_range_with_prefix,
|
||||
test_kv_put_with_prefix, test_kv_range_2_with_prefix, test_kv_range_with_prefix,
|
||||
unprepare_kv,
|
||||
};
|
||||
|
||||
async fn build_pg_kv_backend() -> Option<PgStore> {
|
||||
let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap_or_default();
|
||||
if endpoints.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (client, connection) = tokio_postgres::connect(&endpoints, NoTls).await.unwrap();
|
||||
tokio::spawn(connection);
|
||||
let _ = client.execute(METADKV_CREATION, &[]).await;
|
||||
Some(PgStore { client })
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_put() {
|
||||
if let Some(kv_backend) = build_pg_kv_backend().await {
|
||||
let prefix = b"put/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_put_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_range() {
|
||||
if let Some(kv_backend) = build_pg_kv_backend().await {
|
||||
let prefix = b"range/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_range_2() {
|
||||
if let Some(kv_backend) = build_pg_kv_backend().await {
|
||||
test_kv_range_2_with_prefix(kv_backend, b"range2/".to_vec()).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_batch_get() {
|
||||
if let Some(kv_backend) = build_pg_kv_backend().await {
|
||||
let prefix = b"batchGet/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_batch_get_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_compare_and_put() {
|
||||
if let Some(kv_backend) = build_pg_kv_backend().await {
|
||||
let kv_backend = Arc::new(kv_backend);
|
||||
test_kv_compare_and_put_with_prefix(kv_backend, b"compareAndPut/".to_vec()).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_range() {
|
||||
if let Some(kv_backend) = build_pg_kv_backend().await {
|
||||
let prefix = b"deleteRange/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_delete_range_with_prefix(kv_backend, prefix.to_vec()).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_batch_delete() {
|
||||
if let Some(kv_backend) = build_pg_kv_backend().await {
|
||||
let prefix = b"batchDelete/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_batch_delete_with_prefix(kv_backend, prefix.to_vec()).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -123,7 +123,7 @@ pub fn prepare_wal_options(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_wal::config::kafka::common::KafkaTopicConfig;
|
||||
use common_wal::config::kafka::common::{KafkaConnectionConfig, KafkaTopicConfig};
|
||||
use common_wal::config::kafka::MetasrvKafkaConfig;
|
||||
use common_wal::test_util::run_test_with_kafka_wal;
|
||||
|
||||
@@ -166,7 +166,10 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
let config = MetasrvKafkaConfig {
|
||||
broker_endpoints,
|
||||
connection: KafkaConnectionConfig {
|
||||
broker_endpoints,
|
||||
..Default::default()
|
||||
},
|
||||
kafka_topic,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -30,7 +30,7 @@ use snafu::{ensure, ResultExt};
|
||||
use crate::error::{
|
||||
BuildKafkaClientSnafu, BuildKafkaCtrlClientSnafu, BuildKafkaPartitionClientSnafu,
|
||||
CreateKafkaWalTopicSnafu, DecodeJsonSnafu, EncodeJsonSnafu, InvalidNumTopicsSnafu,
|
||||
ProduceRecordSnafu, ResolveKafkaEndpointSnafu, Result,
|
||||
ProduceRecordSnafu, ResolveKafkaEndpointSnafu, Result, TlsConfigSnafu,
|
||||
};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::store::PutRequest;
|
||||
@@ -117,15 +117,22 @@ impl TopicManager {
|
||||
base: self.config.backoff.base as f64,
|
||||
deadline: self.config.backoff.deadline,
|
||||
};
|
||||
let broker_endpoints = common_wal::resolve_to_ipv4(&self.config.broker_endpoints)
|
||||
.await
|
||||
.context(ResolveKafkaEndpointSnafu)?;
|
||||
let client = ClientBuilder::new(broker_endpoints)
|
||||
.backoff_config(backoff_config)
|
||||
let broker_endpoints =
|
||||
common_wal::resolve_to_ipv4(&self.config.connection.broker_endpoints)
|
||||
.await
|
||||
.context(ResolveKafkaEndpointSnafu)?;
|
||||
let mut builder = ClientBuilder::new(broker_endpoints).backoff_config(backoff_config);
|
||||
if let Some(sasl) = &self.config.connection.sasl {
|
||||
builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
|
||||
};
|
||||
if let Some(tls) = &self.config.connection.tls {
|
||||
builder = builder.tls_config(tls.to_tls_config().await.context(TlsConfigSnafu)?)
|
||||
};
|
||||
let client = builder
|
||||
.build()
|
||||
.await
|
||||
.with_context(|_| BuildKafkaClientSnafu {
|
||||
broker_endpoints: self.config.broker_endpoints.clone(),
|
||||
broker_endpoints: self.config.connection.broker_endpoints.clone(),
|
||||
})?;
|
||||
|
||||
let control_client = client
|
||||
@@ -242,7 +249,7 @@ impl TopicManager {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_wal::config::kafka::common::KafkaTopicConfig;
|
||||
use common_wal::config::kafka::common::{KafkaConnectionConfig, KafkaTopicConfig};
|
||||
use common_wal::test_util::run_test_with_kafka_wal;
|
||||
|
||||
use super::*;
|
||||
@@ -289,7 +296,10 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
let config = MetasrvKafkaConfig {
|
||||
broker_endpoints,
|
||||
connection: KafkaConnectionConfig {
|
||||
broker_endpoints,
|
||||
..Default::default()
|
||||
},
|
||||
kafka_topic,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -680,9 +680,8 @@ pub(crate) mod test_util {
|
||||
|
||||
pub(crate) fn new_object_store(dir: &TempDir) -> ObjectStore {
|
||||
let store_dir = dir.path().to_str().unwrap();
|
||||
let mut builder = Builder::default();
|
||||
let _ = builder.root(store_dir);
|
||||
ObjectStore::new(builder).unwrap().finish()
|
||||
let builder = Builder::default();
|
||||
ObjectStore::new(builder.root(store_dir)).unwrap().finish()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -361,8 +361,7 @@ mod tests {
|
||||
|
||||
fn procedure_store_for_test(dir: &TempDir) -> ProcedureStore {
|
||||
let store_dir = dir.path().to_str().unwrap();
|
||||
let mut builder = Builder::default();
|
||||
let _ = builder.root(store_dir);
|
||||
let builder = Builder::default().root(store_dir);
|
||||
let object_store = ObjectStore::new(builder).unwrap().finish();
|
||||
|
||||
ProcedureStore::from_object_store(object_store)
|
||||
|
||||
@@ -220,8 +220,7 @@ mod tests {
|
||||
async fn test_object_state_store() {
|
||||
let dir = create_temp_dir("state_store");
|
||||
let store_dir = dir.path().to_str().unwrap();
|
||||
let mut builder = Builder::default();
|
||||
let _ = builder.root(store_dir);
|
||||
let builder = Builder::default().root(store_dir);
|
||||
|
||||
let object_store = ObjectStore::new(builder).unwrap().finish();
|
||||
let state_store = ObjectStateStore::new(object_store);
|
||||
@@ -291,8 +290,7 @@ mod tests {
|
||||
async fn test_object_state_store_delete() {
|
||||
let dir = create_temp_dir("state_store_list");
|
||||
let store_dir = dir.path().to_str().unwrap();
|
||||
let mut builder = Builder::default();
|
||||
let _ = builder.root(store_dir);
|
||||
let builder = Builder::default().root(store_dir);
|
||||
|
||||
let object_store = ObjectStore::new(builder).unwrap().finish();
|
||||
let state_store = ObjectStateStore::new(object_store);
|
||||
|
||||
@@ -31,4 +31,5 @@ store-api.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
common-base.workspace = true
|
||||
futures-util.workspace = true
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -155,13 +155,6 @@ pub enum Error {
|
||||
source: DataTypeError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to execute physical plan"))]
|
||||
ExecutePhysicalPlan {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to cast array to {:?}", typ))]
|
||||
TypeCast {
|
||||
#[snafu(source)]
|
||||
@@ -308,7 +301,6 @@ impl ErrorExt for Error {
|
||||
|
||||
Error::DecodePlan { source, .. }
|
||||
| Error::Execute { source, .. }
|
||||
| Error::ExecutePhysicalPlan { source, .. }
|
||||
| Error::ProcedureService { source, .. }
|
||||
| Error::TableMutation { source, .. } => source.status_code(),
|
||||
|
||||
|
||||
@@ -19,8 +19,10 @@ pub mod logical_plan;
|
||||
pub mod prelude;
|
||||
pub mod request;
|
||||
mod signature;
|
||||
pub mod stream;
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub mod test_util;
|
||||
|
||||
use std::fmt::{Debug, Display, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
|
||||
175
src/common/query/src/stream.rs
Normal file
175
src/common/query/src/stream.rs
Normal file
@@ -0,0 +1,175 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use common_recordbatch::adapter::DfRecordBatchStreamAdapter;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use datafusion::execution::context::TaskContext;
|
||||
use datafusion::execution::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datafusion::physical_expr::{EquivalenceProperties, Partitioning, PhysicalSortExpr};
|
||||
use datafusion::physical_plan::{
|
||||
DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties,
|
||||
};
|
||||
use datafusion_common::DataFusionError;
|
||||
use datatypes::arrow::datatypes::SchemaRef as ArrowSchemaRef;
|
||||
use datatypes::schema::SchemaRef;
|
||||
|
||||
/// Adapts greptime's [SendableRecordBatchStream] to DataFusion's [ExecutionPlan].
|
||||
pub struct StreamScanAdapter {
|
||||
stream: Mutex<Option<SendableRecordBatchStream>>,
|
||||
schema: SchemaRef,
|
||||
arrow_schema: ArrowSchemaRef,
|
||||
properties: PlanProperties,
|
||||
output_ordering: Option<Vec<PhysicalSortExpr>>,
|
||||
}
|
||||
|
||||
impl Debug for StreamScanAdapter {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("StreamScanAdapter")
|
||||
.field("stream", &"<SendableRecordBatchStream>")
|
||||
.field("schema", &self.schema)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl StreamScanAdapter {
|
||||
pub fn new(stream: SendableRecordBatchStream) -> Self {
|
||||
let schema = stream.schema();
|
||||
let arrow_schema = schema.arrow_schema().clone();
|
||||
let properties = PlanProperties::new(
|
||||
EquivalenceProperties::new(arrow_schema.clone()),
|
||||
Partitioning::UnknownPartitioning(1),
|
||||
ExecutionMode::Bounded,
|
||||
);
|
||||
|
||||
Self {
|
||||
stream: Mutex::new(Some(stream)),
|
||||
schema,
|
||||
arrow_schema,
|
||||
properties,
|
||||
output_ordering: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_output_ordering(mut self, output_ordering: Option<Vec<PhysicalSortExpr>>) -> Self {
|
||||
self.output_ordering = output_ordering;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for StreamScanAdapter {
|
||||
fn fmt_as(&self, _t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"StreamScanAdapter: [<SendableRecordBatchStream>], schema: ["
|
||||
)?;
|
||||
write!(f, "{:?}", &self.arrow_schema)?;
|
||||
write!(f, "]")
|
||||
}
|
||||
}
|
||||
|
||||
impl ExecutionPlan for StreamScanAdapter {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> ArrowSchemaRef {
|
||||
self.arrow_schema.clone()
|
||||
}
|
||||
|
||||
fn properties(&self) -> &PlanProperties {
|
||||
&self.properties
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
// DataFusion will swap children unconditionally.
|
||||
// But since this node is leaf node, it's safe to just return self.
|
||||
fn with_new_children(
|
||||
self: Arc<Self>,
|
||||
_children: Vec<Arc<dyn ExecutionPlan>>,
|
||||
) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
|
||||
Ok(self.clone())
|
||||
}
|
||||
|
||||
fn execute(
|
||||
&self,
|
||||
_partition: usize,
|
||||
_context: Arc<TaskContext>,
|
||||
) -> datafusion_common::Result<DfSendableRecordBatchStream> {
|
||||
let mut stream = self.stream.lock().unwrap();
|
||||
let stream = stream
|
||||
.take()
|
||||
.ok_or_else(|| DataFusionError::Execution("Stream already exhausted".to_string()))?;
|
||||
Ok(Box::pin(DfRecordBatchStreamAdapter::new(stream)))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::vectors::Int32Vector;
|
||||
use futures_util::TryStreamExt;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_simple_table_scan() {
|
||||
let ctx = SessionContext::new();
|
||||
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
|
||||
"a",
|
||||
ConcreteDataType::int32_datatype(),
|
||||
false,
|
||||
)]));
|
||||
|
||||
let batch1 = RecordBatch::new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(Int32Vector::from_slice([1, 2])) as _],
|
||||
)
|
||||
.unwrap();
|
||||
let batch2 = RecordBatch::new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(Int32Vector::from_slice([3, 4, 5])) as _],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let recordbatches =
|
||||
RecordBatches::try_new(schema.clone(), vec![batch1.clone(), batch2.clone()]).unwrap();
|
||||
let stream = recordbatches.as_stream();
|
||||
|
||||
let scan = StreamScanAdapter::new(stream);
|
||||
|
||||
assert_eq!(scan.schema(), schema.arrow_schema().clone());
|
||||
|
||||
let stream = scan.execute(0, ctx.task_ctx()).unwrap();
|
||||
let recordbatches = stream.try_collect::<Vec<_>>().await.unwrap();
|
||||
assert_eq!(recordbatches[0], batch1.into_df_record_batch());
|
||||
assert_eq!(recordbatches[1], batch2.into_df_record_batch());
|
||||
|
||||
let result = scan.execute(0, ctx.task_ctx());
|
||||
assert!(result.is_err());
|
||||
match result {
|
||||
Err(e) => assert!(e.to_string().contains("Stream already exhausted")),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -33,7 +33,7 @@ use tracing_subscriber::{filter, EnvFilter, Registry};
|
||||
|
||||
use crate::tracing_sampler::{create_sampler, TracingSampleOptions};
|
||||
|
||||
const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317";
|
||||
pub const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317";
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
|
||||
@@ -14,8 +14,8 @@ codec = ["dep:serde", "dep:schemars"]
|
||||
const_format = "0.2"
|
||||
schemars = { workspace = true, optional = true }
|
||||
serde = { workspace = true, optional = true }
|
||||
shadow-rs = "0.29"
|
||||
shadow-rs.workspace = true
|
||||
|
||||
[build-dependencies]
|
||||
build-data = "0.2"
|
||||
shadow-rs = "0.29"
|
||||
shadow-rs.workspace = true
|
||||
|
||||
@@ -12,7 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::env;
|
||||
|
||||
use build_data::{format_timestamp, get_source_time};
|
||||
use shadow_rs::{CARGO_METADATA, CARGO_TREE};
|
||||
|
||||
fn main() -> shadow_rs::SdResult<()> {
|
||||
println!("cargo:rerun-if-changed=.git/refs/heads");
|
||||
@@ -25,5 +29,16 @@ fn main() -> shadow_rs::SdResult<()> {
|
||||
}
|
||||
);
|
||||
build_data::set_BUILD_TIMESTAMP();
|
||||
shadow_rs::new()
|
||||
|
||||
// The "CARGO_WORKSPACE_DIR" is set manually (not by Rust itself) in Cargo config file, to
|
||||
// solve the problem where the "CARGO_MANIFEST_DIR" is not what we want when this repo is
|
||||
// made as a submodule in another repo.
|
||||
let src_path = env::var("CARGO_WORKSPACE_DIR").or_else(|_| env::var("CARGO_MANIFEST_DIR"))?;
|
||||
let out_path = env::var("OUT_DIR")?;
|
||||
let _ = shadow_rs::Shadow::build_with(
|
||||
src_path,
|
||||
out_path,
|
||||
BTreeSet::from([CARGO_METADATA, CARGO_TREE]),
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -125,5 +125,14 @@ pub const fn version() -> &'static str {
|
||||
}
|
||||
|
||||
pub const fn short_version() -> &'static str {
|
||||
const_format::formatcp!("{}-{}", BUILD_INFO.branch, BUILD_INFO.commit_short,)
|
||||
const BRANCH: &str = BUILD_INFO.branch;
|
||||
const COMMIT_ID: &str = BUILD_INFO.commit_short;
|
||||
|
||||
// When git checkout to a commit, the branch is empty.
|
||||
#[allow(clippy::const_is_empty)]
|
||||
if !BRANCH.is_empty() {
|
||||
const_format::formatcp!("{}-{}", BRANCH, COMMIT_ID)
|
||||
} else {
|
||||
COMMIT_ID
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,9 @@ common-telemetry.workspace = true
|
||||
futures-util.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
rskafka.workspace = true
|
||||
rustls = { version = "0.23", default-features = false, features = ["ring", "logging", "std", "tls12"] }
|
||||
rustls-native-certs = "0.7"
|
||||
rustls-pemfile = "2.1"
|
||||
serde.workspace = true
|
||||
serde_with.workspace = true
|
||||
snafu.workspace = true
|
||||
|
||||
@@ -23,12 +23,14 @@ use crate::config::raft_engine::RaftEngineConfig;
|
||||
/// Wal configurations for metasrv.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
|
||||
#[serde(tag = "provider", rename_all = "snake_case")]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum MetasrvWalConfig {
|
||||
#[default]
|
||||
RaftEngine,
|
||||
Kafka(MetasrvKafkaConfig),
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
/// Wal configurations for datanode.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||
#[serde(tag = "provider", rename_all = "snake_case")]
|
||||
@@ -48,7 +50,7 @@ impl From<DatanodeWalConfig> for MetasrvWalConfig {
|
||||
match config {
|
||||
DatanodeWalConfig::RaftEngine(_) => Self::RaftEngine,
|
||||
DatanodeWalConfig::Kafka(config) => Self::Kafka(MetasrvKafkaConfig {
|
||||
broker_endpoints: config.broker_endpoints,
|
||||
connection: config.connection,
|
||||
backoff: config.backoff,
|
||||
kafka_topic: config.kafka_topic,
|
||||
}),
|
||||
@@ -61,7 +63,7 @@ impl From<MetasrvWalConfig> for DatanodeWalConfig {
|
||||
match config {
|
||||
MetasrvWalConfig::RaftEngine => Self::RaftEngine(RaftEngineConfig::default()),
|
||||
MetasrvWalConfig::Kafka(config) => Self::Kafka(DatanodeKafkaConfig {
|
||||
broker_endpoints: config.broker_endpoints,
|
||||
connection: config.connection,
|
||||
backoff: config.backoff,
|
||||
kafka_topic: config.kafka_topic,
|
||||
..Default::default()
|
||||
@@ -75,6 +77,9 @@ mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use kafka::common::{
|
||||
KafkaClientSasl, KafkaClientSaslConfig, KafkaClientTls, KafkaConnectionConfig,
|
||||
};
|
||||
use tests::kafka::common::KafkaTopicConfig;
|
||||
|
||||
use super::*;
|
||||
@@ -144,12 +149,31 @@ mod tests {
|
||||
replication_factor = 1
|
||||
create_topic_timeout = "30s"
|
||||
topic_name_prefix = "greptimedb_wal_topic"
|
||||
[tls]
|
||||
server_ca_cert_path = "/path/to/server.pem"
|
||||
[sasl]
|
||||
type = "SCRAM-SHA-512"
|
||||
username = "hi"
|
||||
password = "test"
|
||||
"#;
|
||||
|
||||
// Deserialized to MetasrvWalConfig.
|
||||
let metasrv_wal_config: MetasrvWalConfig = toml::from_str(toml_str).unwrap();
|
||||
let expected = MetasrvKafkaConfig {
|
||||
broker_endpoints: vec!["127.0.0.1:9092".to_string()],
|
||||
connection: KafkaConnectionConfig {
|
||||
broker_endpoints: vec!["127.0.0.1:9092".to_string()],
|
||||
sasl: Some(KafkaClientSasl {
|
||||
config: KafkaClientSaslConfig::ScramSha512 {
|
||||
username: "hi".to_string(),
|
||||
password: "test".to_string(),
|
||||
},
|
||||
}),
|
||||
tls: Some(KafkaClientTls {
|
||||
server_ca_cert_path: Some("/path/to/server.pem".to_string()),
|
||||
client_cert_path: None,
|
||||
client_key_path: None,
|
||||
}),
|
||||
},
|
||||
backoff: BackoffConfig {
|
||||
init: Duration::from_millis(500),
|
||||
max: Duration::from_secs(10),
|
||||
@@ -170,7 +194,20 @@ mod tests {
|
||||
// Deserialized to DatanodeWalConfig.
|
||||
let datanode_wal_config: DatanodeWalConfig = toml::from_str(toml_str).unwrap();
|
||||
let expected = DatanodeKafkaConfig {
|
||||
broker_endpoints: vec!["127.0.0.1:9092".to_string()],
|
||||
connection: KafkaConnectionConfig {
|
||||
broker_endpoints: vec!["127.0.0.1:9092".to_string()],
|
||||
sasl: Some(KafkaClientSasl {
|
||||
config: KafkaClientSaslConfig::ScramSha512 {
|
||||
username: "hi".to_string(),
|
||||
password: "test".to_string(),
|
||||
},
|
||||
}),
|
||||
tls: Some(KafkaClientTls {
|
||||
server_ca_cert_path: Some("/path/to/server.pem".to_string()),
|
||||
client_cert_path: None,
|
||||
client_key_path: None,
|
||||
}),
|
||||
},
|
||||
max_batch_bytes: ReadableSize::mb(1),
|
||||
consumer_wait_timeout: Duration::from_millis(100),
|
||||
backoff: BackoffConfig {
|
||||
@@ -187,6 +224,7 @@ mod tests {
|
||||
replication_factor: 1,
|
||||
create_topic_timeout: Duration::from_secs(30),
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(datanode_wal_config, DatanodeWalConfig::Kafka(expected));
|
||||
}
|
||||
|
||||
@@ -12,16 +12,22 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::io::Cursor;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use rskafka::client::{Credentials, SaslConfig};
|
||||
use rustls::{ClientConfig, RootCertStore};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::with_prefix;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::{TopicSelectorType, TOPIC_NAME_PREFIX};
|
||||
use crate::error::{self, Result};
|
||||
use crate::{TopicSelectorType, BROKER_ENDPOINT, TOPIC_NAME_PREFIX};
|
||||
|
||||
with_prefix!(pub backoff_prefix "backoff_");
|
||||
|
||||
/// Backoff configurations for kafka clients.
|
||||
/// Backoff configurations for kafka client.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(default)]
|
||||
pub struct BackoffConfig {
|
||||
@@ -49,6 +55,134 @@ impl Default for BackoffConfig {
|
||||
}
|
||||
}
|
||||
|
||||
/// The SASL configurations for kafka client.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct KafkaClientSasl {
|
||||
#[serde(flatten)]
|
||||
pub config: KafkaClientSaslConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(tag = "type", rename_all = "SCREAMING-KEBAB-CASE")]
|
||||
pub enum KafkaClientSaslConfig {
|
||||
Plain {
|
||||
username: String,
|
||||
password: String,
|
||||
},
|
||||
#[serde(rename = "SCRAM-SHA-256")]
|
||||
ScramSha256 {
|
||||
username: String,
|
||||
password: String,
|
||||
},
|
||||
#[serde(rename = "SCRAM-SHA-512")]
|
||||
ScramSha512 {
|
||||
username: String,
|
||||
password: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl KafkaClientSaslConfig {
|
||||
/// Converts to [`SaslConfig`].
|
||||
pub fn into_sasl_config(self) -> SaslConfig {
|
||||
match self {
|
||||
KafkaClientSaslConfig::Plain { username, password } => {
|
||||
SaslConfig::Plain(Credentials::new(username, password))
|
||||
}
|
||||
KafkaClientSaslConfig::ScramSha256 { username, password } => {
|
||||
SaslConfig::ScramSha256(Credentials::new(username, password))
|
||||
}
|
||||
KafkaClientSaslConfig::ScramSha512 { username, password } => {
|
||||
SaslConfig::ScramSha512(Credentials::new(username, password))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The TLS configurations for kafka client.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct KafkaClientTls {
|
||||
pub server_ca_cert_path: Option<String>,
|
||||
pub client_cert_path: Option<String>,
|
||||
pub client_key_path: Option<String>,
|
||||
}
|
||||
|
||||
impl KafkaClientTls {
|
||||
/// Builds the [`ClientConfig`].
|
||||
pub async fn to_tls_config(&self) -> Result<Arc<ClientConfig>> {
|
||||
let builder = ClientConfig::builder();
|
||||
let mut roots = RootCertStore::empty();
|
||||
|
||||
if let Some(server_ca_cert_path) = &self.server_ca_cert_path {
|
||||
let root_cert_bytes =
|
||||
tokio::fs::read(&server_ca_cert_path)
|
||||
.await
|
||||
.context(error::ReadFileSnafu {
|
||||
path: server_ca_cert_path,
|
||||
})?;
|
||||
let mut cursor = Cursor::new(root_cert_bytes);
|
||||
for cert in rustls_pemfile::certs(&mut cursor)
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.context(error::ReadCertsSnafu {
|
||||
path: server_ca_cert_path,
|
||||
})?
|
||||
{
|
||||
roots.add(cert).context(error::AddCertSnafu)?;
|
||||
}
|
||||
};
|
||||
roots.add_parsable_certificates(
|
||||
rustls_native_certs::load_native_certs().context(error::LoadSystemCertsSnafu)?,
|
||||
);
|
||||
|
||||
let builder = builder.with_root_certificates(roots);
|
||||
let config = if let (Some(cert_path), Some(key_path)) =
|
||||
(&self.client_cert_path, &self.client_key_path)
|
||||
{
|
||||
let cert_bytes = tokio::fs::read(cert_path)
|
||||
.await
|
||||
.context(error::ReadFileSnafu { path: cert_path })?;
|
||||
let client_certs = rustls_pemfile::certs(&mut Cursor::new(cert_bytes))
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.context(error::ReadCertsSnafu { path: cert_path })?;
|
||||
let key_bytes = tokio::fs::read(key_path)
|
||||
.await
|
||||
.context(error::ReadFileSnafu { path: key_path })?;
|
||||
let client_key = rustls_pemfile::private_key(&mut Cursor::new(key_bytes))
|
||||
.context(error::ReadKeySnafu { path: key_path })?
|
||||
.context(error::KeyNotFoundSnafu { path: key_path })?;
|
||||
|
||||
builder
|
||||
.with_client_auth_cert(client_certs, client_key)
|
||||
.context(error::SetClientAuthCertSnafu)?
|
||||
} else {
|
||||
builder.with_no_client_auth()
|
||||
};
|
||||
|
||||
Ok(Arc::new(config))
|
||||
}
|
||||
}
|
||||
|
||||
/// The connection configurations for kafka clients.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(default)]
|
||||
pub struct KafkaConnectionConfig {
|
||||
/// The broker endpoints of the Kafka cluster.
|
||||
pub broker_endpoints: Vec<String>,
|
||||
/// Client SASL.
|
||||
pub sasl: Option<KafkaClientSasl>,
|
||||
/// Client TLS config
|
||||
pub tls: Option<KafkaClientTls>,
|
||||
}
|
||||
|
||||
impl Default for KafkaConnectionConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
broker_endpoints: vec![BROKER_ENDPOINT.to_string()],
|
||||
sasl: None,
|
||||
tls: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Topic configurations for kafka clients.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(default)]
|
||||
|
||||
@@ -17,15 +17,16 @@ use std::time::Duration;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::common::KafkaConnectionConfig;
|
||||
use crate::config::kafka::common::{backoff_prefix, BackoffConfig, KafkaTopicConfig};
|
||||
use crate::BROKER_ENDPOINT;
|
||||
|
||||
/// Kafka wal configurations for datanode.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(default)]
|
||||
pub struct DatanodeKafkaConfig {
|
||||
/// The broker endpoints of the Kafka cluster.
|
||||
pub broker_endpoints: Vec<String>,
|
||||
/// The kafka connection config.
|
||||
#[serde(flatten)]
|
||||
pub connection: KafkaConnectionConfig,
|
||||
/// TODO(weny): Remove the alias once we release v0.9.
|
||||
/// The max size of a single producer batch.
|
||||
#[serde(alias = "max_batch_size")]
|
||||
@@ -39,17 +40,22 @@ pub struct DatanodeKafkaConfig {
|
||||
/// The kafka topic config.
|
||||
#[serde(flatten)]
|
||||
pub kafka_topic: KafkaTopicConfig,
|
||||
pub create_index: bool,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub dump_index_interval: Duration,
|
||||
}
|
||||
|
||||
impl Default for DatanodeKafkaConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
broker_endpoints: vec![BROKER_ENDPOINT.to_string()],
|
||||
connection: KafkaConnectionConfig::default(),
|
||||
// Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
max_batch_bytes: ReadableSize::mb(1),
|
||||
consumer_wait_timeout: Duration::from_millis(100),
|
||||
backoff: BackoffConfig::default(),
|
||||
kafka_topic: KafkaTopicConfig::default(),
|
||||
create_index: true,
|
||||
dump_index_interval: Duration::from_secs(60),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,15 +14,16 @@
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::common::KafkaConnectionConfig;
|
||||
use crate::config::kafka::common::{backoff_prefix, BackoffConfig, KafkaTopicConfig};
|
||||
use crate::BROKER_ENDPOINT;
|
||||
|
||||
/// Kafka wal configurations for metasrv.
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
|
||||
#[serde(default)]
|
||||
pub struct MetasrvKafkaConfig {
|
||||
/// The broker endpoints of the Kafka cluster.
|
||||
pub broker_endpoints: Vec<String>,
|
||||
/// The kafka connection config.
|
||||
#[serde(flatten)]
|
||||
pub connection: KafkaConnectionConfig,
|
||||
/// The backoff config.
|
||||
#[serde(flatten, with = "backoff_prefix")]
|
||||
pub backoff: BackoffConfig,
|
||||
@@ -30,14 +31,3 @@ pub struct MetasrvKafkaConfig {
|
||||
#[serde(flatten)]
|
||||
pub kafka_topic: KafkaTopicConfig,
|
||||
}
|
||||
|
||||
impl Default for MetasrvKafkaConfig {
|
||||
fn default() -> Self {
|
||||
let broker_endpoints = vec![BROKER_ENDPOINT.to_string()];
|
||||
Self {
|
||||
broker_endpoints,
|
||||
backoff: BackoffConfig::default(),
|
||||
kafka_topic: KafkaTopicConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::config::kafka::common::{backoff_prefix, BackoffConfig};
|
||||
use crate::{TopicSelectorType, BROKER_ENDPOINT, TOPIC_NAME_PREFIX};
|
||||
|
||||
/// Kafka wal configurations for standalone.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(default)]
|
||||
pub struct StandaloneKafkaConfig {
|
||||
/// The broker endpoints of the Kafka cluster.
|
||||
pub broker_endpoints: Vec<String>,
|
||||
/// Number of topics to be created upon start.
|
||||
pub num_topics: usize,
|
||||
/// The type of the topic selector with which to select a topic for a region.
|
||||
pub selector_type: TopicSelectorType,
|
||||
/// Topic name prefix.
|
||||
pub topic_name_prefix: String,
|
||||
/// Number of partitions per topic.
|
||||
pub num_partitions: i32,
|
||||
/// The replication factor of each topic.
|
||||
pub replication_factor: i16,
|
||||
/// The timeout of topic creation.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub create_topic_timeout: Duration,
|
||||
/// TODO(weny): Remove the alias once we release v0.9.
|
||||
/// The max size of a single producer batch.
|
||||
#[serde(alias = "max_batch_size")]
|
||||
pub max_batch_bytes: ReadableSize,
|
||||
/// The consumer wait timeout.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub consumer_wait_timeout: Duration,
|
||||
/// The backoff config.
|
||||
#[serde(flatten, with = "backoff_prefix")]
|
||||
pub backoff: BackoffConfig,
|
||||
}
|
||||
|
||||
impl Default for StandaloneKafkaConfig {
|
||||
fn default() -> Self {
|
||||
let broker_endpoints = vec![BROKER_ENDPOINT.to_string()];
|
||||
let replication_factor = broker_endpoints.len() as i16;
|
||||
Self {
|
||||
broker_endpoints,
|
||||
num_topics: 64,
|
||||
selector_type: TopicSelectorType::RoundRobin,
|
||||
topic_name_prefix: TOPIC_NAME_PREFIX.to_string(),
|
||||
num_partitions: 1,
|
||||
replication_factor,
|
||||
create_topic_timeout: Duration::from_secs(30),
|
||||
// Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
max_batch_bytes: ReadableSize::mb(1),
|
||||
consumer_wait_timeout: Duration::from_millis(100),
|
||||
backoff: BackoffConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use common_macro::stack_trace_debug;
|
||||
use snafu::Snafu;
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
#[derive(Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
@@ -24,10 +24,74 @@ pub enum Error {
|
||||
broker_endpoint: String,
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to find ipv4 endpoint: {:?}", broker_endpoint))]
|
||||
EndpointIPV4NotFound { broker_endpoint: String },
|
||||
EndpointIPV4NotFound {
|
||||
broker_endpoint: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read file, path: {}", path))]
|
||||
ReadFile {
|
||||
path: String,
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to add root cert"))]
|
||||
AddCert {
|
||||
#[snafu(source)]
|
||||
error: rustls::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read cert, path: {}", path))]
|
||||
ReadCerts {
|
||||
path: String,
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read key, path: {}", path))]
|
||||
ReadKey {
|
||||
path: String,
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse key, path: {}", path))]
|
||||
KeyNotFound {
|
||||
path: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to set client auth cert"))]
|
||||
SetClientAuthCert {
|
||||
#[snafu(source)]
|
||||
error: rustls::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to load ca certs from system"))]
|
||||
LoadSystemCerts {
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
@@ -61,6 +61,9 @@ async fn resolve_to_ipv4_one<T: AsRef<str>>(endpoint: T) -> Result<String> {
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use common_telemetry::warn;
|
||||
use rskafka::client::{Credentials, SaslConfig};
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
|
||||
@@ -86,4 +89,44 @@ mod tests {
|
||||
let got = resolve_to_ipv4_one(host).await;
|
||||
assert_matches!(got.unwrap_err(), Error::ResolveEndpoint { .. });
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_sasl() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let Ok(broker_endpoints) = std::env::var("GT_KAFKA_SASL_ENDPOINTS") else {
|
||||
warn!("The endpoints is empty, skipping the test 'test_sasl'");
|
||||
return;
|
||||
};
|
||||
let broker_endpoints = broker_endpoints
|
||||
.split(',')
|
||||
.map(|s| s.trim().to_string())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let username = "user_kafka";
|
||||
let password = "secret";
|
||||
let _ = rskafka::client::ClientBuilder::new(broker_endpoints.clone())
|
||||
.sasl_config(SaslConfig::Plain(Credentials::new(
|
||||
username.to_string(),
|
||||
password.to_string(),
|
||||
)))
|
||||
.build()
|
||||
.await
|
||||
.unwrap();
|
||||
let _ = rskafka::client::ClientBuilder::new(broker_endpoints.clone())
|
||||
.sasl_config(SaslConfig::ScramSha256(Credentials::new(
|
||||
username.to_string(),
|
||||
password.to_string(),
|
||||
)))
|
||||
.build()
|
||||
.await
|
||||
.unwrap();
|
||||
let _ = rskafka::client::ClientBuilder::new(broker_endpoints)
|
||||
.sasl_config(SaslConfig::ScramSha512(Credentials::new(
|
||||
username.to_string(),
|
||||
password.to_string(),
|
||||
)))
|
||||
.build()
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -179,6 +179,8 @@ pub struct GcsConfig {
|
||||
pub scope: String,
|
||||
#[serde(skip_serializing)]
|
||||
pub credential_path: SecretString,
|
||||
#[serde(skip_serializing)]
|
||||
pub credential: SecretString,
|
||||
pub endpoint: String,
|
||||
#[serde(flatten)]
|
||||
pub cache: ObjectStorageCacheConfig,
|
||||
@@ -190,6 +192,7 @@ impl PartialEq for GcsConfig {
|
||||
&& self.bucket == other.bucket
|
||||
&& self.scope == other.scope
|
||||
&& self.credential_path.expose_secret() == other.credential_path.expose_secret()
|
||||
&& self.credential.expose_secret() == other.credential.expose_secret()
|
||||
&& self.endpoint == other.endpoint
|
||||
&& self.cache == other.cache
|
||||
}
|
||||
@@ -243,6 +246,7 @@ impl Default for GcsConfig {
|
||||
bucket: String::default(),
|
||||
scope: String::default(),
|
||||
credential_path: SecretString::from(String::default()),
|
||||
credential: SecretString::from(String::default()),
|
||||
endpoint: String::default(),
|
||||
cache: ObjectStorageCacheConfig::default(),
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use catalog::memory::MemoryCatalogManager;
|
||||
use common_base::Plugins;
|
||||
@@ -32,6 +33,7 @@ use common_wal::config::DatanodeWalConfig;
|
||||
use file_engine::engine::FileRegionEngine;
|
||||
use futures_util::TryStreamExt;
|
||||
use log_store::kafka::log_store::KafkaLogStore;
|
||||
use log_store::kafka::{default_index_file, GlobalIndexCollector};
|
||||
use log_store::raft_engine::log_store::RaftEngineLogStore;
|
||||
use meta_client::MetaClientRef;
|
||||
use metric_engine::engine::MetricEngine;
|
||||
@@ -64,7 +66,7 @@ use crate::event_listener::{
|
||||
use crate::greptimedb_telemetry::get_greptimedb_telemetry_task;
|
||||
use crate::heartbeat::HeartbeatTask;
|
||||
use crate::region_server::{DummyTableProviderFactory, RegionServer};
|
||||
use crate::store;
|
||||
use crate::store::{self, new_object_store_without_cache};
|
||||
|
||||
/// Datanode service.
|
||||
pub struct Datanode {
|
||||
@@ -398,15 +400,37 @@ impl DatanodeBuilder {
|
||||
)
|
||||
.await
|
||||
.context(BuildMitoEngineSnafu)?,
|
||||
DatanodeWalConfig::Kafka(kafka_config) => MitoEngine::new(
|
||||
&opts.storage.data_home,
|
||||
config,
|
||||
Self::build_kafka_log_store(kafka_config).await?,
|
||||
object_store_manager,
|
||||
plugins,
|
||||
)
|
||||
.await
|
||||
.context(BuildMitoEngineSnafu)?,
|
||||
DatanodeWalConfig::Kafka(kafka_config) => {
|
||||
if kafka_config.create_index && opts.node_id.is_none() {
|
||||
warn!("The WAL index creation only available in distributed mode.")
|
||||
}
|
||||
let global_index_collector = if kafka_config.create_index && opts.node_id.is_some()
|
||||
{
|
||||
let operator = new_object_store_without_cache(
|
||||
&opts.storage.store,
|
||||
&opts.storage.data_home,
|
||||
)
|
||||
.await?;
|
||||
let path = default_index_file(opts.node_id.unwrap());
|
||||
Some(Self::build_global_index_collector(
|
||||
kafka_config.dump_index_interval,
|
||||
operator,
|
||||
path,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
MitoEngine::new(
|
||||
&opts.storage.data_home,
|
||||
config,
|
||||
Self::build_kafka_log_store(kafka_config, global_index_collector).await?,
|
||||
object_store_manager,
|
||||
plugins,
|
||||
)
|
||||
.await
|
||||
.context(BuildMitoEngineSnafu)?
|
||||
}
|
||||
};
|
||||
Ok(mito_engine)
|
||||
}
|
||||
@@ -438,14 +462,26 @@ impl DatanodeBuilder {
|
||||
Ok(Arc::new(logstore))
|
||||
}
|
||||
|
||||
/// Builds [KafkaLogStore].
|
||||
async fn build_kafka_log_store(config: &DatanodeKafkaConfig) -> Result<Arc<KafkaLogStore>> {
|
||||
KafkaLogStore::try_new(config)
|
||||
/// Builds [`KafkaLogStore`].
|
||||
async fn build_kafka_log_store(
|
||||
config: &DatanodeKafkaConfig,
|
||||
global_index_collector: Option<GlobalIndexCollector>,
|
||||
) -> Result<Arc<KafkaLogStore>> {
|
||||
KafkaLogStore::try_new(config, global_index_collector)
|
||||
.await
|
||||
.map_err(Box::new)
|
||||
.context(OpenLogStoreSnafu)
|
||||
.map(Arc::new)
|
||||
}
|
||||
|
||||
/// Builds [`GlobalIndexCollector`]
|
||||
fn build_global_index_collector(
|
||||
dump_index_interval: Duration,
|
||||
operator: object_store::ObjectStore,
|
||||
path: String,
|
||||
) -> GlobalIndexCollector {
|
||||
GlobalIndexCollector::new(dump_index_interval, operator, path)
|
||||
}
|
||||
}
|
||||
|
||||
/// Open all regions belong to this datanode.
|
||||
|
||||
@@ -395,6 +395,20 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to setup plugin"))]
|
||||
SetupPlugin {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start plugin"))]
|
||||
StartPlugin {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -442,9 +456,12 @@ impl ErrorExt for Error {
|
||||
|
||||
AsyncTaskExecute { source, .. } => source.status_code(),
|
||||
|
||||
CreateDir { .. } | RemoveDir { .. } | ShutdownInstance { .. } | DataFusion { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
CreateDir { .. }
|
||||
| RemoveDir { .. }
|
||||
| ShutdownInstance { .. }
|
||||
| DataFusion { .. }
|
||||
| SetupPlugin { .. }
|
||||
| StartPlugin { .. } => StatusCode::Internal,
|
||||
|
||||
RegionNotFound { .. } => StatusCode::RegionNotFound,
|
||||
RegionNotReady { .. } => StatusCode::RegionNotReady,
|
||||
|
||||
@@ -29,18 +29,18 @@ use common_telemetry::{info, warn};
|
||||
use object_store::layers::{LruCacheLayer, RetryInterceptor, RetryLayer};
|
||||
use object_store::services::Fs;
|
||||
use object_store::util::{join_dir, normalize_dir, with_instrument_layers};
|
||||
use object_store::{Error, HttpClient, ObjectStore, ObjectStoreBuilder};
|
||||
use object_store::{Access, Error, HttpClient, ObjectStore, ObjectStoreBuilder};
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::config::{ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE};
|
||||
use crate::error::{self, Result};
|
||||
|
||||
pub(crate) async fn new_object_store(
|
||||
store: ObjectStoreConfig,
|
||||
pub(crate) async fn new_raw_object_store(
|
||||
store: &ObjectStoreConfig,
|
||||
data_home: &str,
|
||||
) -> Result<ObjectStore> {
|
||||
let data_home = normalize_dir(data_home);
|
||||
let object_store = match &store {
|
||||
let object_store = match store {
|
||||
ObjectStoreConfig::File(file_config) => {
|
||||
fs::new_fs_object_store(&data_home, file_config).await
|
||||
}
|
||||
@@ -51,27 +51,61 @@ pub(crate) async fn new_object_store(
|
||||
}
|
||||
ObjectStoreConfig::Gcs(gcs_config) => gcs::new_gcs_object_store(gcs_config).await,
|
||||
}?;
|
||||
Ok(object_store)
|
||||
}
|
||||
|
||||
fn with_retry_layers(object_store: ObjectStore) -> ObjectStore {
|
||||
object_store.layer(
|
||||
RetryLayer::new()
|
||||
.with_jitter()
|
||||
.with_notify(PrintDetailedError),
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) async fn new_object_store_without_cache(
|
||||
store: &ObjectStoreConfig,
|
||||
data_home: &str,
|
||||
) -> Result<ObjectStore> {
|
||||
let object_store = new_raw_object_store(store, data_home).await?;
|
||||
// Enable retry layer and cache layer for non-fs object storages
|
||||
let object_store = if !matches!(store, ObjectStoreConfig::File(..)) {
|
||||
let object_store = create_object_store_with_cache(object_store, &store).await?;
|
||||
object_store.layer(
|
||||
RetryLayer::new()
|
||||
.with_jitter()
|
||||
.with_notify(PrintDetailedError),
|
||||
)
|
||||
// Adds retry layer
|
||||
with_retry_layers(object_store)
|
||||
} else {
|
||||
object_store
|
||||
};
|
||||
|
||||
let store = with_instrument_layers(object_store, true);
|
||||
Ok(store)
|
||||
let object_store = with_instrument_layers(object_store, true);
|
||||
Ok(object_store)
|
||||
}
|
||||
|
||||
async fn create_object_store_with_cache(
|
||||
object_store: ObjectStore,
|
||||
store_config: &ObjectStoreConfig,
|
||||
pub(crate) async fn new_object_store(
|
||||
store: ObjectStoreConfig,
|
||||
data_home: &str,
|
||||
) -> Result<ObjectStore> {
|
||||
let object_store = new_raw_object_store(&store, data_home).await?;
|
||||
// Enable retry layer and cache layer for non-fs object storages
|
||||
let object_store = if !matches!(store, ObjectStoreConfig::File(..)) {
|
||||
let object_store = if let Some(cache_layer) = build_cache_layer(&store).await? {
|
||||
// Adds cache layer
|
||||
object_store.layer(cache_layer)
|
||||
} else {
|
||||
object_store
|
||||
};
|
||||
|
||||
// Adds retry layer
|
||||
with_retry_layers(object_store)
|
||||
} else {
|
||||
object_store
|
||||
};
|
||||
|
||||
let object_store = with_instrument_layers(object_store, true);
|
||||
Ok(object_store)
|
||||
}
|
||||
|
||||
async fn build_cache_layer(
|
||||
store_config: &ObjectStoreConfig,
|
||||
) -> Result<Option<LruCacheLayer<impl Access>>> {
|
||||
let (cache_path, cache_capacity) = match store_config {
|
||||
ObjectStoreConfig::S3(s3_config) => {
|
||||
let path = s3_config.cache.cache_path.as_ref();
|
||||
@@ -112,11 +146,11 @@ async fn create_object_store_with_cache(
|
||||
let atomic_temp_dir = join_dir(path, ".tmp/");
|
||||
clean_temp_dir(&atomic_temp_dir)?;
|
||||
|
||||
let cache_store = {
|
||||
let mut builder = Fs::default();
|
||||
builder.root(path).atomic_write_dir(&atomic_temp_dir);
|
||||
builder.build().context(error::InitBackendSnafu)?
|
||||
};
|
||||
let cache_store = Fs::default()
|
||||
.root(path)
|
||||
.atomic_write_dir(&atomic_temp_dir)
|
||||
.build()
|
||||
.context(error::InitBackendSnafu)?;
|
||||
|
||||
let cache_layer = LruCacheLayer::new(Arc::new(cache_store), cache_capacity.0 as usize)
|
||||
.await
|
||||
@@ -127,9 +161,9 @@ async fn create_object_store_with_cache(
|
||||
path, cache_capacity
|
||||
);
|
||||
|
||||
Ok(object_store.layer(cache_layer))
|
||||
Ok(Some(cache_layer))
|
||||
} else {
|
||||
Ok(object_store)
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -175,7 +209,6 @@ pub(crate) fn build_http_client() -> Result<HttpClient> {
|
||||
|
||||
HttpClient::build(http_builder).context(error::InitBackendSnafu)
|
||||
}
|
||||
|
||||
struct PrintDetailedError;
|
||||
|
||||
// PrintDetailedError is a retry interceptor that prints error in Debug format in retrying.
|
||||
|
||||
@@ -30,8 +30,7 @@ pub(crate) async fn new_azblob_object_store(azblob_config: &AzblobConfig) -> Res
|
||||
azblob_config.container, &root
|
||||
);
|
||||
|
||||
let mut builder = Azblob::default();
|
||||
let _ = builder
|
||||
let mut builder = Azblob::default()
|
||||
.root(&root)
|
||||
.container(&azblob_config.container)
|
||||
.endpoint(&azblob_config.endpoint)
|
||||
@@ -40,8 +39,8 @@ pub(crate) async fn new_azblob_object_store(azblob_config: &AzblobConfig) -> Res
|
||||
.http_client(build_http_client()?);
|
||||
|
||||
if let Some(token) = &azblob_config.sas_token {
|
||||
let _ = builder.sas_token(token);
|
||||
}
|
||||
builder = builder.sas_token(token);
|
||||
};
|
||||
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
|
||||
@@ -35,8 +35,9 @@ pub(crate) async fn new_fs_object_store(
|
||||
let atomic_write_dir = join_dir(data_home, ".tmp/");
|
||||
store::clean_temp_dir(&atomic_write_dir)?;
|
||||
|
||||
let mut builder = Fs::default();
|
||||
let _ = builder.root(data_home).atomic_write_dir(&atomic_write_dir);
|
||||
let builder = Fs::default()
|
||||
.root(data_home)
|
||||
.atomic_write_dir(&atomic_write_dir);
|
||||
|
||||
let object_store = ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
|
||||
@@ -29,12 +29,12 @@ pub(crate) async fn new_gcs_object_store(gcs_config: &GcsConfig) -> Result<Objec
|
||||
gcs_config.bucket, &root
|
||||
);
|
||||
|
||||
let mut builder = Gcs::default();
|
||||
builder
|
||||
let builder = Gcs::default()
|
||||
.root(&root)
|
||||
.bucket(&gcs_config.bucket)
|
||||
.scope(&gcs_config.scope)
|
||||
.credential_path(gcs_config.credential_path.expose_secret())
|
||||
.credential(gcs_config.credential.expose_secret())
|
||||
.endpoint(&gcs_config.endpoint)
|
||||
.http_client(build_http_client()?);
|
||||
|
||||
|
||||
@@ -29,8 +29,7 @@ pub(crate) async fn new_oss_object_store(oss_config: &OssConfig) -> Result<Objec
|
||||
oss_config.bucket, &root
|
||||
);
|
||||
|
||||
let mut builder = Oss::default();
|
||||
let _ = builder
|
||||
let builder = Oss::default()
|
||||
.root(&root)
|
||||
.bucket(&oss_config.bucket)
|
||||
.endpoint(&oss_config.endpoint)
|
||||
|
||||
@@ -30,8 +30,7 @@ pub(crate) async fn new_s3_object_store(s3_config: &S3Config) -> Result<ObjectSt
|
||||
s3_config.bucket, &root
|
||||
);
|
||||
|
||||
let mut builder = S3::default();
|
||||
let _ = builder
|
||||
let mut builder = S3::default()
|
||||
.root(&root)
|
||||
.bucket(&s3_config.bucket)
|
||||
.access_key_id(s3_config.access_key_id.expose_secret())
|
||||
@@ -39,11 +38,11 @@ pub(crate) async fn new_s3_object_store(s3_config: &S3Config) -> Result<ObjectSt
|
||||
.http_client(build_http_client()?);
|
||||
|
||||
if s3_config.endpoint.is_some() {
|
||||
let _ = builder.endpoint(s3_config.endpoint.as_ref().unwrap());
|
||||
}
|
||||
builder = builder.endpoint(s3_config.endpoint.as_ref().unwrap());
|
||||
};
|
||||
if s3_config.region.is_some() {
|
||||
let _ = builder.region(s3_config.region.as_ref().unwrap());
|
||||
}
|
||||
builder = builder.region(s3_config.region.as_ref().unwrap());
|
||||
};
|
||||
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
|
||||
@@ -111,6 +111,24 @@ macro_rules! define_duration_with_unit {
|
||||
val.0.value()
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Value> for Option<[<Duration $unit>]> {
|
||||
type Error = $crate::error::Error;
|
||||
|
||||
#[inline]
|
||||
fn try_from(from: Value) -> std::result::Result<Self, Self::Error> {
|
||||
match from {
|
||||
Value::Duration(v) if v.unit() == TimeUnit::$unit => {
|
||||
Ok(Some([<Duration $unit>](v)))
|
||||
},
|
||||
Value::Null => Ok(None),
|
||||
_ => $crate::error::TryFromValueSnafu {
|
||||
reason: format!("{:?} is not a {}", from, stringify!([<Duration $unit>])),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -106,6 +106,24 @@ macro_rules! define_interval_with_unit {
|
||||
val.0.[<to_ $native_ty>]()
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Value> for Option<[<Interval $unit>]> {
|
||||
type Error = $crate::error::Error;
|
||||
|
||||
#[inline]
|
||||
fn try_from(from: Value) -> std::result::Result<Self, Self::Error> {
|
||||
match from {
|
||||
Value::Interval(v) if v.unit() == common_time::interval::IntervalUnit::$unit => {
|
||||
Ok(Some([<Interval $unit>](v)))
|
||||
},
|
||||
Value::Null => Ok(None),
|
||||
_ => $crate::error::TryFromValueSnafu {
|
||||
reason: format!("{:?} is not a {}", from, stringify!([<Interval $unit>])),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -109,6 +109,24 @@ macro_rules! define_time_with_unit {
|
||||
val.0.value()
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Value> for Option<[<Time $unit>]> {
|
||||
type Error = $crate::error::Error;
|
||||
|
||||
#[inline]
|
||||
fn try_from(from: Value) -> std::result::Result<Self, Self::Error> {
|
||||
match from {
|
||||
Value::Time(v) if *v.unit() == TimeUnit::$unit => {
|
||||
Ok(Some([<Time $unit>](v)))
|
||||
},
|
||||
Value::Null => Ok(None),
|
||||
_ => $crate::error::TryFromValueSnafu {
|
||||
reason: format!("{:?} is not a {}", from, stringify!([<Time $unit>])),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -111,6 +111,24 @@ macro_rules! define_timestamp_with_unit {
|
||||
val.0.value()
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Value> for Option<[<Timestamp $unit>]> {
|
||||
type Error = $crate::error::Error;
|
||||
|
||||
#[inline]
|
||||
fn try_from(from: Value) -> std::result::Result<Self, Self::Error> {
|
||||
match from {
|
||||
Value::Timestamp(v) if v.unit() == TimeUnit::$unit => {
|
||||
Ok(Some([<Timestamp $unit>](v)))
|
||||
},
|
||||
Value::Null => Ok(None),
|
||||
_ => $crate::error::TryFromValueSnafu {
|
||||
reason: format!("{:?} is not a {}", from, stringify!([<Timestamp $unit>])),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, ConvertArrowArrayToScalarsSnafu, Result};
|
||||
use crate::prelude::DataType;
|
||||
use crate::scalars::{Scalar, ScalarVectorBuilder};
|
||||
use crate::value::{ListValue, ListValueRef, Value};
|
||||
use crate::vectors::{
|
||||
@@ -367,6 +368,16 @@ impl Helper {
|
||||
})
|
||||
}
|
||||
|
||||
/// Try to cast an vec of values into vector, fail if type is not the same across all values.
|
||||
pub fn try_from_row_into_vector(row: &[Value], dt: &ConcreteDataType) -> Result<VectorRef> {
|
||||
let mut builder = dt.create_mutable_vector(row.len());
|
||||
for val in row {
|
||||
builder.try_push_value_ref(val.as_value_ref())?;
|
||||
}
|
||||
let vector = builder.to_vector();
|
||||
Ok(vector)
|
||||
}
|
||||
|
||||
/// Try to cast slice of `arrays` to vectors.
|
||||
pub fn try_into_vectors(arrays: &[ArrayRef]) -> Result<Vec<VectorRef>> {
|
||||
arrays.iter().map(Self::try_into_vector).collect()
|
||||
@@ -681,4 +692,48 @@ mod tests {
|
||||
assert_eq!(Value::Interval(Interval::from_i128(2000)), vector.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
fn check_try_from_row_to_vector(row: Vec<Value>, dt: &ConcreteDataType) {
|
||||
let vector = Helper::try_from_row_into_vector(&row, dt).unwrap();
|
||||
for (i, item) in row.iter().enumerate().take(vector.len()) {
|
||||
assert_eq!(*item, vector.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
fn check_into_and_from(array: impl Array + 'static) {
|
||||
let array: ArrayRef = Arc::new(array);
|
||||
let vector = Helper::try_into_vector(array.clone()).unwrap();
|
||||
assert_eq!(&array, &vector.to_arrow_array());
|
||||
let row: Vec<Value> = (0..array.len()).map(|i| vector.get(i)).collect();
|
||||
let dt = vector.data_type();
|
||||
check_try_from_row_to_vector(row, &dt);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_row_to_vector() {
|
||||
check_into_and_from(NullArray::new(2));
|
||||
check_into_and_from(BooleanArray::from(vec![true, false]));
|
||||
check_into_and_from(Int8Array::from(vec![1, 2, 3]));
|
||||
check_into_and_from(Int16Array::from(vec![1, 2, 3]));
|
||||
check_into_and_from(Int32Array::from(vec![1, 2, 3]));
|
||||
check_into_and_from(Int64Array::from(vec![1, 2, 3]));
|
||||
check_into_and_from(UInt8Array::from(vec![1, 2, 3]));
|
||||
check_into_and_from(UInt16Array::from(vec![1, 2, 3]));
|
||||
check_into_and_from(UInt32Array::from(vec![1, 2, 3]));
|
||||
check_into_and_from(UInt64Array::from(vec![1, 2, 3]));
|
||||
check_into_and_from(Float32Array::from(vec![1.0, 2.0, 3.0]));
|
||||
check_into_and_from(Float64Array::from(vec![1.0, 2.0, 3.0]));
|
||||
check_into_and_from(StringArray::from(vec!["hello", "world"]));
|
||||
check_into_and_from(Date32Array::from(vec![1, 2, 3]));
|
||||
check_into_and_from(Date64Array::from(vec![1, 2, 3]));
|
||||
|
||||
check_into_and_from(TimestampSecondArray::from(vec![1, 2, 3]));
|
||||
check_into_and_from(TimestampMillisecondArray::from(vec![1, 2, 3]));
|
||||
check_into_and_from(TimestampMicrosecondArray::from(vec![1, 2, 3]));
|
||||
check_into_and_from(TimestampNanosecondArray::from(vec![1, 2, 3]));
|
||||
check_into_and_from(Time32SecondArray::from(vec![1, 2, 3]));
|
||||
check_into_and_from(Time32MillisecondArray::from(vec![1, 2, 3]));
|
||||
check_into_and_from(Time64MicrosecondArray::from(vec![1, 2, 3]));
|
||||
check_into_and_from(Time64NanosecondArray::from(vec![1, 2, 3]));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,7 +90,8 @@ impl RegionEngine for FileRegionEngine {
|
||||
request: ScanRequest,
|
||||
) -> Result<RegionScannerRef, BoxedError> {
|
||||
let stream = self.handle_query(region_id, request).await?;
|
||||
let scanner = Box::new(SinglePartitionScanner::new(stream));
|
||||
// We don't support enabling append mode for file engine.
|
||||
let scanner = Box::new(SinglePartitionScanner::new(stream, false));
|
||||
Ok(scanner)
|
||||
}
|
||||
|
||||
|
||||
@@ -26,8 +26,7 @@ use store_api::metadata::ColumnMetadata;
|
||||
pub fn new_test_object_store(prefix: &str) -> (TempDir, ObjectStore) {
|
||||
let dir = create_temp_dir(prefix);
|
||||
let store_dir = dir.path().to_string_lossy();
|
||||
let mut builder = Fs::default();
|
||||
let _ = builder.root(&store_dir);
|
||||
let builder = Fs::default().root(&store_dir);
|
||||
(dir, ObjectStore::new(builder).unwrap().finish())
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ workspace = true
|
||||
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
arrow.workspace = true
|
||||
arrow-schema.workspace = true
|
||||
async-recursion = "1.0"
|
||||
async-trait.workspace = true
|
||||
@@ -44,12 +45,14 @@ greptime-proto.workspace = true
|
||||
# otherwise it is the same with upstream repo
|
||||
hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "main" }
|
||||
itertools.workspace = true
|
||||
lazy_static.workspace = true
|
||||
meta-client.workspace = true
|
||||
minstant = "0.1.7"
|
||||
nom = "7.1.3"
|
||||
num-traits = "0.2"
|
||||
operator.workspace = true
|
||||
partition.workspace = true
|
||||
prometheus.workspace = true
|
||||
prost.workspace = true
|
||||
query.workspace = true
|
||||
serde.workspace = true
|
||||
|
||||
@@ -51,6 +51,9 @@ use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
|
||||
use crate::compute::ErrCollector;
|
||||
use crate::error::{ExternalSnafu, InternalSnafu, TableNotFoundSnafu, UnexpectedSnafu};
|
||||
use crate::expr::GlobalId;
|
||||
use crate::metrics::{
|
||||
METRIC_FLOW_INPUT_BUF_SIZE, METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_RUN_INTERVAL_MS,
|
||||
};
|
||||
use crate::repr::{self, DiffRow, Row, BATCH_SIZE};
|
||||
use crate::transform::sql_to_flow_plan;
|
||||
|
||||
@@ -193,6 +196,15 @@ pub enum DiffRequest {
|
||||
Delete(Vec<(Row, repr::Timestamp)>),
|
||||
}
|
||||
|
||||
impl DiffRequest {
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
Self::Insert(v) => v.len(),
|
||||
Self::Delete(v) => v.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// iterate through the diff row and form continuous diff row with same diff type
|
||||
pub fn diff_row_to_request(rows: Vec<DiffRow>) -> Vec<DiffRequest> {
|
||||
let mut reqs = Vec::new();
|
||||
@@ -544,6 +556,7 @@ impl FlowWorkerManager {
|
||||
let new_wait = BATCH_SIZE * 1000 / avg_spd.max(1); //in ms
|
||||
let new_wait = Duration::from_millis(new_wait as u64).min(default_interval);
|
||||
trace!("Wait for {} ms, row_cnt={}", new_wait.as_millis(), row_cnt);
|
||||
METRIC_FLOW_RUN_INTERVAL_MS.set(new_wait.as_millis() as i64);
|
||||
since_last_run = tokio::time::Instant::now();
|
||||
tokio::time::sleep(new_wait).await;
|
||||
}
|
||||
@@ -575,7 +588,7 @@ impl FlowWorkerManager {
|
||||
}
|
||||
}
|
||||
// check row send and rows remain in send buf
|
||||
let (flush_res, buf_len) = if blocking {
|
||||
let (flush_res, _buf_len) = if blocking {
|
||||
let ctx = self.node_context.read().await;
|
||||
(ctx.flush_all_sender().await, ctx.get_send_buf_size().await)
|
||||
} else {
|
||||
@@ -585,16 +598,19 @@ impl FlowWorkerManager {
|
||||
}
|
||||
};
|
||||
match flush_res {
|
||||
Ok(r) => row_cnt += r,
|
||||
Ok(r) => {
|
||||
common_telemetry::trace!("Flushed {} rows", r);
|
||||
row_cnt += r;
|
||||
// send buf is likely to be somewhere empty now, wait
|
||||
if r < BATCH_SIZE / 2 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
common_telemetry::error!("Flush send buf errors: {:?}", err);
|
||||
break;
|
||||
}
|
||||
};
|
||||
// if not enough rows, break
|
||||
if buf_len < BATCH_SIZE {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(row_cnt)
|
||||
@@ -606,13 +622,17 @@ impl FlowWorkerManager {
|
||||
region_id: RegionId,
|
||||
rows: Vec<DiffRow>,
|
||||
) -> Result<(), Error> {
|
||||
debug!(
|
||||
"Handling write request for region_id={:?} with {} rows",
|
||||
region_id,
|
||||
rows.len()
|
||||
);
|
||||
let rows_len = rows.len();
|
||||
let table_id = region_id.table_id();
|
||||
METRIC_FLOW_INPUT_BUF_SIZE.add(rows_len as _);
|
||||
let _timer = METRIC_FLOW_INSERT_ELAPSED
|
||||
.with_label_values(&[table_id.to_string().as_str()])
|
||||
.start_timer();
|
||||
self.node_context.read().await.send(table_id, rows).await?;
|
||||
debug!(
|
||||
"Handling write request for table_id={} with {} rows",
|
||||
table_id, rows_len
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ use store_api::storage::RegionId;
|
||||
|
||||
use crate::adapter::FlowWorkerManager;
|
||||
use crate::error::InternalSnafu;
|
||||
use crate::metrics::METRIC_FLOW_TASK_COUNT;
|
||||
use crate::repr::{self, DiffRow};
|
||||
|
||||
fn to_meta_err(err: crate::error::Error) -> common_meta::error::Error {
|
||||
@@ -78,6 +79,7 @@ impl Flownode for FlowWorkerManager {
|
||||
)
|
||||
.await
|
||||
.map_err(to_meta_err)?;
|
||||
METRIC_FLOW_TASK_COUNT.inc();
|
||||
Ok(FlowResponse {
|
||||
affected_flows: ret
|
||||
.map(|id| greptime_proto::v1::FlowId { id: id as u32 })
|
||||
@@ -92,6 +94,7 @@ impl Flownode for FlowWorkerManager {
|
||||
self.remove_flow(flow_id.id as u64)
|
||||
.await
|
||||
.map_err(to_meta_err)?;
|
||||
METRIC_FLOW_TASK_COUNT.dec();
|
||||
Ok(Default::default())
|
||||
}
|
||||
Some(flow_request::Body::Flush(FlushFlow {
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
//! Node context, prone to change with every incoming requests
|
||||
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::debug;
|
||||
@@ -27,7 +28,8 @@ use crate::adapter::{FlowId, TableName, TableSource};
|
||||
use crate::error::{Error, EvalSnafu, TableNotFoundSnafu};
|
||||
use crate::expr::error::InternalSnafu;
|
||||
use crate::expr::GlobalId;
|
||||
use crate::repr::{DiffRow, RelationDesc, BROADCAST_CAP};
|
||||
use crate::metrics::METRIC_FLOW_INPUT_BUF_SIZE;
|
||||
use crate::repr::{DiffRow, RelationDesc, BROADCAST_CAP, SEND_BUF_CAP};
|
||||
|
||||
/// A context that holds the information of the dataflow
|
||||
#[derive(Default, Debug)]
|
||||
@@ -67,18 +69,20 @@ pub struct FlownodeContext {
|
||||
pub struct SourceSender {
|
||||
// TODO(discord9): make it all Vec<DiffRow>?
|
||||
sender: broadcast::Sender<DiffRow>,
|
||||
send_buf_tx: mpsc::UnboundedSender<Vec<DiffRow>>,
|
||||
send_buf_rx: RwLock<mpsc::UnboundedReceiver<Vec<DiffRow>>>,
|
||||
send_buf_tx: mpsc::Sender<Vec<DiffRow>>,
|
||||
send_buf_rx: RwLock<mpsc::Receiver<Vec<DiffRow>>>,
|
||||
send_buf_row_cnt: AtomicUsize,
|
||||
}
|
||||
|
||||
impl Default for SourceSender {
|
||||
fn default() -> Self {
|
||||
let (send_buf_tx, send_buf_rx) = mpsc::unbounded_channel();
|
||||
let (send_buf_tx, send_buf_rx) = mpsc::channel(SEND_BUF_CAP);
|
||||
Self {
|
||||
// TODO(discord9): found a better way then increase this to prevent lagging and hence missing input data
|
||||
sender: broadcast::Sender::new(BROADCAST_CAP * 2),
|
||||
send_buf_tx,
|
||||
send_buf_rx: RwLock::new(send_buf_rx),
|
||||
send_buf_row_cnt: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -94,15 +98,18 @@ impl SourceSender {
|
||||
/// until send buf is empty or broadchannel is full
|
||||
pub async fn try_flush(&self) -> Result<usize, Error> {
|
||||
let mut row_cnt = 0;
|
||||
let mut iterations = 0;
|
||||
while iterations < Self::MAX_ITERATIONS {
|
||||
loop {
|
||||
let mut send_buf = self.send_buf_rx.write().await;
|
||||
// if inner sender channel is empty or send buf is empty, there
|
||||
// is nothing to do for now, just break
|
||||
if self.sender.len() >= BROADCAST_CAP || send_buf.is_empty() {
|
||||
break;
|
||||
}
|
||||
// TODO(discord9): send rows instead so it's just moving a point
|
||||
if let Some(rows) = send_buf.recv().await {
|
||||
let len = rows.len();
|
||||
self.send_buf_row_cnt
|
||||
.fetch_sub(len, std::sync::atomic::Ordering::SeqCst);
|
||||
for row in rows {
|
||||
self.sender
|
||||
.send(row)
|
||||
@@ -116,10 +123,10 @@ impl SourceSender {
|
||||
row_cnt += 1;
|
||||
}
|
||||
}
|
||||
iterations += 1;
|
||||
}
|
||||
if row_cnt > 0 {
|
||||
debug!("Send {} rows", row_cnt);
|
||||
METRIC_FLOW_INPUT_BUF_SIZE.sub(row_cnt as _);
|
||||
debug!(
|
||||
"Remaining Send buf.len() = {}",
|
||||
self.send_buf_rx.read().await.len()
|
||||
@@ -131,13 +138,12 @@ impl SourceSender {
|
||||
|
||||
/// return number of rows it actual send(including what's in the buffer)
|
||||
pub async fn send_rows(&self, rows: Vec<DiffRow>) -> Result<usize, Error> {
|
||||
self.send_buf_tx.send(rows).map_err(|e| {
|
||||
self.send_buf_tx.send(rows).await.map_err(|e| {
|
||||
crate::error::InternalSnafu {
|
||||
reason: format!("Failed to send row, error = {:?}", e),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
Ok(0)
|
||||
}
|
||||
}
|
||||
@@ -153,7 +159,8 @@ impl FlownodeContext {
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
name: table_id.to_string(),
|
||||
})?;
|
||||
// debug!("FlownodeContext::send: trying to send {} rows", rows.len());
|
||||
|
||||
debug!("FlownodeContext::send: trying to send {} rows", rows.len());
|
||||
sender.send_rows(rows).await
|
||||
}
|
||||
|
||||
@@ -169,6 +176,7 @@ impl FlownodeContext {
|
||||
}
|
||||
|
||||
/// Return the sum number of rows in all send buf
|
||||
/// TODO(discord9): remove this since we can't get correct row cnt anyway
|
||||
pub async fn get_send_buf_size(&self) -> usize {
|
||||
let mut sum = 0;
|
||||
for sender in self.source_sender.values() {
|
||||
|
||||
@@ -16,32 +16,21 @@
|
||||
//!
|
||||
//! And the [`Context`] is the environment for the render process, it contains all the necessary information for the render process
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::ops::Range;
|
||||
use std::rc::Rc;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::value::{ListValue, Value};
|
||||
use hydroflow::futures::SinkExt;
|
||||
use hydroflow::lattices::cc_traits::Get;
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
use hydroflow::scheduled::graph_ext::GraphExt;
|
||||
use hydroflow::scheduled::port::{PortCtx, SEND};
|
||||
use itertools::Itertools;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use super::state::Scheduler;
|
||||
use crate::compute::state::DataflowState;
|
||||
use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff};
|
||||
use crate::error::{Error, EvalSnafu, InvalidQuerySnafu, NotImplementedSnafu, PlanSnafu};
|
||||
use crate::expr::error::{DataTypeSnafu, InternalSnafu};
|
||||
use crate::expr::{
|
||||
self, EvalError, GlobalId, LocalId, MapFilterProject, MfpPlan, SafeMfpPlan, ScalarExpr,
|
||||
};
|
||||
use crate::plan::{AccumulablePlan, KeyValPlan, Plan, ReducePlan, TypedPlan};
|
||||
use crate::repr::{self, DiffRow, KeyValDiffRow, Row};
|
||||
use crate::utils::{ArrangeHandler, ArrangeReader, ArrangeWriter, Arrangement};
|
||||
use crate::compute::types::{Collection, CollectionBundle, ErrCollector, Toff};
|
||||
use crate::error::{Error, InvalidQuerySnafu, NotImplementedSnafu};
|
||||
use crate::expr::{self, GlobalId, LocalId};
|
||||
use crate::plan::{Plan, TypedPlan};
|
||||
use crate::repr::{self, DiffRow};
|
||||
|
||||
mod map;
|
||||
mod reduce;
|
||||
@@ -218,20 +207,17 @@ mod test {
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
|
||||
use common_time::DateTime;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
use hydroflow::scheduled::graph_ext::GraphExt;
|
||||
use hydroflow::scheduled::handoff::VecHandoff;
|
||||
use pretty_assertions::{assert_eq, assert_ne};
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
use crate::expr::BinaryFunc;
|
||||
use crate::repr::Row;
|
||||
pub fn run_and_check(
|
||||
state: &mut DataflowState,
|
||||
df: &mut Hydroflow,
|
||||
time_range: Range<i64>,
|
||||
time_range: std::ops::Range<i64>,
|
||||
expected: BTreeMap<i64, Vec<DiffRow>>,
|
||||
output: Rc<RefCell<Vec<DiffRow>>>,
|
||||
) {
|
||||
|
||||
@@ -24,7 +24,7 @@ use crate::compute::state::Scheduler;
|
||||
use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff};
|
||||
use crate::error::{Error, PlanSnafu};
|
||||
use crate::expr::{EvalError, MapFilterProject, MfpPlan, ScalarExpr};
|
||||
use crate::plan::{Plan, TypedPlan};
|
||||
use crate::plan::TypedPlan;
|
||||
use crate::repr::{self, DiffRow, KeyValDiffRow, Row};
|
||||
use crate::utils::ArrangeHandler;
|
||||
|
||||
@@ -206,8 +206,6 @@ fn eval_mfp_core(
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
@@ -216,6 +214,7 @@ mod test {
|
||||
use crate::compute::render::test::{get_output_handle, harness_test_ctx, run_and_check};
|
||||
use crate::compute::state::DataflowState;
|
||||
use crate::expr::{self, BinaryFunc, GlobalId};
|
||||
use crate::plan::Plan;
|
||||
use crate::repr::{ColumnType, RelationType};
|
||||
|
||||
/// test if temporal filter works properly
|
||||
|
||||
@@ -18,17 +18,15 @@ use std::ops::Range;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::value::{ListValue, Value};
|
||||
use hydroflow::scheduled::graph_ext::GraphExt;
|
||||
use hydroflow::scheduled::port::{PortCtx, SEND};
|
||||
use itertools::Itertools;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::compute::render::{Context, SubgraphArg};
|
||||
use crate::compute::state::Scheduler;
|
||||
use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff};
|
||||
use crate::error::{Error, PlanSnafu};
|
||||
use crate::expr::error::{DataAlreadyExpiredSnafu, DataTypeSnafu, InternalSnafu};
|
||||
use crate::expr::{AggregateExpr, EvalError, ScalarExpr};
|
||||
use crate::plan::{AccumulablePlan, AggrWithIndex, KeyValPlan, Plan, ReducePlan, TypedPlan};
|
||||
use crate::expr::{EvalError, ScalarExpr};
|
||||
use crate::plan::{AccumulablePlan, AggrWithIndex, KeyValPlan, ReducePlan, TypedPlan};
|
||||
use crate::repr::{self, DiffRow, KeyValDiffRow, RelationType, Row};
|
||||
use crate::utils::{ArrangeHandler, ArrangeReader, ArrangeWriter, KeyExpiryManager};
|
||||
|
||||
@@ -790,8 +788,6 @@ fn from_val_to_slice_idx(
|
||||
// TODO(discord9): add tests for accum ser/de
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
|
||||
use common_time::{DateTime, Interval, Timestamp};
|
||||
use datatypes::data_type::{ConcreteDataType, ConcreteDataType as CDT};
|
||||
@@ -800,7 +796,10 @@ mod test {
|
||||
use super::*;
|
||||
use crate::compute::render::test::{get_output_handle, harness_test_ctx, run_and_check};
|
||||
use crate::compute::state::DataflowState;
|
||||
use crate::expr::{self, AggregateFunc, BinaryFunc, GlobalId, MapFilterProject, UnaryFunc};
|
||||
use crate::expr::{
|
||||
self, AggregateExpr, AggregateFunc, BinaryFunc, GlobalId, MapFilterProject, UnaryFunc,
|
||||
};
|
||||
use crate::plan::Plan;
|
||||
use crate::repr::{ColumnType, RelationType};
|
||||
|
||||
/// SELECT sum(number) FROM numbers_with_ts GROUP BY tumble(ts, '1 second', '2021-07-01 00:00:00')
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
|
||||
use common_telemetry::{debug, info};
|
||||
use common_telemetry::debug;
|
||||
use hydroflow::scheduled::graph_ext::GraphExt;
|
||||
use itertools::Itertools;
|
||||
use snafu::OptionExt;
|
||||
@@ -27,7 +27,7 @@ use crate::compute::render::Context;
|
||||
use crate::compute::types::{Arranged, Collection, CollectionBundle, Toff};
|
||||
use crate::error::{Error, PlanSnafu};
|
||||
use crate::expr::error::InternalSnafu;
|
||||
use crate::expr::{EvalError, GlobalId};
|
||||
use crate::expr::EvalError;
|
||||
use crate::repr::{DiffRow, Row, BROADCAST_CAP};
|
||||
|
||||
#[allow(clippy::mutable_key_type)]
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::{BTreeMap, BTreeSet, VecDeque};
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::rc::Rc;
|
||||
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
|
||||
@@ -22,12 +22,11 @@ use hydroflow::scheduled::handoff::TeeingHandoff;
|
||||
use hydroflow::scheduled::port::RecvPort;
|
||||
use hydroflow::scheduled::SubgraphId;
|
||||
use itertools::Itertools;
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use crate::compute::render::Context;
|
||||
use crate::expr::{EvalError, ScalarExpr};
|
||||
use crate::repr::DiffRow;
|
||||
use crate::utils::{ArrangeHandler, Arrangement};
|
||||
use crate::utils::ArrangeHandler;
|
||||
|
||||
pub type Toff<T = DiffRow> = TeeingHandoff<T>;
|
||||
|
||||
|
||||
@@ -83,6 +83,14 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to list flows in flownode={id:?}"))]
|
||||
ListFlows {
|
||||
id: Option<common_meta::FlownodeId>,
|
||||
source: common_meta::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Flow already exist, id={id}"))]
|
||||
FlowAlreadyExist {
|
||||
id: FlowId,
|
||||
@@ -214,7 +222,8 @@ impl ErrorExt for Error {
|
||||
}
|
||||
Self::TableNotFound { .. }
|
||||
| Self::TableNotFoundMeta { .. }
|
||||
| Self::FlowNotFound { .. } => StatusCode::TableNotFound,
|
||||
| Self::FlowNotFound { .. }
|
||||
| Self::ListFlows { .. } => StatusCode::TableNotFound,
|
||||
Self::InvalidQueryProst { .. }
|
||||
| &Self::InvalidQuery { .. }
|
||||
| &Self::Plan { .. }
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
//! for declare Expression in dataflow, including map, reduce, id and join(TODO!) etc.
|
||||
|
||||
mod df_func;
|
||||
pub(crate) mod error;
|
||||
mod func;
|
||||
mod id;
|
||||
@@ -22,9 +23,92 @@ mod relation;
|
||||
mod scalar;
|
||||
mod signature;
|
||||
|
||||
pub(crate) use error::{EvalError, InvalidArgumentSnafu, OptimizeSnafu};
|
||||
use datatypes::prelude::DataType;
|
||||
use datatypes::vectors::VectorRef;
|
||||
pub(crate) use df_func::{DfScalarFunction, RawDfScalarFn};
|
||||
pub(crate) use error::{EvalError, InvalidArgumentSnafu};
|
||||
pub(crate) use func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc};
|
||||
pub(crate) use id::{GlobalId, Id, LocalId};
|
||||
use itertools::Itertools;
|
||||
pub(crate) use linear::{MapFilterProject, MfpPlan, SafeMfpPlan};
|
||||
pub(crate) use relation::{AggregateExpr, AggregateFunc};
|
||||
pub(crate) use scalar::{DfScalarFunction, RawDfScalarFn, ScalarExpr, TypedExpr};
|
||||
pub(crate) use scalar::{ScalarExpr, TypedExpr};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::expr::error::DataTypeSnafu;
|
||||
|
||||
/// A batch of vectors with the same length but without schema, only useful in dataflow
|
||||
pub struct Batch {
|
||||
batch: Vec<VectorRef>,
|
||||
row_count: usize,
|
||||
}
|
||||
|
||||
impl Batch {
|
||||
pub fn new(batch: Vec<VectorRef>, row_count: usize) -> Self {
|
||||
Self { batch, row_count }
|
||||
}
|
||||
|
||||
pub fn batch(&self) -> &[VectorRef] {
|
||||
&self.batch
|
||||
}
|
||||
|
||||
pub fn row_count(&self) -> usize {
|
||||
self.row_count
|
||||
}
|
||||
|
||||
/// Slices the `Batch`, returning a new `Batch`.
|
||||
///
|
||||
/// # Panics
|
||||
/// This function panics if `offset + length > self.row_count()`.
|
||||
pub fn slice(&self, offset: usize, length: usize) -> Batch {
|
||||
let batch = self
|
||||
.batch()
|
||||
.iter()
|
||||
.map(|v| v.slice(offset, length))
|
||||
.collect_vec();
|
||||
Batch::new(batch, length)
|
||||
}
|
||||
|
||||
/// append another batch to self
|
||||
pub fn append_batch(&mut self, other: Batch) -> Result<(), EvalError> {
|
||||
ensure!(
|
||||
self.batch.len() == other.batch.len(),
|
||||
InvalidArgumentSnafu {
|
||||
reason: format!(
|
||||
"Expect two batch to have same numbers of column, found {} and {} columns",
|
||||
self.batch.len(),
|
||||
other.batch.len()
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
let batch_builders = self
|
||||
.batch
|
||||
.iter()
|
||||
.map(|v| {
|
||||
v.data_type()
|
||||
.create_mutable_vector(self.row_count() + other.row_count())
|
||||
})
|
||||
.collect_vec();
|
||||
|
||||
let mut result = vec![];
|
||||
let zelf_row_count = self.row_count();
|
||||
let other_row_count = other.row_count();
|
||||
for (idx, mut builder) in batch_builders.into_iter().enumerate() {
|
||||
builder
|
||||
.extend_slice_of(self.batch()[idx].as_ref(), 0, zelf_row_count)
|
||||
.context(DataTypeSnafu {
|
||||
msg: "Failed to extend vector",
|
||||
})?;
|
||||
builder
|
||||
.extend_slice_of(other.batch()[idx].as_ref(), 0, other_row_count)
|
||||
.context(DataTypeSnafu {
|
||||
msg: "Failed to extend vector",
|
||||
})?;
|
||||
result.push(builder.to_vector());
|
||||
}
|
||||
self.batch = result;
|
||||
self.row_count = zelf_row_count + other_row_count;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
293
src/flow/src/expr/df_func.rs
Normal file
293
src/flow/src/expr/df_func.rs
Normal file
@@ -0,0 +1,293 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Porting Datafusion scalar function to our scalar function to be used in dataflow
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::RecordBatchOptions;
|
||||
use bytes::BytesMut;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::DfRecordBatch;
|
||||
use common_telemetry::debug;
|
||||
use datafusion_physical_expr::PhysicalExpr;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use prost::Message;
|
||||
use snafu::{IntoError, ResultExt};
|
||||
use substrait::error::{DecodeRelSnafu, EncodeRelSnafu};
|
||||
use substrait::substrait_proto_df::proto::expression::ScalarFunction;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::expr::error::{
|
||||
ArrowSnafu, DatafusionSnafu as EvalDatafusionSnafu, EvalError, ExternalSnafu,
|
||||
InvalidArgumentSnafu,
|
||||
};
|
||||
use crate::expr::{Batch, ScalarExpr};
|
||||
use crate::repr::RelationDesc;
|
||||
use crate::transform::{from_scalar_fn_to_df_fn_impl, FunctionExtensions};
|
||||
|
||||
/// A way to represent a scalar function that is implemented in Datafusion
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DfScalarFunction {
|
||||
/// The raw bytes encoded datafusion scalar function
|
||||
pub(crate) raw_fn: RawDfScalarFn,
|
||||
// TODO(discord9): directly from datafusion expr
|
||||
/// The implementation of the function
|
||||
pub(crate) fn_impl: Arc<dyn PhysicalExpr>,
|
||||
/// The input schema of the function
|
||||
pub(crate) df_schema: Arc<datafusion_common::DFSchema>,
|
||||
}
|
||||
|
||||
impl DfScalarFunction {
|
||||
pub fn new(raw_fn: RawDfScalarFn, fn_impl: Arc<dyn PhysicalExpr>) -> Result<Self, Error> {
|
||||
Ok(Self {
|
||||
df_schema: Arc::new(raw_fn.input_schema.to_df_schema()?),
|
||||
raw_fn,
|
||||
fn_impl,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn try_from_raw_fn(raw_fn: RawDfScalarFn) -> Result<Self, Error> {
|
||||
Ok(Self {
|
||||
fn_impl: raw_fn.get_fn_impl().await?,
|
||||
df_schema: Arc::new(raw_fn.input_schema.to_df_schema()?),
|
||||
raw_fn,
|
||||
})
|
||||
}
|
||||
|
||||
/// Evaluate a batch of expressions using input values
|
||||
pub fn eval_batch(&self, batch: &Batch, exprs: &[ScalarExpr]) -> Result<VectorRef, EvalError> {
|
||||
let row_count = batch.row_count();
|
||||
let batch: Vec<_> = exprs
|
||||
.iter()
|
||||
.map(|expr| expr.eval_batch(batch))
|
||||
.collect::<Result<_, _>>()?;
|
||||
|
||||
let schema = self.df_schema.inner().clone();
|
||||
|
||||
let arrays = batch
|
||||
.iter()
|
||||
.map(|array| array.to_arrow_array())
|
||||
.collect::<Vec<_>>();
|
||||
let rb = DfRecordBatch::try_new_with_options(schema, arrays, &RecordBatchOptions::new().with_row_count(Some(row_count))).map_err(|err| {
|
||||
ArrowSnafu {
|
||||
context:
|
||||
"Failed to create RecordBatch from values when eval_batch datafusion scalar function",
|
||||
}
|
||||
.into_error(err)
|
||||
})?;
|
||||
|
||||
let len = rb.num_rows();
|
||||
|
||||
let res = self.fn_impl.evaluate(&rb).map_err(|err| {
|
||||
EvalDatafusionSnafu {
|
||||
raw: err,
|
||||
context: "Failed to evaluate datafusion scalar function",
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
let res = common_query::columnar_value::ColumnarValue::try_from(&res)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let res_vec = res
|
||||
.try_into_vector(len)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
|
||||
Ok(res_vec)
|
||||
}
|
||||
|
||||
/// eval a list of expressions using input values
|
||||
fn eval_args(values: &[Value], exprs: &[ScalarExpr]) -> Result<Vec<Value>, EvalError> {
|
||||
exprs
|
||||
.iter()
|
||||
.map(|expr| expr.eval(values))
|
||||
.collect::<Result<_, _>>()
|
||||
}
|
||||
|
||||
// TODO(discord9): add RecordBatch support
|
||||
pub fn eval(&self, values: &[Value], exprs: &[ScalarExpr]) -> Result<Value, EvalError> {
|
||||
// first eval exprs to construct values to feed to datafusion
|
||||
let values: Vec<_> = Self::eval_args(values, exprs)?;
|
||||
if values.is_empty() {
|
||||
return InvalidArgumentSnafu {
|
||||
reason: "values is empty".to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
// TODO(discord9): make cols all array length of one
|
||||
let mut cols = vec![];
|
||||
for (idx, typ) in self
|
||||
.raw_fn
|
||||
.input_schema
|
||||
.typ()
|
||||
.column_types
|
||||
.iter()
|
||||
.enumerate()
|
||||
{
|
||||
let typ = typ.scalar_type();
|
||||
let mut array = typ.create_mutable_vector(1);
|
||||
array.push_value_ref(values[idx].as_value_ref());
|
||||
cols.push(array.to_vector().to_arrow_array());
|
||||
}
|
||||
let schema = self.df_schema.inner().clone();
|
||||
let rb = DfRecordBatch::try_new_with_options(
|
||||
schema,
|
||||
cols,
|
||||
&RecordBatchOptions::new().with_row_count(Some(1)),
|
||||
)
|
||||
.map_err(|err| {
|
||||
ArrowSnafu {
|
||||
context:
|
||||
"Failed to create RecordBatch from values when eval datafusion scalar function",
|
||||
}
|
||||
.into_error(err)
|
||||
})?;
|
||||
|
||||
let res = self.fn_impl.evaluate(&rb).map_err(|err| {
|
||||
EvalDatafusionSnafu {
|
||||
raw: err,
|
||||
context: "Failed to evaluate datafusion scalar function",
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
let res = common_query::columnar_value::ColumnarValue::try_from(&res)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let res_vec = res
|
||||
.try_into_vector(1)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let res_val = res_vec
|
||||
.try_get(0)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
Ok(res_val)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct RawDfScalarFn {
|
||||
/// The raw bytes encoded datafusion scalar function
|
||||
pub(crate) f: bytes::BytesMut,
|
||||
/// The input schema of the function
|
||||
pub(crate) input_schema: RelationDesc,
|
||||
/// Extension contains mapping from function reference to function name
|
||||
pub(crate) extensions: FunctionExtensions,
|
||||
}
|
||||
|
||||
impl RawDfScalarFn {
|
||||
pub fn from_proto(
|
||||
f: &substrait::substrait_proto_df::proto::expression::ScalarFunction,
|
||||
input_schema: RelationDesc,
|
||||
extensions: FunctionExtensions,
|
||||
) -> Result<Self, Error> {
|
||||
let mut buf = BytesMut::new();
|
||||
f.encode(&mut buf)
|
||||
.context(EncodeRelSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(crate::error::ExternalSnafu)?;
|
||||
Ok(Self {
|
||||
f: buf,
|
||||
input_schema,
|
||||
extensions,
|
||||
})
|
||||
}
|
||||
async fn get_fn_impl(&self) -> Result<Arc<dyn PhysicalExpr>, Error> {
|
||||
let f = ScalarFunction::decode(&mut self.f.as_ref())
|
||||
.context(DecodeRelSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(crate::error::ExternalSnafu)?;
|
||||
debug!("Decoded scalar function: {:?}", f);
|
||||
|
||||
let input_schema = &self.input_schema;
|
||||
let extensions = &self.extensions;
|
||||
|
||||
from_scalar_fn_to_df_fn_impl(&f, input_schema, extensions).await
|
||||
}
|
||||
}
|
||||
|
||||
impl std::cmp::PartialEq for DfScalarFunction {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.raw_fn.eq(&other.raw_fn)
|
||||
}
|
||||
}
|
||||
|
||||
// can't derive Eq because of Arc<dyn PhysicalExpr> not eq, so implement it manually
|
||||
impl std::cmp::Eq for DfScalarFunction {}
|
||||
|
||||
impl std::cmp::PartialOrd for DfScalarFunction {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
impl std::cmp::Ord for DfScalarFunction {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.raw_fn.cmp(&other.raw_fn)
|
||||
}
|
||||
}
|
||||
impl std::hash::Hash for DfScalarFunction {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.raw_fn.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use substrait::substrait_proto_df::proto::expression::literal::LiteralType;
|
||||
use substrait::substrait_proto_df::proto::expression::{Literal, RexType};
|
||||
use substrait::substrait_proto_df::proto::function_argument::ArgType;
|
||||
use substrait::substrait_proto_df::proto::{Expression, FunctionArgument};
|
||||
|
||||
use super::*;
|
||||
use crate::repr::{ColumnType, RelationType};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_df_scalar_function() {
|
||||
let raw_scalar_func = ScalarFunction {
|
||||
function_reference: 0,
|
||||
arguments: vec![FunctionArgument {
|
||||
arg_type: Some(ArgType::Value(Expression {
|
||||
rex_type: Some(RexType::Literal(Literal {
|
||||
nullable: false,
|
||||
type_variation_reference: 0,
|
||||
literal_type: Some(LiteralType::I64(-1)),
|
||||
})),
|
||||
})),
|
||||
}],
|
||||
output_type: None,
|
||||
..Default::default()
|
||||
};
|
||||
let input_schema = RelationDesc::try_new(
|
||||
RelationType::new(vec![ColumnType::new_nullable(
|
||||
ConcreteDataType::null_datatype(),
|
||||
)]),
|
||||
vec!["null_column".to_string()],
|
||||
)
|
||||
.unwrap();
|
||||
let extensions = FunctionExtensions::from_iter(vec![(0, "abs")]);
|
||||
let raw_fn = RawDfScalarFn::from_proto(&raw_scalar_func, input_schema, extensions).unwrap();
|
||||
let df_func = DfScalarFunction::try_from_raw_fn(raw_fn).await.unwrap();
|
||||
assert_eq!(
|
||||
df_func
|
||||
.eval(&[Value::Null], &[ScalarExpr::Column(0)])
|
||||
.unwrap(),
|
||||
Value::Int64(1)
|
||||
);
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user