mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-25 15:40:02 +00:00
Compare commits
54 Commits
v0.10.0-ni
...
cache-logi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1b7ab2957b | ||
|
|
aaa9b32908 | ||
|
|
4bb1f4f184 | ||
|
|
0f907ef99e | ||
|
|
a61c0bd1d8 | ||
|
|
7dd0e3ab37 | ||
|
|
d168bde226 | ||
|
|
4b34f610aa | ||
|
|
695ff1e037 | ||
|
|
288fdc3145 | ||
|
|
a8ed3db0aa | ||
|
|
0dd11f53f5 | ||
|
|
19918928c5 | ||
|
|
5f0a83b2b1 | ||
|
|
71a66d15f7 | ||
|
|
2cdd103874 | ||
|
|
4dea4cac47 | ||
|
|
a283e13da7 | ||
|
|
47a3277d12 | ||
|
|
caf5f2c7a5 | ||
|
|
c1e8084af6 | ||
|
|
6e776d5f98 | ||
|
|
e39a9e6feb | ||
|
|
77af4fd981 | ||
|
|
cd55202136 | ||
|
|
50cb59587d | ||
|
|
0a82b12d08 | ||
|
|
d9f2f0ccf0 | ||
|
|
cedbbcf2b8 | ||
|
|
d6be44bc7f | ||
|
|
3a46c1b235 | ||
|
|
934bc13967 | ||
|
|
4045298cb2 | ||
|
|
cc4106cbd2 | ||
|
|
627a326273 | ||
|
|
0274e752ae | ||
|
|
cd4bf239d0 | ||
|
|
e3c0b5482f | ||
|
|
d1b252736d | ||
|
|
54f6e13d13 | ||
|
|
5c64f0ce09 | ||
|
|
2feddca1cb | ||
|
|
0f99218386 | ||
|
|
163cea81c2 | ||
|
|
0c9b8eb0d2 | ||
|
|
75c6fad1a3 | ||
|
|
e12ffbeb2f | ||
|
|
c4e52ebf91 | ||
|
|
f02410c39b | ||
|
|
f5cf25b0db | ||
|
|
1acda74c26 | ||
|
|
95787825f1 | ||
|
|
49004391d3 | ||
|
|
d0f5b2ad7d |
@@ -50,7 +50,7 @@ runs:
|
||||
BUILDX_MULTI_PLATFORM_BUILD=all \
|
||||
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
|
||||
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
|
||||
IMAGE_TAG=${{ inputs.version }}
|
||||
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
|
||||
|
||||
- name: Build and push dev-builder-centos image
|
||||
shell: bash
|
||||
@@ -61,7 +61,7 @@ runs:
|
||||
BUILDX_MULTI_PLATFORM_BUILD=amd64 \
|
||||
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
|
||||
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
|
||||
IMAGE_TAG=${{ inputs.version }}
|
||||
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
|
||||
|
||||
- name: Build and push dev-builder-android image # Only build image for amd64 platform.
|
||||
shell: bash
|
||||
@@ -71,6 +71,6 @@ runs:
|
||||
BASE_IMAGE=android \
|
||||
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
|
||||
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
|
||||
IMAGE_TAG=${{ inputs.version }} && \
|
||||
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }} && \
|
||||
|
||||
docker push ${{ inputs.dockerhub-image-registry }}/${{ inputs.dockerhub-image-namespace }}/dev-builder-android:${{ inputs.version }}
|
||||
|
||||
16
.github/workflows/develop.yml
vendored
16
.github/workflows/develop.yml
vendored
@@ -269,6 +269,13 @@ jobs:
|
||||
- name: Install cargo-gc-bin
|
||||
shell: bash
|
||||
run: cargo install cargo-gc-bin
|
||||
- name: Check aws-lc-sys will not build
|
||||
shell: bash
|
||||
run: |
|
||||
if cargo tree -i aws-lc-sys -e features | grep -q aws-lc-sys; then
|
||||
echo "Found aws-lc-sys, which has compilation problems on older gcc versions. Please replace it with ring until its building experience improves."
|
||||
exit 1
|
||||
fi
|
||||
- name: Build greptime bianry
|
||||
shell: bash
|
||||
# `cargo gc` will invoke `cargo build` with specified args
|
||||
@@ -435,6 +442,13 @@ jobs:
|
||||
minio: true
|
||||
kafka: true
|
||||
values: "with-remote-wal.yaml"
|
||||
include:
|
||||
- target: "fuzz_migrate_mito_regions"
|
||||
mode:
|
||||
name: "Local WAL"
|
||||
minio: true
|
||||
kafka: false
|
||||
values: "with-minio.yaml"
|
||||
steps:
|
||||
- name: Remove unused software
|
||||
run: |
|
||||
@@ -523,7 +537,7 @@ jobs:
|
||||
with:
|
||||
image-registry: localhost:5001
|
||||
values-filename: ${{ matrix.mode.values }}
|
||||
enable-region-failover: true
|
||||
enable-region-failover: ${{ matrix.mode.kafka }}
|
||||
- name: Port forward (mysql)
|
||||
run: |
|
||||
kubectl port-forward service/my-greptimedb-frontend 4002:4002 -n my-greptimedb&
|
||||
|
||||
2380
Cargo.lock
generated
2380
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
13
Cargo.toml
13
Cargo.toml
@@ -120,11 +120,11 @@ etcd-client = { version = "0.13" }
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "973f49cde88a582fb65755cc572ebcf6fb93ccf7" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "0b4f7c8ab06399f6b90e1626e8d5b9697cb33bb9" }
|
||||
humantime = "2.1"
|
||||
humantime-serde = "1.1"
|
||||
itertools = "0.10"
|
||||
jsonb = { git = "https://github.com/CookiePieWw/jsonb.git", rev = "d0166c130fce903bf6c58643417a3173a6172d31", default-features = false }
|
||||
jsonb = { git = "https://github.com/datafuselabs/jsonb.git", rev = "46ad50fc71cf75afbf98eec455f7892a6387c1fc", default-features = false }
|
||||
lazy_static = "1.4"
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd" }
|
||||
mockall = "0.11.4"
|
||||
@@ -245,6 +245,15 @@ store-api = { path = "src/store-api" }
|
||||
substrait = { path = "src/common/substrait" }
|
||||
table = { path = "src/table" }
|
||||
|
||||
[patch.crates-io]
|
||||
# change all rustls dependencies to use our fork to default to `ring` to make it "just work"
|
||||
hyper-rustls = { git = "https://github.com/GreptimeTeam/hyper-rustls" }
|
||||
rustls = { git = "https://github.com/GreptimeTeam/rustls" }
|
||||
tokio-rustls = { git = "https://github.com/GreptimeTeam/tokio-rustls" }
|
||||
# This is commented, since we are not using aws-lc-sys, if we need to use it, we need to uncomment this line or use a release after this commit, or it wouldn't compile with gcc < 8.1
|
||||
# see https://github.com/aws/aws-lc-rs/pull/526
|
||||
# aws-lc-sys = { git ="https://github.com/aws/aws-lc-rs", rev = "556558441e3494af4b156ae95ebc07ebc2fd38aa" }
|
||||
|
||||
[workspace.dependencies.meter-macros]
|
||||
git = "https://github.com/GreptimeTeam/greptime-meter.git"
|
||||
rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd"
|
||||
|
||||
2
Makefile
2
Makefile
@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
|
||||
IMAGE_REGISTRY ?= docker.io
|
||||
IMAGE_NAMESPACE ?= greptime
|
||||
IMAGE_TAG ?= latest
|
||||
DEV_BUILDER_IMAGE_TAG ?= 2024-06-06-b4b105ad-20240827021230
|
||||
DEV_BUILDER_IMAGE_TAG ?= 2024-06-06-5674c14f-20240920110415
|
||||
BUILDX_MULTI_PLATFORM_BUILD ?= false
|
||||
BUILDX_BUILDER_NAME ?= gtbuilder
|
||||
BASE_IMAGE ?= ubuntu
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
| `default_timezone` | String | Unset | The default timezone of the server. |
|
||||
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
|
||||
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
|
||||
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
|
||||
| `runtime` | -- | -- | The runtime options. |
|
||||
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
|
||||
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
|
||||
@@ -160,8 +161,13 @@
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `logging.slow_query` | -- | -- | The slow query log options. |
|
||||
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
|
||||
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
|
||||
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
@@ -246,8 +252,13 @@
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `logging.slow_query` | -- | -- | The slow query log options. |
|
||||
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
|
||||
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
|
||||
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
@@ -311,8 +322,13 @@
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `logging.slow_query` | -- | -- | The slow query log options. |
|
||||
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
|
||||
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
|
||||
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
@@ -335,6 +351,7 @@
|
||||
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
|
||||
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
|
||||
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
|
||||
| `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. |
|
||||
| `rpc_hostname` | String | Unset | Deprecated, use `grpc.hostname` instead. |
|
||||
| `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. |
|
||||
@@ -462,8 +479,13 @@
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `logging.slow_query` | -- | -- | The slow query log options. |
|
||||
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
|
||||
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
|
||||
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
@@ -508,7 +530,12 @@
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `logging.slow_query` | -- | -- | The slow query log options. |
|
||||
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
|
||||
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
|
||||
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
|
||||
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
|
||||
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
|
||||
|
||||
@@ -19,6 +19,9 @@ enable_telemetry = true
|
||||
## Parallelism of initializing regions.
|
||||
init_regions_parallelism = 16
|
||||
|
||||
## The maximum current queries allowed to be executed. Zero means unlimited.
|
||||
max_concurrent_queries = 0
|
||||
|
||||
## Deprecated, use `grpc.addr` instead.
|
||||
## @toml2docs:none-default
|
||||
rpc_addr = "127.0.0.1:3001"
|
||||
@@ -577,12 +580,28 @@ append_stdout = true
|
||||
## The log format. Can be `text`/`json`.
|
||||
log_format = "text"
|
||||
|
||||
## The maximum amount of log files.
|
||||
max_log_files = 720
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The slow query log options.
|
||||
[logging.slow_query]
|
||||
## Whether to enable slow query log.
|
||||
enable = false
|
||||
|
||||
## The threshold of slow query.
|
||||
## @toml2docs:none-default
|
||||
threshold = "10s"
|
||||
|
||||
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
|
||||
## @toml2docs:none-default
|
||||
sample_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
@@ -78,12 +78,28 @@ append_stdout = true
|
||||
## The log format. Can be `text`/`json`.
|
||||
log_format = "text"
|
||||
|
||||
## The maximum amount of log files.
|
||||
max_log_files = 720
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The slow query log options.
|
||||
[logging.slow_query]
|
||||
## Whether to enable slow query log.
|
||||
enable = false
|
||||
|
||||
## The threshold of slow query.
|
||||
## @toml2docs:none-default
|
||||
threshold = "10s"
|
||||
|
||||
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
|
||||
## @toml2docs:none-default
|
||||
sample_ratio = 1.0
|
||||
|
||||
## The tracing options. Only effect when compiled with `tokio-console` feature.
|
||||
[tracing]
|
||||
## The tokio console address.
|
||||
|
||||
@@ -185,12 +185,28 @@ append_stdout = true
|
||||
## The log format. Can be `text`/`json`.
|
||||
log_format = "text"
|
||||
|
||||
## The maximum amount of log files.
|
||||
max_log_files = 720
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The slow query log options.
|
||||
[logging.slow_query]
|
||||
## Whether to enable slow query log.
|
||||
enable = false
|
||||
|
||||
## The threshold of slow query.
|
||||
## @toml2docs:none-default
|
||||
threshold = "10s"
|
||||
|
||||
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
|
||||
## @toml2docs:none-default
|
||||
sample_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
@@ -172,12 +172,28 @@ append_stdout = true
|
||||
## The log format. Can be `text`/`json`.
|
||||
log_format = "text"
|
||||
|
||||
## The maximum amount of log files.
|
||||
max_log_files = 720
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The slow query log options.
|
||||
[logging.slow_query]
|
||||
## Whether to enable slow query log.
|
||||
enable = false
|
||||
|
||||
## The threshold of slow query.
|
||||
## @toml2docs:none-default
|
||||
threshold = "10s"
|
||||
|
||||
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
|
||||
## @toml2docs:none-default
|
||||
sample_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
@@ -15,6 +15,9 @@ init_regions_in_background = false
|
||||
## Parallelism of initializing regions.
|
||||
init_regions_parallelism = 16
|
||||
|
||||
## The maximum current queries allowed to be executed. Zero means unlimited.
|
||||
max_concurrent_queries = 0
|
||||
|
||||
## The runtime options.
|
||||
#+ [runtime]
|
||||
## The number of threads to execute the runtime for global read operations.
|
||||
@@ -621,12 +624,28 @@ append_stdout = true
|
||||
## The log format. Can be `text`/`json`.
|
||||
log_format = "text"
|
||||
|
||||
## The maximum amount of log files.
|
||||
max_log_files = 720
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The slow query log options.
|
||||
[logging.slow_query]
|
||||
## Whether to enable slow query log.
|
||||
enable = false
|
||||
|
||||
## The threshold of slow query.
|
||||
## @toml2docs:none-default
|
||||
threshold = "10s"
|
||||
|
||||
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
|
||||
## @toml2docs:none-default
|
||||
sample_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
50
docker/dev-builder/binstall/pull_binstall.sh
Executable file
50
docker/dev-builder/binstall/pull_binstall.sh
Executable file
@@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euxo pipefail
|
||||
|
||||
cd "$(mktemp -d)"
|
||||
# Fix version to v1.6.6, this is different than the latest version in original install script in
|
||||
# https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh
|
||||
base_url="https://github.com/cargo-bins/cargo-binstall/releases/download/v1.6.6/cargo-binstall-"
|
||||
|
||||
os="$(uname -s)"
|
||||
if [ "$os" == "Darwin" ]; then
|
||||
url="${base_url}universal-apple-darwin.zip"
|
||||
curl -LO --proto '=https' --tlsv1.2 -sSf "$url"
|
||||
unzip cargo-binstall-universal-apple-darwin.zip
|
||||
elif [ "$os" == "Linux" ]; then
|
||||
machine="$(uname -m)"
|
||||
if [ "$machine" == "armv7l" ]; then
|
||||
machine="armv7"
|
||||
fi
|
||||
target="${machine}-unknown-linux-musl"
|
||||
if [ "$machine" == "armv7" ]; then
|
||||
target="${target}eabihf"
|
||||
fi
|
||||
|
||||
url="${base_url}${target}.tgz"
|
||||
curl -L --proto '=https' --tlsv1.2 -sSf "$url" | tar -xvzf -
|
||||
elif [ "${OS-}" = "Windows_NT" ]; then
|
||||
machine="$(uname -m)"
|
||||
target="${machine}-pc-windows-msvc"
|
||||
url="${base_url}${target}.zip"
|
||||
curl -LO --proto '=https' --tlsv1.2 -sSf "$url"
|
||||
unzip "cargo-binstall-${target}.zip"
|
||||
else
|
||||
echo "Unsupported OS ${os}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
./cargo-binstall -y --force cargo-binstall
|
||||
|
||||
CARGO_HOME="${CARGO_HOME:-$HOME/.cargo}"
|
||||
|
||||
if ! [[ ":$PATH:" == *":$CARGO_HOME/bin:"* ]]; then
|
||||
if [ -n "${CI:-}" ] && [ -n "${GITHUB_PATH:-}" ]; then
|
||||
echo "$CARGO_HOME/bin" >> "$GITHUB_PATH"
|
||||
else
|
||||
echo
|
||||
printf "\033[0;31mYour path is missing %s, you might want to add it.\033[0m\n" "$CARGO_HOME/bin"
|
||||
echo
|
||||
fi
|
||||
fi
|
||||
@@ -32,7 +32,9 @@ RUN rustup toolchain install ${RUST_TOOLCHAIN}
|
||||
|
||||
# Install cargo-binstall with a specific version to adapt the current rust toolchain.
|
||||
# Note: if we use the latest version, we may encounter the following `use of unstable library feature 'io_error_downcast'` error.
|
||||
RUN cargo install cargo-binstall --version 1.6.6 --locked
|
||||
# compile from source take too long, so we use the precompiled binary instead
|
||||
COPY $DOCKER_BUILD_ROOT/docker/dev-builder/binstall/pull_binstall.sh /usr/local/bin/pull_binstall.sh
|
||||
RUN chmod +x /usr/local/bin/pull_binstall.sh && /usr/local/bin/pull_binstall.sh
|
||||
|
||||
# Install nextest.
|
||||
RUN cargo binstall cargo-nextest --no-confirm
|
||||
|
||||
@@ -24,6 +24,15 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
python3.10 \
|
||||
python3.10-dev
|
||||
|
||||
# https://github.com/GreptimeTeam/greptimedb/actions/runs/10935485852/job/30357457188#step:3:7106
|
||||
# `aws-lc-sys` require gcc >= 10.3.0 to work, hence alias to use gcc-10
|
||||
RUN apt-get remove -y gcc-9 g++-9 cpp-9 && \
|
||||
apt-get install -y gcc-10 g++-10 cpp-10 make cmake && \
|
||||
ln -sf /usr/bin/gcc-10 /usr/bin/gcc && ln -sf /usr/bin/g++-10 /usr/bin/g++ && \
|
||||
ln -sf /usr/bin/gcc-10 /usr/bin/cc && \
|
||||
ln -sf /usr/bin/g++-10 /usr/bin/cpp && ln -sf /usr/bin/g++-10 /usr/bin/c++ && \
|
||||
cc --version && gcc --version && g++ --version && cpp --version && c++ --version
|
||||
|
||||
# Remove Python 3.8 and install pip.
|
||||
RUN apt-get -y purge python3.8 && \
|
||||
apt-get -y autoremove && \
|
||||
@@ -57,7 +66,9 @@ RUN rustup toolchain install ${RUST_TOOLCHAIN}
|
||||
|
||||
# Install cargo-binstall with a specific version to adapt the current rust toolchain.
|
||||
# Note: if we use the latest version, we may encounter the following `use of unstable library feature 'io_error_downcast'` error.
|
||||
RUN cargo install cargo-binstall --version 1.6.6 --locked
|
||||
# compile from source take too long, so we use the precompiled binary instead
|
||||
COPY $DOCKER_BUILD_ROOT/docker/dev-builder/binstall/pull_binstall.sh /usr/local/bin/pull_binstall.sh
|
||||
RUN chmod +x /usr/local/bin/pull_binstall.sh && /usr/local/bin/pull_binstall.sh
|
||||
|
||||
# Install nextest.
|
||||
RUN cargo binstall cargo-nextest --no-confirm
|
||||
|
||||
@@ -9,7 +9,7 @@ cargo build --features=pprof
|
||||
## HTTP API
|
||||
Sample at 99 Hertz, for 5 seconds, output report in [protobuf format](https://github.com/google/pprof/blob/master/proto/profile.proto).
|
||||
```bash
|
||||
curl -s '0:4000/v1/prof/cpu' > /tmp/pprof.out
|
||||
curl -s '0:4000/debug/prof/cpu' > /tmp/pprof.out
|
||||
```
|
||||
|
||||
Then you can use `pprof` command with the protobuf file.
|
||||
@@ -19,10 +19,10 @@ go tool pprof -top /tmp/pprof.out
|
||||
|
||||
Sample at 99 Hertz, for 60 seconds, output report in flamegraph format.
|
||||
```bash
|
||||
curl -s '0:4000/v1/prof/cpu?seconds=60&output=flamegraph' > /tmp/pprof.svg
|
||||
curl -s '0:4000/debug/prof/cpu?seconds=60&output=flamegraph' > /tmp/pprof.svg
|
||||
```
|
||||
|
||||
Sample at 49 Hertz, for 10 seconds, output report in text format.
|
||||
```bash
|
||||
curl -s '0:4000/v1/prof/cpu?seconds=10&frequency=49&output=text' > /tmp/pprof.txt
|
||||
curl -s '0:4000/debug/prof/cpu?seconds=10&frequency=49&output=text' > /tmp/pprof.txt
|
||||
```
|
||||
@@ -12,10 +12,10 @@ brew install jemalloc
|
||||
sudo apt install libjemalloc-dev
|
||||
```
|
||||
|
||||
### [flamegraph](https://github.com/brendangregg/FlameGraph)
|
||||
### [flamegraph](https://github.com/brendangregg/FlameGraph)
|
||||
|
||||
```bash
|
||||
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
|
||||
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
|
||||
```
|
||||
|
||||
### Build GreptimeDB with `mem-prof` feature.
|
||||
@@ -35,7 +35,7 @@ MALLOC_CONF=prof:true,lg_prof_interval:28 ./target/debug/greptime standalone sta
|
||||
Dump memory profiling data through HTTP API:
|
||||
|
||||
```bash
|
||||
curl localhost:4000/v1/prof/mem > greptime.hprof
|
||||
curl localhost:4000/debug/prof/mem > greptime.hprof
|
||||
```
|
||||
|
||||
You can periodically dump profiling data and compare them to find the delta memory usage.
|
||||
@@ -45,6 +45,9 @@ You can periodically dump profiling data and compare them to find the delta memo
|
||||
To create flamegraph according to dumped profiling data:
|
||||
|
||||
```bash
|
||||
jeprof --svg <path_to_greptimedb_binary> --base=<baseline_prof> <profile_data> > output.svg
|
||||
```
|
||||
sudo apt install -y libjemalloc-dev
|
||||
|
||||
jeprof <path_to_greptime_binary> <profile_data> --collapse | ./flamegraph.pl > mem-prof.svg
|
||||
|
||||
jeprof <path_to_greptime_binary> --base <baseline_prof> <profile_data> --collapse | ./flamegraph.pl > output.svg
|
||||
```
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 36 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 25 KiB |
@@ -75,6 +75,16 @@ pub enum Password<'a> {
|
||||
PgMD5(HashedPassword<'a>, Salt<'a>),
|
||||
}
|
||||
|
||||
impl Password<'_> {
|
||||
pub fn r#type(&self) -> &str {
|
||||
match self {
|
||||
Password::PlainText(_) => "plain_text",
|
||||
Password::MysqlNativePassword(_, _) => "mysql_native_password",
|
||||
Password::PgMD5(_, _) => "pg_md5",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn auth_mysql(
|
||||
auth_data: HashedPassword,
|
||||
salt: Salt,
|
||||
|
||||
@@ -89,7 +89,7 @@ impl ErrorExt for Error {
|
||||
Error::FileWatch { .. } => StatusCode::InvalidArguments,
|
||||
Error::InternalState { .. } => StatusCode::Unexpected,
|
||||
Error::Io { .. } => StatusCode::StorageUnavailable,
|
||||
Error::AuthBackend { .. } => StatusCode::Internal,
|
||||
Error::AuthBackend { source, .. } => source.status_code(),
|
||||
|
||||
Error::UserNotFound { .. } => StatusCode::UserNotFound,
|
||||
Error::UnsupportedPasswordType { .. } => StatusCode::UnsupportedPasswordType,
|
||||
|
||||
@@ -57,6 +57,11 @@ pub trait UserProvider: Send + Sync {
|
||||
self.authorize(catalog, schema, &user_info).await?;
|
||||
Ok(user_info)
|
||||
}
|
||||
|
||||
/// Returns whether this user provider implementation is backed by an external system.
|
||||
fn external(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn load_credential_from_file(filepath: &str) -> Result<Option<HashMap<String, Vec<u8>>>> {
|
||||
|
||||
@@ -22,6 +22,7 @@ common-config.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-procedure.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
common-runtime.workspace = true
|
||||
|
||||
@@ -50,13 +50,20 @@ pub enum Error {
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to list nodes in cluster: {source}"))]
|
||||
#[snafu(display("Failed to list nodes in cluster"))]
|
||||
ListNodes {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to region stats in cluster"))]
|
||||
ListRegionStats {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to list flows in catalog {catalog}"))]
|
||||
ListFlows {
|
||||
#[snafu(implicit)]
|
||||
@@ -82,6 +89,32 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to get information extension client"))]
|
||||
GetInformationExtension {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to list procedures"))]
|
||||
ListProcedures {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Procedure id not found"))]
|
||||
ProcedureIdNotFound {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("convert proto data error"))]
|
||||
ConvertProtoData {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to re-compile script due to internal error"))]
|
||||
CompileScriptInternal {
|
||||
#[snafu(implicit)]
|
||||
@@ -266,7 +299,9 @@ impl ErrorExt for Error {
|
||||
| Error::FindRegionRoutes { .. }
|
||||
| Error::CacheNotFound { .. }
|
||||
| Error::CastManager { .. }
|
||||
| Error::Json { .. } => StatusCode::Unexpected,
|
||||
| Error::Json { .. }
|
||||
| Error::GetInformationExtension { .. }
|
||||
| Error::ProcedureIdNotFound { .. } => StatusCode::Unexpected,
|
||||
|
||||
Error::ViewPlanColumnsChanged { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
@@ -283,7 +318,10 @@ impl ErrorExt for Error {
|
||||
| Error::ListNodes { source, .. }
|
||||
| Error::ListSchemas { source, .. }
|
||||
| Error::ListTables { source, .. }
|
||||
| Error::ListFlows { source, .. } => source.status_code(),
|
||||
| Error::ListFlows { source, .. }
|
||||
| Error::ListProcedures { source, .. }
|
||||
| Error::ListRegionStats { source, .. }
|
||||
| Error::ConvertProtoData { source, .. } => source.status_code(),
|
||||
|
||||
Error::CreateTable { source, .. } => source.status_code(),
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ use common_catalog::consts::{
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID,
|
||||
PG_CATALOG_NAME,
|
||||
};
|
||||
use common_config::Mode;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache::{LayeredCacheRegistryRef, ViewInfoCacheRef};
|
||||
use common_meta::key::catalog_name::CatalogNameKey;
|
||||
@@ -31,9 +30,9 @@ use common_meta::key::table_info::TableInfoValue;
|
||||
use common_meta::key::table_name::TableNameKey;
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_procedure::ProcedureManagerRef;
|
||||
use futures_util::stream::BoxStream;
|
||||
use futures_util::{StreamExt, TryStreamExt};
|
||||
use meta_client::client::MetaClient;
|
||||
use moka::sync::Cache;
|
||||
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
|
||||
use session::context::{Channel, QueryContext};
|
||||
@@ -49,7 +48,7 @@ use crate::error::{
|
||||
CacheNotFoundSnafu, GetTableCacheSnafu, InvalidTableInfoInCatalogSnafu, ListCatalogsSnafu,
|
||||
ListSchemasSnafu, ListTablesSnafu, Result, TableMetadataManagerSnafu,
|
||||
};
|
||||
use crate::information_schema::InformationSchemaProvider;
|
||||
use crate::information_schema::{InformationExtensionRef, InformationSchemaProvider};
|
||||
use crate::kvbackend::TableCacheRef;
|
||||
use crate::system_schema::pg_catalog::PGCatalogProvider;
|
||||
use crate::system_schema::SystemSchemaProvider;
|
||||
@@ -62,27 +61,31 @@ use crate::CatalogManager;
|
||||
/// comes from `SystemCatalog`, which is static and read-only.
|
||||
#[derive(Clone)]
|
||||
pub struct KvBackendCatalogManager {
|
||||
mode: Mode,
|
||||
meta_client: Option<Arc<MetaClient>>,
|
||||
/// Provides the extension methods for the `information_schema` tables
|
||||
information_extension: InformationExtensionRef,
|
||||
/// Manages partition rules.
|
||||
partition_manager: PartitionRuleManagerRef,
|
||||
/// Manages table metadata.
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
/// A sub-CatalogManager that handles system tables
|
||||
system_catalog: SystemCatalog,
|
||||
/// Cache registry for all caches.
|
||||
cache_registry: LayeredCacheRegistryRef,
|
||||
/// Only available in `Standalone` mode.
|
||||
procedure_manager: Option<ProcedureManagerRef>,
|
||||
}
|
||||
|
||||
const CATALOG_CACHE_MAX_CAPACITY: u64 = 128;
|
||||
|
||||
impl KvBackendCatalogManager {
|
||||
pub fn new(
|
||||
mode: Mode,
|
||||
meta_client: Option<Arc<MetaClient>>,
|
||||
information_extension: InformationExtensionRef,
|
||||
backend: KvBackendRef,
|
||||
cache_registry: LayeredCacheRegistryRef,
|
||||
procedure_manager: Option<ProcedureManagerRef>,
|
||||
) -> Arc<Self> {
|
||||
Arc::new_cyclic(|me| Self {
|
||||
mode,
|
||||
meta_client,
|
||||
information_extension,
|
||||
partition_manager: Arc::new(PartitionRuleManager::new(
|
||||
backend.clone(),
|
||||
cache_registry
|
||||
@@ -106,23 +109,19 @@ impl KvBackendCatalogManager {
|
||||
backend,
|
||||
},
|
||||
cache_registry,
|
||||
procedure_manager,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the server running mode.
|
||||
pub fn running_mode(&self) -> &Mode {
|
||||
&self.mode
|
||||
}
|
||||
|
||||
pub fn view_info_cache(&self) -> Result<ViewInfoCacheRef> {
|
||||
self.cache_registry.get().context(CacheNotFoundSnafu {
|
||||
name: "view_info_cache",
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the `[MetaClient]`.
|
||||
pub fn meta_client(&self) -> Option<Arc<MetaClient>> {
|
||||
self.meta_client.clone()
|
||||
/// Returns the [`InformationExtension`].
|
||||
pub fn information_extension(&self) -> InformationExtensionRef {
|
||||
self.information_extension.clone()
|
||||
}
|
||||
|
||||
pub fn partition_manager(&self) -> PartitionRuleManagerRef {
|
||||
@@ -132,6 +131,10 @@ impl KvBackendCatalogManager {
|
||||
pub fn table_metadata_manager_ref(&self) -> &TableMetadataManagerRef {
|
||||
&self.table_metadata_manager
|
||||
}
|
||||
|
||||
pub fn procedure_manager(&self) -> Option<ProcedureManagerRef> {
|
||||
self.procedure_manager.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
|
||||
@@ -18,7 +18,9 @@ pub mod flows;
|
||||
mod information_memory_table;
|
||||
pub mod key_column_usage;
|
||||
mod partitions;
|
||||
mod procedure_info;
|
||||
mod region_peers;
|
||||
mod region_statistics;
|
||||
mod runtime_metrics;
|
||||
pub mod schemata;
|
||||
mod table_constraints;
|
||||
@@ -30,7 +32,11 @@ use std::collections::HashMap;
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, INFORMATION_SCHEMA_NAME};
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_meta::cluster::NodeInfo;
|
||||
use common_meta::datanode::RegionStat;
|
||||
use common_meta::key::flow::FlowMetadataManager;
|
||||
use common_procedure::ProcedureInfo;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use lazy_static::lazy_static;
|
||||
@@ -43,7 +49,7 @@ use views::InformationSchemaViews;
|
||||
|
||||
use self::columns::InformationSchemaColumns;
|
||||
use super::{SystemSchemaProviderInner, SystemTable, SystemTableRef};
|
||||
use crate::error::Result;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::system_schema::information_schema::cluster_info::InformationSchemaClusterInfo;
|
||||
use crate::system_schema::information_schema::flows::InformationSchemaFlows;
|
||||
use crate::system_schema::information_schema::information_memory_table::get_schema_columns;
|
||||
@@ -188,6 +194,16 @@ impl SystemSchemaProviderInner for InformationSchemaProvider {
|
||||
self.catalog_name.clone(),
|
||||
self.flow_metadata_manager.clone(),
|
||||
)) as _),
|
||||
PROCEDURE_INFO => Some(
|
||||
Arc::new(procedure_info::InformationSchemaProcedureInfo::new(
|
||||
self.catalog_manager.clone(),
|
||||
)) as _,
|
||||
),
|
||||
REGION_STATISTICS => Some(Arc::new(
|
||||
region_statistics::InformationSchemaRegionStatistics::new(
|
||||
self.catalog_manager.clone(),
|
||||
),
|
||||
) as _),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -235,6 +251,14 @@ impl InformationSchemaProvider {
|
||||
CLUSTER_INFO.to_string(),
|
||||
self.build_table(CLUSTER_INFO).unwrap(),
|
||||
);
|
||||
tables.insert(
|
||||
PROCEDURE_INFO.to_string(),
|
||||
self.build_table(PROCEDURE_INFO).unwrap(),
|
||||
);
|
||||
tables.insert(
|
||||
REGION_STATISTICS.to_string(),
|
||||
self.build_table(REGION_STATISTICS).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap());
|
||||
@@ -250,7 +274,6 @@ impl InformationSchemaProvider {
|
||||
self.build_table(TABLE_CONSTRAINTS).unwrap(),
|
||||
);
|
||||
tables.insert(FLOWS.to_string(), self.build_table(FLOWS).unwrap());
|
||||
|
||||
// Add memory tables
|
||||
for name in MEMORY_TABLES.iter() {
|
||||
tables.insert((*name).to_string(), self.build_table(name).expect(name));
|
||||
@@ -299,3 +322,39 @@ where
|
||||
InformationTable::to_stream(self, request)
|
||||
}
|
||||
}
|
||||
|
||||
pub type InformationExtensionRef = Arc<dyn InformationExtension<Error = Error> + Send + Sync>;
|
||||
|
||||
/// The `InformationExtension` trait provides the extension methods for the `information_schema` tables.
|
||||
#[async_trait::async_trait]
|
||||
pub trait InformationExtension {
|
||||
type Error: ErrorExt;
|
||||
|
||||
/// Gets the nodes information.
|
||||
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error>;
|
||||
|
||||
/// Gets the procedures information.
|
||||
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error>;
|
||||
|
||||
/// Gets the region statistics.
|
||||
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error>;
|
||||
}
|
||||
|
||||
pub struct NoopInformationExtension;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl InformationExtension for NoopInformationExtension {
|
||||
type Error = Error;
|
||||
|
||||
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
|
||||
Ok(vec![])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,13 +17,10 @@ use std::time::Duration;
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_CLUSTER_INFO_TABLE_ID;
|
||||
use common_config::Mode;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cluster::{ClusterInfo, NodeInfo, NodeStatus};
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::cluster::NodeInfo;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use common_telemetry::warn;
|
||||
use common_time::timestamp::Timestamp;
|
||||
use datafusion::execution::TaskContext;
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
@@ -40,7 +37,7 @@ use snafu::ResultExt;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::CLUSTER_INFO;
|
||||
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, ListNodesSnafu, Result};
|
||||
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
use crate::system_schema::utils;
|
||||
use crate::CatalogManager;
|
||||
@@ -70,7 +67,6 @@ const INIT_CAPACITY: usize = 42;
|
||||
pub(super) struct InformationSchemaClusterInfo {
|
||||
schema: SchemaRef,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
start_time_ms: u64,
|
||||
}
|
||||
|
||||
impl InformationSchemaClusterInfo {
|
||||
@@ -78,7 +74,6 @@ impl InformationSchemaClusterInfo {
|
||||
Self {
|
||||
schema: Self::schema(),
|
||||
catalog_manager,
|
||||
start_time_ms: common_time::util::current_time_millis() as u64,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,11 +95,7 @@ impl InformationSchemaClusterInfo {
|
||||
}
|
||||
|
||||
fn builder(&self) -> InformationSchemaClusterInfoBuilder {
|
||||
InformationSchemaClusterInfoBuilder::new(
|
||||
self.schema.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
self.start_time_ms,
|
||||
)
|
||||
InformationSchemaClusterInfoBuilder::new(self.schema.clone(), self.catalog_manager.clone())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,7 +135,6 @@ impl InformationTable for InformationSchemaClusterInfo {
|
||||
|
||||
struct InformationSchemaClusterInfoBuilder {
|
||||
schema: SchemaRef,
|
||||
start_time_ms: u64,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
|
||||
peer_ids: Int64VectorBuilder,
|
||||
@@ -158,11 +148,7 @@ struct InformationSchemaClusterInfoBuilder {
|
||||
}
|
||||
|
||||
impl InformationSchemaClusterInfoBuilder {
|
||||
fn new(
|
||||
schema: SchemaRef,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
start_time_ms: u64,
|
||||
) -> Self {
|
||||
fn new(schema: SchemaRef, catalog_manager: Weak<dyn CatalogManager>) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
catalog_manager,
|
||||
@@ -174,56 +160,17 @@ impl InformationSchemaClusterInfoBuilder {
|
||||
start_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
uptimes: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
active_times: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
start_time_ms,
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct the `information_schema.cluster_info` virtual table
|
||||
async fn make_cluster_info(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
let mode = utils::running_mode(&self.catalog_manager)?.unwrap_or(Mode::Standalone);
|
||||
|
||||
match mode {
|
||||
Mode::Standalone => {
|
||||
let build_info = common_version::build_info();
|
||||
|
||||
self.add_node_info(
|
||||
&predicates,
|
||||
NodeInfo {
|
||||
// For the standalone:
|
||||
// - id always 0
|
||||
// - empty string for peer_addr
|
||||
peer: Peer {
|
||||
id: 0,
|
||||
addr: "".to_string(),
|
||||
},
|
||||
last_activity_ts: -1,
|
||||
status: NodeStatus::Standalone,
|
||||
version: build_info.version.to_string(),
|
||||
git_commit: build_info.commit_short.to_string(),
|
||||
// Use `self.start_time_ms` instead.
|
||||
// It's not precise but enough.
|
||||
start_time_ms: self.start_time_ms,
|
||||
},
|
||||
);
|
||||
}
|
||||
Mode::Distributed => {
|
||||
if let Some(meta_client) = utils::meta_client(&self.catalog_manager)? {
|
||||
let node_infos = meta_client
|
||||
.list_nodes(None)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ListNodesSnafu)?;
|
||||
|
||||
for node_info in node_infos {
|
||||
self.add_node_info(&predicates, node_info);
|
||||
}
|
||||
} else {
|
||||
warn!("Could not find meta client in distributed mode.");
|
||||
}
|
||||
}
|
||||
let information_extension = utils::information_extension(&self.catalog_manager)?;
|
||||
let node_infos = information_extension.nodes().await?;
|
||||
for node_info in node_infos {
|
||||
self.add_node_info(&predicates, node_info);
|
||||
}
|
||||
|
||||
self.finish()
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,241 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_procedure::ProcedureInfo;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use common_time::timestamp::Timestamp;
|
||||
use datafusion::execution::TaskContext;
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::timestamp::TimestampMillisecond;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{StringVectorBuilder, TimestampMillisecondVectorBuilder};
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::PROCEDURE_INFO;
|
||||
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
use crate::system_schema::utils;
|
||||
use crate::CatalogManager;
|
||||
|
||||
const PROCEDURE_ID: &str = "procedure_id";
|
||||
const PROCEDURE_TYPE: &str = "procedure_type";
|
||||
const START_TIME: &str = "start_time";
|
||||
const END_TIME: &str = "end_time";
|
||||
const STATUS: &str = "status";
|
||||
const LOCK_KEYS: &str = "lock_keys";
|
||||
|
||||
const INIT_CAPACITY: usize = 42;
|
||||
|
||||
/// The `PROCEDURE_INFO` table provides information about the current procedure information of the cluster.
|
||||
///
|
||||
/// - `procedure_id`: the unique identifier of the procedure.
|
||||
/// - `procedure_name`: the name of the procedure.
|
||||
/// - `start_time`: the starting execution time of the procedure.
|
||||
/// - `end_time`: the ending execution time of the procedure.
|
||||
/// - `status`: the status of the procedure.
|
||||
/// - `lock_keys`: the lock keys of the procedure.
|
||||
///
|
||||
pub(super) struct InformationSchemaProcedureInfo {
|
||||
schema: SchemaRef,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
}
|
||||
|
||||
impl InformationSchemaProcedureInfo {
|
||||
pub(super) fn new(catalog_manager: Weak<dyn CatalogManager>) -> Self {
|
||||
Self {
|
||||
schema: Self::schema(),
|
||||
catalog_manager,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn schema() -> SchemaRef {
|
||||
Arc::new(Schema::new(vec![
|
||||
ColumnSchema::new(PROCEDURE_ID, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(PROCEDURE_TYPE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(
|
||||
START_TIME,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
END_TIME,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(STATUS, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(LOCK_KEYS, ConcreteDataType::string_datatype(), true),
|
||||
]))
|
||||
}
|
||||
|
||||
fn builder(&self) -> InformationSchemaProcedureInfoBuilder {
|
||||
InformationSchemaProcedureInfoBuilder::new(
|
||||
self.schema.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl InformationTable for InformationSchemaProcedureInfo {
|
||||
fn table_id(&self) -> TableId {
|
||||
INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID
|
||||
}
|
||||
|
||||
fn table_name(&self) -> &'static str {
|
||||
PROCEDURE_INFO
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_procedure_info(Some(request))
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
));
|
||||
Ok(Box::pin(
|
||||
RecordBatchStreamAdapter::try_new(stream)
|
||||
.map_err(BoxedError::new)
|
||||
.context(InternalSnafu)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
struct InformationSchemaProcedureInfoBuilder {
|
||||
schema: SchemaRef,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
|
||||
procedure_ids: StringVectorBuilder,
|
||||
procedure_types: StringVectorBuilder,
|
||||
start_times: TimestampMillisecondVectorBuilder,
|
||||
end_times: TimestampMillisecondVectorBuilder,
|
||||
statuses: StringVectorBuilder,
|
||||
lock_keys: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaProcedureInfoBuilder {
|
||||
fn new(schema: SchemaRef, catalog_manager: Weak<dyn CatalogManager>) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
catalog_manager,
|
||||
procedure_ids: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
procedure_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
start_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
end_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
statuses: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
lock_keys: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct the `information_schema.procedure_info` virtual table
|
||||
async fn make_procedure_info(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
let information_extension = utils::information_extension(&self.catalog_manager)?;
|
||||
let procedures = information_extension.procedures().await?;
|
||||
for (status, procedure_info) in procedures {
|
||||
self.add_procedure(&predicates, status, procedure_info);
|
||||
}
|
||||
self.finish()
|
||||
}
|
||||
|
||||
fn add_procedure(
|
||||
&mut self,
|
||||
predicates: &Predicates,
|
||||
status: String,
|
||||
procedure_info: ProcedureInfo,
|
||||
) {
|
||||
let ProcedureInfo {
|
||||
id,
|
||||
type_name,
|
||||
start_time_ms,
|
||||
end_time_ms,
|
||||
lock_keys,
|
||||
..
|
||||
} = procedure_info;
|
||||
let pid = id.to_string();
|
||||
let start_time = TimestampMillisecond(Timestamp::new_millisecond(start_time_ms));
|
||||
let end_time = TimestampMillisecond(Timestamp::new_millisecond(end_time_ms));
|
||||
let lock_keys = lock_keys.join(",");
|
||||
|
||||
let row = [
|
||||
(PROCEDURE_ID, &Value::from(pid.clone())),
|
||||
(PROCEDURE_TYPE, &Value::from(type_name.clone())),
|
||||
(START_TIME, &Value::from(start_time)),
|
||||
(END_TIME, &Value::from(end_time)),
|
||||
(STATUS, &Value::from(status.clone())),
|
||||
(LOCK_KEYS, &Value::from(lock_keys.clone())),
|
||||
];
|
||||
if !predicates.eval(&row) {
|
||||
return;
|
||||
}
|
||||
self.procedure_ids.push(Some(&pid));
|
||||
self.procedure_types.push(Some(&type_name));
|
||||
self.start_times.push(Some(start_time));
|
||||
self.end_times.push(Some(end_time));
|
||||
self.statuses.push(Some(&status));
|
||||
self.lock_keys.push(Some(&lock_keys));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<RecordBatch> {
|
||||
let columns: Vec<VectorRef> = vec![
|
||||
Arc::new(self.procedure_ids.finish()),
|
||||
Arc::new(self.procedure_types.finish()),
|
||||
Arc::new(self.start_times.finish()),
|
||||
Arc::new(self.end_times.finish()),
|
||||
Arc::new(self.statuses.finish()),
|
||||
Arc::new(self.lock_keys.finish()),
|
||||
];
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
impl DfPartitionStream for InformationSchemaProcedureInfo {
|
||||
fn schema(&self) -> &ArrowSchemaRef {
|
||||
self.schema.arrow_schema()
|
||||
}
|
||||
|
||||
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_procedure_info(None)
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -224,8 +224,8 @@ impl InformationSchemaRegionPeersBuilder {
|
||||
let region_id = RegionId::new(table_id, route.region.id.region_number()).as_u64();
|
||||
let peer_id = route.leader_peer.clone().map(|p| p.id);
|
||||
let peer_addr = route.leader_peer.clone().map(|p| p.addr);
|
||||
let status = if let Some(status) = route.leader_status {
|
||||
Some(status.as_ref().to_string())
|
||||
let state = if let Some(state) = route.leader_state {
|
||||
Some(state.as_ref().to_string())
|
||||
} else {
|
||||
// Alive by default
|
||||
Some("ALIVE".to_string())
|
||||
@@ -242,7 +242,7 @@ impl InformationSchemaRegionPeersBuilder {
|
||||
self.peer_ids.push(peer_id);
|
||||
self.peer_addrs.push(peer_addr.as_deref());
|
||||
self.is_leaders.push(Some("Yes"));
|
||||
self.statuses.push(status.as_deref());
|
||||
self.statuses.push(state.as_deref());
|
||||
self.down_seconds
|
||||
.push(route.leader_down_millis().map(|m| m / 1000));
|
||||
}
|
||||
|
||||
@@ -0,0 +1,237 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::datanode::RegionStat;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{DfSendableRecordBatchStream, RecordBatch, SendableRecordBatchStream};
|
||||
use datafusion::execution::TaskContext;
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder};
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::{InformationTable, REGION_STATISTICS};
|
||||
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
|
||||
use crate::information_schema::Predicates;
|
||||
use crate::system_schema::utils;
|
||||
use crate::CatalogManager;
|
||||
|
||||
const REGION_ID: &str = "region_id";
|
||||
const TABLE_ID: &str = "table_id";
|
||||
const REGION_NUMBER: &str = "region_number";
|
||||
const MEMTABLE_SIZE: &str = "memtable_size";
|
||||
const MANIFEST_SIZE: &str = "manifest_size";
|
||||
const SST_SIZE: &str = "sst_size";
|
||||
const ENGINE: &str = "engine";
|
||||
const REGION_ROLE: &str = "region_role";
|
||||
|
||||
const INIT_CAPACITY: usize = 42;
|
||||
|
||||
/// The `REGION_STATISTICS` table provides information about the region statistics. Including fields:
|
||||
///
|
||||
/// - `region_id`: The region id.
|
||||
/// - `table_id`: The table id.
|
||||
/// - `region_number`: The region number.
|
||||
/// - `memtable_size`: The memtable size in bytes.
|
||||
/// - `manifest_size`: The manifest size in bytes.
|
||||
/// - `sst_size`: The sst size in bytes.
|
||||
/// - `engine`: The engine type.
|
||||
/// - `region_role`: The region role.
|
||||
///
|
||||
pub(super) struct InformationSchemaRegionStatistics {
|
||||
schema: SchemaRef,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
}
|
||||
|
||||
impl InformationSchemaRegionStatistics {
|
||||
pub(super) fn new(catalog_manager: Weak<dyn CatalogManager>) -> Self {
|
||||
Self {
|
||||
schema: Self::schema(),
|
||||
catalog_manager,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn schema() -> SchemaRef {
|
||||
Arc::new(Schema::new(vec![
|
||||
ColumnSchema::new(REGION_ID, ConcreteDataType::uint64_datatype(), false),
|
||||
ColumnSchema::new(TABLE_ID, ConcreteDataType::uint32_datatype(), false),
|
||||
ColumnSchema::new(REGION_NUMBER, ConcreteDataType::uint32_datatype(), false),
|
||||
ColumnSchema::new(MEMTABLE_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(MANIFEST_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(SST_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new(REGION_ROLE, ConcreteDataType::string_datatype(), true),
|
||||
]))
|
||||
}
|
||||
|
||||
fn builder(&self) -> InformationSchemaRegionStatisticsBuilder {
|
||||
InformationSchemaRegionStatisticsBuilder::new(
|
||||
self.schema.clone(),
|
||||
self.catalog_manager.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl InformationTable for InformationSchemaRegionStatistics {
|
||||
fn table_id(&self) -> TableId {
|
||||
INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID
|
||||
}
|
||||
|
||||
fn table_name(&self) -> &'static str {
|
||||
REGION_STATISTICS
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
|
||||
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_region_statistics(Some(request))
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
));
|
||||
|
||||
Ok(Box::pin(
|
||||
RecordBatchStreamAdapter::try_new(stream)
|
||||
.map_err(BoxedError::new)
|
||||
.context(InternalSnafu)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
struct InformationSchemaRegionStatisticsBuilder {
|
||||
schema: SchemaRef,
|
||||
catalog_manager: Weak<dyn CatalogManager>,
|
||||
|
||||
region_ids: UInt64VectorBuilder,
|
||||
table_ids: UInt32VectorBuilder,
|
||||
region_numbers: UInt32VectorBuilder,
|
||||
memtable_sizes: UInt64VectorBuilder,
|
||||
manifest_sizes: UInt64VectorBuilder,
|
||||
sst_sizes: UInt64VectorBuilder,
|
||||
engines: StringVectorBuilder,
|
||||
region_roles: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaRegionStatisticsBuilder {
|
||||
fn new(schema: SchemaRef, catalog_manager: Weak<dyn CatalogManager>) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
catalog_manager,
|
||||
region_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_ids: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
region_numbers: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
memtable_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
manifest_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
sst_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
engines: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
region_roles: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a new `InformationSchemaRegionStatistics` from the collected data.
|
||||
async fn make_region_statistics(
|
||||
&mut self,
|
||||
request: Option<ScanRequest>,
|
||||
) -> Result<RecordBatch> {
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
let information_extension = utils::information_extension(&self.catalog_manager)?;
|
||||
let region_stats = information_extension.region_stats().await?;
|
||||
for region_stat in region_stats {
|
||||
self.add_region_statistic(&predicates, region_stat);
|
||||
}
|
||||
self.finish()
|
||||
}
|
||||
|
||||
fn add_region_statistic(&mut self, predicate: &Predicates, region_stat: RegionStat) {
|
||||
let row = [
|
||||
(REGION_ID, &Value::from(region_stat.id.as_u64())),
|
||||
(TABLE_ID, &Value::from(region_stat.id.table_id())),
|
||||
(REGION_NUMBER, &Value::from(region_stat.id.region_number())),
|
||||
(MEMTABLE_SIZE, &Value::from(region_stat.memtable_size)),
|
||||
(MANIFEST_SIZE, &Value::from(region_stat.manifest_size)),
|
||||
(SST_SIZE, &Value::from(region_stat.sst_size)),
|
||||
(ENGINE, &Value::from(region_stat.engine.as_str())),
|
||||
(REGION_ROLE, &Value::from(region_stat.role.to_string())),
|
||||
];
|
||||
|
||||
if !predicate.eval(&row) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.region_ids.push(Some(region_stat.id.as_u64()));
|
||||
self.table_ids.push(Some(region_stat.id.table_id()));
|
||||
self.region_numbers
|
||||
.push(Some(region_stat.id.region_number()));
|
||||
self.memtable_sizes.push(Some(region_stat.memtable_size));
|
||||
self.manifest_sizes.push(Some(region_stat.manifest_size));
|
||||
self.sst_sizes.push(Some(region_stat.sst_size));
|
||||
self.engines.push(Some(®ion_stat.engine));
|
||||
self.region_roles.push(Some(®ion_stat.role.to_string()));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<RecordBatch> {
|
||||
let columns: Vec<VectorRef> = vec![
|
||||
Arc::new(self.region_ids.finish()),
|
||||
Arc::new(self.table_ids.finish()),
|
||||
Arc::new(self.region_numbers.finish()),
|
||||
Arc::new(self.memtable_sizes.finish()),
|
||||
Arc::new(self.manifest_sizes.finish()),
|
||||
Arc::new(self.sst_sizes.finish()),
|
||||
Arc::new(self.engines.finish()),
|
||||
Arc::new(self.region_roles.finish()),
|
||||
];
|
||||
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
impl DfPartitionStream for InformationSchemaRegionStatistics {
|
||||
fn schema(&self) -> &ArrowSchemaRef {
|
||||
self.schema.arrow_schema()
|
||||
}
|
||||
|
||||
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_region_statistics(None)
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -45,3 +45,5 @@ pub const TABLE_CONSTRAINTS: &str = "table_constraints";
|
||||
pub const CLUSTER_INFO: &str = "cluster_info";
|
||||
pub const VIEWS: &str = "views";
|
||||
pub const FLOWS: &str = "flows";
|
||||
pub const PROCEDURE_INFO: &str = "procedure_info";
|
||||
pub const REGION_STATISTICS: &str = "region_statistics";
|
||||
|
||||
@@ -12,47 +12,33 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod tables;
|
||||
use std::sync::Weak;
|
||||
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use common_config::Mode;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use meta_client::client::MetaClient;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::error::{Result, UpgradeWeakCatalogManagerRefSnafu};
|
||||
use crate::error::{GetInformationExtensionSnafu, Result, UpgradeWeakCatalogManagerRefSnafu};
|
||||
use crate::information_schema::InformationExtensionRef;
|
||||
use crate::kvbackend::KvBackendCatalogManager;
|
||||
use crate::CatalogManager;
|
||||
|
||||
/// Try to get the server running mode from `[CatalogManager]` weak reference.
|
||||
pub fn running_mode(catalog_manager: &Weak<dyn CatalogManager>) -> Result<Option<Mode>> {
|
||||
pub mod tables;
|
||||
|
||||
/// Try to get the `[InformationExtension]` from `[CatalogManager]` weak reference.
|
||||
pub fn information_extension(
|
||||
catalog_manager: &Weak<dyn CatalogManager>,
|
||||
) -> Result<InformationExtensionRef> {
|
||||
let catalog_manager = catalog_manager
|
||||
.upgrade()
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
|
||||
Ok(catalog_manager
|
||||
let information_extension = catalog_manager
|
||||
.as_any()
|
||||
.downcast_ref::<KvBackendCatalogManager>()
|
||||
.map(|manager| manager.running_mode())
|
||||
.copied())
|
||||
}
|
||||
.map(|manager| manager.information_extension())
|
||||
.context(GetInformationExtensionSnafu)?;
|
||||
|
||||
/// Try to get the `[MetaClient]` from `[CatalogManager]` weak reference.
|
||||
pub fn meta_client(catalog_manager: &Weak<dyn CatalogManager>) -> Result<Option<Arc<MetaClient>>> {
|
||||
let catalog_manager = catalog_manager
|
||||
.upgrade()
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
|
||||
let meta_client = match catalog_manager
|
||||
.as_any()
|
||||
.downcast_ref::<KvBackendCatalogManager>()
|
||||
{
|
||||
None => None,
|
||||
Some(manager) => manager.meta_client(),
|
||||
};
|
||||
|
||||
Ok(meta_client)
|
||||
Ok(information_extension)
|
||||
}
|
||||
|
||||
/// Try to get the `[TableMetadataManagerRef]` from `[CatalogManager]` weak reference.
|
||||
|
||||
@@ -259,7 +259,6 @@ mod tests {
|
||||
|
||||
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
|
||||
use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
|
||||
use common_config::Mode;
|
||||
use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
@@ -269,6 +268,8 @@ mod tests {
|
||||
use datafusion::logical_expr::builder::LogicalTableSource;
|
||||
use datafusion::logical_expr::{col, lit, LogicalPlan, LogicalPlanBuilder};
|
||||
|
||||
use crate::information_schema::NoopInformationExtension;
|
||||
|
||||
struct MockDecoder;
|
||||
impl MockDecoder {
|
||||
pub fn arc() -> Arc<Self> {
|
||||
@@ -323,10 +324,10 @@ mod tests {
|
||||
);
|
||||
|
||||
let catalog_manager = KvBackendCatalogManager::new(
|
||||
Mode::Standalone,
|
||||
None,
|
||||
Arc::new(NoopInformationExtension),
|
||||
backend.clone(),
|
||||
layered_cache_registry,
|
||||
None,
|
||||
);
|
||||
let table_metadata_manager = TableMetadataManager::new(backend);
|
||||
let mut view_info = common_meta::key::test_utils::new_test_table_info(1024, vec![]);
|
||||
|
||||
@@ -158,7 +158,7 @@ fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
|
||||
addr: String::new(),
|
||||
}),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -35,7 +35,6 @@ use either::Either;
|
||||
use meta_client::client::MetaClientBuilder;
|
||||
use query::datafusion::DatafusionQueryEngine;
|
||||
use query::parser::QueryLanguageParser;
|
||||
use query::plan::LogicalPlan;
|
||||
use query::query_engine::{DefaultSerializer, QueryEngineState};
|
||||
use query::QueryEngine;
|
||||
use rustyline::error::ReadlineError;
|
||||
@@ -47,12 +46,12 @@ use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
|
||||
use crate::cli::cmd::ReplCommand;
|
||||
use crate::cli::helper::RustylineHelper;
|
||||
use crate::cli::AttachCommand;
|
||||
use crate::error;
|
||||
use crate::error::{
|
||||
CollectRecordBatchesSnafu, ParseSqlSnafu, PlanStatementSnafu, PrettyPrintRecordBatchesSnafu,
|
||||
ReadlineSnafu, ReplCreationSnafu, RequestDatabaseSnafu, Result, StartMetaClientSnafu,
|
||||
SubstraitEncodeLogicalPlanSnafu,
|
||||
};
|
||||
use crate::{error, DistributedInformationExtension};
|
||||
|
||||
/// Captures the state of the repl, gathers commands and executes them one by one
|
||||
pub struct Repl {
|
||||
@@ -179,7 +178,7 @@ impl Repl {
|
||||
.await
|
||||
.context(PlanStatementSnafu)?;
|
||||
|
||||
let LogicalPlan::DfPlan(plan) = query_engine
|
||||
let plan = query_engine
|
||||
.optimize(&query_engine.engine_context(query_ctx), &plan)
|
||||
.context(PlanStatementSnafu)?;
|
||||
|
||||
@@ -276,11 +275,12 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
|
||||
.build(),
|
||||
);
|
||||
|
||||
let information_extension = Arc::new(DistributedInformationExtension::new(meta_client.clone()));
|
||||
let catalog_manager = KvBackendCatalogManager::new(
|
||||
Mode::Distributed,
|
||||
Some(meta_client.clone()),
|
||||
information_extension,
|
||||
cached_meta_backend.clone(),
|
||||
layered_cache_registry,
|
||||
None,
|
||||
);
|
||||
let plugins: Plugins = Default::default();
|
||||
let state = Arc::new(QueryEngineState::new(
|
||||
|
||||
@@ -41,7 +41,7 @@ use crate::error::{
|
||||
MissingConfigSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
|
||||
};
|
||||
use crate::options::{GlobalOptions, GreptimeOptions};
|
||||
use crate::{log_versions, App};
|
||||
use crate::{log_versions, App, DistributedInformationExtension};
|
||||
|
||||
pub const APP_NAME: &str = "greptime-flownode";
|
||||
|
||||
@@ -269,11 +269,13 @@ impl StartCommand {
|
||||
.build(),
|
||||
);
|
||||
|
||||
let information_extension =
|
||||
Arc::new(DistributedInformationExtension::new(meta_client.clone()));
|
||||
let catalog_manager = KvBackendCatalogManager::new(
|
||||
opts.mode,
|
||||
Some(meta_client.clone()),
|
||||
information_extension,
|
||||
cached_meta_backend.clone(),
|
||||
layered_cache_registry.clone(),
|
||||
None,
|
||||
);
|
||||
|
||||
let table_metadata_manager =
|
||||
|
||||
@@ -36,8 +36,8 @@ use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::instance::{FrontendInstance, Instance as FeInstance};
|
||||
use frontend::server::Services;
|
||||
use meta_client::{MetaClientOptions, MetaClientType};
|
||||
use query::stats::StatementStatistics;
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
use servers::Mode;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
@@ -46,7 +46,7 @@ use crate::error::{
|
||||
Result, StartFrontendSnafu,
|
||||
};
|
||||
use crate::options::{GlobalOptions, GreptimeOptions};
|
||||
use crate::{log_versions, App};
|
||||
use crate::{log_versions, App, DistributedInformationExtension};
|
||||
|
||||
type FrontendOptions = GreptimeOptions<frontend::frontend::FrontendOptions>;
|
||||
|
||||
@@ -315,11 +315,13 @@ impl StartCommand {
|
||||
.build(),
|
||||
);
|
||||
|
||||
let information_extension =
|
||||
Arc::new(DistributedInformationExtension::new(meta_client.clone()));
|
||||
let catalog_manager = KvBackendCatalogManager::new(
|
||||
Mode::Distributed,
|
||||
Some(meta_client.clone()),
|
||||
information_extension,
|
||||
cached_meta_backend.clone(),
|
||||
layered_cache_registry.clone(),
|
||||
None,
|
||||
);
|
||||
|
||||
let executor = HandlerGroupExecutor::new(vec![
|
||||
@@ -351,6 +353,7 @@ impl StartCommand {
|
||||
catalog_manager,
|
||||
Arc::new(client),
|
||||
meta_client,
|
||||
StatementStatistics::new(opts.logging.slow_query.clone()),
|
||||
)
|
||||
.with_plugin(plugins.clone())
|
||||
.with_local_cache_invalidator(layered_cache_registry)
|
||||
|
||||
@@ -15,7 +15,17 @@
|
||||
#![feature(assert_matches, let_chains)]
|
||||
|
||||
use async_trait::async_trait;
|
||||
use catalog::information_schema::InformationExtension;
|
||||
use client::api::v1::meta::ProcedureStatus;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cluster::{ClusterInfo, NodeInfo};
|
||||
use common_meta::datanode::RegionStat;
|
||||
use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
|
||||
use common_meta::rpc::procedure;
|
||||
use common_procedure::{ProcedureInfo, ProcedureState};
|
||||
use common_telemetry::{error, info};
|
||||
use meta_client::MetaClientRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
@@ -94,3 +104,69 @@ fn log_env_flags() {
|
||||
info!("argument: {}", argument);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DistributedInformationExtension {
|
||||
meta_client: MetaClientRef,
|
||||
}
|
||||
|
||||
impl DistributedInformationExtension {
|
||||
pub fn new(meta_client: MetaClientRef) -> Self {
|
||||
Self { meta_client }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl InformationExtension for DistributedInformationExtension {
|
||||
type Error = catalog::error::Error;
|
||||
|
||||
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
|
||||
self.meta_client
|
||||
.list_nodes(None)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(catalog::error::ListNodesSnafu)
|
||||
}
|
||||
|
||||
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
|
||||
let procedures = self
|
||||
.meta_client
|
||||
.list_procedures(&ExecutorContext::default())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(catalog::error::ListProceduresSnafu)?
|
||||
.procedures;
|
||||
let mut result = Vec::with_capacity(procedures.len());
|
||||
for procedure in procedures {
|
||||
let pid = match procedure.id {
|
||||
Some(pid) => pid,
|
||||
None => return catalog::error::ProcedureIdNotFoundSnafu {}.fail(),
|
||||
};
|
||||
let pid = procedure::pb_pid_to_pid(&pid)
|
||||
.map_err(BoxedError::new)
|
||||
.context(catalog::error::ConvertProtoDataSnafu)?;
|
||||
let status = ProcedureStatus::try_from(procedure.status)
|
||||
.map(|v| v.as_str_name())
|
||||
.unwrap_or("Unknown")
|
||||
.to_string();
|
||||
let procedure_info = ProcedureInfo {
|
||||
id: pid,
|
||||
type_name: procedure.type_name,
|
||||
start_time_ms: procedure.start_time_ms,
|
||||
end_time_ms: procedure.end_time_ms,
|
||||
state: ProcedureState::Running,
|
||||
lock_keys: procedure.lock_keys,
|
||||
};
|
||||
result.push((status, procedure_info));
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
|
||||
self.meta_client
|
||||
.list_region_stats()
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(catalog::error::ListRegionStatsSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,14 +17,18 @@ use std::{fs, path};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
|
||||
use catalog::information_schema::InformationExtension;
|
||||
use catalog::kvbackend::KvBackendCatalogManager;
|
||||
use clap::Parser;
|
||||
use client::api::v1::meta::RegionRole;
|
||||
use common_base::Plugins;
|
||||
use common_catalog::consts::{MIN_USER_FLOW_ID, MIN_USER_TABLE_ID};
|
||||
use common_config::{metadata_store_dir, Configurable, KvBackendConfig};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache::LayeredCacheRegistryBuilder;
|
||||
use common_meta::cache_invalidator::CacheInvalidatorRef;
|
||||
use common_meta::cluster::{NodeInfo, NodeStatus};
|
||||
use common_meta::datanode::RegionStat;
|
||||
use common_meta::ddl::flow_meta::{FlowMetadataAllocator, FlowMetadataAllocatorRef};
|
||||
use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef};
|
||||
use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl, ProcedureExecutorRef};
|
||||
@@ -33,10 +37,11 @@ use common_meta::key::flow::{FlowMetadataManager, FlowMetadataManagerRef};
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::node_manager::NodeManagerRef;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::region_keeper::MemoryRegionKeeper;
|
||||
use common_meta::sequence::SequenceBuilder;
|
||||
use common_meta::wal_options_allocator::{WalOptionsAllocator, WalOptionsAllocatorRef};
|
||||
use common_procedure::ProcedureManagerRef;
|
||||
use common_procedure::{ProcedureInfo, ProcedureManagerRef};
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::{LoggingOptions, TracingOptions};
|
||||
use common_time::timezone::set_default_timezone;
|
||||
@@ -44,6 +49,7 @@ use common_version::{short_version, version};
|
||||
use common_wal::config::DatanodeWalConfig;
|
||||
use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, StorageConfig};
|
||||
use datanode::datanode::{Datanode, DatanodeBuilder};
|
||||
use datanode::region_server::RegionServer;
|
||||
use file_engine::config::EngineConfig as FileEngineConfig;
|
||||
use flow::{FlowWorkerManager, FlownodeBuilder, FrontendInvoker};
|
||||
use frontend::frontend::FrontendOptions;
|
||||
@@ -55,6 +61,7 @@ use frontend::service_config::{
|
||||
};
|
||||
use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
|
||||
use mito2::config::MitoConfig;
|
||||
use query::stats::StatementStatistics;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::export_metrics::ExportMetricsOption;
|
||||
use servers::grpc::GrpcOptions;
|
||||
@@ -477,22 +484,26 @@ impl StartCommand {
|
||||
.build(),
|
||||
);
|
||||
|
||||
let catalog_manager = KvBackendCatalogManager::new(
|
||||
dn_opts.mode,
|
||||
None,
|
||||
kv_backend.clone(),
|
||||
layered_cache_registry.clone(),
|
||||
);
|
||||
|
||||
let table_metadata_manager =
|
||||
Self::create_table_metadata_manager(kv_backend.clone()).await?;
|
||||
|
||||
let datanode = DatanodeBuilder::new(dn_opts, plugins.clone())
|
||||
.with_kv_backend(kv_backend.clone())
|
||||
.build()
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
|
||||
let information_extension = Arc::new(StandaloneInformationExtension::new(
|
||||
datanode.region_server(),
|
||||
procedure_manager.clone(),
|
||||
));
|
||||
let catalog_manager = KvBackendCatalogManager::new(
|
||||
information_extension,
|
||||
kv_backend.clone(),
|
||||
layered_cache_registry.clone(),
|
||||
Some(procedure_manager.clone()),
|
||||
);
|
||||
|
||||
let table_metadata_manager =
|
||||
Self::create_table_metadata_manager(kv_backend.clone()).await?;
|
||||
|
||||
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
|
||||
let flow_builder = FlownodeBuilder::new(
|
||||
Default::default(),
|
||||
@@ -556,6 +567,7 @@ impl StartCommand {
|
||||
catalog_manager.clone(),
|
||||
node_manager.clone(),
|
||||
ddl_task_executor.clone(),
|
||||
StatementStatistics::new(opts.logging.slow_query.clone()),
|
||||
)
|
||||
.with_plugin(plugins.clone())
|
||||
.try_build()
|
||||
@@ -641,6 +653,91 @@ impl StartCommand {
|
||||
}
|
||||
}
|
||||
|
||||
struct StandaloneInformationExtension {
|
||||
region_server: RegionServer,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
start_time_ms: u64,
|
||||
}
|
||||
|
||||
impl StandaloneInformationExtension {
|
||||
pub fn new(region_server: RegionServer, procedure_manager: ProcedureManagerRef) -> Self {
|
||||
Self {
|
||||
region_server,
|
||||
procedure_manager,
|
||||
start_time_ms: common_time::util::current_time_millis() as u64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl InformationExtension for StandaloneInformationExtension {
|
||||
type Error = catalog::error::Error;
|
||||
|
||||
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
|
||||
let build_info = common_version::build_info();
|
||||
let node_info = NodeInfo {
|
||||
// For the standalone:
|
||||
// - id always 0
|
||||
// - empty string for peer_addr
|
||||
peer: Peer {
|
||||
id: 0,
|
||||
addr: "".to_string(),
|
||||
},
|
||||
last_activity_ts: -1,
|
||||
status: NodeStatus::Standalone,
|
||||
version: build_info.version.to_string(),
|
||||
git_commit: build_info.commit_short.to_string(),
|
||||
// Use `self.start_time_ms` instead.
|
||||
// It's not precise but enough.
|
||||
start_time_ms: self.start_time_ms,
|
||||
};
|
||||
Ok(vec![node_info])
|
||||
}
|
||||
|
||||
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
|
||||
self.procedure_manager
|
||||
.list_procedures()
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.map(|procedures| {
|
||||
procedures
|
||||
.into_iter()
|
||||
.map(|procedure| {
|
||||
let status = procedure.state.as_str_name().to_string();
|
||||
(status, procedure)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.context(catalog::error::ListProceduresSnafu)
|
||||
}
|
||||
|
||||
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
|
||||
let stats = self
|
||||
.region_server
|
||||
.reportable_regions()
|
||||
.into_iter()
|
||||
.map(|stat| {
|
||||
let region_stat = self
|
||||
.region_server
|
||||
.region_statistic(stat.region_id)
|
||||
.unwrap_or_default();
|
||||
RegionStat {
|
||||
id: stat.region_id,
|
||||
rcus: 0,
|
||||
wcus: 0,
|
||||
approximate_bytes: region_stat.estimated_disk_size() as i64,
|
||||
engine: stat.engine,
|
||||
role: RegionRole::from(stat.role).into(),
|
||||
memtable_size: region_stat.memtable_size,
|
||||
manifest_size: region_stat.manifest_size,
|
||||
sst_size: region_stat.sst_size,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok(stats)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::default::Default;
|
||||
|
||||
@@ -8,7 +8,7 @@ license.workspace = true
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anymap = "1.0.0-beta.2"
|
||||
anymap2 = "0.13"
|
||||
async-trait.workspace = true
|
||||
bitvec = "1.0"
|
||||
bytes.workspace = true
|
||||
|
||||
@@ -12,20 +12,21 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
|
||||
|
||||
/// [`Plugins`] is a wrapper of [AnyMap](https://github.com/chris-morgan/anymap) and provides a thread-safe way to store and retrieve plugins.
|
||||
use anymap2::SendSyncAnyMap;
|
||||
|
||||
/// [`Plugins`] is a wrapper of [anymap2](https://github.com/azriel91/anymap2) and provides a thread-safe way to store and retrieve plugins.
|
||||
/// Make it Cloneable and we can treat it like an Arc struct.
|
||||
#[derive(Default, Clone)]
|
||||
pub struct Plugins {
|
||||
inner: Arc<RwLock<anymap::Map<dyn Any + Send + Sync>>>,
|
||||
inner: Arc<RwLock<SendSyncAnyMap>>,
|
||||
}
|
||||
|
||||
impl Plugins {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
inner: Arc::new(RwLock::new(anymap::Map::new())),
|
||||
inner: Arc::new(RwLock::new(SendSyncAnyMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,11 +62,11 @@ impl Plugins {
|
||||
self.read().is_empty()
|
||||
}
|
||||
|
||||
fn read(&self) -> RwLockReadGuard<anymap::Map<dyn Any + Send + Sync>> {
|
||||
fn read(&self) -> RwLockReadGuard<SendSyncAnyMap> {
|
||||
self.inner.read().unwrap()
|
||||
}
|
||||
|
||||
fn write(&self) -> RwLockWriteGuard<anymap::Map<dyn Any + Send + Sync>> {
|
||||
fn write(&self) -> RwLockWriteGuard<SendSyncAnyMap> {
|
||||
self.inner.write().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,6 +98,11 @@ pub const INFORMATION_SCHEMA_CLUSTER_INFO_TABLE_ID: u32 = 31;
|
||||
pub const INFORMATION_SCHEMA_VIEW_TABLE_ID: u32 = 32;
|
||||
/// id for information_schema.FLOWS
|
||||
pub const INFORMATION_SCHEMA_FLOW_TABLE_ID: u32 = 33;
|
||||
/// id for information_schema.procedure_info
|
||||
pub const INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID: u32 = 34;
|
||||
/// id for information_schema.region_statistics
|
||||
pub const INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID: u32 = 35;
|
||||
|
||||
/// ----- End of information_schema tables -----
|
||||
|
||||
/// ----- Begin of pg_catalog tables -----
|
||||
|
||||
@@ -38,6 +38,8 @@ pub enum StatusCode {
|
||||
Cancelled = 1005,
|
||||
/// Illegal state, can be exposed to users.
|
||||
IllegalState = 1006,
|
||||
/// Caused by some error originated from external system.
|
||||
External = 1007,
|
||||
// ====== End of common status code ================
|
||||
|
||||
// ====== Begin of SQL related status code =========
|
||||
@@ -162,7 +164,8 @@ impl StatusCode {
|
||||
| StatusCode::InvalidAuthHeader
|
||||
| StatusCode::AccessDenied
|
||||
| StatusCode::PermissionDenied
|
||||
| StatusCode::RequestOutdated => false,
|
||||
| StatusCode::RequestOutdated
|
||||
| StatusCode::External => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -177,7 +180,9 @@ impl StatusCode {
|
||||
| StatusCode::IllegalState
|
||||
| StatusCode::EngineExecuteQuery
|
||||
| StatusCode::StorageUnavailable
|
||||
| StatusCode::RuntimeResourcesExhausted => true,
|
||||
| StatusCode::RuntimeResourcesExhausted
|
||||
| StatusCode::External => true,
|
||||
|
||||
StatusCode::Success
|
||||
| StatusCode::Unsupported
|
||||
| StatusCode::InvalidArguments
|
||||
@@ -256,7 +261,7 @@ macro_rules! define_into_tonic_status {
|
||||
pub fn status_to_tonic_code(status_code: StatusCode) -> Code {
|
||||
match status_code {
|
||||
StatusCode::Success => Code::Ok,
|
||||
StatusCode::Unknown => Code::Unknown,
|
||||
StatusCode::Unknown | StatusCode::External => Code::Unknown,
|
||||
StatusCode::Unsupported => Code::Unimplemented,
|
||||
StatusCode::Unexpected
|
||||
| StatusCode::IllegalState
|
||||
|
||||
@@ -27,6 +27,7 @@ common-time.workspace = true
|
||||
common-version.workspace = true
|
||||
datafusion.workspace = true
|
||||
datatypes.workspace = true
|
||||
derive_more = { version = "1", default-features = false, features = ["display"] }
|
||||
geohash = { version = "0.13", optional = true }
|
||||
h3o = { version = "0.6", optional = true }
|
||||
jsonb.workspace = true
|
||||
|
||||
@@ -16,7 +16,6 @@ mod argmax;
|
||||
mod argmin;
|
||||
mod diff;
|
||||
mod mean;
|
||||
mod percentile;
|
||||
mod polyval;
|
||||
mod scipy_stats_norm_cdf;
|
||||
mod scipy_stats_norm_pdf;
|
||||
@@ -28,11 +27,11 @@ pub use argmin::ArgminAccumulatorCreator;
|
||||
use common_query::logical_plan::AggregateFunctionCreatorRef;
|
||||
pub use diff::DiffAccumulatorCreator;
|
||||
pub use mean::MeanAccumulatorCreator;
|
||||
pub use percentile::PercentileAccumulatorCreator;
|
||||
pub use polyval::PolyvalAccumulatorCreator;
|
||||
pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
|
||||
pub use scipy_stats_norm_pdf::ScipyStatsNormPdfAccumulatorCreator;
|
||||
|
||||
use super::geo::encoding::JsonPathEncodeFunctionCreator;
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
/// A function creates `AggregateFunctionCreator`.
|
||||
@@ -91,8 +90,9 @@ impl AggregateFunctions {
|
||||
register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);
|
||||
register_aggr_func!("argmax", 1, ArgmaxAccumulatorCreator);
|
||||
register_aggr_func!("argmin", 1, ArgminAccumulatorCreator);
|
||||
register_aggr_func!("percentile", 2, PercentileAccumulatorCreator);
|
||||
register_aggr_func!("scipystatsnormcdf", 2, ScipyStatsNormCdfAccumulatorCreator);
|
||||
register_aggr_func!("scipystatsnormpdf", 2, ScipyStatsNormPdfAccumulatorCreator);
|
||||
|
||||
register_aggr_func!("json_encode_path", 3, JsonPathEncodeFunctionCreator);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,10 @@ use std::cmp::Ordering;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Result};
|
||||
use common_query::error::{
|
||||
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
|
||||
@@ -16,7 +16,10 @@ use std::cmp::Ordering;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Result};
|
||||
use common_query::error::{
|
||||
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
|
||||
@@ -17,8 +17,10 @@ use std::sync::Arc;
|
||||
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, Result,
|
||||
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, InvalidInputStateSnafu,
|
||||
Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
|
||||
@@ -17,8 +17,10 @@ use std::sync::Arc;
|
||||
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu, Result,
|
||||
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu, InvalidInputStateSnafu,
|
||||
Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
|
||||
@@ -1,436 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
|
||||
FromScalarValueSnafu, InvalidInputColSnafu, Result,
|
||||
};
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::OrdPrimitive;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num::NumCast;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
// https://numpy.org/doc/stable/reference/generated/numpy.percentile.html?highlight=percentile#numpy.percentile
|
||||
// if the p is 50,then the Percentile become median
|
||||
// we use two heap great and not_greater
|
||||
// the not_greater push the value that smaller than P-value
|
||||
// the greater push the value that bigger than P-value
|
||||
// just like the percentile in numpy:
|
||||
// Given a vector V of length N, the q-th percentile of V is the value q/100 of the way from the minimum to the maximum in a sorted copy of V.
|
||||
// The values and distances of the two nearest neighbors as well as the method parameter will determine the percentile
|
||||
// if the normalized ranking does not match the location of q exactly.
|
||||
// This function is the same as the median if q=50, the same as the minimum if q=0 and the same as the maximum if q=100.
|
||||
// This optional method parameter specifies the method to use when the desired quantile lies between two data points i < j.
|
||||
// If g is the fractional part of the index surrounded by i and alpha and beta are correction constants modifying i and j.
|
||||
// i+g = (q-alpha)/(n-alpha-beta+1)
|
||||
// Below, 'q' is the quantile value, 'n' is the sample size and alpha and beta are constants. The following formula gives an interpolation "i + g" of where the quantile would be in the sorted sample.
|
||||
// With 'i' being the floor and 'g' the fractional part of the result.
|
||||
// the default method is linear where
|
||||
// alpha = 1
|
||||
// beta = 1
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Percentile<T>
|
||||
where
|
||||
T: WrapperType,
|
||||
{
|
||||
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
|
||||
not_greater: BinaryHeap<OrdPrimitive<T>>,
|
||||
n: u64,
|
||||
p: Option<f64>,
|
||||
}
|
||||
|
||||
impl<T> Percentile<T>
|
||||
where
|
||||
T: WrapperType,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
let value = OrdPrimitive::<T>(value);
|
||||
|
||||
self.n += 1;
|
||||
if self.not_greater.is_empty() {
|
||||
self.not_greater.push(value);
|
||||
return;
|
||||
}
|
||||
// to keep the not_greater length == floor+1
|
||||
// so to ensure the peek of the not_greater is array[floor]
|
||||
// and the peek of the greater is array[floor+1]
|
||||
let p = self.p.unwrap_or(0.0_f64);
|
||||
let floor = (((self.n - 1) as f64) * p / (100_f64)).floor();
|
||||
if value <= *self.not_greater.peek().unwrap() {
|
||||
self.not_greater.push(value);
|
||||
if self.not_greater.len() > (floor + 1.0) as usize {
|
||||
self.greater.push(Reverse(self.not_greater.pop().unwrap()));
|
||||
}
|
||||
} else {
|
||||
self.greater.push(Reverse(value));
|
||||
if self.not_greater.len() < (floor + 1.0) as usize {
|
||||
self.not_greater.push(self.greater.pop().unwrap().0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Accumulator for Percentile<T>
|
||||
where
|
||||
T: WrapperType,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
.greater
|
||||
.iter()
|
||||
.map(|x| &x.0)
|
||||
.chain(self.not_greater.iter())
|
||||
.map(|&n| n.into())
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
|
||||
self.p.into(),
|
||||
])
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
ensure!(values.len() == 2, InvalidInputStateSnafu);
|
||||
ensure!(values[0].len() == values[1].len(), InvalidInputStateSnafu);
|
||||
|
||||
if values[0].len() == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// This is a unary accumulator, so only one column is provided.
|
||||
let column = &values[0];
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
|
||||
let x = &values[1];
|
||||
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"POLYVAL\" function's second argument to be float64",
|
||||
})?;
|
||||
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
|
||||
let first = x.get(0);
|
||||
ensure!(!first.is_null(), InvalidInputColSnafu);
|
||||
|
||||
for i in 1..x.len() {
|
||||
ensure!(first == x.get(i), InvalidInputColSnafu);
|
||||
}
|
||||
|
||||
let first = match first {
|
||||
Value::Float64(OrderedFloat(v)) => v,
|
||||
// unreachable because we have checked `first` is not null and is i64 above
|
||||
_ => unreachable!(),
|
||||
};
|
||||
if let Some(p) = self.p {
|
||||
ensure!(p == first, InvalidInputColSnafu);
|
||||
} else {
|
||||
self.p = Some(first);
|
||||
};
|
||||
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
|
||||
if states.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(
|
||||
states.len() == 2,
|
||||
BadAccumulatorImplSnafu {
|
||||
err_msg: "expect 2 states in `merge_batch`"
|
||||
}
|
||||
);
|
||||
|
||||
let p = &states[1];
|
||||
let p = p
|
||||
.as_any()
|
||||
.downcast_ref::<Float64Vector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect float64vector, got vector type {}",
|
||||
p.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
let p = p.get(0);
|
||||
if p.is_null() {
|
||||
return Ok(());
|
||||
}
|
||||
let p = match p {
|
||||
Value::Float64(OrderedFloat(p)) => p,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
self.p = Some(p);
|
||||
|
||||
let values = &states[0];
|
||||
let values = values
|
||||
.as_any()
|
||||
.downcast_ref::<ListVector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect ListVector, got vector type {}",
|
||||
values.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
if self.not_greater.is_empty() {
|
||||
assert!(
|
||||
self.greater.is_empty(),
|
||||
"not expected in two-heap percentile algorithm, there must be a bug when implementing it"
|
||||
);
|
||||
}
|
||||
let not_greater = self.not_greater.peek();
|
||||
if not_greater.is_none() {
|
||||
return Ok(Value::Null);
|
||||
}
|
||||
let not_greater = (*self.not_greater.peek().unwrap()).as_primitive();
|
||||
let percentile = if self.greater.is_empty() {
|
||||
NumCast::from(not_greater).unwrap()
|
||||
} else {
|
||||
let greater = self.greater.peek().unwrap();
|
||||
let p = if let Some(p) = self.p {
|
||||
p
|
||||
} else {
|
||||
return Ok(Value::Null);
|
||||
};
|
||||
let fract = (((self.n - 1) as f64) * p / 100_f64).fract();
|
||||
let not_greater_v: f64 = NumCast::from(not_greater).unwrap();
|
||||
let greater_v: f64 = NumCast::from(greater.0.as_primitive()).unwrap();
|
||||
not_greater_v * (1.0 - fract) + greater_v * fract
|
||||
};
|
||||
Ok(Value::from(percentile))
|
||||
}
|
||||
}
|
||||
|
||||
#[as_aggr_func_creator]
|
||||
#[derive(Debug, Default, AggrFuncTypeStore)]
|
||||
pub struct PercentileAccumulatorCreator {}
|
||||
|
||||
impl AggregateFunctionCreator for PercentileAccumulatorCreator {
|
||||
fn creator(&self) -> AccumulatorCreatorFunction {
|
||||
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
|
||||
let input_type = &types[0];
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Percentile::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
"\"PERCENTILE\" aggregate function not support data type {:?}",
|
||||
input_type.logical_type_id(),
|
||||
);
|
||||
CreateAccumulatorSnafu { err_msg }.fail()?
|
||||
}
|
||||
)
|
||||
});
|
||||
creator
|
||||
}
|
||||
|
||||
fn output_type(&self) -> Result<ConcreteDataType> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
|
||||
// unwrap is safe because we have checked input_types len must equals 1
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
|
||||
Ok(vec![
|
||||
ConcreteDataType::list_datatype(input_types.into_iter().next().unwrap()),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
percentile.update_batch(&[]).unwrap();
|
||||
assert!(percentile.not_greater.is_empty());
|
||||
assert!(percentile.greater.is_empty());
|
||||
assert_eq!(Value::Null, percentile.evaluate().unwrap());
|
||||
|
||||
// test update one not-null value
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(42)])),
|
||||
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
|
||||
];
|
||||
percentile.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(42.0_f64), percentile.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
|
||||
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
|
||||
];
|
||||
percentile.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::Null, percentile.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
])),
|
||||
];
|
||||
percentile.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(2_f64), percentile.evaluate().unwrap());
|
||||
|
||||
// test update null-value batch
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
])),
|
||||
];
|
||||
percentile.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
|
||||
|
||||
// test update with constant vector
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
2,
|
||||
)),
|
||||
Arc::new(Float64Vector::from(vec![Some(100.0_f64), Some(100.0_f64)])),
|
||||
];
|
||||
percentile.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
|
||||
|
||||
// test left border
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(0.0_f64),
|
||||
Some(0.0_f64),
|
||||
Some(0.0_f64),
|
||||
])),
|
||||
];
|
||||
percentile.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(-1.0_f64), percentile.evaluate().unwrap());
|
||||
|
||||
// test medium
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(50.0_f64),
|
||||
Some(50.0_f64),
|
||||
Some(50.0_f64),
|
||||
])),
|
||||
];
|
||||
percentile.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(1.0_f64), percentile.evaluate().unwrap());
|
||||
|
||||
// test right border
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
])),
|
||||
];
|
||||
percentile.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(2.0_f64), percentile.evaluate().unwrap());
|
||||
|
||||
// the following is the result of numpy.percentile
|
||||
// numpy.percentile
|
||||
// a = np.array([[10,7,4]])
|
||||
// np.percentile(a,40)
|
||||
// >> 6.400000000000
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(40.0_f64),
|
||||
Some(40.0_f64),
|
||||
Some(40.0_f64),
|
||||
])),
|
||||
];
|
||||
percentile.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(6.400000000_f64), percentile.evaluate().unwrap());
|
||||
|
||||
// the following is the result of numpy.percentile
|
||||
// a = np.array([[10,7,4]])
|
||||
// np.percentile(a,95)
|
||||
// >> 9.7000000000000011
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(95.0_f64),
|
||||
Some(95.0_f64),
|
||||
Some(95.0_f64),
|
||||
])),
|
||||
];
|
||||
percentile.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
Value::from(9.700_000_000_000_001_f64),
|
||||
percentile.evaluate().unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -18,8 +18,9 @@ use std::sync::Arc;
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
|
||||
FromScalarValueSnafu, InvalidInputColSnafu, Result,
|
||||
FromScalarValueSnafu, InvalidInputColSnafu, InvalidInputStateSnafu, Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
|
||||
@@ -17,8 +17,10 @@ use std::sync::Arc;
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
|
||||
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, Result,
|
||||
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
|
||||
Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
|
||||
@@ -17,8 +17,10 @@ use std::sync::Arc;
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
|
||||
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, Result,
|
||||
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
|
||||
Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
|
||||
@@ -13,11 +13,12 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
pub(crate) mod encoding;
|
||||
mod geohash;
|
||||
mod h3;
|
||||
mod helpers;
|
||||
|
||||
use geohash::GeohashFunction;
|
||||
use h3::H3Function;
|
||||
use geohash::{GeohashFunction, GeohashNeighboursFunction};
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
@@ -25,7 +26,34 @@ pub(crate) struct GeoFunctions;
|
||||
|
||||
impl GeoFunctions {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
// geohash
|
||||
registry.register(Arc::new(GeohashFunction));
|
||||
registry.register(Arc::new(H3Function));
|
||||
registry.register(Arc::new(GeohashNeighboursFunction));
|
||||
|
||||
// h3 index
|
||||
registry.register(Arc::new(h3::H3LatLngToCell));
|
||||
registry.register(Arc::new(h3::H3LatLngToCellString));
|
||||
|
||||
// h3 index inspection
|
||||
registry.register(Arc::new(h3::H3CellBase));
|
||||
registry.register(Arc::new(h3::H3CellIsPentagon));
|
||||
registry.register(Arc::new(h3::H3StringToCell));
|
||||
registry.register(Arc::new(h3::H3CellToString));
|
||||
registry.register(Arc::new(h3::H3CellCenterLatLng));
|
||||
registry.register(Arc::new(h3::H3CellResolution));
|
||||
|
||||
// h3 hierarchical grid
|
||||
registry.register(Arc::new(h3::H3CellCenterChild));
|
||||
registry.register(Arc::new(h3::H3CellParent));
|
||||
registry.register(Arc::new(h3::H3CellToChildren));
|
||||
registry.register(Arc::new(h3::H3CellToChildrenSize));
|
||||
registry.register(Arc::new(h3::H3CellToChildPos));
|
||||
registry.register(Arc::new(h3::H3ChildPosToCell));
|
||||
|
||||
// h3 grid traversal
|
||||
registry.register(Arc::new(h3::H3GridDisk));
|
||||
registry.register(Arc::new(h3::H3GridDiskDistances));
|
||||
registry.register(Arc::new(h3::H3GridDistance));
|
||||
registry.register(Arc::new(h3::H3GridPathCells));
|
||||
}
|
||||
}
|
||||
|
||||
223
src/common/function/src/scalars/geo/encoding.rs
Normal file
223
src/common/function/src/scalars/geo/encoding.rs
Normal file
@@ -0,0 +1,223 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_error::ext::{BoxedError, PlainError};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{self, InvalidFuncArgsSnafu, InvalidInputStateSnafu, Result};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::AccumulatorCreatorFunction;
|
||||
use common_time::Timestamp;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::{ListValue, Value};
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use super::helpers::{ensure_columns_len, ensure_columns_n};
|
||||
|
||||
/// Accumulator of lat, lng, timestamp tuples
|
||||
#[derive(Debug)]
|
||||
pub struct JsonPathAccumulator {
|
||||
timestamp_type: ConcreteDataType,
|
||||
lat: Vec<Option<f64>>,
|
||||
lng: Vec<Option<f64>>,
|
||||
timestamp: Vec<Option<Timestamp>>,
|
||||
}
|
||||
|
||||
impl JsonPathAccumulator {
|
||||
fn new(timestamp_type: ConcreteDataType) -> Self {
|
||||
Self {
|
||||
lat: Vec::default(),
|
||||
lng: Vec::default(),
|
||||
timestamp: Vec::default(),
|
||||
timestamp_type,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Accumulator for JsonPathAccumulator {
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
self.lat.iter().map(|i| Value::from(*i)).collect(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
)),
|
||||
Value::List(ListValue::new(
|
||||
self.lng.iter().map(|i| Value::from(*i)).collect(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
)),
|
||||
Value::List(ListValue::new(
|
||||
self.timestamp.iter().map(|i| Value::from(*i)).collect(),
|
||||
self.timestamp_type.clone(),
|
||||
)),
|
||||
])
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, columns: &[VectorRef]) -> Result<()> {
|
||||
// update batch as in datafusion just provides the accumulator original
|
||||
// input.
|
||||
//
|
||||
// columns is vec of [`lat`, `lng`, `timestamp`]
|
||||
// where
|
||||
// - `lat` is a vector of `Value::Float64` or similar type. Each item in
|
||||
// the vector is a row in given dataset.
|
||||
// - so on so forth for `lng` and `timestamp`
|
||||
ensure_columns_n!(columns, 3);
|
||||
|
||||
let lat = &columns[0];
|
||||
let lng = &columns[1];
|
||||
let ts = &columns[2];
|
||||
|
||||
let size = lat.len();
|
||||
|
||||
for idx in 0..size {
|
||||
self.lat.push(lat.get(idx).as_f64_lossy());
|
||||
self.lng.push(lng.get(idx).as_f64_lossy());
|
||||
self.timestamp.push(ts.get(idx).as_timestamp());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
|
||||
// merge batch as in datafusion gives state accumulated from the data
|
||||
// returned from child accumulators' state() call
|
||||
// In our particular implementation, the data structure is like
|
||||
//
|
||||
// states is vec of [`lat`, `lng`, `timestamp`]
|
||||
// where
|
||||
// - `lat` is a vector of `Value::List`. Each item in the list is all
|
||||
// coordinates from a child accumulator.
|
||||
// - so on so forth for `lng` and `timestamp`
|
||||
|
||||
ensure_columns_n!(states, 3);
|
||||
|
||||
let lat_lists = &states[0];
|
||||
let lng_lists = &states[1];
|
||||
let ts_lists = &states[2];
|
||||
|
||||
let len = lat_lists.len();
|
||||
|
||||
for idx in 0..len {
|
||||
if let Some(lat_list) = lat_lists
|
||||
.get(idx)
|
||||
.as_list()
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteSnafu)?
|
||||
{
|
||||
for v in lat_list.items() {
|
||||
self.lat.push(v.as_f64_lossy());
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(lng_list) = lng_lists
|
||||
.get(idx)
|
||||
.as_list()
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteSnafu)?
|
||||
{
|
||||
for v in lng_list.items() {
|
||||
self.lng.push(v.as_f64_lossy());
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ts_list) = ts_lists
|
||||
.get(idx)
|
||||
.as_list()
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteSnafu)?
|
||||
{
|
||||
for v in ts_list.items() {
|
||||
self.timestamp.push(v.as_timestamp());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
let mut work_vec: Vec<(&Option<f64>, &Option<f64>, &Option<Timestamp>)> = self
|
||||
.lat
|
||||
.iter()
|
||||
.zip(self.lng.iter())
|
||||
.zip(self.timestamp.iter())
|
||||
.map(|((a, b), c)| (a, b, c))
|
||||
.collect();
|
||||
|
||||
// sort by timestamp, we treat null timestamp as 0
|
||||
work_vec.sort_unstable_by_key(|tuple| tuple.2.unwrap_or_else(|| Timestamp::new_second(0)));
|
||||
|
||||
let result = serde_json::to_string(
|
||||
&work_vec
|
||||
.into_iter()
|
||||
// note that we transform to lng,lat for geojson compatibility
|
||||
.map(|(lat, lng, _)| vec![lng, lat])
|
||||
.collect::<Vec<Vec<&Option<f64>>>>(),
|
||||
)
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("Serialization failure: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)?;
|
||||
|
||||
Ok(Value::String(result.into()))
|
||||
}
|
||||
}
|
||||
|
||||
/// This function accept rows of lat, lng and timestamp, sort with timestamp and
|
||||
/// encoding them into a geojson-like path.
|
||||
///
|
||||
/// Example:
|
||||
///
|
||||
/// ```sql
|
||||
/// SELECT json_encode_path(lat, lon, timestamp) FROM table [group by ...];
|
||||
/// ```
|
||||
///
|
||||
#[as_aggr_func_creator]
|
||||
#[derive(Debug, Default, AggrFuncTypeStore)]
|
||||
pub struct JsonPathEncodeFunctionCreator {}
|
||||
|
||||
impl AggregateFunctionCreator for JsonPathEncodeFunctionCreator {
|
||||
fn creator(&self) -> AccumulatorCreatorFunction {
|
||||
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
|
||||
let ts_type = types[2].clone();
|
||||
Ok(Box::new(JsonPathAccumulator::new(ts_type)))
|
||||
});
|
||||
|
||||
creator
|
||||
}
|
||||
|
||||
fn output_type(&self) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::string_datatype())
|
||||
}
|
||||
|
||||
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 3, InvalidInputStateSnafu);
|
||||
|
||||
let timestamp_type = input_types[2].clone();
|
||||
|
||||
Ok(vec![
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::float64_datatype()),
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::float64_datatype()),
|
||||
ConcreteDataType::list_datatype(timestamp_type),
|
||||
])
|
||||
}
|
||||
}
|
||||
@@ -20,23 +20,69 @@ use common_query::error::{self, InvalidFuncArgsSnafu, Result};
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{MutableVector, StringVectorBuilder, VectorRef};
|
||||
use datatypes::scalars::{Scalar, ScalarVectorBuilder};
|
||||
use datatypes::value::{ListValue, Value};
|
||||
use datatypes::vectors::{ListVectorBuilder, MutableVector, StringVectorBuilder, VectorRef};
|
||||
use geohash::Coord;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
macro_rules! ensure_resolution_usize {
|
||||
($v: ident) => {
|
||||
if !($v > 0 && $v <= 12) {
|
||||
Err(BoxedError::new(PlainError::new(
|
||||
format!("Invalid geohash resolution {}, expect value: [1, 12]", $v),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
)))
|
||||
.context(error::ExecuteSnafu)
|
||||
} else {
|
||||
Ok($v as usize)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn try_into_resolution(v: Value) -> Result<usize> {
|
||||
match v {
|
||||
Value::Int8(v) => {
|
||||
ensure_resolution_usize!(v)
|
||||
}
|
||||
Value::Int16(v) => {
|
||||
ensure_resolution_usize!(v)
|
||||
}
|
||||
Value::Int32(v) => {
|
||||
ensure_resolution_usize!(v)
|
||||
}
|
||||
Value::Int64(v) => {
|
||||
ensure_resolution_usize!(v)
|
||||
}
|
||||
Value::UInt8(v) => {
|
||||
ensure_resolution_usize!(v)
|
||||
}
|
||||
Value::UInt16(v) => {
|
||||
ensure_resolution_usize!(v)
|
||||
}
|
||||
Value::UInt32(v) => {
|
||||
ensure_resolution_usize!(v)
|
||||
}
|
||||
Value::UInt64(v) => {
|
||||
ensure_resolution_usize!(v)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Function that return geohash string for a given geospatial coordinate.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct GeohashFunction;
|
||||
|
||||
const NAME: &str = "geohash";
|
||||
impl GeohashFunction {
|
||||
const NAME: &'static str = "geohash";
|
||||
}
|
||||
|
||||
impl Function for GeohashFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
Self::NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
@@ -93,17 +139,7 @@ impl Function for GeohashFunction {
|
||||
for i in 0..size {
|
||||
let lat = lat_vec.get(i).as_f64_lossy();
|
||||
let lon = lon_vec.get(i).as_f64_lossy();
|
||||
let r = match resolution_vec.get(i) {
|
||||
Value::Int8(v) => v as usize,
|
||||
Value::Int16(v) => v as usize,
|
||||
Value::Int32(v) => v as usize,
|
||||
Value::Int64(v) => v as usize,
|
||||
Value::UInt8(v) => v as usize,
|
||||
Value::UInt16(v) => v as usize,
|
||||
Value::UInt32(v) => v as usize,
|
||||
Value::UInt64(v) => v as usize,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let r = try_into_resolution(resolution_vec.get(i))?;
|
||||
|
||||
let result = match (lat, lon) {
|
||||
(Some(lat), Some(lon)) => {
|
||||
@@ -130,6 +166,134 @@ impl Function for GeohashFunction {
|
||||
|
||||
impl fmt::Display for GeohashFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", NAME)
|
||||
write!(f, "{}", Self::NAME)
|
||||
}
|
||||
}
|
||||
|
||||
/// Function that return geohash string for a given geospatial coordinate.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct GeohashNeighboursFunction;
|
||||
|
||||
impl GeohashNeighboursFunction {
|
||||
const NAME: &'static str = "geohash_neighbours";
|
||||
}
|
||||
|
||||
impl Function for GeohashNeighboursFunction {
|
||||
fn name(&self) -> &str {
|
||||
GeohashNeighboursFunction::NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::list_datatype(
|
||||
ConcreteDataType::string_datatype(),
|
||||
))
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
let mut signatures = Vec::new();
|
||||
for coord_type in &[
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
] {
|
||||
for resolution_type in &[
|
||||
ConcreteDataType::int8_datatype(),
|
||||
ConcreteDataType::int16_datatype(),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::uint8_datatype(),
|
||||
ConcreteDataType::uint16_datatype(),
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
] {
|
||||
signatures.push(TypeSignature::Exact(vec![
|
||||
// latitude
|
||||
coord_type.clone(),
|
||||
// longitude
|
||||
coord_type.clone(),
|
||||
// resolution
|
||||
resolution_type.clone(),
|
||||
]));
|
||||
}
|
||||
}
|
||||
Signature::one_of(signatures, Volatility::Stable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 3,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect 3, provided : {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
let lat_vec = &columns[0];
|
||||
let lon_vec = &columns[1];
|
||||
let resolution_vec = &columns[2];
|
||||
|
||||
let size = lat_vec.len();
|
||||
let mut results =
|
||||
ListVectorBuilder::with_type_capacity(ConcreteDataType::string_datatype(), size);
|
||||
|
||||
for i in 0..size {
|
||||
let lat = lat_vec.get(i).as_f64_lossy();
|
||||
let lon = lon_vec.get(i).as_f64_lossy();
|
||||
let r = try_into_resolution(resolution_vec.get(i))?;
|
||||
|
||||
let result = match (lat, lon) {
|
||||
(Some(lat), Some(lon)) => {
|
||||
let coord = Coord { x: lon, y: lat };
|
||||
let encoded = geohash::encode(coord, r)
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("Geohash error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)?;
|
||||
let neighbours = geohash::neighbors(&encoded)
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("Geohash error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)?;
|
||||
Some(ListValue::new(
|
||||
vec![
|
||||
neighbours.n,
|
||||
neighbours.nw,
|
||||
neighbours.w,
|
||||
neighbours.sw,
|
||||
neighbours.s,
|
||||
neighbours.se,
|
||||
neighbours.e,
|
||||
neighbours.ne,
|
||||
]
|
||||
.into_iter()
|
||||
.map(Value::from)
|
||||
.collect(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(list_value) = result {
|
||||
results.push(Some(list_value.as_scalar_ref()));
|
||||
} else {
|
||||
results.push(None);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for GeohashNeighboursFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", GeohashNeighboursFunction::NAME)
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
61
src/common/function/src/scalars/geo/helpers.rs
Normal file
61
src/common/function/src/scalars/geo/helpers.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
macro_rules! ensure_columns_len {
|
||||
($columns:ident) => {
|
||||
ensure!(
|
||||
$columns.windows(2).all(|c| c[0].len() == c[1].len()),
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: "The length of input columns are in different size"
|
||||
}
|
||||
)
|
||||
};
|
||||
($column_a:ident, $column_b:ident, $($column_n:ident),*) => {
|
||||
ensure!(
|
||||
{
|
||||
let mut result = $column_a.len() == $column_b.len();
|
||||
$(
|
||||
result = result && ($column_a.len() == $column_n.len());
|
||||
)*
|
||||
result
|
||||
}
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: "The length of input columns are in different size"
|
||||
}
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
pub(super) use ensure_columns_len;
|
||||
|
||||
macro_rules! ensure_columns_n {
|
||||
($columns:ident, $n:literal) => {
|
||||
ensure!(
|
||||
$columns.len() == $n,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of arguments is not correct, expect {}, provided : {}",
|
||||
stringify!($n),
|
||||
$columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
if $n > 1 {
|
||||
ensure_columns_len!($columns);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub(super) use ensure_columns_n;
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::sync::Arc;
|
||||
mod json_get;
|
||||
mod json_is;
|
||||
mod json_path_exists;
|
||||
mod json_to_string;
|
||||
mod parse_json;
|
||||
|
||||
@@ -46,5 +47,7 @@ impl JsonFunction {
|
||||
registry.register(Arc::new(JsonIsBool));
|
||||
registry.register(Arc::new(JsonIsArray));
|
||||
registry.register(Arc::new(JsonIsObject));
|
||||
|
||||
registry.register(Arc::new(json_path_exists::JsonPathExistsFunction));
|
||||
}
|
||||
}
|
||||
|
||||
172
src/common/function/src/scalars/json/json_path_exists.rs
Normal file
172
src/common/function/src/scalars/json/json_path_exists.rs
Normal file
@@ -0,0 +1,172 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::Signature;
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Check if the given JSON data contains the given JSON path.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct JsonPathExistsFunction;
|
||||
|
||||
const NAME: &str = "json_path_exists";
|
||||
|
||||
impl Function for JsonPathExistsFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 2,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly two, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
let jsons = &columns[0];
|
||||
let paths = &columns[1];
|
||||
|
||||
let size = jsons.len();
|
||||
let datatype = jsons.data_type();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
match datatype {
|
||||
// JSON data type uses binary vector
|
||||
ConcreteDataType::Binary(_) => {
|
||||
for i in 0..size {
|
||||
let json = jsons.get_ref(i);
|
||||
let path = paths.get_ref(i);
|
||||
|
||||
let json = json.as_binary();
|
||||
let path = path.as_string();
|
||||
let result = match (json, path) {
|
||||
(Ok(Some(json)), Ok(Some(path))) => {
|
||||
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
|
||||
match json_path {
|
||||
Ok(json_path) => jsonb::path_exists(json, json_path).ok(),
|
||||
Err(_) => None,
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for JsonPathExistsFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "JSON_PATH_EXISTS")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, StringVector};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_json_path_exists_function() {
|
||||
let json_path_exists = JsonPathExistsFunction;
|
||||
|
||||
assert_eq!("json_path_exists", json_path_exists.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::boolean_datatype(),
|
||||
json_path_exists
|
||||
.return_type(&[ConcreteDataType::json_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
assert!(matches!(json_path_exists.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()]
|
||||
));
|
||||
|
||||
let json_strings = [
|
||||
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
|
||||
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
];
|
||||
let paths = vec!["$.a.b.c", "$.b", "$.c.a", ".d"];
|
||||
let results = [false, true, true, false];
|
||||
|
||||
let jsonbs = json_strings
|
||||
.iter()
|
||||
.map(|s| {
|
||||
let value = jsonb::parse_value(s.as_bytes()).unwrap();
|
||||
value.to_vec()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let json_vector = BinaryVector::from_vec(jsonbs);
|
||||
let path_vector = StringVector::from_vec(paths);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
|
||||
let vector = json_path_exists
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(4, vector.len());
|
||||
for (i, gt) in results.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
let result = result.as_boolean().unwrap().unwrap();
|
||||
assert_eq!(*gt, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -25,13 +25,13 @@ use session::context::QueryContextRef;
|
||||
use crate::handlers::ProcedureServiceHandlerRef;
|
||||
use crate::helper::cast_u64;
|
||||
|
||||
const DEFAULT_REPLAY_TIMEOUT_SECS: u64 = 10;
|
||||
const DEFAULT_TIMEOUT_SECS: u64 = 30;
|
||||
|
||||
/// A function to migrate a region from source peer to target peer.
|
||||
/// Returns the submitted procedure id if success. Only available in cluster mode.
|
||||
///
|
||||
/// - `migrate_region(region_id, from_peer, to_peer)`, with default replay WAL timeout(10 seconds).
|
||||
/// - `migrate_region(region_id, from_peer, to_peer, timeout(secs))`
|
||||
/// - `migrate_region(region_id, from_peer, to_peer)`, with timeout(30 seconds).
|
||||
/// - `migrate_region(region_id, from_peer, to_peer, timeout(secs))`.
|
||||
///
|
||||
/// The parameters:
|
||||
/// - `region_id`: the region id
|
||||
@@ -48,18 +48,13 @@ pub(crate) async fn migrate_region(
|
||||
_ctx: &QueryContextRef,
|
||||
params: &[ValueRef<'_>],
|
||||
) -> Result<Value> {
|
||||
let (region_id, from_peer, to_peer, replay_timeout) = match params.len() {
|
||||
let (region_id, from_peer, to_peer, timeout) = match params.len() {
|
||||
3 => {
|
||||
let region_id = cast_u64(¶ms[0])?;
|
||||
let from_peer = cast_u64(¶ms[1])?;
|
||||
let to_peer = cast_u64(¶ms[2])?;
|
||||
|
||||
(
|
||||
region_id,
|
||||
from_peer,
|
||||
to_peer,
|
||||
Some(DEFAULT_REPLAY_TIMEOUT_SECS),
|
||||
)
|
||||
(region_id, from_peer, to_peer, Some(DEFAULT_TIMEOUT_SECS))
|
||||
}
|
||||
|
||||
4 => {
|
||||
@@ -82,14 +77,14 @@ pub(crate) async fn migrate_region(
|
||||
}
|
||||
};
|
||||
|
||||
match (region_id, from_peer, to_peer, replay_timeout) {
|
||||
(Some(region_id), Some(from_peer), Some(to_peer), Some(replay_timeout)) => {
|
||||
match (region_id, from_peer, to_peer, timeout) {
|
||||
(Some(region_id), Some(from_peer), Some(to_peer), Some(timeout)) => {
|
||||
let pid = procedure_service_handler
|
||||
.migrate_region(MigrateRegionRequest {
|
||||
region_id,
|
||||
from_peer,
|
||||
to_peer,
|
||||
replay_timeout: Duration::from_secs(replay_timeout),
|
||||
timeout: Duration::from_secs(timeout),
|
||||
})
|
||||
.await?;
|
||||
|
||||
|
||||
@@ -21,23 +21,19 @@ use syn::{parse_macro_input, DeriveInput, ItemStruct};
|
||||
pub(crate) fn impl_aggr_func_type_store(ast: &DeriveInput) -> TokenStream {
|
||||
let name = &ast.ident;
|
||||
let gen = quote! {
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::error::{InvalidInputStateSnafu, Error as QueryError};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
|
||||
impl AggrFuncTypeStore for #name {
|
||||
fn input_types(&self) -> std::result::Result<Vec<ConcreteDataType>, QueryError> {
|
||||
impl common_query::logical_plan::accumulator::AggrFuncTypeStore for #name {
|
||||
fn input_types(&self) -> std::result::Result<Vec<datatypes::prelude::ConcreteDataType>, common_query::error::Error> {
|
||||
let input_types = self.input_types.load();
|
||||
snafu::ensure!(input_types.is_some(), InvalidInputStateSnafu);
|
||||
snafu::ensure!(input_types.is_some(), common_query::error::InvalidInputStateSnafu);
|
||||
Ok(input_types.as_ref().unwrap().as_ref().clone())
|
||||
}
|
||||
|
||||
fn set_input_types(&self, input_types: Vec<ConcreteDataType>) -> std::result::Result<(), QueryError> {
|
||||
fn set_input_types(&self, input_types: Vec<datatypes::prelude::ConcreteDataType>) -> std::result::Result<(), common_query::error::Error> {
|
||||
let old = self.input_types.swap(Some(std::sync::Arc::new(input_types.clone())));
|
||||
if let Some(old) = old {
|
||||
snafu::ensure!(old.len() == input_types.len(), InvalidInputStateSnafu);
|
||||
snafu::ensure!(old.len() == input_types.len(), common_query::error::InvalidInputStateSnafu);
|
||||
for (x, y) in old.iter().zip(input_types.iter()) {
|
||||
snafu::ensure!(x == y, InvalidInputStateSnafu);
|
||||
snafu::ensure!(x == y, common_query::error::InvalidInputStateSnafu);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -51,7 +47,7 @@ pub(crate) fn impl_as_aggr_func_creator(_args: TokenStream, input: TokenStream)
|
||||
let mut item_struct = parse_macro_input!(input as ItemStruct);
|
||||
if let syn::Fields::Named(ref mut fields) = item_struct.fields {
|
||||
let result = syn::Field::parse_named.parse2(quote! {
|
||||
input_types: arc_swap::ArcSwapOption<Vec<ConcreteDataType>>
|
||||
input_types: arc_swap::ArcSwapOption<Vec<datatypes::prelude::ConcreteDataType>>
|
||||
});
|
||||
match result {
|
||||
Ok(field) => fields.named.push(field),
|
||||
|
||||
@@ -24,5 +24,5 @@ struct Foo {}
|
||||
fn test_derive() {
|
||||
let _ = Foo::default();
|
||||
assert_fields!(Foo: input_types);
|
||||
assert_impl_all!(Foo: std::fmt::Debug, Default, AggrFuncTypeStore);
|
||||
assert_impl_all!(Foo: std::fmt::Debug, Default, common_query::logical_plan::accumulator::AggrFuncTypeStore);
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::datanode::RegionStat;
|
||||
use crate::error::{
|
||||
DecodeJsonSnafu, EncodeJsonSnafu, Error, FromUtf8Snafu, InvalidNodeInfoKeySnafu,
|
||||
InvalidRoleSnafu, ParseNumSnafu, Result,
|
||||
@@ -47,6 +48,9 @@ pub trait ClusterInfo {
|
||||
role: Option<Role>,
|
||||
) -> std::result::Result<Vec<NodeInfo>, Self::Error>;
|
||||
|
||||
/// List all region stats in the cluster.
|
||||
async fn list_region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error>;
|
||||
|
||||
// TODO(jeremy): Other info, like region status, etc.
|
||||
}
|
||||
|
||||
|
||||
413
src/common/meta/src/datanode.rs
Normal file
413
src/common/meta/src/datanode.rs
Normal file
@@ -0,0 +1,413 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::str::FromStr;
|
||||
|
||||
use api::v1::meta::{HeartbeatRequest, RequestHeader};
|
||||
use common_time::util as time_util;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::region_engine::{RegionRole, RegionStatistic};
|
||||
use store_api::storage::RegionId;
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::{error, ClusterId};
|
||||
|
||||
pub(crate) const DATANODE_LEASE_PREFIX: &str = "__meta_datanode_lease";
|
||||
const INACTIVE_REGION_PREFIX: &str = "__meta_inactive_region";
|
||||
|
||||
const DATANODE_STAT_PREFIX: &str = "__meta_datanode_stat";
|
||||
|
||||
pub const REGION_STATISTIC_KEY: &str = "__region_statistic";
|
||||
|
||||
lazy_static! {
|
||||
pub(crate) static ref DATANODE_LEASE_KEY_PATTERN: Regex =
|
||||
Regex::new(&format!("^{DATANODE_LEASE_PREFIX}-([0-9]+)-([0-9]+)$")).unwrap();
|
||||
static ref DATANODE_STAT_KEY_PATTERN: Regex =
|
||||
Regex::new(&format!("^{DATANODE_STAT_PREFIX}-([0-9]+)-([0-9]+)$")).unwrap();
|
||||
static ref INACTIVE_REGION_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{INACTIVE_REGION_PREFIX}-([0-9]+)-([0-9]+)-([0-9]+)$"
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// The key of the datanode stat in the storage.
|
||||
///
|
||||
/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct Stat {
|
||||
pub timestamp_millis: i64,
|
||||
pub cluster_id: ClusterId,
|
||||
// The datanode Id.
|
||||
pub id: u64,
|
||||
// The datanode address.
|
||||
pub addr: String,
|
||||
/// The read capacity units during this period
|
||||
pub rcus: i64,
|
||||
/// The write capacity units during this period
|
||||
pub wcus: i64,
|
||||
/// How many regions on this node
|
||||
pub region_num: u64,
|
||||
pub region_stats: Vec<RegionStat>,
|
||||
// The node epoch is used to check whether the node has restarted or redeployed.
|
||||
pub node_epoch: u64,
|
||||
}
|
||||
|
||||
/// The statistics of a region.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RegionStat {
|
||||
/// The region_id.
|
||||
pub id: RegionId,
|
||||
/// The read capacity units during this period
|
||||
pub rcus: i64,
|
||||
/// The write capacity units during this period
|
||||
pub wcus: i64,
|
||||
/// Approximate bytes of this region
|
||||
pub approximate_bytes: i64,
|
||||
/// The engine name.
|
||||
pub engine: String,
|
||||
/// The region role.
|
||||
pub role: RegionRole,
|
||||
/// The size of the memtable in bytes.
|
||||
pub memtable_size: u64,
|
||||
/// The size of the manifest in bytes.
|
||||
pub manifest_size: u64,
|
||||
/// The size of the SST files in bytes.
|
||||
pub sst_size: u64,
|
||||
}
|
||||
|
||||
impl Stat {
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.region_stats.is_empty()
|
||||
}
|
||||
|
||||
pub fn stat_key(&self) -> DatanodeStatKey {
|
||||
DatanodeStatKey {
|
||||
cluster_id: self.cluster_id,
|
||||
node_id: self.id,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a tuple array containing [RegionId] and [RegionRole].
|
||||
pub fn regions(&self) -> Vec<(RegionId, RegionRole)> {
|
||||
self.region_stats.iter().map(|s| (s.id, s.role)).collect()
|
||||
}
|
||||
|
||||
/// Returns all table ids in the region stats.
|
||||
pub fn table_ids(&self) -> HashSet<TableId> {
|
||||
self.region_stats.iter().map(|s| s.id.table_id()).collect()
|
||||
}
|
||||
|
||||
/// Retains the active region stats and updates the rcus, wcus, and region_num.
|
||||
pub fn retain_active_region_stats(&mut self, inactive_region_ids: &HashSet<RegionId>) {
|
||||
if inactive_region_ids.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
self.region_stats
|
||||
.retain(|r| !inactive_region_ids.contains(&r.id));
|
||||
self.rcus = self.region_stats.iter().map(|s| s.rcus).sum();
|
||||
self.wcus = self.region_stats.iter().map(|s| s.wcus).sum();
|
||||
self.region_num = self.region_stats.len() as u64;
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&HeartbeatRequest> for Stat {
|
||||
type Error = Option<RequestHeader>;
|
||||
|
||||
fn try_from(value: &HeartbeatRequest) -> std::result::Result<Self, Self::Error> {
|
||||
let HeartbeatRequest {
|
||||
header,
|
||||
peer,
|
||||
region_stats,
|
||||
node_epoch,
|
||||
..
|
||||
} = value;
|
||||
|
||||
match (header, peer) {
|
||||
(Some(header), Some(peer)) => {
|
||||
let region_stats = region_stats
|
||||
.iter()
|
||||
.map(RegionStat::from)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(Self {
|
||||
timestamp_millis: time_util::current_time_millis(),
|
||||
cluster_id: header.cluster_id,
|
||||
// datanode id
|
||||
id: peer.id,
|
||||
// datanode address
|
||||
addr: peer.addr.clone(),
|
||||
rcus: region_stats.iter().map(|s| s.rcus).sum(),
|
||||
wcus: region_stats.iter().map(|s| s.wcus).sum(),
|
||||
region_num: region_stats.len() as u64,
|
||||
region_stats,
|
||||
node_epoch: *node_epoch,
|
||||
})
|
||||
}
|
||||
(header, _) => Err(header.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&api::v1::meta::RegionStat> for RegionStat {
|
||||
fn from(value: &api::v1::meta::RegionStat) -> Self {
|
||||
let region_stat = value
|
||||
.extensions
|
||||
.get(REGION_STATISTIC_KEY)
|
||||
.and_then(|value| RegionStatistic::deserialize_from_slice(value))
|
||||
.unwrap_or_default();
|
||||
|
||||
Self {
|
||||
id: RegionId::from_u64(value.region_id),
|
||||
rcus: value.rcus,
|
||||
wcus: value.wcus,
|
||||
approximate_bytes: value.approximate_bytes,
|
||||
engine: value.engine.to_string(),
|
||||
role: RegionRole::from(value.role()),
|
||||
memtable_size: region_stat.memtable_size,
|
||||
manifest_size: region_stat.manifest_size,
|
||||
sst_size: region_stat.sst_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The key of the datanode stat in the memory store.
|
||||
///
|
||||
/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
|
||||
pub struct DatanodeStatKey {
|
||||
pub cluster_id: ClusterId,
|
||||
pub node_id: u64,
|
||||
}
|
||||
|
||||
impl DatanodeStatKey {
|
||||
/// The key prefix.
|
||||
pub fn prefix_key() -> Vec<u8> {
|
||||
format!("{DATANODE_STAT_PREFIX}-").into_bytes()
|
||||
}
|
||||
|
||||
/// The key prefix with the cluster id.
|
||||
pub fn key_prefix_with_cluster_id(cluster_id: ClusterId) -> String {
|
||||
format!("{DATANODE_STAT_PREFIX}-{cluster_id}-")
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DatanodeStatKey> for Vec<u8> {
|
||||
fn from(value: DatanodeStatKey) -> Self {
|
||||
format!(
|
||||
"{}-{}-{}",
|
||||
DATANODE_STAT_PREFIX, value.cluster_id, value.node_id
|
||||
)
|
||||
.into_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for DatanodeStatKey {
|
||||
type Err = error::Error;
|
||||
|
||||
fn from_str(key: &str) -> Result<Self> {
|
||||
let caps = DATANODE_STAT_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(error::InvalidStatKeySnafu { key })?;
|
||||
|
||||
ensure!(caps.len() == 3, error::InvalidStatKeySnafu { key });
|
||||
|
||||
let cluster_id = caps[1].to_string();
|
||||
let node_id = caps[2].to_string();
|
||||
let cluster_id: u64 = cluster_id.parse().context(error::ParseNumSnafu {
|
||||
err_msg: format!("invalid cluster_id: {cluster_id}"),
|
||||
})?;
|
||||
let node_id: u64 = node_id.parse().context(error::ParseNumSnafu {
|
||||
err_msg: format!("invalid node_id: {node_id}"),
|
||||
})?;
|
||||
|
||||
Ok(Self {
|
||||
cluster_id,
|
||||
node_id,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Vec<u8>> for DatanodeStatKey {
|
||||
type Error = error::Error;
|
||||
|
||||
fn try_from(bytes: Vec<u8>) -> Result<Self> {
|
||||
String::from_utf8(bytes)
|
||||
.context(error::FromUtf8Snafu {
|
||||
name: "DatanodeStatKey",
|
||||
})
|
||||
.map(|x| x.parse())?
|
||||
}
|
||||
}
|
||||
|
||||
/// The value of the datanode stat in the memory store.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct DatanodeStatValue {
|
||||
pub stats: Vec<Stat>,
|
||||
}
|
||||
|
||||
impl DatanodeStatValue {
|
||||
/// Get the latest number of regions.
|
||||
pub fn region_num(&self) -> Option<u64> {
|
||||
self.stats.last().map(|x| x.region_num)
|
||||
}
|
||||
|
||||
/// Get the latest node addr.
|
||||
pub fn node_addr(&self) -> Option<String> {
|
||||
self.stats.last().map(|x| x.addr.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<DatanodeStatValue> for Vec<u8> {
|
||||
type Error = error::Error;
|
||||
|
||||
fn try_from(stats: DatanodeStatValue) -> Result<Self> {
|
||||
Ok(serde_json::to_string(&stats)
|
||||
.context(error::SerializeToJsonSnafu {
|
||||
input: format!("{stats:?}"),
|
||||
})?
|
||||
.into_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for DatanodeStatValue {
|
||||
type Err = error::Error;
|
||||
|
||||
fn from_str(value: &str) -> Result<Self> {
|
||||
serde_json::from_str(value).context(error::DeserializeFromJsonSnafu { input: value })
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Vec<u8>> for DatanodeStatValue {
|
||||
type Error = error::Error;
|
||||
|
||||
fn try_from(value: Vec<u8>) -> Result<Self> {
|
||||
String::from_utf8(value)
|
||||
.context(error::FromUtf8Snafu {
|
||||
name: "DatanodeStatValue",
|
||||
})
|
||||
.map(|x| x.parse())?
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_stat_key() {
|
||||
let stat = Stat {
|
||||
cluster_id: 3,
|
||||
id: 101,
|
||||
region_num: 10,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let stat_key = stat.stat_key();
|
||||
|
||||
assert_eq!(3, stat_key.cluster_id);
|
||||
assert_eq!(101, stat_key.node_id);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stat_val_round_trip() {
|
||||
let stat = Stat {
|
||||
cluster_id: 0,
|
||||
id: 101,
|
||||
region_num: 100,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let stat_val = DatanodeStatValue { stats: vec![stat] };
|
||||
|
||||
let bytes: Vec<u8> = stat_val.try_into().unwrap();
|
||||
let stat_val: DatanodeStatValue = bytes.try_into().unwrap();
|
||||
let stats = stat_val.stats;
|
||||
|
||||
assert_eq!(1, stats.len());
|
||||
|
||||
let stat = stats.first().unwrap();
|
||||
assert_eq!(0, stat.cluster_id);
|
||||
assert_eq!(101, stat.id);
|
||||
assert_eq!(100, stat.region_num);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_addr_from_stat_val() {
|
||||
let empty = DatanodeStatValue { stats: vec![] };
|
||||
let addr = empty.node_addr();
|
||||
assert!(addr.is_none());
|
||||
|
||||
let stat_val = DatanodeStatValue {
|
||||
stats: vec![
|
||||
Stat {
|
||||
addr: "1".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
Stat {
|
||||
addr: "2".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
Stat {
|
||||
addr: "3".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
],
|
||||
};
|
||||
let addr = stat_val.node_addr().unwrap();
|
||||
assert_eq!("3", addr);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_region_num_from_stat_val() {
|
||||
let empty = DatanodeStatValue { stats: vec![] };
|
||||
let region_num = empty.region_num();
|
||||
assert!(region_num.is_none());
|
||||
|
||||
let wrong = DatanodeStatValue {
|
||||
stats: vec![Stat {
|
||||
region_num: 0,
|
||||
..Default::default()
|
||||
}],
|
||||
};
|
||||
let right = wrong.region_num();
|
||||
assert_eq!(Some(0), right);
|
||||
|
||||
let stat_val = DatanodeStatValue {
|
||||
stats: vec![
|
||||
Stat {
|
||||
region_num: 1,
|
||||
..Default::default()
|
||||
},
|
||||
Stat {
|
||||
region_num: 0,
|
||||
..Default::default()
|
||||
},
|
||||
Stat {
|
||||
region_num: 2,
|
||||
..Default::default()
|
||||
},
|
||||
],
|
||||
};
|
||||
let region_num = stat_val.region_num().unwrap();
|
||||
assert_eq!(2, region_num);
|
||||
}
|
||||
}
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::ProcedureDetailResponse;
|
||||
use common_telemetry::tracing_context::W3cTrace;
|
||||
use store_api::storage::{RegionId, RegionNumber, TableId};
|
||||
|
||||
@@ -82,6 +83,8 @@ pub trait ProcedureExecutor: Send + Sync {
|
||||
ctx: &ExecutorContext,
|
||||
pid: &str,
|
||||
) -> Result<ProcedureStateResponse>;
|
||||
|
||||
async fn list_procedures(&self, ctx: &ExecutorContext) -> Result<ProcedureDetailResponse>;
|
||||
}
|
||||
|
||||
pub type ProcedureExecutorRef = Arc<dyn ProcedureExecutor>;
|
||||
|
||||
@@ -187,7 +187,7 @@ mod tests {
|
||||
region: Region::new_test(region_id),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
}]),
|
||||
HashMap::new(),
|
||||
|
||||
@@ -107,21 +107,21 @@ async fn test_on_submit_alter_request() {
|
||||
region: Region::new_test(RegionId::new(table_id, 1)),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![Peer::empty(5)],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 2)),
|
||||
leader_peer: Some(Peer::empty(2)),
|
||||
follower_peers: vec![Peer::empty(4)],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 3)),
|
||||
leader_peer: Some(Peer::empty(3)),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
@@ -193,21 +193,21 @@ async fn test_on_submit_alter_request_with_outdated_request() {
|
||||
region: Region::new_test(RegionId::new(table_id, 1)),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![Peer::empty(5)],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 2)),
|
||||
leader_peer: Some(Peer::empty(2)),
|
||||
follower_peers: vec![Peer::empty(4)],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 3)),
|
||||
leader_peer: Some(Peer::empty(3)),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
|
||||
@@ -119,21 +119,21 @@ async fn test_on_datanode_drop_regions() {
|
||||
region: Region::new_test(RegionId::new(table_id, 1)),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![Peer::empty(5)],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 2)),
|
||||
leader_peer: Some(Peer::empty(2)),
|
||||
follower_peers: vec![Peer::empty(4)],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 3)),
|
||||
leader_peer: Some(Peer::empty(3)),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
|
||||
@@ -18,6 +18,7 @@ use common_procedure::error::Error as ProcedureError;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
|
||||
use table::metadata::TableId;
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use crate::ddl::DetectingRegion;
|
||||
use crate::error::{Error, OperateDatanodeSnafu, Result, TableNotFoundSnafu, UnsupportedSnafu};
|
||||
@@ -109,8 +110,8 @@ pub async fn check_and_get_physical_table_id(
|
||||
.table_name_manager()
|
||||
.get(physical_table_name)
|
||||
.await?
|
||||
.context(TableNotFoundSnafu {
|
||||
table_name: physical_table_name.to_string(),
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
table_name: TableReference::from(physical_table_name).to_string(),
|
||||
})
|
||||
.map(|table| table.table_id())
|
||||
}
|
||||
@@ -123,8 +124,8 @@ pub async fn get_physical_table_id(
|
||||
.table_name_manager()
|
||||
.get(logical_table_name)
|
||||
.await?
|
||||
.context(TableNotFoundSnafu {
|
||||
table_name: logical_table_name.to_string(),
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
table_name: TableReference::from(logical_table_name).to_string(),
|
||||
})
|
||||
.map(|table| table.table_id())?;
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::ProcedureDetailResponse;
|
||||
use common_procedure::{
|
||||
watcher, BoxedProcedureLoader, Output, ProcedureId, ProcedureManagerRef, ProcedureWithId,
|
||||
};
|
||||
@@ -825,6 +826,15 @@ impl ProcedureExecutor for DdlManager {
|
||||
|
||||
Ok(procedure::procedure_state_to_pb_response(&state))
|
||||
}
|
||||
|
||||
async fn list_procedures(&self, _ctx: &ExecutorContext) -> Result<ProcedureDetailResponse> {
|
||||
let metas = self
|
||||
.procedure_manager
|
||||
.list_procedures()
|
||||
.await
|
||||
.context(QueryProcedureSnafu)?;
|
||||
Ok(procedure::procedure_details_to_pb_response(metas))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -147,6 +147,20 @@ pub enum Error {
|
||||
source: common_procedure::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start procedure manager"))]
|
||||
StartProcedureManager {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_procedure::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to stop procedure manager"))]
|
||||
StopProcedureManager {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_procedure::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to get procedure output, procedure id: {procedure_id}, error: {err_msg}"
|
||||
))]
|
||||
@@ -218,6 +232,24 @@ pub enum Error {
|
||||
error: JsonError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to serialize to json: {}", input))]
|
||||
SerializeToJson {
|
||||
input: String,
|
||||
#[snafu(source)]
|
||||
error: serde_json::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to deserialize from json: {}", input))]
|
||||
DeserializeFromJson {
|
||||
input: String,
|
||||
#[snafu(source)]
|
||||
error: serde_json::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Payload not exist"))]
|
||||
PayloadNotExist {
|
||||
#[snafu(implicit)]
|
||||
@@ -531,13 +563,20 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid node info key: {}", key))]
|
||||
#[snafu(display("Invalid node info key: {}", key))]
|
||||
InvalidNodeInfoKey {
|
||||
key: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid node stat key: {}", key))]
|
||||
InvalidStatKey {
|
||||
key: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse number: {}", err_msg))]
|
||||
ParseNum {
|
||||
err_msg: String,
|
||||
@@ -627,7 +666,9 @@ impl ErrorExt for Error {
|
||||
| EtcdTxnFailed { .. }
|
||||
| ConnectEtcd { .. }
|
||||
| MoveValues { .. }
|
||||
| GetCache { .. } => StatusCode::Internal,
|
||||
| GetCache { .. }
|
||||
| SerializeToJson { .. }
|
||||
| DeserializeFromJson { .. } => StatusCode::Internal,
|
||||
|
||||
ValueNotExist { .. } => StatusCode::Unexpected,
|
||||
|
||||
@@ -688,7 +729,9 @@ impl ErrorExt for Error {
|
||||
|
||||
SubmitProcedure { source, .. }
|
||||
| QueryProcedure { source, .. }
|
||||
| WaitProcedure { source, .. } => source.status_code(),
|
||||
| WaitProcedure { source, .. }
|
||||
| StartProcedureManager { source, .. }
|
||||
| StopProcedureManager { source, .. } => source.status_code(),
|
||||
RegisterProcedureLoader { source, .. } => source.status_code(),
|
||||
External { source, .. } => source.status_code(),
|
||||
OperateDatanode { source, .. } => source.status_code(),
|
||||
@@ -700,6 +743,7 @@ impl ErrorExt for Error {
|
||||
| InvalidNumTopics { .. }
|
||||
| SchemaNotFound { .. }
|
||||
| InvalidNodeInfoKey { .. }
|
||||
| InvalidStatKey { .. }
|
||||
| ParseNum { .. }
|
||||
| InvalidRole { .. }
|
||||
| EmptyDdlTasks { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
@@ -132,11 +132,22 @@ impl OpenRegion {
|
||||
pub struct DowngradeRegion {
|
||||
/// The [RegionId].
|
||||
pub region_id: RegionId,
|
||||
/// The timeout of waiting for flush the region.
|
||||
///
|
||||
/// `None` stands for don't flush before downgrading the region.
|
||||
#[serde(default)]
|
||||
pub flush_timeout: Option<Duration>,
|
||||
/// Rejects all write requests after flushing.
|
||||
pub reject_write: bool,
|
||||
}
|
||||
|
||||
impl Display for DowngradeRegion {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "DowngradeRegion(region_id={})", self.region_id)
|
||||
write!(
|
||||
f,
|
||||
"DowngradeRegion(region_id={}, flush_timeout={:?}, rejct_write={})",
|
||||
self.region_id, self.flush_timeout, self.reject_write
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -152,7 +163,7 @@ pub struct UpgradeRegion {
|
||||
/// `None` stands for no wait,
|
||||
/// it's helpful to verify whether the leader region is ready.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub wait_for_replay_timeout: Option<Duration>,
|
||||
pub replay_timeout: Option<Duration>,
|
||||
/// The hint for replaying memtable.
|
||||
#[serde(default)]
|
||||
pub location_id: Option<u64>,
|
||||
|
||||
@@ -140,11 +140,11 @@ use crate::key::table_route::TableRouteKey;
|
||||
use crate::key::txn_helper::TxnOpGetResponseSet;
|
||||
use crate::kv_backend::txn::{Txn, TxnOp};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::router::{region_distribution, RegionRoute, RegionStatus};
|
||||
use crate::rpc::router::{region_distribution, LeaderState, RegionRoute};
|
||||
use crate::rpc::store::BatchDeleteRequest;
|
||||
use crate::DatanodeId;
|
||||
|
||||
pub const NAME_PATTERN: &str = r"[a-zA-Z_:-][a-zA-Z0-9_:\-\.]*";
|
||||
pub const NAME_PATTERN: &str = r"[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*";
|
||||
pub const MAINTENANCE_KEY: &str = "__maintenance";
|
||||
|
||||
const DATANODE_TABLE_KEY_PREFIX: &str = "__dn_table";
|
||||
@@ -1126,14 +1126,14 @@ impl TableMetadataManager {
|
||||
next_region_route_status: F,
|
||||
) -> Result<()>
|
||||
where
|
||||
F: Fn(&RegionRoute) -> Option<Option<RegionStatus>>,
|
||||
F: Fn(&RegionRoute) -> Option<Option<LeaderState>>,
|
||||
{
|
||||
let mut new_region_routes = current_table_route_value.region_routes()?.clone();
|
||||
|
||||
let mut updated = 0;
|
||||
for route in &mut new_region_routes {
|
||||
if let Some(status) = next_region_route_status(route) {
|
||||
if route.set_leader_status(status) {
|
||||
if let Some(state) = next_region_route_status(route) {
|
||||
if route.set_leader_state(state) {
|
||||
updated += 1;
|
||||
}
|
||||
}
|
||||
@@ -1280,7 +1280,7 @@ mod tests {
|
||||
use crate::key::{DeserializedValueWithBytes, TableMetadataManager, ViewInfoValue};
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::router::{region_distribution, Region, RegionRoute, RegionStatus};
|
||||
use crate::rpc::router::{region_distribution, LeaderState, Region, RegionRoute};
|
||||
|
||||
#[test]
|
||||
fn test_deserialized_value_with_bytes() {
|
||||
@@ -1324,7 +1324,7 @@ mod tests {
|
||||
},
|
||||
leader_peer: Some(Peer::new(datanode, "a2")),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
}
|
||||
}
|
||||
@@ -1715,7 +1715,7 @@ mod tests {
|
||||
attrs: BTreeMap::new(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(datanode, "a2")),
|
||||
leader_status: Some(RegionStatus::Downgraded),
|
||||
leader_state: Some(LeaderState::Downgrading),
|
||||
follower_peers: vec![],
|
||||
leader_down_since: Some(current_time_millis()),
|
||||
},
|
||||
@@ -1727,7 +1727,7 @@ mod tests {
|
||||
attrs: BTreeMap::new(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(datanode, "a1")),
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
follower_peers: vec![],
|
||||
leader_down_since: None,
|
||||
},
|
||||
@@ -1750,10 +1750,10 @@ mod tests {
|
||||
|
||||
table_metadata_manager
|
||||
.update_leader_region_status(table_id, ¤t_table_route_value, |region_route| {
|
||||
if region_route.leader_status.is_some() {
|
||||
if region_route.leader_state.is_some() {
|
||||
None
|
||||
} else {
|
||||
Some(Some(RegionStatus::Downgraded))
|
||||
Some(Some(LeaderState::Downgrading))
|
||||
}
|
||||
})
|
||||
.await
|
||||
@@ -1768,8 +1768,8 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
updated_route_value.region_routes().unwrap()[0].leader_status,
|
||||
Some(RegionStatus::Downgraded)
|
||||
updated_route_value.region_routes().unwrap()[0].leader_state,
|
||||
Some(LeaderState::Downgrading)
|
||||
);
|
||||
|
||||
assert!(updated_route_value.region_routes().unwrap()[0]
|
||||
@@ -1777,8 +1777,8 @@ mod tests {
|
||||
.is_some());
|
||||
|
||||
assert_eq!(
|
||||
updated_route_value.region_routes().unwrap()[1].leader_status,
|
||||
Some(RegionStatus::Downgraded)
|
||||
updated_route_value.region_routes().unwrap()[1].leader_state,
|
||||
Some(LeaderState::Downgrading)
|
||||
);
|
||||
assert!(updated_route_value.region_routes().unwrap()[1]
|
||||
.leader_down_since
|
||||
@@ -1943,21 +1943,21 @@ mod tests {
|
||||
region: Region::new_test(RegionId::new(table_id, 1)),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![Peer::empty(5)],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 2)),
|
||||
leader_peer: Some(Peer::empty(2)),
|
||||
follower_peers: vec![Peer::empty(4)],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 3)),
|
||||
leader_peer: Some(Peer::empty(3)),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
@@ -1996,21 +1996,21 @@ mod tests {
|
||||
region: Region::new_test(RegionId::new(table_id, 1)),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![Peer::empty(5)],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 2)),
|
||||
leader_peer: Some(Peer::empty(2)),
|
||||
follower_peers: vec![Peer::empty(4)],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, 3)),
|
||||
leader_peer: Some(Peer::empty(3)),
|
||||
follower_peers: vec![],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
]),
|
||||
|
||||
@@ -21,6 +21,7 @@ use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
use table::metadata::TableId;
|
||||
use table::table_name::TableName;
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use super::{MetadataKey, MetadataValue, TABLE_NAME_KEY_PATTERN, TABLE_NAME_KEY_PREFIX};
|
||||
use crate::error::{Error, InvalidMetadataSnafu, Result};
|
||||
@@ -122,6 +123,16 @@ impl From<TableNameKey<'_>> for TableName {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<TableNameKey<'a>> for TableReference<'a> {
|
||||
fn from(value: TableNameKey<'a>) -> Self {
|
||||
Self {
|
||||
catalog: value.catalog,
|
||||
schema: value.schema,
|
||||
table: value.table,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<&'a str> for TableNameKey<'a> {
|
||||
type Error = Error;
|
||||
|
||||
|
||||
@@ -744,6 +744,7 @@ mod tests {
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::kv_backend::{KvBackend, TxnService};
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::router::Region;
|
||||
use crate::rpc::store::PutRequest;
|
||||
|
||||
#[test]
|
||||
@@ -751,11 +752,43 @@ mod tests {
|
||||
let old_raw_v = r#"{"region_routes":[{"region":{"id":1,"name":"r1","partition":null,"attrs":{}},"leader_peer":{"id":2,"addr":"a2"},"follower_peers":[]},{"region":{"id":1,"name":"r1","partition":null,"attrs":{}},"leader_peer":{"id":2,"addr":"a2"},"follower_peers":[]}],"version":0}"#;
|
||||
let v = TableRouteValue::try_from_raw_value(old_raw_v.as_bytes()).unwrap();
|
||||
|
||||
let new_raw_v = format!("{:?}", v);
|
||||
assert_eq!(
|
||||
new_raw_v,
|
||||
r#"Physical(PhysicalTableRouteValue { region_routes: [RegionRoute { region: Region { id: 1(0, 1), name: "r1", partition: None, attrs: {} }, leader_peer: Some(Peer { id: 2, addr: "a2" }), follower_peers: [], leader_status: None, leader_down_since: None }, RegionRoute { region: Region { id: 1(0, 1), name: "r1", partition: None, attrs: {} }, leader_peer: Some(Peer { id: 2, addr: "a2" }), follower_peers: [], leader_status: None, leader_down_since: None }], version: 0 })"#
|
||||
);
|
||||
let expected_table_route = TableRouteValue::Physical(PhysicalTableRouteValue {
|
||||
region_routes: vec![
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(0, 1),
|
||||
name: "r1".to_string(),
|
||||
partition: None,
|
||||
attrs: Default::default(),
|
||||
},
|
||||
leader_peer: Some(Peer {
|
||||
id: 2,
|
||||
addr: "a2".to_string(),
|
||||
}),
|
||||
follower_peers: vec![],
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(0, 1),
|
||||
name: "r1".to_string(),
|
||||
partition: None,
|
||||
attrs: Default::default(),
|
||||
},
|
||||
leader_peer: Some(Peer {
|
||||
id: 2,
|
||||
addr: "a2".to_string(),
|
||||
}),
|
||||
follower_peers: vec![],
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
],
|
||||
version: 0,
|
||||
});
|
||||
|
||||
assert_eq!(v, expected_table_route);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
156
src/common/meta/src/leadership_notifier.rs
Normal file
156
src/common/meta/src/leadership_notifier.rs
Normal file
@@ -0,0 +1,156 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_telemetry::error;
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
pub type LeadershipChangeNotifierCustomizerRef = Arc<dyn LeadershipChangeNotifierCustomizer>;
|
||||
|
||||
/// A trait for customizing the leadership change notifier.
|
||||
pub trait LeadershipChangeNotifierCustomizer: Send + Sync {
|
||||
fn customize(&self, notifier: &mut LeadershipChangeNotifier);
|
||||
}
|
||||
|
||||
/// A trait for handling leadership change events in a distributed system.
|
||||
#[async_trait]
|
||||
pub trait LeadershipChangeListener: Send + Sync {
|
||||
/// Returns the listener name.
|
||||
fn name(&self) -> &str;
|
||||
|
||||
/// Called when the node transitions to the leader role.
|
||||
async fn on_leader_start(&self) -> Result<()>;
|
||||
|
||||
/// Called when the node transitions to the follower role.
|
||||
async fn on_leader_stop(&self) -> Result<()>;
|
||||
}
|
||||
|
||||
/// A notifier for leadership change events.
|
||||
#[derive(Default)]
|
||||
pub struct LeadershipChangeNotifier {
|
||||
listeners: Vec<Arc<dyn LeadershipChangeListener>>,
|
||||
}
|
||||
|
||||
impl LeadershipChangeNotifier {
|
||||
/// Adds a listener to the notifier.
|
||||
pub fn add_listener(&mut self, listener: Arc<dyn LeadershipChangeListener>) {
|
||||
self.listeners.push(listener);
|
||||
}
|
||||
|
||||
/// Notify all listeners that the node has become a leader.
|
||||
pub async fn notify_on_leader_start(&self) {
|
||||
for listener in &self.listeners {
|
||||
if let Err(err) = listener.on_leader_start().await {
|
||||
error!(
|
||||
err;
|
||||
"Failed to notify listener: {}, event 'on_leader_start'",
|
||||
listener.name()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Notify all listeners that the node has become a follower.
|
||||
pub async fn notify_on_leader_stop(&self) {
|
||||
for listener in &self.listeners {
|
||||
if let Err(err) = listener.on_leader_stop().await {
|
||||
error!(
|
||||
err;
|
||||
"Failed to notify listener: {}, event: 'on_follower_start'",
|
||||
listener.name()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
|
||||
struct MockListener {
|
||||
name: String,
|
||||
on_leader_start_fn: Option<Box<dyn Fn() -> Result<()> + Send + Sync>>,
|
||||
on_follower_start_fn: Option<Box<dyn Fn() -> Result<()> + Send + Sync>>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl LeadershipChangeListener for MockListener {
|
||||
fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
|
||||
async fn on_leader_start(&self) -> Result<()> {
|
||||
if let Some(f) = &self.on_leader_start_fn {
|
||||
return f();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn on_leader_stop(&self) -> Result<()> {
|
||||
if let Some(f) = &self.on_follower_start_fn {
|
||||
return f();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_leadership_change_notifier() {
|
||||
let mut notifier = LeadershipChangeNotifier::default();
|
||||
let listener1 = Arc::new(MockListener {
|
||||
name: "listener1".to_string(),
|
||||
on_leader_start_fn: None,
|
||||
on_follower_start_fn: None,
|
||||
});
|
||||
let called_on_leader_start = Arc::new(AtomicBool::new(false));
|
||||
let called_on_follower_start = Arc::new(AtomicBool::new(false));
|
||||
let called_on_leader_start_moved = called_on_leader_start.clone();
|
||||
let called_on_follower_start_moved = called_on_follower_start.clone();
|
||||
let listener2 = Arc::new(MockListener {
|
||||
name: "listener2".to_string(),
|
||||
on_leader_start_fn: Some(Box::new(move || {
|
||||
called_on_leader_start_moved.store(true, Ordering::Relaxed);
|
||||
Ok(())
|
||||
})),
|
||||
on_follower_start_fn: Some(Box::new(move || {
|
||||
called_on_follower_start_moved.store(true, Ordering::Relaxed);
|
||||
Ok(())
|
||||
})),
|
||||
});
|
||||
|
||||
notifier.add_listener(listener1);
|
||||
notifier.add_listener(listener2);
|
||||
|
||||
let listener1 = notifier.listeners.first().unwrap();
|
||||
let listener2 = notifier.listeners.get(1).unwrap();
|
||||
|
||||
assert_eq!(listener1.name(), "listener1");
|
||||
assert_eq!(listener2.name(), "listener2");
|
||||
|
||||
notifier.notify_on_leader_start().await;
|
||||
assert!(!called_on_follower_start.load(Ordering::Relaxed));
|
||||
assert!(called_on_leader_start.load(Ordering::Relaxed));
|
||||
|
||||
notifier.notify_on_leader_stop().await;
|
||||
assert!(called_on_follower_start.load(Ordering::Relaxed));
|
||||
assert!(called_on_leader_start.load(Ordering::Relaxed));
|
||||
}
|
||||
}
|
||||
@@ -22,6 +22,7 @@
|
||||
pub mod cache;
|
||||
pub mod cache_invalidator;
|
||||
pub mod cluster;
|
||||
pub mod datanode;
|
||||
pub mod ddl;
|
||||
pub mod ddl_manager;
|
||||
pub mod distributed_time_constants;
|
||||
@@ -31,6 +32,7 @@ pub mod heartbeat;
|
||||
pub mod instruction;
|
||||
pub mod key;
|
||||
pub mod kv_backend;
|
||||
pub mod leadership_notifier;
|
||||
pub mod lock_key;
|
||||
pub mod metrics;
|
||||
pub mod node_manager;
|
||||
|
||||
@@ -58,7 +58,7 @@ impl MemoryRegionKeeper {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
/// Returns [OpeningRegionGuard] if Region(`region_id`) on Peer(`datanode_id`) does not exist.
|
||||
/// Returns [OperatingRegionGuard] if Region(`region_id`) on Peer(`datanode_id`) does not exist.
|
||||
pub fn register(
|
||||
&self,
|
||||
datanode_id: DatanodeId,
|
||||
|
||||
@@ -16,10 +16,11 @@ use std::time::Duration;
|
||||
|
||||
pub use api::v1::meta::{MigrateRegionResponse, ProcedureStateResponse};
|
||||
use api::v1::meta::{
|
||||
ProcedureId as PbProcedureId, ProcedureStateResponse as PbProcedureStateResponse,
|
||||
ProcedureDetailResponse as PbProcedureDetailResponse, ProcedureId as PbProcedureId,
|
||||
ProcedureMeta as PbProcedureMeta, ProcedureStateResponse as PbProcedureStateResponse,
|
||||
ProcedureStatus as PbProcedureStatus,
|
||||
};
|
||||
use common_procedure::{ProcedureId, ProcedureState};
|
||||
use common_procedure::{ProcedureId, ProcedureInfo, ProcedureState};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{ParseProcedureIdSnafu, Result};
|
||||
@@ -30,7 +31,7 @@ pub struct MigrateRegionRequest {
|
||||
pub region_id: u64,
|
||||
pub from_peer: u64,
|
||||
pub to_peer: u64,
|
||||
pub replay_timeout: Duration,
|
||||
pub timeout: Duration,
|
||||
}
|
||||
|
||||
/// Cast the protobuf [`ProcedureId`] to common [`ProcedureId`].
|
||||
@@ -49,9 +50,9 @@ pub fn pid_to_pb_pid(pid: ProcedureId) -> PbProcedureId {
|
||||
}
|
||||
}
|
||||
|
||||
/// Cast the common [`ProcedureState`] to pb [`ProcedureStateResponse`].
|
||||
pub fn procedure_state_to_pb_response(state: &ProcedureState) -> PbProcedureStateResponse {
|
||||
let (status, error) = match state {
|
||||
/// Cast the [`ProcedureState`] to protobuf [`PbProcedureStatus`].
|
||||
pub fn procedure_state_to_pb_state(state: &ProcedureState) -> (PbProcedureStatus, String) {
|
||||
match state {
|
||||
ProcedureState::Running => (PbProcedureStatus::Running, String::default()),
|
||||
ProcedureState::Done { .. } => (PbProcedureStatus::Done, String::default()),
|
||||
ProcedureState::Retrying { error } => (PbProcedureStatus::Retrying, error.to_string()),
|
||||
@@ -62,8 +63,12 @@ pub fn procedure_state_to_pb_response(state: &ProcedureState) -> PbProcedureStat
|
||||
ProcedureState::RollingBack { error } => {
|
||||
(PbProcedureStatus::RollingBack, error.to_string())
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Cast the common [`ProcedureState`] to pb [`ProcedureStateResponse`].
|
||||
pub fn procedure_state_to_pb_response(state: &ProcedureState) -> PbProcedureStateResponse {
|
||||
let (status, error) = procedure_state_to_pb_state(state);
|
||||
PbProcedureStateResponse {
|
||||
status: status.into(),
|
||||
error,
|
||||
@@ -71,6 +76,28 @@ pub fn procedure_state_to_pb_response(state: &ProcedureState) -> PbProcedureStat
|
||||
}
|
||||
}
|
||||
|
||||
pub fn procedure_details_to_pb_response(metas: Vec<ProcedureInfo>) -> PbProcedureDetailResponse {
|
||||
let procedures = metas
|
||||
.into_iter()
|
||||
.map(|meta| {
|
||||
let (status, error) = procedure_state_to_pb_state(&meta.state);
|
||||
PbProcedureMeta {
|
||||
id: Some(pid_to_pb_pid(meta.id)),
|
||||
type_name: meta.type_name.to_string(),
|
||||
status: status.into(),
|
||||
start_time_ms: meta.start_time_ms,
|
||||
end_time_ms: meta.end_time_ms,
|
||||
lock_keys: meta.lock_keys,
|
||||
error,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
PbProcedureDetailResponse {
|
||||
procedures,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -108,16 +108,16 @@ pub fn convert_to_region_peer_map(
|
||||
.collect::<HashMap<_, _>>()
|
||||
}
|
||||
|
||||
/// Returns the HashMap<[RegionNumber], [RegionStatus]>;
|
||||
pub fn convert_to_region_leader_status_map(
|
||||
/// Returns the HashMap<[RegionNumber], [LeaderState]>;
|
||||
pub fn convert_to_region_leader_state_map(
|
||||
region_routes: &[RegionRoute],
|
||||
) -> HashMap<RegionNumber, RegionStatus> {
|
||||
) -> HashMap<RegionNumber, LeaderState> {
|
||||
region_routes
|
||||
.iter()
|
||||
.filter_map(|x| {
|
||||
x.leader_status
|
||||
x.leader_state
|
||||
.as_ref()
|
||||
.map(|status| (x.region.id.region_number(), *status))
|
||||
.map(|state| (x.region.id.region_number(), *state))
|
||||
})
|
||||
.collect::<HashMap<_, _>>()
|
||||
}
|
||||
@@ -205,7 +205,7 @@ impl TableRoute {
|
||||
region,
|
||||
leader_peer,
|
||||
follower_peers,
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
});
|
||||
}
|
||||
@@ -259,9 +259,13 @@ pub struct RegionRoute {
|
||||
pub follower_peers: Vec<Peer>,
|
||||
/// `None` by default.
|
||||
#[builder(setter(into, strip_option), default)]
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub leader_status: Option<RegionStatus>,
|
||||
/// The start time when the leader is in `Downgraded` status.
|
||||
#[serde(
|
||||
default,
|
||||
alias = "leader_status",
|
||||
skip_serializing_if = "Option::is_none"
|
||||
)]
|
||||
pub leader_state: Option<LeaderState>,
|
||||
/// The start time when the leader is in `Downgraded` state.
|
||||
#[serde(default)]
|
||||
#[builder(default = "self.default_leader_down_since()")]
|
||||
pub leader_down_since: Option<i64>,
|
||||
@@ -269,76 +273,78 @@ pub struct RegionRoute {
|
||||
|
||||
impl RegionRouteBuilder {
|
||||
fn default_leader_down_since(&self) -> Option<i64> {
|
||||
match self.leader_status {
|
||||
Some(Some(RegionStatus::Downgraded)) => Some(current_time_millis()),
|
||||
match self.leader_state {
|
||||
Some(Some(LeaderState::Downgrading)) => Some(current_time_millis()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The Status of the [Region].
|
||||
/// The State of the [`Region`] Leader.
|
||||
/// TODO(dennis): It's better to add more fine-grained statuses such as `PENDING` etc.
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, AsRefStr)]
|
||||
#[strum(serialize_all = "UPPERCASE")]
|
||||
pub enum RegionStatus {
|
||||
/// The following cases in which the [Region] will be downgraded.
|
||||
pub enum LeaderState {
|
||||
/// The following cases in which the [`Region`] will be downgraded.
|
||||
///
|
||||
/// - The [Region] is unavailable(e.g., Crashed, Network disconnected).
|
||||
/// - The [Region] was planned to migrate to another [Peer].
|
||||
Downgraded,
|
||||
/// - The [`Region`] may be unavailable (e.g., Crashed, Network disconnected).
|
||||
/// - The [`Region`] was planned to migrate to another [`Peer`].
|
||||
Downgrading,
|
||||
}
|
||||
|
||||
impl RegionRoute {
|
||||
/// Returns true if the Leader [Region] is downgraded.
|
||||
/// Returns true if the Leader [`Region`] is downgraded.
|
||||
///
|
||||
/// The following cases in which the [Region] will be downgraded.
|
||||
/// The following cases in which the [`Region`] will be downgraded.
|
||||
///
|
||||
/// - The [Region] is unavailable(e.g., Crashed, Network disconnected).
|
||||
/// - The [Region] was planned to migrate to another [Peer].
|
||||
/// - The [`Region`] is unavailable(e.g., Crashed, Network disconnected).
|
||||
/// - The [`Region`] was planned to migrate to another [`Peer`].
|
||||
///
|
||||
pub fn is_leader_downgraded(&self) -> bool {
|
||||
matches!(self.leader_status, Some(RegionStatus::Downgraded))
|
||||
pub fn is_leader_downgrading(&self) -> bool {
|
||||
matches!(self.leader_state, Some(LeaderState::Downgrading))
|
||||
}
|
||||
|
||||
/// Marks the Leader [Region] as downgraded.
|
||||
/// Marks the Leader [`Region`] as [`RegionState::Downgrading`].
|
||||
///
|
||||
/// We should downgrade a [Region] before deactivating it:
|
||||
/// We should downgrade a [`Region`] before deactivating it:
|
||||
///
|
||||
/// - During the [Region] Failover Procedure.
|
||||
/// - Migrating a [Region].
|
||||
/// - During the [`Region`] Failover Procedure.
|
||||
/// - Migrating a [`Region`].
|
||||
///
|
||||
/// **Notes:** Meta Server will stop renewing the lease for the downgraded [Region].
|
||||
/// **Notes:** Meta Server will renewing a special lease(`Downgrading`) for the downgrading [`Region`].
|
||||
///
|
||||
/// A downgrading region will reject any write requests, and only allow memetable to be flushed to object storage
|
||||
///
|
||||
pub fn downgrade_leader(&mut self) {
|
||||
self.leader_down_since = Some(current_time_millis());
|
||||
self.leader_status = Some(RegionStatus::Downgraded)
|
||||
self.leader_state = Some(LeaderState::Downgrading)
|
||||
}
|
||||
|
||||
/// Returns how long since the leader is in `Downgraded` status.
|
||||
/// Returns how long since the leader is in `Downgraded` state.
|
||||
pub fn leader_down_millis(&self) -> Option<i64> {
|
||||
self.leader_down_since
|
||||
.map(|start| current_time_millis() - start)
|
||||
}
|
||||
|
||||
/// Sets the leader status.
|
||||
/// Sets the leader state.
|
||||
///
|
||||
/// Returns true if updated.
|
||||
pub fn set_leader_status(&mut self, status: Option<RegionStatus>) -> bool {
|
||||
let updated = self.leader_status != status;
|
||||
pub fn set_leader_state(&mut self, state: Option<LeaderState>) -> bool {
|
||||
let updated = self.leader_state != state;
|
||||
|
||||
match (status, updated) {
|
||||
(Some(RegionStatus::Downgraded), true) => {
|
||||
match (state, updated) {
|
||||
(Some(LeaderState::Downgrading), true) => {
|
||||
self.leader_down_since = Some(current_time_millis());
|
||||
}
|
||||
(Some(RegionStatus::Downgraded), false) => {
|
||||
// Do nothing if leader is still in `Downgraded` status.
|
||||
(Some(LeaderState::Downgrading), false) => {
|
||||
// Do nothing if leader is still in `Downgraded` state.
|
||||
}
|
||||
_ => {
|
||||
self.leader_down_since = None;
|
||||
}
|
||||
}
|
||||
|
||||
self.leader_status = status;
|
||||
self.leader_state = state;
|
||||
updated
|
||||
}
|
||||
}
|
||||
@@ -477,15 +483,15 @@ mod tests {
|
||||
},
|
||||
leader_peer: Some(Peer::new(1, "a1")),
|
||||
follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
};
|
||||
|
||||
assert!(!region_route.is_leader_downgraded());
|
||||
assert!(!region_route.is_leader_downgrading());
|
||||
|
||||
region_route.downgrade_leader();
|
||||
|
||||
assert!(region_route.is_leader_downgraded());
|
||||
assert!(region_route.is_leader_downgrading());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -499,7 +505,7 @@ mod tests {
|
||||
},
|
||||
leader_peer: Some(Peer::new(1, "a1")),
|
||||
follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
|
||||
leader_status: None,
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
};
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ pub mod kafka;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_wal::config::MetasrvWalConfig;
|
||||
use common_wal::options::{KafkaWalOptions, WalOptions, WAL_OPTIONS_KEY};
|
||||
use snafu::ResultExt;
|
||||
@@ -24,6 +25,7 @@ use store_api::storage::{RegionId, RegionNumber};
|
||||
|
||||
use crate::error::{EncodeWalOptionsSnafu, Result};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::leadership_notifier::LeadershipChangeListener;
|
||||
use crate::wal_options_allocator::kafka::topic_manager::TopicManager as KafkaTopicManager;
|
||||
|
||||
/// Allocates wal options in region granularity.
|
||||
@@ -94,6 +96,21 @@ impl WalOptionsAllocator {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl LeadershipChangeListener for WalOptionsAllocator {
|
||||
fn name(&self) -> &str {
|
||||
"WalOptionsAllocator"
|
||||
}
|
||||
|
||||
async fn on_leader_start(&self) -> Result<()> {
|
||||
self.start().await
|
||||
}
|
||||
|
||||
async fn on_leader_stop(&self) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocates a wal options for each region. The allocated wal options is encoded immediately.
|
||||
pub fn allocate_region_wal_options(
|
||||
regions: Vec<RegionNumber>,
|
||||
|
||||
@@ -19,6 +19,7 @@ common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-runtime.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
futures.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
object-store.workspace = true
|
||||
|
||||
@@ -26,7 +26,7 @@ pub mod watcher;
|
||||
pub use crate::error::{Error, Result};
|
||||
pub use crate::procedure::{
|
||||
BoxedProcedure, BoxedProcedureLoader, Context, ContextProvider, LockKey, Output, ParseIdError,
|
||||
Procedure, ProcedureId, ProcedureManager, ProcedureManagerRef, ProcedureState, ProcedureWithId,
|
||||
Status, StringKey,
|
||||
Procedure, ProcedureId, ProcedureInfo, ProcedureManager, ProcedureManagerRef, ProcedureState,
|
||||
ProcedureWithId, Status, StringKey,
|
||||
};
|
||||
pub use crate::watcher::Watcher;
|
||||
|
||||
@@ -16,7 +16,7 @@ mod runner;
|
||||
mod rwlock;
|
||||
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::atomic::{AtomicBool, AtomicI64, Ordering};
|
||||
use std::sync::{Arc, Mutex, RwLock};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
@@ -35,7 +35,7 @@ use crate::error::{
|
||||
StartRemoveOutdatedMetaTaskSnafu, StopRemoveOutdatedMetaTaskSnafu,
|
||||
};
|
||||
use crate::local::runner::Runner;
|
||||
use crate::procedure::{BoxedProcedureLoader, InitProcedureState};
|
||||
use crate::procedure::{BoxedProcedureLoader, InitProcedureState, ProcedureInfo};
|
||||
use crate::store::{ProcedureMessage, ProcedureMessages, ProcedureStore, StateStoreRef};
|
||||
use crate::{
|
||||
BoxedProcedure, ContextProvider, LockKey, ProcedureId, ProcedureManager, ProcedureState,
|
||||
@@ -57,6 +57,8 @@ const META_TTL: Duration = Duration::from_secs(60 * 10);
|
||||
pub(crate) struct ProcedureMeta {
|
||||
/// Id of this procedure.
|
||||
id: ProcedureId,
|
||||
/// Type name of this procedure.
|
||||
type_name: String,
|
||||
/// Parent procedure id.
|
||||
parent_id: Option<ProcedureId>,
|
||||
/// Notify to wait for subprocedures.
|
||||
@@ -69,6 +71,10 @@ pub(crate) struct ProcedureMeta {
|
||||
state_receiver: Receiver<ProcedureState>,
|
||||
/// Id of child procedures.
|
||||
children: Mutex<Vec<ProcedureId>>,
|
||||
/// Start execution time of this procedure.
|
||||
start_time_ms: AtomicI64,
|
||||
/// End execution time of this procedure.
|
||||
end_time_ms: AtomicI64,
|
||||
}
|
||||
|
||||
impl ProcedureMeta {
|
||||
@@ -77,6 +83,7 @@ impl ProcedureMeta {
|
||||
procedure_state: ProcedureState,
|
||||
parent_id: Option<ProcedureId>,
|
||||
lock_key: LockKey,
|
||||
type_name: &str,
|
||||
) -> ProcedureMeta {
|
||||
let (state_sender, state_receiver) = watch::channel(procedure_state);
|
||||
ProcedureMeta {
|
||||
@@ -87,6 +94,9 @@ impl ProcedureMeta {
|
||||
state_sender,
|
||||
state_receiver,
|
||||
children: Mutex::new(Vec::new()),
|
||||
start_time_ms: AtomicI64::new(0),
|
||||
end_time_ms: AtomicI64::new(0),
|
||||
type_name: type_name.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,6 +127,18 @@ impl ProcedureMeta {
|
||||
fn num_children(&self) -> usize {
|
||||
self.children.lock().unwrap().len()
|
||||
}
|
||||
|
||||
/// update the start time of the procedure.
|
||||
fn set_start_time_ms(&self) {
|
||||
self.start_time_ms
|
||||
.store(common_time::util::current_time_millis(), Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// update the end time of the procedure.
|
||||
fn set_end_time_ms(&self) {
|
||||
self.end_time_ms
|
||||
.store(common_time::util::current_time_millis(), Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
/// Reference counted pointer to [ProcedureMeta].
|
||||
@@ -210,6 +232,22 @@ impl ManagerContext {
|
||||
procedures.get(&procedure_id).map(|meta| meta.state())
|
||||
}
|
||||
|
||||
/// Returns the [ProcedureMeta] of all procedures.
|
||||
fn list_procedure(&self) -> Vec<ProcedureInfo> {
|
||||
let procedures = self.procedures.read().unwrap();
|
||||
procedures
|
||||
.values()
|
||||
.map(|meta| ProcedureInfo {
|
||||
id: meta.id,
|
||||
type_name: meta.type_name.clone(),
|
||||
start_time_ms: meta.start_time_ms.load(Ordering::Relaxed),
|
||||
end_time_ms: meta.end_time_ms.load(Ordering::Relaxed),
|
||||
state: meta.state(),
|
||||
lock_keys: meta.lock_key.get_keys(),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Returns the [Watcher] of specific `procedure_id`.
|
||||
fn watcher(&self, procedure_id: ProcedureId) -> Option<Watcher> {
|
||||
let procedures = self.procedures.read().unwrap();
|
||||
@@ -438,6 +476,7 @@ impl LocalManager {
|
||||
procedure_state,
|
||||
None,
|
||||
procedure.lock_key(),
|
||||
procedure.type_name(),
|
||||
));
|
||||
let runner = Runner {
|
||||
meta: meta.clone(),
|
||||
@@ -641,6 +680,10 @@ impl ProcedureManager for LocalManager {
|
||||
fn procedure_watcher(&self, procedure_id: ProcedureId) -> Option<Watcher> {
|
||||
self.manager_ctx.watcher(procedure_id)
|
||||
}
|
||||
|
||||
async fn list_procedures(&self) -> Result<Vec<ProcedureInfo>> {
|
||||
Ok(self.manager_ctx.list_procedure())
|
||||
}
|
||||
}
|
||||
|
||||
struct RemoveOutdatedMetaFunction {
|
||||
@@ -675,6 +718,7 @@ pub(crate) mod test_util {
|
||||
ProcedureState::Running,
|
||||
None,
|
||||
LockKey::default(),
|
||||
"ProcedureAdapter",
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -27,7 +27,9 @@ use crate::error::{self, ProcedurePanicSnafu, Result, RollbackTimesExceededSnafu
|
||||
use crate::local::{ManagerContext, ProcedureMeta, ProcedureMetaRef};
|
||||
use crate::procedure::{Output, StringKey};
|
||||
use crate::store::{ProcedureMessage, ProcedureStore};
|
||||
use crate::{BoxedProcedure, Context, Error, ProcedureId, ProcedureState, ProcedureWithId, Status};
|
||||
use crate::{
|
||||
BoxedProcedure, Context, Error, Procedure, ProcedureId, ProcedureState, ProcedureWithId, Status,
|
||||
};
|
||||
|
||||
/// A guard to cleanup procedure state.
|
||||
struct ProcedureGuard {
|
||||
@@ -129,7 +131,9 @@ impl Runner {
|
||||
|
||||
// Execute the procedure. We need to release the lock whenever the execution
|
||||
// is successful or fail.
|
||||
self.meta.set_start_time_ms();
|
||||
self.execute_procedure_in_loop().await;
|
||||
self.meta.set_end_time_ms();
|
||||
|
||||
// We can't remove the metadata of the procedure now as users and its parent might
|
||||
// need to query its state.
|
||||
@@ -368,6 +372,7 @@ impl Runner {
|
||||
procedure_state,
|
||||
Some(self.meta.id),
|
||||
procedure.lock_key(),
|
||||
procedure.type_name(),
|
||||
));
|
||||
let runner = Runner {
|
||||
meta: meta.clone(),
|
||||
|
||||
@@ -159,6 +159,14 @@ impl<T: Procedure + ?Sized> Procedure for Box<T> {
|
||||
(**self).execute(ctx).await
|
||||
}
|
||||
|
||||
async fn rollback(&mut self, ctx: &Context) -> Result<()> {
|
||||
(**self).rollback(ctx).await
|
||||
}
|
||||
|
||||
fn rollback_supported(&self) -> bool {
|
||||
(**self).rollback_supported()
|
||||
}
|
||||
|
||||
fn dump(&self) -> Result<String> {
|
||||
(**self).dump()
|
||||
}
|
||||
@@ -227,6 +235,11 @@ impl LockKey {
|
||||
pub fn keys_to_lock(&self) -> impl Iterator<Item = &StringKey> {
|
||||
self.0.iter()
|
||||
}
|
||||
|
||||
/// Returns the keys to lock.
|
||||
pub fn get_keys(&self) -> Vec<String> {
|
||||
self.0.iter().map(|key| format!("{:?}", key)).collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Boxed [Procedure].
|
||||
@@ -374,6 +387,18 @@ impl ProcedureState {
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the string values of the enum field names.
|
||||
pub fn as_str_name(&self) -> &str {
|
||||
match self {
|
||||
ProcedureState::Running => "Running",
|
||||
ProcedureState::Done { .. } => "Done",
|
||||
ProcedureState::Retrying { .. } => "Retrying",
|
||||
ProcedureState::Failed { .. } => "Failed",
|
||||
ProcedureState::PrepareRollback { .. } => "PrepareRollback",
|
||||
ProcedureState::RollingBack { .. } => "RollingBack",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The initial procedure state.
|
||||
@@ -412,11 +437,30 @@ pub trait ProcedureManager: Send + Sync + 'static {
|
||||
|
||||
/// Returns a [Watcher] to watch [ProcedureState] of specific procedure.
|
||||
fn procedure_watcher(&self, procedure_id: ProcedureId) -> Option<Watcher>;
|
||||
|
||||
/// Returns the details of the procedure.
|
||||
async fn list_procedures(&self) -> Result<Vec<ProcedureInfo>>;
|
||||
}
|
||||
|
||||
/// Ref-counted pointer to the [ProcedureManager].
|
||||
pub type ProcedureManagerRef = Arc<dyn ProcedureManager>;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ProcedureInfo {
|
||||
/// Id of this procedure.
|
||||
pub id: ProcedureId,
|
||||
/// Type name of this procedure.
|
||||
pub type_name: String,
|
||||
/// Start execution time of this procedure.
|
||||
pub start_time_ms: i64,
|
||||
/// End execution time of this procedure.
|
||||
pub end_time_ms: i64,
|
||||
/// status of this procedure.
|
||||
pub state: ProcedureState,
|
||||
/// Lock keys of this procedure.
|
||||
pub lock_keys: Vec<String>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_error::mock::MockError;
|
||||
|
||||
@@ -329,6 +329,7 @@ impl ExecutionPlanVisitor for MetricCollector {
|
||||
level: self.current_level,
|
||||
metrics: vec![],
|
||||
});
|
||||
self.current_level += 1;
|
||||
return Ok(true);
|
||||
};
|
||||
|
||||
@@ -365,8 +366,7 @@ impl ExecutionPlanVisitor for MetricCollector {
|
||||
}
|
||||
|
||||
fn post_visit(&mut self, _plan: &dyn ExecutionPlan) -> std::result::Result<bool, Self::Error> {
|
||||
// the last minus will underflow
|
||||
self.current_level = self.current_level.wrapping_sub(1);
|
||||
self.current_level -= 1;
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ backtrace = "0.3"
|
||||
common-error.workspace = true
|
||||
console-subscriber = { version = "0.1", optional = true }
|
||||
greptime-proto.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
lazy_static.workspace = true
|
||||
once_cell.workspace = true
|
||||
opentelemetry = { version = "0.21.0", default-features = false, features = [
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
//! logging stuffs, inspired by databend
|
||||
use std::env;
|
||||
use std::sync::{Arc, Mutex, Once};
|
||||
use std::time::Duration;
|
||||
|
||||
use once_cell::sync::{Lazy, OnceCell};
|
||||
use opentelemetry::{global, KeyValue};
|
||||
@@ -26,7 +27,7 @@ use serde::{Deserialize, Serialize};
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
use tracing_appender::rolling::{RollingFileAppender, Rotation};
|
||||
use tracing_log::LogTracer;
|
||||
use tracing_subscriber::filter::Targets;
|
||||
use tracing_subscriber::filter::{FilterFn, Targets};
|
||||
use tracing_subscriber::fmt::Layer;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::prelude::*;
|
||||
@@ -53,6 +54,9 @@ pub struct LoggingOptions {
|
||||
/// The log format that can be one of "json" or "text". Default is "text".
|
||||
pub log_format: LogFormat,
|
||||
|
||||
/// The maximum number of log files set by default.
|
||||
pub max_log_files: usize,
|
||||
|
||||
/// Whether to append logs to stdout. Default is true.
|
||||
pub append_stdout: bool,
|
||||
|
||||
@@ -64,6 +68,24 @@ pub struct LoggingOptions {
|
||||
|
||||
/// The tracing sample ratio.
|
||||
pub tracing_sample_ratio: Option<TracingSampleOptions>,
|
||||
|
||||
/// The logging options of slow query.
|
||||
pub slow_query: SlowQueryOptions,
|
||||
}
|
||||
|
||||
/// The options of slow query.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Default)]
|
||||
#[serde(default)]
|
||||
pub struct SlowQueryOptions {
|
||||
/// Whether to enable slow query log.
|
||||
pub enable: bool,
|
||||
|
||||
/// The threshold of slow queries.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub threshold: Option<Duration>,
|
||||
|
||||
/// The sample ratio of slow queries.
|
||||
pub sample_ratio: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
@@ -96,6 +118,9 @@ impl Default for LoggingOptions {
|
||||
otlp_endpoint: None,
|
||||
tracing_sample_ratio: None,
|
||||
append_stdout: true,
|
||||
slow_query: SlowQueryOptions::default(),
|
||||
// Rotation hourly, 24 files per day, keeps info log files of 30 days
|
||||
max_log_files: 720,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -186,8 +211,17 @@ pub fn init_global_logging(
|
||||
|
||||
// Configure the file logging layer with rolling policy.
|
||||
let file_logging_layer = if !opts.dir.is_empty() {
|
||||
let rolling_appender =
|
||||
RollingFileAppender::new(Rotation::HOURLY, &opts.dir, "greptimedb");
|
||||
let rolling_appender = RollingFileAppender::builder()
|
||||
.rotation(Rotation::HOURLY)
|
||||
.filename_prefix("greptimedb")
|
||||
.max_log_files(opts.max_log_files)
|
||||
.build(&opts.dir)
|
||||
.unwrap_or_else(|e| {
|
||||
panic!(
|
||||
"initializing rolling file appender at {} failed: {}",
|
||||
&opts.dir, e
|
||||
)
|
||||
});
|
||||
let (writer, guard) = tracing_appender::non_blocking(rolling_appender);
|
||||
guards.push(guard);
|
||||
|
||||
@@ -208,8 +242,17 @@ pub fn init_global_logging(
|
||||
|
||||
// Configure the error file logging layer with rolling policy.
|
||||
let err_file_logging_layer = if !opts.dir.is_empty() {
|
||||
let rolling_appender =
|
||||
RollingFileAppender::new(Rotation::HOURLY, &opts.dir, "greptimedb-err");
|
||||
let rolling_appender = RollingFileAppender::builder()
|
||||
.rotation(Rotation::HOURLY)
|
||||
.filename_prefix("greptimedb-err")
|
||||
.max_log_files(opts.max_log_files)
|
||||
.build(&opts.dir)
|
||||
.unwrap_or_else(|e| {
|
||||
panic!(
|
||||
"initializing rolling file appender at {} failed: {}",
|
||||
&opts.dir, e
|
||||
)
|
||||
});
|
||||
let (writer, guard) = tracing_appender::non_blocking(rolling_appender);
|
||||
guards.push(guard);
|
||||
|
||||
@@ -235,6 +278,51 @@ pub fn init_global_logging(
|
||||
None
|
||||
};
|
||||
|
||||
let slow_query_logging_layer = if !opts.dir.is_empty() && opts.slow_query.enable {
|
||||
let rolling_appender = RollingFileAppender::builder()
|
||||
.rotation(Rotation::HOURLY)
|
||||
.filename_prefix("greptimedb-slow-queries")
|
||||
.max_log_files(opts.max_log_files)
|
||||
.build(&opts.dir)
|
||||
.unwrap_or_else(|e| {
|
||||
panic!(
|
||||
"initializing rolling file appender at {} failed: {}",
|
||||
&opts.dir, e
|
||||
)
|
||||
});
|
||||
let (writer, guard) = tracing_appender::non_blocking(rolling_appender);
|
||||
guards.push(guard);
|
||||
|
||||
// Only logs if the field contains "slow".
|
||||
let slow_query_filter = FilterFn::new(|metadata| {
|
||||
metadata
|
||||
.fields()
|
||||
.iter()
|
||||
.any(|field| field.name().contains("slow"))
|
||||
});
|
||||
|
||||
if opts.log_format == LogFormat::Json {
|
||||
Some(
|
||||
Layer::new()
|
||||
.json()
|
||||
.with_writer(writer)
|
||||
.with_ansi(false)
|
||||
.with_filter(slow_query_filter)
|
||||
.boxed(),
|
||||
)
|
||||
} else {
|
||||
Some(
|
||||
Layer::new()
|
||||
.with_writer(writer)
|
||||
.with_ansi(false)
|
||||
.with_filter(slow_query_filter)
|
||||
.boxed(),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// resolve log level settings from:
|
||||
// - options from command line or config files
|
||||
// - environment variable: RUST_LOG
|
||||
@@ -279,6 +367,7 @@ pub fn init_global_logging(
|
||||
.with(stdout_logging_layer)
|
||||
.with(file_logging_layer)
|
||||
.with(err_file_logging_layer)
|
||||
.with(slow_query_logging_layer)
|
||||
};
|
||||
|
||||
// consume the `tracing_opts` to avoid "unused" warnings.
|
||||
@@ -289,7 +378,8 @@ pub fn init_global_logging(
|
||||
.with(dyn_filter)
|
||||
.with(stdout_logging_layer)
|
||||
.with(file_logging_layer)
|
||||
.with(err_file_logging_layer);
|
||||
.with(err_file_logging_layer)
|
||||
.with(slow_query_logging_layer);
|
||||
|
||||
if opts.enable_otlp_tracing {
|
||||
global::set_text_map_propagator(TraceContextPropagator::new());
|
||||
|
||||
@@ -152,6 +152,17 @@ macro_rules! trace {
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! slow {
|
||||
(target: $target:expr, $($arg:tt)+) => {
|
||||
$crate::log!(target: $target, slow = true, $crate::tracing::Level::INFO, $($arg)+)
|
||||
};
|
||||
|
||||
($($arg:tt)+) => {
|
||||
$crate::log!($crate::tracing::Level::INFO, slow = true, $($arg)+)
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_error::mock::MockError;
|
||||
|
||||
@@ -53,6 +53,7 @@ prost.workspace = true
|
||||
query.workspace = true
|
||||
reqwest.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
servers.workspace = true
|
||||
session.workspace = true
|
||||
snafu.workspace = true
|
||||
|
||||
@@ -129,8 +129,10 @@ impl RegionAliveKeeper {
|
||||
let request = RegionRequest::Close(RegionCloseRequest {});
|
||||
if let Err(e) = self.region_server.handle_request(region_id, request).await {
|
||||
if e.status_code() != StatusCode::RegionNotFound {
|
||||
let _ = self.region_server.set_writable(region_id, false);
|
||||
error!(e; "Failed to close staled region {}, set region to readonly.",region_id);
|
||||
let _ = self
|
||||
.region_server
|
||||
.set_region_role(region_id, RegionRole::Follower);
|
||||
error!(e; "Failed to close staled region {}, convert region to follower.", region_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -378,7 +380,7 @@ impl CountdownTask {
|
||||
}
|
||||
},
|
||||
Some(CountdownCommand::Reset((role, deadline))) => {
|
||||
let _ = self.region_server.set_writable(self.region_id, role.writable());
|
||||
let _ = self.region_server.set_region_role(self.region_id, role);
|
||||
trace!(
|
||||
"Reset deadline of region {region_id} to approximately {} seconds later.",
|
||||
(deadline - Instant::now()).as_secs_f32(),
|
||||
@@ -399,8 +401,8 @@ impl CountdownTask {
|
||||
}
|
||||
}
|
||||
() = &mut countdown => {
|
||||
warn!("The region {region_id} lease is expired, set region to readonly.");
|
||||
let _ = self.region_server.set_writable(self.region_id, false);
|
||||
warn!("The region {region_id} lease is expired, convert region to follower.");
|
||||
let _ = self.region_server.set_region_role(self.region_id, RegionRole::Follower);
|
||||
// resets the countdown.
|
||||
let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 30);
|
||||
countdown.as_mut().reset(far_future);
|
||||
@@ -436,7 +438,9 @@ mod test {
|
||||
.handle_request(region_id, RegionRequest::Create(builder.build()))
|
||||
.await
|
||||
.unwrap();
|
||||
region_server.set_writable(region_id, true).unwrap();
|
||||
region_server
|
||||
.set_region_role(region_id, RegionRole::Leader)
|
||||
.unwrap();
|
||||
|
||||
// Register a region before starting.
|
||||
alive_keeper.register_region(region_id).await;
|
||||
|
||||
@@ -305,6 +305,7 @@ pub struct DatanodeOptions {
|
||||
pub meta_client: Option<MetaClientOptions>,
|
||||
pub wal: DatanodeWalConfig,
|
||||
pub storage: StorageConfig,
|
||||
pub max_concurrent_queries: usize,
|
||||
/// Options for different store engines.
|
||||
pub region_engine: Vec<RegionEngineConfig>,
|
||||
pub logging: LoggingOptions,
|
||||
@@ -339,6 +340,7 @@ impl Default for DatanodeOptions {
|
||||
meta_client: None,
|
||||
wal: DatanodeWalConfig::default(),
|
||||
storage: StorageConfig::default(),
|
||||
max_concurrent_queries: 0,
|
||||
region_engine: vec![
|
||||
RegionEngineConfig::Mito(MitoConfig::default()),
|
||||
RegionEngineConfig::File(FileEngineConfig::default()),
|
||||
|
||||
@@ -47,7 +47,7 @@ use servers::server::ServerHandlers;
|
||||
use servers::Mode;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::path_utils::{region_dir, WAL_DIR};
|
||||
use store_api::region_engine::RegionEngineRef;
|
||||
use store_api::region_engine::{RegionEngineRef, RegionRole};
|
||||
use store_api::region_request::RegionOpenRequest;
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::fs;
|
||||
@@ -314,7 +314,7 @@ impl DatanodeBuilder {
|
||||
&self,
|
||||
event_listener: RegionServerEventListenerRef,
|
||||
) -> Result<RegionServer> {
|
||||
let opts = &self.opts;
|
||||
let opts: &DatanodeOptions = &self.opts;
|
||||
|
||||
let query_engine_factory = QueryEngineFactory::new_with_plugins(
|
||||
// query engine in datanode only executes plan with resolved table source.
|
||||
@@ -334,6 +334,9 @@ impl DatanodeBuilder {
|
||||
common_runtime::global_runtime(),
|
||||
event_listener,
|
||||
table_provider_factory,
|
||||
opts.max_concurrent_queries,
|
||||
//TODO: revaluate the hardcoded timeout on the next version of datanode concurrency limiter.
|
||||
Duration::from_millis(100),
|
||||
);
|
||||
|
||||
let object_store_manager = Self::build_object_store_manager(&opts.storage).await?;
|
||||
@@ -543,9 +546,9 @@ async fn open_all_regions(
|
||||
|
||||
for region_id in open_regions {
|
||||
if open_with_writable {
|
||||
if let Err(e) = region_server.set_writable(region_id, true) {
|
||||
if let Err(e) = region_server.set_region_role(region_id, RegionRole::Leader) {
|
||||
error!(
|
||||
e; "failed to set writable for region {region_id}"
|
||||
e; "failed to convert region {region_id} to leader"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ use common_macro::stack_trace_debug;
|
||||
use snafu::{Location, Snafu};
|
||||
use store_api::storage::RegionId;
|
||||
use table::error::Error as TableError;
|
||||
use tokio::time::error::Elapsed;
|
||||
|
||||
/// Business error of datanode.
|
||||
#[derive(Snafu)]
|
||||
@@ -347,6 +348,22 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to acquire permit, source closed"))]
|
||||
ConcurrentQueryLimiterClosed {
|
||||
#[snafu(source)]
|
||||
error: tokio::sync::AcquireError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to acquire permit under timeouts"))]
|
||||
ConcurrentQueryLimiterTimeout {
|
||||
#[snafu(source)]
|
||||
error: Elapsed,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -411,6 +428,9 @@ impl ErrorExt for Error {
|
||||
|
||||
FindLogicalRegions { source, .. } => source.status_code(),
|
||||
BuildMitoEngine { source, .. } => source.status_code(),
|
||||
ConcurrentQueryLimiterClosed { .. } | ConcurrentQueryLimiterTimeout { .. } => {
|
||||
StatusCode::RegionBusy
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,11 +12,13 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::{HeartbeatRequest, NodeInfo, Peer, RegionRole, RegionStat};
|
||||
use common_meta::datanode::REGION_STATISTIC_KEY;
|
||||
use common_meta::distributed_time_constants::META_KEEP_ALIVE_INTERVAL_SECS;
|
||||
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
|
||||
use common_meta::heartbeat::handler::{
|
||||
@@ -124,7 +126,9 @@ impl HeartbeatTask {
|
||||
let mut follower_region_lease_count = 0;
|
||||
for lease in &lease.regions {
|
||||
match lease.role() {
|
||||
RegionRole::Leader => leader_region_lease_count += 1,
|
||||
RegionRole::Leader | RegionRole::DowngradingLeader => {
|
||||
leader_region_lease_count += 1
|
||||
}
|
||||
RegionRole::Follower => follower_region_lease_count += 1,
|
||||
}
|
||||
}
|
||||
@@ -320,16 +324,25 @@ impl HeartbeatTask {
|
||||
region_server
|
||||
.reportable_regions()
|
||||
.into_iter()
|
||||
.map(|stat| RegionStat {
|
||||
region_id: stat.region_id.as_u64(),
|
||||
engine: stat.engine,
|
||||
role: RegionRole::from(stat.role).into(),
|
||||
// TODO(weny): w/rcus
|
||||
rcus: 0,
|
||||
wcus: 0,
|
||||
approximate_bytes: region_server.region_disk_usage(stat.region_id).unwrap_or(0),
|
||||
// TODO(weny): add extensions
|
||||
extensions: Default::default(),
|
||||
.map(|stat| {
|
||||
let region_stat = region_server
|
||||
.region_statistic(stat.region_id)
|
||||
.unwrap_or_default();
|
||||
let mut extensions = HashMap::new();
|
||||
if let Some(serialized) = region_stat.serialize_to_vec() {
|
||||
extensions.insert(REGION_STATISTIC_KEY.to_string(), serialized);
|
||||
}
|
||||
|
||||
RegionStat {
|
||||
region_id: stat.region_id.as_u64(),
|
||||
engine: stat.engine,
|
||||
role: RegionRole::from(stat.role).into(),
|
||||
// TODO(weny): w/rcus
|
||||
rcus: 0,
|
||||
wcus: 0,
|
||||
approximate_bytes: region_stat.estimated_disk_size() as i64,
|
||||
extensions,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@ use crate::region_server::RegionServer;
|
||||
pub struct RegionHeartbeatResponseHandler {
|
||||
region_server: RegionServer,
|
||||
catchup_tasks: TaskTracker<()>,
|
||||
downgrade_tasks: TaskTracker<()>,
|
||||
}
|
||||
|
||||
/// Handler of the instruction.
|
||||
@@ -47,12 +48,22 @@ pub type InstructionHandler =
|
||||
pub struct HandlerContext {
|
||||
region_server: RegionServer,
|
||||
catchup_tasks: TaskTracker<()>,
|
||||
downgrade_tasks: TaskTracker<()>,
|
||||
}
|
||||
|
||||
impl HandlerContext {
|
||||
fn region_ident_to_region_id(region_ident: &RegionIdent) -> RegionId {
|
||||
RegionId::new(region_ident.table_id, region_ident.region_number)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn new_for_test(region_server: RegionServer) -> Self {
|
||||
Self {
|
||||
region_server,
|
||||
catchup_tasks: TaskTracker::new(),
|
||||
downgrade_tasks: TaskTracker::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RegionHeartbeatResponseHandler {
|
||||
@@ -61,6 +72,7 @@ impl RegionHeartbeatResponseHandler {
|
||||
Self {
|
||||
region_server,
|
||||
catchup_tasks: TaskTracker::new(),
|
||||
downgrade_tasks: TaskTracker::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,11 +119,13 @@ impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
|
||||
let mailbox = ctx.mailbox.clone();
|
||||
let region_server = self.region_server.clone();
|
||||
let catchup_tasks = self.catchup_tasks.clone();
|
||||
let downgrade_tasks = self.downgrade_tasks.clone();
|
||||
let handler = Self::build_handler(instruction)?;
|
||||
let _handle = common_runtime::spawn_global(async move {
|
||||
let reply = handler(HandlerContext {
|
||||
region_server,
|
||||
catchup_tasks,
|
||||
downgrade_tasks,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -129,6 +143,7 @@ mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_meta::heartbeat::mailbox::{
|
||||
HeartbeatMailbox, IncomingMessage, MailboxRef, MessageMeta,
|
||||
@@ -138,6 +153,7 @@ mod tests {
|
||||
use mito2::engine::MITO_ENGINE_NAME;
|
||||
use mito2::test_util::{CreateRequestBuilder, TestEnv};
|
||||
use store_api::path_utils::region_dir;
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::region_request::{RegionCloseRequest, RegionRequest};
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::sync::mpsc::{self, Receiver};
|
||||
@@ -197,6 +213,8 @@ mod tests {
|
||||
// Downgrade region
|
||||
let instruction = Instruction::DowngradeRegion(DowngradeRegion {
|
||||
region_id: RegionId::new(2048, 1),
|
||||
flush_timeout: Some(Duration::from_secs(1)),
|
||||
reject_write: false,
|
||||
});
|
||||
assert!(heartbeat_handler
|
||||
.is_acceptable(&heartbeat_env.create_handler_ctx((meta.clone(), instruction))));
|
||||
@@ -205,7 +223,7 @@ mod tests {
|
||||
let instruction = Instruction::UpgradeRegion(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
wait_for_replay_timeout: None,
|
||||
replay_timeout: None,
|
||||
location_id: None,
|
||||
});
|
||||
assert!(
|
||||
@@ -279,7 +297,9 @@ mod tests {
|
||||
}
|
||||
|
||||
assert_matches!(
|
||||
region_server.set_writable(region_id, true).unwrap_err(),
|
||||
region_server
|
||||
.set_region_role(region_id, RegionRole::Leader)
|
||||
.unwrap_err(),
|
||||
error::Error::RegionNotFound { .. }
|
||||
);
|
||||
}
|
||||
@@ -392,7 +412,11 @@ mod tests {
|
||||
// Should be ok, if we try to downgrade it twice.
|
||||
for _ in 0..2 {
|
||||
let meta = MessageMeta::new_test(1, "test", "dn-1", "me-0");
|
||||
let instruction = Instruction::DowngradeRegion(DowngradeRegion { region_id });
|
||||
let instruction = Instruction::DowngradeRegion(DowngradeRegion {
|
||||
region_id,
|
||||
flush_timeout: Some(Duration::from_secs(1)),
|
||||
reject_write: false,
|
||||
});
|
||||
|
||||
let mut ctx = heartbeat_env.create_handler_ctx((meta, instruction));
|
||||
let control = heartbeat_handler.handle(&mut ctx).await.unwrap();
|
||||
@@ -413,6 +437,8 @@ mod tests {
|
||||
let meta = MessageMeta::new_test(1, "test", "dn-1", "me-0");
|
||||
let instruction = Instruction::DowngradeRegion(DowngradeRegion {
|
||||
region_id: RegionId::new(2048, 1),
|
||||
flush_timeout: Some(Duration::from_secs(1)),
|
||||
reject_write: false,
|
||||
});
|
||||
let mut ctx = heartbeat_env.create_handler_ctx((meta, instruction));
|
||||
let control = heartbeat_handler.handle(&mut ctx).await.unwrap();
|
||||
|
||||
@@ -13,38 +13,446 @@
|
||||
// limitations under the License.
|
||||
|
||||
use common_meta::instruction::{DowngradeRegion, DowngradeRegionReply, InstructionReply};
|
||||
use common_telemetry::tracing::info;
|
||||
use common_telemetry::warn;
|
||||
use futures_util::future::BoxFuture;
|
||||
use store_api::region_engine::SetReadonlyResponse;
|
||||
use store_api::region_engine::{SetRegionRoleStateResponse, SettableRegionRoleState};
|
||||
use store_api::region_request::{RegionFlushRequest, RegionRequest};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::heartbeat::handler::HandlerContext;
|
||||
use crate::heartbeat::task_tracker::WaitResult;
|
||||
|
||||
impl HandlerContext {
|
||||
async fn downgrade_to_follower_gracefully(&self, region_id: RegionId) -> InstructionReply {
|
||||
match self
|
||||
.region_server
|
||||
.set_region_role_state_gracefully(region_id, SettableRegionRoleState::Follower)
|
||||
.await
|
||||
{
|
||||
Ok(SetRegionRoleStateResponse::Success { last_entry_id }) => {
|
||||
InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
last_entry_id,
|
||||
exists: true,
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
Ok(SetRegionRoleStateResponse::NotFound) => {
|
||||
InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
last_entry_id: None,
|
||||
exists: false,
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
Err(err) => InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
last_entry_id: None,
|
||||
exists: true,
|
||||
error: Some(format!("{err:?}")),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn handle_downgrade_region_instruction(
|
||||
self,
|
||||
DowngradeRegion { region_id }: DowngradeRegion,
|
||||
DowngradeRegion {
|
||||
region_id,
|
||||
flush_timeout,
|
||||
reject_write,
|
||||
}: DowngradeRegion,
|
||||
) -> BoxFuture<'static, InstructionReply> {
|
||||
Box::pin(async move {
|
||||
match self.region_server.set_readonly_gracefully(region_id).await {
|
||||
Ok(SetReadonlyResponse::Success { last_entry_id }) => {
|
||||
InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
last_entry_id,
|
||||
exists: true,
|
||||
error: None,
|
||||
})
|
||||
let Some(writable) = self.region_server.is_region_leader(region_id) else {
|
||||
warn!("Region: {region_id} is not found");
|
||||
return InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
last_entry_id: None,
|
||||
exists: false,
|
||||
error: None,
|
||||
});
|
||||
};
|
||||
|
||||
let region_server_moved = self.region_server.clone();
|
||||
|
||||
// Ignores flush request
|
||||
if !writable {
|
||||
return self.downgrade_to_follower_gracefully(region_id).await;
|
||||
}
|
||||
|
||||
// If flush_timeout is not set, directly convert region to follower.
|
||||
let Some(flush_timeout) = flush_timeout else {
|
||||
return self.downgrade_to_follower_gracefully(region_id).await;
|
||||
};
|
||||
|
||||
if reject_write {
|
||||
// Sets region to downgrading, the downgrading region will reject all write requests.
|
||||
match self
|
||||
.region_server
|
||||
.set_region_role_state_gracefully(
|
||||
region_id,
|
||||
SettableRegionRoleState::DowngradingLeader,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(SetRegionRoleStateResponse::Success { .. }) => {}
|
||||
Ok(SetRegionRoleStateResponse::NotFound) => {
|
||||
warn!("Region: {region_id} is not found");
|
||||
return InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
last_entry_id: None,
|
||||
exists: false,
|
||||
error: None,
|
||||
});
|
||||
}
|
||||
Err(err) => {
|
||||
warn!(err; "Failed to convert region to downgrading leader");
|
||||
return InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
last_entry_id: None,
|
||||
exists: true,
|
||||
error: Some(format!("{err:?}")),
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(SetReadonlyResponse::NotFound) => {
|
||||
InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
last_entry_id: None,
|
||||
exists: false,
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
Err(err) => InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
}
|
||||
|
||||
let register_result = self
|
||||
.downgrade_tasks
|
||||
.try_register(
|
||||
region_id,
|
||||
Box::pin(async move {
|
||||
info!("Flush region: {region_id} before converting region to follower");
|
||||
region_server_moved
|
||||
.handle_request(
|
||||
region_id,
|
||||
RegionRequest::Flush(RegionFlushRequest {
|
||||
row_group_size: None,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
if register_result.is_busy() {
|
||||
warn!("Another flush task is running for the region: {region_id}");
|
||||
}
|
||||
|
||||
let mut watcher = register_result.into_watcher();
|
||||
let result = self.catchup_tasks.wait(&mut watcher, flush_timeout).await;
|
||||
|
||||
match result {
|
||||
WaitResult::Timeout => InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
last_entry_id: None,
|
||||
exists: true,
|
||||
error: Some(format!("{err:?}")),
|
||||
error: Some(format!("Flush region: {region_id} is timeout")),
|
||||
}),
|
||||
WaitResult::Finish(Ok(_)) => self.downgrade_to_follower_gracefully(region_id).await,
|
||||
WaitResult::Finish(Err(err)) => {
|
||||
InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
last_entry_id: None,
|
||||
exists: true,
|
||||
error: Some(format!("{err:?}")),
|
||||
})
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_meta::instruction::{DowngradeRegion, InstructionReply};
|
||||
use mito2::engine::MITO_ENGINE_NAME;
|
||||
use store_api::region_engine::{RegionRole, SetRegionRoleStateResponse};
|
||||
use store_api::region_request::RegionRequest;
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::error;
|
||||
use crate::heartbeat::handler::HandlerContext;
|
||||
use crate::tests::{mock_region_server, MockRegionEngine};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_not_exist() {
|
||||
let mut mock_region_server = mock_region_server();
|
||||
let (mock_engine, _) = MockRegionEngine::new(MITO_ENGINE_NAME);
|
||||
mock_region_server.register_engine(mock_engine);
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let waits = vec![None, Some(Duration::from_millis(100u64))];
|
||||
|
||||
for flush_timeout in waits {
|
||||
let reply = handler_context
|
||||
.clone()
|
||||
.handle_downgrade_region_instruction(DowngradeRegion {
|
||||
region_id,
|
||||
flush_timeout,
|
||||
reject_write: false,
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, InstructionReply::DowngradeRegion(_));
|
||||
|
||||
if let InstructionReply::DowngradeRegion(reply) = reply {
|
||||
assert!(!reply.exists);
|
||||
assert!(reply.error.is_none());
|
||||
assert!(reply.last_entry_id.is_none());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_readonly() {
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
|
||||
region_engine.mock_role = Some(Some(RegionRole::Follower));
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(|_, req| {
|
||||
if let RegionRequest::Flush(_) = req {
|
||||
// Should be unreachable.
|
||||
unreachable!();
|
||||
};
|
||||
|
||||
Ok(0)
|
||||
}));
|
||||
region_engine.handle_set_readonly_gracefully_mock_fn = Some(Box::new(|_| {
|
||||
Ok(SetRegionRoleStateResponse::success(Some(1024)))
|
||||
}))
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
let waits = vec![None, Some(Duration::from_millis(100u64))];
|
||||
for flush_timeout in waits {
|
||||
let reply = handler_context
|
||||
.clone()
|
||||
.handle_downgrade_region_instruction(DowngradeRegion {
|
||||
region_id,
|
||||
flush_timeout,
|
||||
reject_write: false,
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, InstructionReply::DowngradeRegion(_));
|
||||
|
||||
if let InstructionReply::DowngradeRegion(reply) = reply {
|
||||
assert!(reply.exists);
|
||||
assert!(reply.error.is_none());
|
||||
assert_eq!(reply.last_entry_id.unwrap(), 1024);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_flush_timeout() {
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
|
||||
region_engine.mock_role = Some(Some(RegionRole::Leader));
|
||||
region_engine.handle_request_delay = Some(Duration::from_secs(100));
|
||||
region_engine.handle_set_readonly_gracefully_mock_fn = Some(Box::new(|_| {
|
||||
Ok(SetRegionRoleStateResponse::success(Some(1024)))
|
||||
}))
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
let flush_timeout = Duration::from_millis(100);
|
||||
let reply = handler_context
|
||||
.clone()
|
||||
.handle_downgrade_region_instruction(DowngradeRegion {
|
||||
region_id,
|
||||
flush_timeout: Some(flush_timeout),
|
||||
reject_write: false,
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, InstructionReply::DowngradeRegion(_));
|
||||
|
||||
if let InstructionReply::DowngradeRegion(reply) = reply {
|
||||
assert!(reply.exists);
|
||||
assert!(reply.error.unwrap().contains("timeout"));
|
||||
assert!(reply.last_entry_id.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_flush_timeout_and_retry() {
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
|
||||
region_engine.mock_role = Some(Some(RegionRole::Leader));
|
||||
region_engine.handle_request_delay = Some(Duration::from_millis(300));
|
||||
region_engine.handle_set_readonly_gracefully_mock_fn = Some(Box::new(|_| {
|
||||
Ok(SetRegionRoleStateResponse::success(Some(1024)))
|
||||
}))
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
let waits = vec![
|
||||
Some(Duration::from_millis(100u64)),
|
||||
Some(Duration::from_millis(100u64)),
|
||||
];
|
||||
|
||||
for flush_timeout in waits {
|
||||
let reply = handler_context
|
||||
.clone()
|
||||
.handle_downgrade_region_instruction(DowngradeRegion {
|
||||
region_id,
|
||||
flush_timeout,
|
||||
reject_write: false,
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, InstructionReply::DowngradeRegion(_));
|
||||
if let InstructionReply::DowngradeRegion(reply) = reply {
|
||||
assert!(reply.exists);
|
||||
assert!(reply.error.unwrap().contains("timeout"));
|
||||
assert!(reply.last_entry_id.is_none());
|
||||
}
|
||||
}
|
||||
let timer = Instant::now();
|
||||
let reply = handler_context
|
||||
.handle_downgrade_region_instruction(DowngradeRegion {
|
||||
region_id,
|
||||
flush_timeout: Some(Duration::from_millis(500)),
|
||||
reject_write: false,
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, InstructionReply::DowngradeRegion(_));
|
||||
// Must less than 300 ms.
|
||||
assert!(timer.elapsed().as_millis() < 300);
|
||||
|
||||
if let InstructionReply::DowngradeRegion(reply) = reply {
|
||||
assert!(reply.exists);
|
||||
assert!(reply.error.is_none());
|
||||
assert_eq!(reply.last_entry_id.unwrap(), 1024);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_flush_timeout_and_retry_error() {
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
|
||||
region_engine.mock_role = Some(Some(RegionRole::Leader));
|
||||
region_engine.handle_request_delay = Some(Duration::from_millis(300));
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| {
|
||||
error::UnexpectedSnafu {
|
||||
violated: "mock flush failed",
|
||||
}
|
||||
.fail()
|
||||
}));
|
||||
region_engine.handle_set_readonly_gracefully_mock_fn = Some(Box::new(|_| {
|
||||
Ok(SetRegionRoleStateResponse::success(Some(1024)))
|
||||
}))
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
let waits = vec![
|
||||
Some(Duration::from_millis(100u64)),
|
||||
Some(Duration::from_millis(100u64)),
|
||||
];
|
||||
|
||||
for flush_timeout in waits {
|
||||
let reply = handler_context
|
||||
.clone()
|
||||
.handle_downgrade_region_instruction(DowngradeRegion {
|
||||
region_id,
|
||||
flush_timeout,
|
||||
reject_write: false,
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, InstructionReply::DowngradeRegion(_));
|
||||
if let InstructionReply::DowngradeRegion(reply) = reply {
|
||||
assert!(reply.exists);
|
||||
assert!(reply.error.unwrap().contains("timeout"));
|
||||
assert!(reply.last_entry_id.is_none());
|
||||
}
|
||||
}
|
||||
let timer = Instant::now();
|
||||
let reply = handler_context
|
||||
.handle_downgrade_region_instruction(DowngradeRegion {
|
||||
region_id,
|
||||
flush_timeout: Some(Duration::from_millis(500)),
|
||||
reject_write: false,
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, InstructionReply::DowngradeRegion(_));
|
||||
// Must less than 300 ms.
|
||||
assert!(timer.elapsed().as_millis() < 300);
|
||||
|
||||
if let InstructionReply::DowngradeRegion(reply) = reply {
|
||||
assert!(reply.exists);
|
||||
assert!(reply.error.unwrap().contains("flush failed"));
|
||||
assert!(reply.last_entry_id.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_set_region_readonly_not_found() {
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
|
||||
region_engine.mock_role = Some(Some(RegionRole::Leader));
|
||||
region_engine.handle_set_readonly_gracefully_mock_fn =
|
||||
Some(Box::new(|_| Ok(SetRegionRoleStateResponse::NotFound)));
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
let reply = handler_context
|
||||
.clone()
|
||||
.handle_downgrade_region_instruction(DowngradeRegion {
|
||||
region_id,
|
||||
flush_timeout: None,
|
||||
reject_write: false,
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, InstructionReply::DowngradeRegion(_));
|
||||
if let InstructionReply::DowngradeRegion(reply) = reply {
|
||||
assert!(!reply.exists);
|
||||
assert!(reply.error.is_none());
|
||||
assert!(reply.last_entry_id.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_set_region_readonly_error() {
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
|
||||
region_engine.mock_role = Some(Some(RegionRole::Leader));
|
||||
region_engine.handle_set_readonly_gracefully_mock_fn = Some(Box::new(|_| {
|
||||
error::UnexpectedSnafu {
|
||||
violated: "Failed to set region to readonly",
|
||||
}
|
||||
.fail()
|
||||
}));
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
let reply = handler_context
|
||||
.clone()
|
||||
.handle_downgrade_region_instruction(DowngradeRegion {
|
||||
region_id,
|
||||
flush_timeout: None,
|
||||
reject_write: false,
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, InstructionReply::DowngradeRegion(_));
|
||||
if let InstructionReply::DowngradeRegion(reply) = reply {
|
||||
assert!(reply.exists);
|
||||
assert!(reply
|
||||
.error
|
||||
.unwrap()
|
||||
.contains("Failed to set region to readonly"));
|
||||
assert!(reply.last_entry_id.is_none());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,12 +26,12 @@ impl HandlerContext {
|
||||
UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id,
|
||||
wait_for_replay_timeout,
|
||||
replay_timeout,
|
||||
location_id,
|
||||
}: UpgradeRegion,
|
||||
) -> BoxFuture<'static, InstructionReply> {
|
||||
Box::pin(async move {
|
||||
let Some(writable) = self.region_server.is_writable(region_id) else {
|
||||
let Some(writable) = self.region_server.is_region_leader(region_id) else {
|
||||
return InstructionReply::UpgradeRegion(UpgradeRegionReply {
|
||||
ready: false,
|
||||
exists: false,
|
||||
@@ -78,7 +78,7 @@ impl HandlerContext {
|
||||
}
|
||||
|
||||
// Returns immediately
|
||||
let Some(wait_for_replay_timeout) = wait_for_replay_timeout else {
|
||||
let Some(replay_timeout) = replay_timeout else {
|
||||
return InstructionReply::UpgradeRegion(UpgradeRegionReply {
|
||||
ready: false,
|
||||
exists: true,
|
||||
@@ -88,10 +88,7 @@ impl HandlerContext {
|
||||
|
||||
// We don't care that it returns a newly registered or running task.
|
||||
let mut watcher = register_result.into_watcher();
|
||||
let result = self
|
||||
.catchup_tasks
|
||||
.wait(&mut watcher, wait_for_replay_timeout)
|
||||
.await;
|
||||
let result = self.catchup_tasks.wait(&mut watcher, replay_timeout).await;
|
||||
|
||||
match result {
|
||||
WaitResult::Timeout => InstructionReply::UpgradeRegion(UpgradeRegionReply {
|
||||
@@ -129,7 +126,6 @@ mod tests {
|
||||
|
||||
use crate::error;
|
||||
use crate::heartbeat::handler::HandlerContext;
|
||||
use crate::heartbeat::task_tracker::TaskTracker;
|
||||
use crate::tests::{mock_region_server, MockRegionEngine};
|
||||
|
||||
#[tokio::test]
|
||||
@@ -138,21 +134,18 @@ mod tests {
|
||||
let (mock_engine, _) = MockRegionEngine::new(MITO_ENGINE_NAME);
|
||||
mock_region_server.register_engine(mock_engine);
|
||||
|
||||
let handler_context = HandlerContext {
|
||||
region_server: mock_region_server,
|
||||
catchup_tasks: TaskTracker::new(),
|
||||
};
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let waits = vec![None, Some(Duration::from_millis(100u64))];
|
||||
|
||||
for wait_for_replay_timeout in waits {
|
||||
for replay_timeout in waits {
|
||||
let reply = handler_context
|
||||
.clone()
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
wait_for_replay_timeout,
|
||||
replay_timeout,
|
||||
location_id: None,
|
||||
})
|
||||
.await;
|
||||
@@ -180,20 +173,17 @@ mod tests {
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
|
||||
let handler_context = HandlerContext {
|
||||
region_server: mock_region_server,
|
||||
catchup_tasks: TaskTracker::new(),
|
||||
};
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
let waits = vec![None, Some(Duration::from_millis(100u64))];
|
||||
|
||||
for wait_for_replay_timeout in waits {
|
||||
for replay_timeout in waits {
|
||||
let reply = handler_context
|
||||
.clone()
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
wait_for_replay_timeout,
|
||||
replay_timeout,
|
||||
location_id: None,
|
||||
})
|
||||
.await;
|
||||
@@ -222,20 +212,17 @@ mod tests {
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
|
||||
let handler_context = HandlerContext {
|
||||
region_server: mock_region_server,
|
||||
catchup_tasks: TaskTracker::new(),
|
||||
};
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
let waits = vec![None, Some(Duration::from_millis(100u64))];
|
||||
|
||||
for wait_for_replay_timeout in waits {
|
||||
for replay_timeout in waits {
|
||||
let reply = handler_context
|
||||
.clone()
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
wait_for_replay_timeout,
|
||||
replay_timeout,
|
||||
location_id: None,
|
||||
})
|
||||
.await;
|
||||
@@ -269,17 +256,14 @@ mod tests {
|
||||
Some(Duration::from_millis(100u64)),
|
||||
];
|
||||
|
||||
let handler_context = HandlerContext {
|
||||
region_server: mock_region_server,
|
||||
catchup_tasks: TaskTracker::new(),
|
||||
};
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
for wait_for_replay_timeout in waits {
|
||||
for replay_timeout in waits {
|
||||
let reply = handler_context
|
||||
.clone()
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
wait_for_replay_timeout,
|
||||
replay_timeout,
|
||||
last_entry_id: None,
|
||||
location_id: None,
|
||||
})
|
||||
@@ -298,7 +282,7 @@ mod tests {
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
wait_for_replay_timeout: Some(Duration::from_millis(500)),
|
||||
replay_timeout: Some(Duration::from_millis(500)),
|
||||
location_id: None,
|
||||
})
|
||||
.await;
|
||||
@@ -333,17 +317,14 @@ mod tests {
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
|
||||
let handler_context = HandlerContext {
|
||||
region_server: mock_region_server,
|
||||
catchup_tasks: TaskTracker::new(),
|
||||
};
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
let reply = handler_context
|
||||
.clone()
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
wait_for_replay_timeout: None,
|
||||
replay_timeout: None,
|
||||
location_id: None,
|
||||
})
|
||||
.await;
|
||||
@@ -361,7 +342,7 @@ mod tests {
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
wait_for_replay_timeout: Some(Duration::from_millis(200)),
|
||||
replay_timeout: Some(Duration::from_millis(200)),
|
||||
location_id: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::ops::Deref;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::time::Duration;
|
||||
|
||||
use api::region::RegionResponse;
|
||||
use api::v1::region::{region_request, RegionResponse as RegionResponseV1};
|
||||
@@ -53,15 +54,21 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::metric_engine_consts::{
|
||||
FILE_ENGINE_NAME, LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME,
|
||||
};
|
||||
use store_api::region_engine::{RegionEngineRef, RegionRole, SetReadonlyResponse};
|
||||
use store_api::region_engine::{
|
||||
RegionEngineRef, RegionRole, RegionStatistic, SetRegionRoleStateResponse,
|
||||
SettableRegionRoleState,
|
||||
};
|
||||
use store_api::region_request::{
|
||||
AffectedRows, RegionCloseRequest, RegionOpenRequest, RegionRequest,
|
||||
};
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::sync::{Semaphore, SemaphorePermit};
|
||||
use tokio::time::timeout;
|
||||
use tonic::{Request, Response, Result as TonicResult};
|
||||
|
||||
use crate::error::{
|
||||
self, BuildRegionRequestsSnafu, DataFusionSnafu, DecodeLogicalPlanSnafu,
|
||||
self, BuildRegionRequestsSnafu, ConcurrentQueryLimiterClosedSnafu,
|
||||
ConcurrentQueryLimiterTimeoutSnafu, DataFusionSnafu, DecodeLogicalPlanSnafu,
|
||||
ExecuteLogicalPlanSnafu, FindLogicalRegionsSnafu, HandleBatchOpenRequestSnafu,
|
||||
HandleRegionRequestSnafu, NewPlanDecoderSnafu, RegionEngineNotFoundSnafu, RegionNotFoundSnafu,
|
||||
RegionNotReadySnafu, Result, StopRegionEngineSnafu, UnexpectedSnafu, UnsupportedOutputSnafu,
|
||||
@@ -90,6 +97,8 @@ impl RegionServer {
|
||||
runtime,
|
||||
event_listener,
|
||||
Arc::new(DummyTableProviderFactory),
|
||||
0,
|
||||
Duration::from_millis(0),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -98,6 +107,8 @@ impl RegionServer {
|
||||
runtime: Runtime,
|
||||
event_listener: RegionServerEventListenerRef,
|
||||
table_provider_factory: TableProviderFactoryRef,
|
||||
max_concurrent_queries: usize,
|
||||
concurrent_query_limiter_timeout: Duration,
|
||||
) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(RegionServerInner::new(
|
||||
@@ -105,6 +116,10 @@ impl RegionServer {
|
||||
runtime,
|
||||
event_listener,
|
||||
table_provider_factory,
|
||||
RegionServerParallelism::from_opts(
|
||||
max_concurrent_queries,
|
||||
concurrent_query_limiter_timeout,
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
@@ -167,6 +182,11 @@ impl RegionServer {
|
||||
&self,
|
||||
request: api::v1::region::QueryRequest,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let _permit = if let Some(p) = &self.inner.parallelism {
|
||||
Some(p.acquire().await?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let region_id = RegionId::from_u64(request.region_id);
|
||||
let provider = self.table_provider(region_id).await?;
|
||||
let catalog_list = Arc::new(DummyCatalogList::with_table_provider(provider));
|
||||
@@ -200,6 +220,11 @@ impl RegionServer {
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn handle_read(&self, request: QueryRequest) -> Result<SendableRecordBatchStream> {
|
||||
let _permit = if let Some(p) = &self.inner.parallelism {
|
||||
Some(p.acquire().await?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let provider = self.table_provider(request.region_id).await?;
|
||||
|
||||
struct RegionDataSourceInjector {
|
||||
@@ -252,37 +277,47 @@ impl RegionServer {
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn is_writable(&self, region_id: RegionId) -> Option<bool> {
|
||||
// TODO(weny): Finds a better way.
|
||||
pub fn is_region_leader(&self, region_id: RegionId) -> Option<bool> {
|
||||
self.inner.region_map.get(®ion_id).and_then(|engine| {
|
||||
engine.role(region_id).map(|role| match role {
|
||||
RegionRole::Follower => false,
|
||||
RegionRole::Leader => true,
|
||||
RegionRole::DowngradingLeader => true,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
pub fn set_writable(&self, region_id: RegionId, writable: bool) -> Result<()> {
|
||||
pub fn set_region_role(&self, region_id: RegionId, role: RegionRole) -> Result<()> {
|
||||
let engine = self
|
||||
.inner
|
||||
.region_map
|
||||
.get(®ion_id)
|
||||
.with_context(|| RegionNotFoundSnafu { region_id })?;
|
||||
engine
|
||||
.set_writable(region_id, writable)
|
||||
.set_region_role(region_id, role)
|
||||
.with_context(|_| HandleRegionRequestSnafu { region_id })
|
||||
}
|
||||
|
||||
pub async fn set_readonly_gracefully(
|
||||
/// Set region role state gracefully.
|
||||
///
|
||||
/// For [SettableRegionRoleState::Follower]:
|
||||
/// After the call returns, the engine ensures that
|
||||
/// no **further** write or flush operations will succeed in this region.
|
||||
///
|
||||
/// For [SettableRegionRoleState::DowngradingLeader]:
|
||||
/// After the call returns, the engine ensures that
|
||||
/// no **further** write operations will succeed in this region.
|
||||
pub async fn set_region_role_state_gracefully(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
) -> Result<SetReadonlyResponse> {
|
||||
state: SettableRegionRoleState,
|
||||
) -> Result<SetRegionRoleStateResponse> {
|
||||
match self.inner.region_map.get(®ion_id) {
|
||||
Some(engine) => Ok(engine
|
||||
.set_readonly_gracefully(region_id)
|
||||
.set_region_role_state_gracefully(region_id, state)
|
||||
.await
|
||||
.with_context(|_| HandleRegionRequestSnafu { region_id })?),
|
||||
None => Ok(SetReadonlyResponse::NotFound),
|
||||
None => Ok(SetRegionRoleStateResponse::NotFound),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -290,9 +325,9 @@ impl RegionServer {
|
||||
self.inner.runtime.clone()
|
||||
}
|
||||
|
||||
pub fn region_disk_usage(&self, region_id: RegionId) -> Option<i64> {
|
||||
pub fn region_statistic(&self, region_id: RegionId) -> Option<RegionStatistic> {
|
||||
match self.inner.region_map.get(®ion_id) {
|
||||
Some(e) => e.region_disk_usage(region_id),
|
||||
Some(e) => e.region_statistic(region_id),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
@@ -450,6 +485,36 @@ struct RegionServerInner {
|
||||
runtime: Runtime,
|
||||
event_listener: RegionServerEventListenerRef,
|
||||
table_provider_factory: TableProviderFactoryRef,
|
||||
// The number of queries allowed to be executed at the same time.
|
||||
// Act as last line of defense on datanode to prevent query overloading.
|
||||
parallelism: Option<RegionServerParallelism>,
|
||||
}
|
||||
|
||||
struct RegionServerParallelism {
|
||||
semaphore: Semaphore,
|
||||
timeout: Duration,
|
||||
}
|
||||
|
||||
impl RegionServerParallelism {
|
||||
pub fn from_opts(
|
||||
max_concurrent_queries: usize,
|
||||
concurrent_query_limiter_timeout: Duration,
|
||||
) -> Option<Self> {
|
||||
if max_concurrent_queries == 0 {
|
||||
return None;
|
||||
}
|
||||
Some(RegionServerParallelism {
|
||||
semaphore: Semaphore::new(max_concurrent_queries),
|
||||
timeout: concurrent_query_limiter_timeout,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn acquire(&self) -> Result<SemaphorePermit> {
|
||||
timeout(self.timeout, self.semaphore.acquire())
|
||||
.await
|
||||
.context(ConcurrentQueryLimiterTimeoutSnafu)?
|
||||
.context(ConcurrentQueryLimiterClosedSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
enum CurrentEngine {
|
||||
@@ -478,6 +543,7 @@ impl RegionServerInner {
|
||||
runtime: Runtime,
|
||||
event_listener: RegionServerEventListenerRef,
|
||||
table_provider_factory: TableProviderFactoryRef,
|
||||
parallelism: Option<RegionServerParallelism>,
|
||||
) -> Self {
|
||||
Self {
|
||||
engines: RwLock::new(HashMap::new()),
|
||||
@@ -486,6 +552,7 @@ impl RegionServerInner {
|
||||
runtime,
|
||||
event_listener,
|
||||
table_provider_factory,
|
||||
parallelism,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -788,7 +855,7 @@ impl RegionServerInner {
|
||||
info!("Region {region_id} is deregistered from engine {engine_type}");
|
||||
self.region_map
|
||||
.remove(®ion_id)
|
||||
.map(|(id, engine)| engine.set_writable(id, false));
|
||||
.map(|(id, engine)| engine.set_region_role(id, RegionRole::Follower));
|
||||
self.event_listener.on_region_deregistered(region_id);
|
||||
}
|
||||
RegionChange::Catchup => {
|
||||
@@ -843,7 +910,7 @@ impl RegionServerInner {
|
||||
|
||||
let result = self
|
||||
.query_engine
|
||||
.execute(request.plan.into(), query_ctx)
|
||||
.execute(request.plan, query_ctx)
|
||||
.await
|
||||
.context(ExecuteLogicalPlanSnafu)?;
|
||||
|
||||
@@ -1284,4 +1351,23 @@ mod tests {
|
||||
assert(result);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_server_parallism() {
|
||||
let p = RegionServerParallelism::from_opts(2, Duration::from_millis(1)).unwrap();
|
||||
let first_query = p.acquire().await;
|
||||
assert!(first_query.is_ok());
|
||||
let second_query = p.acquire().await;
|
||||
assert!(second_query.is_ok());
|
||||
let third_query = p.acquire().await;
|
||||
assert!(third_query.is_err());
|
||||
let err = third_query.unwrap_err();
|
||||
assert_eq!(
|
||||
err.output_msg(),
|
||||
"Failed to acquire permit under timeouts: deadline has elapsed".to_string()
|
||||
);
|
||||
drop(first_query);
|
||||
let forth_query = p.acquire().await;
|
||||
assert!(forth_query.is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user