mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-10 23:32:55 +00:00
Compare commits
93 Commits
chore/udap
...
transform-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6247de2d50 | ||
|
|
a2eb46132f | ||
|
|
3f9bf48161 | ||
|
|
9bd2e006b5 | ||
|
|
031421ca91 | ||
|
|
999f3a40c2 | ||
|
|
50d28e0a00 | ||
|
|
770a850437 | ||
|
|
65e53b5bc4 | ||
|
|
9a6c7aa4d6 | ||
|
|
4f446b95d8 | ||
|
|
9ad4200f55 | ||
|
|
53d456651f | ||
|
|
f11c5acb0f | ||
|
|
8536a1ec6e | ||
|
|
fce8c968da | ||
|
|
98a6ac973c | ||
|
|
8f79e421c3 | ||
|
|
e8b326382f | ||
|
|
56781e7fbc | ||
|
|
7d342b3d95 | ||
|
|
a22667bf3c | ||
|
|
29b9b7db0c | ||
|
|
a66909a562 | ||
|
|
8137b8ff3d | ||
|
|
7c5cd2922a | ||
|
|
a1d0dcf2c3 | ||
|
|
c391171f99 | ||
|
|
f44862aaac | ||
|
|
8bf795d88c | ||
|
|
3bbf4e0232 | ||
|
|
83da3950da | ||
|
|
957b5effd5 | ||
|
|
f59e28006a | ||
|
|
3e5bbdf71e | ||
|
|
b8ac19c480 | ||
|
|
92b274a856 | ||
|
|
6bdac25f0a | ||
|
|
a9f3c4b17c | ||
|
|
e003eaab36 | ||
|
|
6e590da412 | ||
|
|
ff5fa40b85 | ||
|
|
d4aa4159d4 | ||
|
|
960f6d821b | ||
|
|
9c5d044238 | ||
|
|
be72d3bedb | ||
|
|
1ff29d8fde | ||
|
|
39ab1a6415 | ||
|
|
70c354eed6 | ||
|
|
23bf663d58 | ||
|
|
817648eac5 | ||
|
|
758ad0a8c5 | ||
|
|
8b60c27c2e | ||
|
|
ea6df9ba49 | ||
|
|
69420793e2 | ||
|
|
0da112b335 | ||
|
|
dcc08f6b3e | ||
|
|
a34035a1f2 | ||
|
|
fd8eba36a8 | ||
|
|
9712295177 | ||
|
|
d275cdd570 | ||
|
|
83eb777d21 | ||
|
|
8ed5bc5305 | ||
|
|
9ded314905 | ||
|
|
702a55a235 | ||
|
|
f3e5a5a7aa | ||
|
|
9c79baca4b | ||
|
|
03f2fa219d | ||
|
|
0ee455a980 | ||
|
|
eab9e3a48d | ||
|
|
1008af5324 | ||
|
|
2485f66077 | ||
|
|
4f3afb13b6 | ||
|
|
32a0023010 | ||
|
|
4e9c251041 | ||
|
|
e328c7067c | ||
|
|
8b307e4548 | ||
|
|
ff38abde2e | ||
|
|
aa9a265984 | ||
|
|
9d3ee6384a | ||
|
|
fcde0a4874 | ||
|
|
5d42e63ab0 | ||
|
|
0c01532a37 | ||
|
|
6d503b047a | ||
|
|
5d28f7a912 | ||
|
|
a50eea76a6 | ||
|
|
03b29439e2 | ||
|
|
712f4ca0ef | ||
|
|
60bacff57e | ||
|
|
6208772ba4 | ||
|
|
67184c0498 | ||
|
|
1dd908fdf7 | ||
|
|
8179b4798e |
@@ -40,7 +40,7 @@ runs:
|
||||
|
||||
- name: Install PyArrow Package
|
||||
shell: pwsh
|
||||
run: pip install pyarrow
|
||||
run: pip install pyarrow numpy
|
||||
|
||||
- name: Install WSL distribution
|
||||
uses: Vampire/setup-wsl@v2
|
||||
|
||||
@@ -18,7 +18,7 @@ runs:
|
||||
--set replicaCount=${{ inputs.etcd-replicas }} \
|
||||
--set resources.requests.cpu=50m \
|
||||
--set resources.requests.memory=128Mi \
|
||||
--set resources.limits.cpu=1000m \
|
||||
--set resources.limits.cpu=1500m \
|
||||
--set resources.limits.memory=2Gi \
|
||||
--set auth.rbac.create=false \
|
||||
--set auth.rbac.token.enabled=false \
|
||||
|
||||
10
.github/workflows/develop.yml
vendored
10
.github/workflows/develop.yml
vendored
@@ -436,7 +436,7 @@ jobs:
|
||||
timeout-minutes: 60
|
||||
strategy:
|
||||
matrix:
|
||||
target: ["fuzz_migrate_mito_regions", "fuzz_failover_mito_regions", "fuzz_failover_metric_regions"]
|
||||
target: ["fuzz_migrate_mito_regions", "fuzz_migrate_metric_regions", "fuzz_failover_mito_regions", "fuzz_failover_metric_regions"]
|
||||
mode:
|
||||
- name: "Remote WAL"
|
||||
minio: true
|
||||
@@ -449,6 +449,12 @@ jobs:
|
||||
minio: true
|
||||
kafka: false
|
||||
values: "with-minio.yaml"
|
||||
- target: "fuzz_migrate_metric_regions"
|
||||
mode:
|
||||
name: "Local WAL"
|
||||
minio: true
|
||||
kafka: false
|
||||
values: "with-minio.yaml"
|
||||
steps:
|
||||
- name: Remove unused software
|
||||
run: |
|
||||
@@ -688,7 +694,7 @@ jobs:
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Install PyArrow Package
|
||||
run: pip install pyarrow
|
||||
run: pip install pyarrow numpy
|
||||
- name: Setup etcd server
|
||||
working-directory: tests-integration/fixtures/etcd
|
||||
run: docker compose -f docker-compose-standalone.yml up -d --wait
|
||||
|
||||
2
.github/workflows/nightly-ci.yml
vendored
2
.github/workflows/nightly-ci.yml
vendored
@@ -92,7 +92,7 @@ jobs:
|
||||
with:
|
||||
python-version: "3.10"
|
||||
- name: Install PyArrow Package
|
||||
run: pip install pyarrow
|
||||
run: pip install pyarrow numpy
|
||||
- name: Install WSL distribution
|
||||
uses: Vampire/setup-wsl@v2
|
||||
with:
|
||||
|
||||
57
Cargo.lock
generated
57
Cargo.lock
generated
@@ -200,12 +200,6 @@ version = "1.0.89"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6"
|
||||
|
||||
[[package]]
|
||||
name = "anymap"
|
||||
version = "1.0.0-beta.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f1f8f5a6f3d50d89e3797d7593a50f96bb2aaa20ca0cc7be1fb673232c91d72"
|
||||
|
||||
[[package]]
|
||||
name = "anymap2"
|
||||
version = "0.13.0"
|
||||
@@ -1794,7 +1788,6 @@ dependencies = [
|
||||
"tokio-stream",
|
||||
"tonic 0.11.0",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1808,6 +1801,17 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clocksource"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "129026dd5a8a9592d96916258f3a5379589e513ea5e86aeb0bd2530286e44e9e"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"time",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cmake"
|
||||
version = "0.1.51"
|
||||
@@ -2144,6 +2148,7 @@ dependencies = [
|
||||
"paste",
|
||||
"prost 0.12.6",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"table",
|
||||
]
|
||||
|
||||
@@ -2332,16 +2337,24 @@ name = "common-runtime"
|
||||
version = "0.9.5"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.19",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-telemetry",
|
||||
"futures",
|
||||
"lazy_static",
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
"parking_lot 0.12.3",
|
||||
"paste",
|
||||
"pin-project",
|
||||
"prometheus",
|
||||
"rand",
|
||||
"ratelimit",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"snafu 0.8.5",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio-metrics",
|
||||
"tokio-metrics-collector",
|
||||
@@ -4518,7 +4531,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=b4d301184eb0d01fd4d1042fcc7c5dfb54f3c1e3#b4d301184eb0d01fd4d1042fcc7c5dfb54f3c1e3"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=255f87a3318ace3f88a67f76995a0e14910983f4#255f87a3318ace3f88a67f76995a0e14910983f4"
|
||||
dependencies = [
|
||||
"prost 0.12.6",
|
||||
"serde",
|
||||
@@ -5511,7 +5524,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "jsonb"
|
||||
version = "0.4.1"
|
||||
source = "git+https://github.com/datafuselabs/jsonb.git?rev=46ad50fc71cf75afbf98eec455f7892a6387c1fc#46ad50fc71cf75afbf98eec455f7892a6387c1fc"
|
||||
source = "git+https://github.com/databendlabs/jsonb.git?rev=46ad50fc71cf75afbf98eec455f7892a6387c1fc#46ad50fc71cf75afbf98eec455f7892a6387c1fc"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"fast-float",
|
||||
@@ -6408,9 +6421,9 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "meter-core"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-meter.git?rev=80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd#80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-meter.git?rev=a10facb353b41460eeb98578868ebf19c2084fac#a10facb353b41460eeb98578868ebf19c2084fac"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"anymap2",
|
||||
"once_cell",
|
||||
"parking_lot 0.12.3",
|
||||
]
|
||||
@@ -6418,7 +6431,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "meter-macros"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-meter.git?rev=80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd#80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-meter.git?rev=a10facb353b41460eeb98578868ebf19c2084fac#a10facb353b41460eeb98578868ebf19c2084fac"
|
||||
dependencies = [
|
||||
"meter-core",
|
||||
]
|
||||
@@ -8320,6 +8333,7 @@ dependencies = [
|
||||
"datanode",
|
||||
"frontend",
|
||||
"meta-srv",
|
||||
"serde",
|
||||
"snafu 0.8.5",
|
||||
]
|
||||
|
||||
@@ -8613,9 +8627,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql-parser"
|
||||
version = "0.4.1"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c1ad4a4cfa84ec4aa5831c82e57af0a3faf3f0af83bee13fa1390b2d0a32dc9"
|
||||
checksum = "7fe99e6f80a79abccf1e8fb48dd63473a36057e600cc6ea36147c8318698ae6f"
|
||||
dependencies = [
|
||||
"cfgrammar",
|
||||
"chrono",
|
||||
@@ -8623,6 +8637,8 @@ dependencies = [
|
||||
"lrlex",
|
||||
"lrpar",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -9016,6 +9032,7 @@ dependencies = [
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -9195,6 +9212,17 @@ dependencies = [
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ratelimit"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c1bb13e2dcfa2232ac6887157aad8d9b3fe4ca57f7c8d4938ff5ea9be742300"
|
||||
dependencies = [
|
||||
"clocksource",
|
||||
"parking_lot 0.12.3",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "raw-cpuid"
|
||||
version = "11.2.0"
|
||||
@@ -11471,6 +11499,7 @@ dependencies = [
|
||||
"datatypes",
|
||||
"derive_builder 0.12.0",
|
||||
"futures",
|
||||
"humantime",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"snafu 0.8.5",
|
||||
|
||||
25
Cargo.toml
25
Cargo.toml
@@ -121,13 +121,13 @@ etcd-client = { version = "0.13" }
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "b4d301184eb0d01fd4d1042fcc7c5dfb54f3c1e3" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "255f87a3318ace3f88a67f76995a0e14910983f4" }
|
||||
humantime = "2.1"
|
||||
humantime-serde = "1.1"
|
||||
itertools = "0.10"
|
||||
jsonb = { git = "https://github.com/datafuselabs/jsonb.git", rev = "46ad50fc71cf75afbf98eec455f7892a6387c1fc", default-features = false }
|
||||
jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "46ad50fc71cf75afbf98eec455f7892a6387c1fc", default-features = false }
|
||||
lazy_static = "1.4"
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd" }
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "a10facb353b41460eeb98578868ebf19c2084fac" }
|
||||
mockall = "0.11.4"
|
||||
moka = "0.12"
|
||||
notify = "6.1"
|
||||
@@ -140,14 +140,16 @@ opentelemetry-proto = { version = "0.5", features = [
|
||||
"with-serde",
|
||||
"logs",
|
||||
] }
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
|
||||
paste = "1.0"
|
||||
pin-project = "1.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
promql-parser = { version = "0.4.1" }
|
||||
promql-parser = { version = "0.4.3", features = ["ser"] }
|
||||
prost = "0.12"
|
||||
raft-engine = { version = "0.4.1", default-features = false }
|
||||
rand = "0.8"
|
||||
ratelimit = "0.9"
|
||||
regex = "1.8"
|
||||
regex-automata = { version = "0.4" }
|
||||
reqwest = { version = "0.12", default-features = false, features = [
|
||||
@@ -178,13 +180,16 @@ sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "5
|
||||
] }
|
||||
strum = { version = "0.25", features = ["derive"] }
|
||||
tempfile = "3"
|
||||
tokio = { version = "1.36", features = ["full"] }
|
||||
tokio = { version = "1.40", features = ["full"] }
|
||||
tokio-postgres = "0.7"
|
||||
tokio-stream = { version = "0.1" }
|
||||
tokio-util = { version = "0.7", features = ["io-util", "compat"] }
|
||||
toml = "0.8.8"
|
||||
tonic = { version = "0.11", features = ["tls", "gzip", "zstd"] }
|
||||
tower = { version = "0.4" }
|
||||
tracing-appender = "0.2"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
|
||||
typetag = "0.2"
|
||||
uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
|
||||
zstd = "0.13"
|
||||
|
||||
@@ -259,16 +264,18 @@ tokio-rustls = { git = "https://github.com/GreptimeTeam/tokio-rustls" }
|
||||
|
||||
[workspace.dependencies.meter-macros]
|
||||
git = "https://github.com/GreptimeTeam/greptime-meter.git"
|
||||
rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd"
|
||||
rev = "a10facb353b41460eeb98578868ebf19c2084fac"
|
||||
|
||||
[profile.release]
|
||||
debug = 1
|
||||
# debug = 1
|
||||
split-debuginfo = "off"
|
||||
|
||||
[profile.nightly]
|
||||
inherits = "release"
|
||||
strip = "debuginfo"
|
||||
split-debuginfo = "off"
|
||||
# strip = "debuginfo"
|
||||
lto = "thin"
|
||||
debug = false
|
||||
# debug = false
|
||||
incremental = false
|
||||
|
||||
[profile.ci]
|
||||
|
||||
@@ -83,7 +83,7 @@
|
||||
| `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system <br/>can still successfully replay memtable data without throwing an <br/>out-of-range error. <br/>However, enabling this option might lead to unexpected data loss, <br/>as the system will skip over missing entries instead of treating <br/>them as critical errors. |
|
||||
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
|
||||
| `metadata_store` | -- | -- | Metadata storage options. |
|
||||
| `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. |
|
||||
| `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. |
|
||||
@@ -116,7 +116,9 @@
|
||||
| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
|
||||
| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
|
||||
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
|
||||
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
|
||||
| `region_engine.mito.max_background_flushes` | Integer | Auto | Max number of running background flush jobs (default: 1/2 of cpu cores). |
|
||||
| `region_engine.mito.max_background_compactions` | Integer | Auto | Max number of running background compaction jobs (default: 1/4 of cpu cores). |
|
||||
| `region_engine.mito.max_background_purges` | Integer | Auto | Max number of running background purge jobs (default: number of cpu cores). |
|
||||
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
|
||||
| `region_engine.mito.global_write_buffer_size` | String | Auto | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
|
||||
| `region_engine.mito.global_write_buffer_reject_size` | String | Auto | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`. |
|
||||
@@ -410,7 +412,7 @@
|
||||
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system <br/>can still successfully replay memtable data without throwing an <br/>out-of-range error. <br/>However, enabling this option might lead to unexpected data loss, <br/>as the system will skip over missing entries instead of treating <br/>them as critical errors. |
|
||||
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
@@ -437,7 +439,9 @@
|
||||
| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
|
||||
| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
|
||||
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
|
||||
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
|
||||
| `region_engine.mito.max_background_flushes` | Integer | Auto | Max number of running background flush jobs (default: 1/2 of cpu cores). |
|
||||
| `region_engine.mito.max_background_compactions` | Integer | Auto | Max number of running background compaction jobs (default: 1/4 of cpu cores). |
|
||||
| `region_engine.mito.max_background_purges` | Integer | Auto | Max number of running background purge jobs (default: number of cpu cores). |
|
||||
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
|
||||
| `region_engine.mito.global_write_buffer_size` | String | Auto | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
|
||||
| `region_engine.mito.global_write_buffer_reject_size` | String | Auto | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
|
||||
|
||||
@@ -215,12 +215,12 @@ dump_index_interval = "60s"
|
||||
|
||||
## Ignore missing entries during read WAL.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
##
|
||||
## This option ensures that when Kafka messages are deleted, the system
|
||||
## can still successfully replay memtable data without throwing an
|
||||
## out-of-range error.
|
||||
## However, enabling this option might lead to unexpected data loss,
|
||||
## as the system will skip over missing entries instead of treating
|
||||
##
|
||||
## This option ensures that when Kafka messages are deleted, the system
|
||||
## can still successfully replay memtable data without throwing an
|
||||
## out-of-range error.
|
||||
## However, enabling this option might lead to unexpected data loss,
|
||||
## as the system will skip over missing entries instead of treating
|
||||
## them as critical errors.
|
||||
overwrite_entry_start_id = false
|
||||
|
||||
@@ -416,8 +416,17 @@ manifest_checkpoint_distance = 10
|
||||
## Whether to compress manifest and checkpoint file by gzip (default false).
|
||||
compress_manifest = false
|
||||
|
||||
## Max number of running background jobs
|
||||
max_background_jobs = 4
|
||||
## Max number of running background flush jobs (default: 1/2 of cpu cores).
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_flushes = 4
|
||||
|
||||
## Max number of running background compaction jobs (default: 1/4 of cpu cores).
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_compactions = 2
|
||||
|
||||
## Max number of running background purge jobs (default: number of cpu cores).
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_purges = 8
|
||||
|
||||
## Interval to auto flush a region if it has not flushed yet.
|
||||
auto_flush_interval = "1h"
|
||||
@@ -637,7 +646,7 @@ url = ""
|
||||
headers = { }
|
||||
|
||||
## The tracing options. Only effect when compiled with `tokio-console` feature.
|
||||
[tracing]
|
||||
#+ [tracing]
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
tokio_console_addr = "127.0.0.1"
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
@@ -101,8 +101,8 @@ threshold = "10s"
|
||||
sample_ratio = 1.0
|
||||
|
||||
## The tracing options. Only effect when compiled with `tokio-console` feature.
|
||||
[tracing]
|
||||
#+ [tracing]
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
tokio_console_addr = "127.0.0.1"
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
|
||||
@@ -231,7 +231,7 @@ url = ""
|
||||
headers = { }
|
||||
|
||||
## The tracing options. Only effect when compiled with `tokio-console` feature.
|
||||
[tracing]
|
||||
#+ [tracing]
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
tokio_console_addr = "127.0.0.1"
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
@@ -218,7 +218,7 @@ url = ""
|
||||
headers = { }
|
||||
|
||||
## The tracing options. Only effect when compiled with `tokio-console` feature.
|
||||
[tracing]
|
||||
#+ [tracing]
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
tokio_console_addr = "127.0.0.1"
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
@@ -239,12 +239,12 @@ backoff_deadline = "5mins"
|
||||
|
||||
## Ignore missing entries during read WAL.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
##
|
||||
## This option ensures that when Kafka messages are deleted, the system
|
||||
## can still successfully replay memtable data without throwing an
|
||||
## out-of-range error.
|
||||
## However, enabling this option might lead to unexpected data loss,
|
||||
## as the system will skip over missing entries instead of treating
|
||||
##
|
||||
## This option ensures that when Kafka messages are deleted, the system
|
||||
## can still successfully replay memtable data without throwing an
|
||||
## out-of-range error.
|
||||
## However, enabling this option might lead to unexpected data loss,
|
||||
## as the system will skip over missing entries instead of treating
|
||||
## them as critical errors.
|
||||
overwrite_entry_start_id = false
|
||||
|
||||
@@ -454,8 +454,17 @@ manifest_checkpoint_distance = 10
|
||||
## Whether to compress manifest and checkpoint file by gzip (default false).
|
||||
compress_manifest = false
|
||||
|
||||
## Max number of running background jobs
|
||||
max_background_jobs = 4
|
||||
## Max number of running background flush jobs (default: 1/2 of cpu cores).
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_flushes = 4
|
||||
|
||||
## Max number of running background compaction jobs (default: 1/4 of cpu cores).
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_compactions = 2
|
||||
|
||||
## Max number of running background purge jobs (default: number of cpu cores).
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_purges = 8
|
||||
|
||||
## Interval to auto flush a region if it has not flushed yet.
|
||||
auto_flush_interval = "1h"
|
||||
@@ -681,7 +690,7 @@ url = ""
|
||||
headers = { }
|
||||
|
||||
## The tracing options. Only effect when compiled with `tokio-console` feature.
|
||||
[tracing]
|
||||
#+ [tracing]
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
tokio_console_addr = "127.0.0.1"
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
16
docs/how-to/how-to-change-log-level-on-the-fly.md
Normal file
16
docs/how-to/how-to-change-log-level-on-the-fly.md
Normal file
@@ -0,0 +1,16 @@
|
||||
# Change Log Level on the Fly
|
||||
|
||||
## HTTP API
|
||||
|
||||
example:
|
||||
```bash
|
||||
curl --data "trace;flow=debug" 127.0.0.1:4000/debug/log_level
|
||||
```
|
||||
And database will reply with something like:
|
||||
```bash
|
||||
Log Level changed from Some("info") to "trace;flow=debug"%
|
||||
```
|
||||
|
||||
The data is a string in the format of `global_level;module1=level1;module2=level2;...` that follow the same rule of `RUST_LOG`.
|
||||
|
||||
The module is the module name of the log, and the level is the log level. The log level can be one of the following: `trace`, `debug`, `info`, `warn`, `error`, `off`(case insensitive).
|
||||
@@ -116,6 +116,7 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
|
||||
ConcreteDataType::binary_datatype()
|
||||
}
|
||||
}
|
||||
ColumnDataType::Json => ConcreteDataType::json_datatype(),
|
||||
ColumnDataType::String => ConcreteDataType::string_datatype(),
|
||||
ColumnDataType::Date => ConcreteDataType::date_datatype(),
|
||||
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
|
||||
@@ -417,6 +418,10 @@ pub fn values_with_capacity(datatype: ColumnDataType, capacity: usize) -> Values
|
||||
decimal128_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::Json => Values {
|
||||
string_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -39,9 +39,12 @@ use crate::CatalogManager;
|
||||
const REGION_ID: &str = "region_id";
|
||||
const TABLE_ID: &str = "table_id";
|
||||
const REGION_NUMBER: &str = "region_number";
|
||||
const REGION_ROWS: &str = "region_rows";
|
||||
const DISK_SIZE: &str = "disk_size";
|
||||
const MEMTABLE_SIZE: &str = "memtable_size";
|
||||
const MANIFEST_SIZE: &str = "manifest_size";
|
||||
const SST_SIZE: &str = "sst_size";
|
||||
const INDEX_SIZE: &str = "index_size";
|
||||
const ENGINE: &str = "engine";
|
||||
const REGION_ROLE: &str = "region_role";
|
||||
|
||||
@@ -52,9 +55,12 @@ const INIT_CAPACITY: usize = 42;
|
||||
/// - `region_id`: The region id.
|
||||
/// - `table_id`: The table id.
|
||||
/// - `region_number`: The region number.
|
||||
/// - `region_rows`: The number of rows in region.
|
||||
/// - `memtable_size`: The memtable size in bytes.
|
||||
/// - `disk_size`: The approximate disk size in bytes.
|
||||
/// - `manifest_size`: The manifest size in bytes.
|
||||
/// - `sst_size`: The sst size in bytes.
|
||||
/// - `sst_size`: The sst data files size in bytes.
|
||||
/// - `index_size`: The sst index files size in bytes.
|
||||
/// - `engine`: The engine type.
|
||||
/// - `region_role`: The region role.
|
||||
///
|
||||
@@ -76,9 +82,12 @@ impl InformationSchemaRegionStatistics {
|
||||
ColumnSchema::new(REGION_ID, ConcreteDataType::uint64_datatype(), false),
|
||||
ColumnSchema::new(TABLE_ID, ConcreteDataType::uint32_datatype(), false),
|
||||
ColumnSchema::new(REGION_NUMBER, ConcreteDataType::uint32_datatype(), false),
|
||||
ColumnSchema::new(REGION_ROWS, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(DISK_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(MEMTABLE_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(MANIFEST_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(SST_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(INDEX_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new(REGION_ROLE, ConcreteDataType::string_datatype(), true),
|
||||
]))
|
||||
@@ -135,9 +144,12 @@ struct InformationSchemaRegionStatisticsBuilder {
|
||||
region_ids: UInt64VectorBuilder,
|
||||
table_ids: UInt32VectorBuilder,
|
||||
region_numbers: UInt32VectorBuilder,
|
||||
region_rows: UInt64VectorBuilder,
|
||||
disk_sizes: UInt64VectorBuilder,
|
||||
memtable_sizes: UInt64VectorBuilder,
|
||||
manifest_sizes: UInt64VectorBuilder,
|
||||
sst_sizes: UInt64VectorBuilder,
|
||||
index_sizes: UInt64VectorBuilder,
|
||||
engines: StringVectorBuilder,
|
||||
region_roles: StringVectorBuilder,
|
||||
}
|
||||
@@ -150,9 +162,12 @@ impl InformationSchemaRegionStatisticsBuilder {
|
||||
region_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_ids: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
region_numbers: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
region_rows: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
disk_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
memtable_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
manifest_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
sst_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
index_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
engines: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
region_roles: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
@@ -177,9 +192,12 @@ impl InformationSchemaRegionStatisticsBuilder {
|
||||
(REGION_ID, &Value::from(region_stat.id.as_u64())),
|
||||
(TABLE_ID, &Value::from(region_stat.id.table_id())),
|
||||
(REGION_NUMBER, &Value::from(region_stat.id.region_number())),
|
||||
(REGION_ROWS, &Value::from(region_stat.num_rows)),
|
||||
(DISK_SIZE, &Value::from(region_stat.approximate_bytes)),
|
||||
(MEMTABLE_SIZE, &Value::from(region_stat.memtable_size)),
|
||||
(MANIFEST_SIZE, &Value::from(region_stat.manifest_size)),
|
||||
(SST_SIZE, &Value::from(region_stat.sst_size)),
|
||||
(INDEX_SIZE, &Value::from(region_stat.index_size)),
|
||||
(ENGINE, &Value::from(region_stat.engine.as_str())),
|
||||
(REGION_ROLE, &Value::from(region_stat.role.to_string())),
|
||||
];
|
||||
@@ -192,9 +210,12 @@ impl InformationSchemaRegionStatisticsBuilder {
|
||||
self.table_ids.push(Some(region_stat.id.table_id()));
|
||||
self.region_numbers
|
||||
.push(Some(region_stat.id.region_number()));
|
||||
self.region_rows.push(Some(region_stat.num_rows));
|
||||
self.disk_sizes.push(Some(region_stat.approximate_bytes));
|
||||
self.memtable_sizes.push(Some(region_stat.memtable_size));
|
||||
self.manifest_sizes.push(Some(region_stat.manifest_size));
|
||||
self.sst_sizes.push(Some(region_stat.sst_size));
|
||||
self.index_sizes.push(Some(region_stat.index_size));
|
||||
self.engines.push(Some(®ion_stat.engine));
|
||||
self.region_roles.push(Some(®ion_stat.role.to_string()));
|
||||
}
|
||||
@@ -204,9 +225,12 @@ impl InformationSchemaRegionStatisticsBuilder {
|
||||
Arc::new(self.region_ids.finish()),
|
||||
Arc::new(self.table_ids.finish()),
|
||||
Arc::new(self.region_numbers.finish()),
|
||||
Arc::new(self.region_rows.finish()),
|
||||
Arc::new(self.disk_sizes.finish()),
|
||||
Arc::new(self.memtable_sizes.finish()),
|
||||
Arc::new(self.manifest_sizes.finish()),
|
||||
Arc::new(self.sst_sizes.finish()),
|
||||
Arc::new(self.index_sizes.finish()),
|
||||
Arc::new(self.engines.finish()),
|
||||
Arc::new(self.region_roles.finish()),
|
||||
];
|
||||
|
||||
@@ -74,7 +74,7 @@ impl MemoryTableBuilder {
|
||||
/// Construct the `information_schema.{table_name}` virtual table
|
||||
pub async fn memory_records(&mut self) -> Result<RecordBatch> {
|
||||
if self.columns.is_empty() {
|
||||
RecordBatch::new_empty(self.schema.clone()).context(CreateRecordBatchSnafu)
|
||||
Ok(RecordBatch::new_empty(self.schema.clone()))
|
||||
} else {
|
||||
RecordBatch::new(self.schema.clone(), std::mem::take(&mut self.columns))
|
||||
.context(CreateRecordBatchSnafu)
|
||||
|
||||
@@ -28,7 +28,7 @@ enum_dispatch = "0.3"
|
||||
futures-util.workspace = true
|
||||
lazy_static.workspace = true
|
||||
moka = { workspace = true, features = ["future"] }
|
||||
parking_lot = "0.12"
|
||||
parking_lot.workspace = true
|
||||
prometheus.workspace = true
|
||||
prost.workspace = true
|
||||
query.workspace = true
|
||||
@@ -45,7 +45,6 @@ common-grpc-expr.workspace = true
|
||||
datanode.workspace = true
|
||||
derive-new = "0.5"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
|
||||
[dev-dependencies.substrait_proto]
|
||||
package = "substrait"
|
||||
|
||||
@@ -78,7 +78,7 @@ table.workspace = true
|
||||
tokio.workspace = true
|
||||
toml.workspace = true
|
||||
tonic.workspace = true
|
||||
tracing-appender = "0.2"
|
||||
tracing-appender.workspace = true
|
||||
|
||||
[target.'cfg(not(windows))'.dependencies]
|
||||
tikv-jemallocator = "0.6"
|
||||
|
||||
@@ -174,7 +174,7 @@ impl Repl {
|
||||
|
||||
let plan = query_engine
|
||||
.planner()
|
||||
.plan(stmt, query_ctx.clone())
|
||||
.plan(&stmt, query_ctx.clone())
|
||||
.await
|
||||
.context(PlanStatementSnafu)?;
|
||||
|
||||
|
||||
@@ -272,9 +272,10 @@ impl StartCommand {
|
||||
info!("Datanode start command: {:#?}", self);
|
||||
info!("Datanode options: {:#?}", opts);
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let opts = opts.component;
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_datanode_plugins(&mut plugins, &opts)
|
||||
plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
|
||||
|
||||
@@ -266,9 +266,10 @@ impl StartCommand {
|
||||
info!("Frontend start command: {:#?}", self);
|
||||
info!("Frontend options: {:#?}", opts);
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let opts = opts.component;
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_frontend_plugins(&mut plugins, &opts)
|
||||
plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
@@ -342,6 +343,8 @@ impl StartCommand {
|
||||
// Some queries are expected to take long time.
|
||||
let channel_config = ChannelConfig {
|
||||
timeout: None,
|
||||
tcp_nodelay: opts.datanode.client.tcp_nodelay,
|
||||
connect_timeout: Some(opts.datanode.client.connect_timeout),
|
||||
..Default::default()
|
||||
};
|
||||
let client = NodeClients::new(channel_config);
|
||||
@@ -472,7 +475,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_frontend_plugins(&mut plugins, &fe_opts)
|
||||
plugins::setup_frontend_plugins(&mut plugins, &[], &fe_opts)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -273,9 +273,10 @@ impl StartCommand {
|
||||
info!("Metasrv start command: {:#?}", self);
|
||||
info!("Metasrv options: {:#?}", opts);
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let opts = opts.component;
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_metasrv_plugins(&mut plugins, &opts)
|
||||
plugins::setup_metasrv_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
.context(StartMetaServerSnafu)?;
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use clap::Parser;
|
||||
use common_config::Configurable;
|
||||
use common_runtime::global::RuntimeOptions;
|
||||
use plugins::PluginOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Parser, Default, Debug, Clone)]
|
||||
@@ -40,6 +41,8 @@ pub struct GlobalOptions {
|
||||
pub struct GreptimeOptions<T> {
|
||||
/// The runtime options.
|
||||
pub runtime: RuntimeOptions,
|
||||
/// The plugin options.
|
||||
pub plugins: Vec<PluginOptions>,
|
||||
|
||||
/// The options of each component (like Datanode or Standalone) of GreptimeDB.
|
||||
#[serde(flatten)]
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::{fs, path};
|
||||
|
||||
@@ -250,6 +251,13 @@ pub struct Instance {
|
||||
_guard: Vec<WorkerGuard>,
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
/// Find the socket addr of a server by its `name`.
|
||||
pub async fn server_addr(&self, name: &str) -> Option<SocketAddr> {
|
||||
self.frontend.server_handlers().addr(name).await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl App for Instance {
|
||||
fn name(&self) -> &str {
|
||||
@@ -340,7 +348,8 @@ pub struct StartCommand {
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
fn load_options(
|
||||
/// Load the GreptimeDB options from various sources (command line, config file or env).
|
||||
pub fn load_options(
|
||||
&self,
|
||||
global_options: &GlobalOptions,
|
||||
) -> Result<GreptimeOptions<StandaloneOptions>> {
|
||||
@@ -430,7 +439,8 @@ impl StartCommand {
|
||||
#[allow(unreachable_code)]
|
||||
#[allow(unused_variables)]
|
||||
#[allow(clippy::diverging_sub_expression)]
|
||||
async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
|
||||
/// Build GreptimeDB instance with the loaded options.
|
||||
pub async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
|
||||
common_runtime::init_global_runtimes(&opts.runtime);
|
||||
|
||||
let guard = common_telemetry::init_global_logging(
|
||||
@@ -445,15 +455,16 @@ impl StartCommand {
|
||||
info!("Standalone options: {opts:#?}");
|
||||
|
||||
let mut plugins = Plugins::new();
|
||||
let plugin_opts = opts.plugins;
|
||||
let opts = opts.component;
|
||||
let fe_opts = opts.frontend_options();
|
||||
let dn_opts = opts.datanode_options();
|
||||
|
||||
plugins::setup_frontend_plugins(&mut plugins, &fe_opts)
|
||||
plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &fe_opts)
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
plugins::setup_datanode_plugins(&mut plugins, &dn_opts)
|
||||
plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &dn_opts)
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
|
||||
@@ -725,12 +736,14 @@ impl InformationExtension for StandaloneInformationExtension {
|
||||
id: stat.region_id,
|
||||
rcus: 0,
|
||||
wcus: 0,
|
||||
approximate_bytes: region_stat.estimated_disk_size() as i64,
|
||||
approximate_bytes: region_stat.estimated_disk_size(),
|
||||
engine: stat.engine,
|
||||
role: RegionRole::from(stat.role).into(),
|
||||
num_rows: region_stat.num_rows,
|
||||
memtable_size: region_stat.memtable_size,
|
||||
manifest_size: region_stat.manifest_size,
|
||||
sst_size: region_stat.sst_size,
|
||||
index_size: region_stat.index_size,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
@@ -762,7 +775,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_frontend_plugins(&mut plugins, &fe_opts)
|
||||
plugins::setup_frontend_plugins(&mut plugins, &[], &fe_opts)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ use common_config::Configurable;
|
||||
use common_grpc::channel_manager::{
|
||||
DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
|
||||
};
|
||||
use common_telemetry::logging::{LoggingOptions, DEFAULT_OTLP_ENDPOINT};
|
||||
use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, DEFAULT_OTLP_ENDPOINT};
|
||||
use common_wal::config::raft_engine::RaftEngineConfig;
|
||||
use common_wal::config::DatanodeWalConfig;
|
||||
use datanode::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
|
||||
@@ -159,8 +159,20 @@ fn test_load_metasrv_example_config() {
|
||||
level: Some("info".to_string()),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
|
||||
tracing_sample_ratio: Some(Default::default()),
|
||||
slow_query: SlowQueryOptions {
|
||||
enable: false,
|
||||
threshold: Some(Duration::from_secs(10)),
|
||||
sample_ratio: Some(1.0),
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
datanode: meta_srv::metasrv::DatanodeOptions {
|
||||
client: meta_srv::metasrv::DatanodeClientOptions {
|
||||
timeout: Duration::from_secs(10),
|
||||
connect_timeout: Duration::from_secs(10),
|
||||
tcp_nodelay: true,
|
||||
},
|
||||
},
|
||||
export_metrics: ExportMetricsOption {
|
||||
self_import: Some(Default::default()),
|
||||
remote_write: Some(Default::default()),
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::sync::Arc;
|
||||
mod json_get;
|
||||
mod json_is;
|
||||
mod json_path_exists;
|
||||
mod json_path_match;
|
||||
mod json_to_string;
|
||||
mod parse_json;
|
||||
|
||||
@@ -49,5 +50,6 @@ impl JsonFunction {
|
||||
registry.register(Arc::new(JsonIsObject));
|
||||
|
||||
registry.register(Arc::new(json_path_exists::JsonPathExistsFunction));
|
||||
registry.register(Arc::new(json_path_match::JsonPathMatchFunction));
|
||||
}
|
||||
}
|
||||
|
||||
202
src/common/function/src/scalars/json/json_path_match.rs
Normal file
202
src/common/function/src/scalars/json/json_path_match.rs
Normal file
@@ -0,0 +1,202 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::Signature;
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Check if the given JSON data match the given JSON path's predicate.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct JsonPathMatchFunction;
|
||||
|
||||
const NAME: &str = "json_path_match";
|
||||
|
||||
impl Function for JsonPathMatchFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 2,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly two, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
let jsons = &columns[0];
|
||||
let paths = &columns[1];
|
||||
|
||||
let size = jsons.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let json = jsons.get_ref(i);
|
||||
let path = paths.get_ref(i);
|
||||
|
||||
match json.data_type() {
|
||||
// JSON data type uses binary vector
|
||||
ConcreteDataType::Binary(_) => {
|
||||
let json = json.as_binary();
|
||||
let path = path.as_string();
|
||||
let result = match (json, path) {
|
||||
(Ok(Some(json)), Ok(Some(path))) => {
|
||||
if !jsonb::is_null(json) {
|
||||
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
|
||||
match json_path {
|
||||
Ok(json_path) => jsonb::path_match(json, json_path).ok(),
|
||||
Err(_) => None,
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for JsonPathMatchFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "JSON_PATH_MATCH")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::vectors::{BinaryVector, StringVector};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_json_path_match_function() {
|
||||
let json_path_match = JsonPathMatchFunction;
|
||||
|
||||
assert_eq!("json_path_match", json_path_match.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::boolean_datatype(),
|
||||
json_path_match
|
||||
.return_type(&[ConcreteDataType::json_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
assert!(matches!(json_path_match.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()],
|
||||
));
|
||||
|
||||
let json_strings = [
|
||||
Some(r#"{"a": {"b": 2}, "b": 2, "c": 3}"#.to_string()),
|
||||
Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()),
|
||||
Some(r#"{"a": 1 ,"b": [1,2,3]}"#.to_string()),
|
||||
Some(r#"[1,2,3]"#.to_string()),
|
||||
Some(r#"{"a":1,"b":[1,2,3]}"#.to_string()),
|
||||
Some(r#"null"#.to_string()),
|
||||
Some(r#"null"#.to_string()),
|
||||
];
|
||||
|
||||
let paths = vec![
|
||||
Some("$.a.b == 2".to_string()),
|
||||
Some("$.b[1 to last] >= 2".to_string()),
|
||||
Some("$.c > 0".to_string()),
|
||||
Some("$[0 to last] > 0".to_string()),
|
||||
Some(r#"null"#.to_string()),
|
||||
Some("$.c > 0".to_string()),
|
||||
Some(r#"null"#.to_string()),
|
||||
];
|
||||
|
||||
let results = [
|
||||
Some(true),
|
||||
Some(true),
|
||||
Some(false),
|
||||
Some(true),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
];
|
||||
|
||||
let jsonbs = json_strings
|
||||
.into_iter()
|
||||
.map(|s| s.map(|json| jsonb::parse_value(json.as_bytes()).unwrap().to_vec()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let json_vector = BinaryVector::from(jsonbs);
|
||||
let path_vector = StringVector::from(paths);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
|
||||
let vector = json_path_match
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(7, vector.len());
|
||||
for (i, expected) in results.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
|
||||
match expected {
|
||||
Some(expected_value) => {
|
||||
assert!(!result.is_null());
|
||||
let result_value = result.as_boolean().unwrap().unwrap();
|
||||
assert_eq!(*expected_value, result_value);
|
||||
}
|
||||
None => {
|
||||
assert!(result.is_null());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -325,7 +325,7 @@ mod tests {
|
||||
});
|
||||
let addr = ([127, 0, 0, 1], port).into();
|
||||
|
||||
let server = Server::bind(&addr).serve(make_svc);
|
||||
let server = Server::try_bind(&addr).unwrap().serve(make_svc);
|
||||
let graceful = server.with_graceful_shutdown(async {
|
||||
rx.await.ok();
|
||||
});
|
||||
|
||||
@@ -18,6 +18,7 @@ common-time.workspace = true
|
||||
datatypes.workspace = true
|
||||
prost.workspace = true
|
||||
snafu.workspace = true
|
||||
store-api.workspace = true
|
||||
table.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -22,12 +22,13 @@ use api::v1::{
|
||||
use common_query::AddColumnLocation;
|
||||
use datatypes::schema::{ColumnSchema, RawSchema};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::region_request::ChangeOption;
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, ChangeColumnTypeRequest};
|
||||
|
||||
use crate::error::{
|
||||
InvalidColumnDefSnafu, MissingFieldSnafu, MissingTimestampColumnSnafu, Result,
|
||||
UnknownLocationTypeSnafu,
|
||||
InvalidChangeTableOptionRequestSnafu, InvalidColumnDefSnafu, MissingFieldSnafu,
|
||||
MissingTimestampColumnSnafu, Result, UnknownLocationTypeSnafu,
|
||||
};
|
||||
|
||||
const LOCATION_TYPE_FIRST: i32 = LocationType::First as i32;
|
||||
@@ -92,6 +93,15 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterExpr) -> Result<Alter
|
||||
Kind::RenameTable(RenameTable { new_table_name }) => {
|
||||
AlterKind::RenameTable { new_table_name }
|
||||
}
|
||||
Kind::ChangeTableOptions(api::v1::ChangeTableOptions {
|
||||
change_table_options,
|
||||
}) => AlterKind::ChangeTableOptions {
|
||||
options: change_table_options
|
||||
.iter()
|
||||
.map(ChangeOption::try_from)
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.context(InvalidChangeTableOptionRequestSnafu)?,
|
||||
},
|
||||
};
|
||||
|
||||
let request = AlterTableRequest {
|
||||
|
||||
@@ -19,6 +19,7 @@ use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use snafu::{Location, Snafu};
|
||||
use store_api::metadata::MetadataError;
|
||||
|
||||
#[derive(Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
@@ -118,6 +119,12 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid change table option request"))]
|
||||
InvalidChangeTableOptionRequest {
|
||||
#[snafu(source)]
|
||||
error: MetadataError,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -141,6 +148,7 @@ impl ErrorExt for Error {
|
||||
Error::UnknownColumnDataType { .. } | Error::InvalidFulltextColumnType { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
Error::InvalidChangeTableOptionRequest { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ table.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-postgres = { workspace = true, optional = true }
|
||||
tonic.workspace = true
|
||||
typetag = "0.2"
|
||||
typetag.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
chrono.workspace = true
|
||||
|
||||
@@ -78,17 +78,21 @@ pub struct RegionStat {
|
||||
/// The write capacity units during this period
|
||||
pub wcus: i64,
|
||||
/// Approximate bytes of this region
|
||||
pub approximate_bytes: i64,
|
||||
pub approximate_bytes: u64,
|
||||
/// The engine name.
|
||||
pub engine: String,
|
||||
/// The region role.
|
||||
pub role: RegionRole,
|
||||
/// The number of rows
|
||||
pub num_rows: u64,
|
||||
/// The size of the memtable in bytes.
|
||||
pub memtable_size: u64,
|
||||
/// The size of the manifest in bytes.
|
||||
pub manifest_size: u64,
|
||||
/// The size of the SST files in bytes.
|
||||
/// The size of the SST data files in bytes.
|
||||
pub sst_size: u64,
|
||||
/// The size of the SST index files in bytes.
|
||||
pub index_size: u64,
|
||||
}
|
||||
|
||||
impl Stat {
|
||||
@@ -178,12 +182,14 @@ impl From<&api::v1::meta::RegionStat> for RegionStat {
|
||||
id: RegionId::from_u64(value.region_id),
|
||||
rcus: value.rcus,
|
||||
wcus: value.wcus,
|
||||
approximate_bytes: value.approximate_bytes,
|
||||
approximate_bytes: value.approximate_bytes as u64,
|
||||
engine: value.engine.to_string(),
|
||||
role: RegionRole::from(value.role()),
|
||||
num_rows: region_stat.num_rows,
|
||||
memtable_size: region_stat.memtable_size,
|
||||
manifest_size: region_stat.manifest_size,
|
||||
sst_size: region_stat.sst_size,
|
||||
index_size: region_stat.index_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,10 +43,10 @@ impl AlterLogicalTablesProcedure {
|
||||
&self.data.physical_columns,
|
||||
);
|
||||
|
||||
// Updates physical table's metadata
|
||||
// Updates physical table's metadata, and we don't need to touch per-region settings.
|
||||
self.context
|
||||
.table_metadata_manager
|
||||
.update_table_info(physical_table_info, new_raw_table_info)
|
||||
.update_table_info(physical_table_info, None, new_raw_table_info)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -43,10 +43,10 @@ use crate::ddl::DdlContext;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::instruction::CacheIdent;
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
use crate::key::{DeserializedValueWithBytes, RegionDistribution};
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
|
||||
use crate::rpc::ddl::AlterTableTask;
|
||||
use crate::rpc::router::{find_leader_regions, find_leaders};
|
||||
use crate::rpc::router::{find_leader_regions, find_leaders, region_distribution};
|
||||
use crate::{metrics, ClusterId};
|
||||
|
||||
/// The alter table procedure
|
||||
@@ -101,6 +101,9 @@ impl AlterTableProcedure {
|
||||
.get_physical_table_route(table_id)
|
||||
.await?;
|
||||
|
||||
self.data.region_distribution =
|
||||
Some(region_distribution(&physical_table_route.region_routes));
|
||||
|
||||
let leaders = find_leaders(&physical_table_route.region_routes);
|
||||
let mut alter_region_tasks = Vec::with_capacity(leaders.len());
|
||||
|
||||
@@ -161,8 +164,14 @@ impl AlterTableProcedure {
|
||||
self.on_update_metadata_for_rename(new_table_name.to_string(), table_info_value)
|
||||
.await?;
|
||||
} else {
|
||||
self.on_update_metadata_for_alter(new_info.into(), table_info_value)
|
||||
.await?;
|
||||
// region distribution is set in submit_alter_region_requests
|
||||
let region_distribution = self.data.region_distribution.as_ref().unwrap().clone();
|
||||
self.on_update_metadata_for_alter(
|
||||
new_info.into(),
|
||||
region_distribution,
|
||||
table_info_value,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
info!("Updated table metadata for table {table_ref}, table_id: {table_id}");
|
||||
@@ -271,6 +280,8 @@ pub struct AlterTableData {
|
||||
table_id: TableId,
|
||||
/// Table info value before alteration.
|
||||
table_info_value: Option<DeserializedValueWithBytes<TableInfoValue>>,
|
||||
/// Region distribution for table in case we need to update region options.
|
||||
region_distribution: Option<RegionDistribution>,
|
||||
}
|
||||
|
||||
impl AlterTableData {
|
||||
@@ -281,6 +292,7 @@ impl AlterTableData {
|
||||
table_id,
|
||||
cluster_id,
|
||||
table_info_value: None,
|
||||
region_distribution: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -106,6 +106,7 @@ fn create_proto_alter_kind(
|
||||
})))
|
||||
}
|
||||
Kind::RenameTable(_) => Ok(None),
|
||||
Kind::ChangeTableOptions(v) => Ok(Some(alter_request::Kind::ChangeTableOptions(v.clone()))),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ use table::requests::AlterKind;
|
||||
use crate::ddl::alter_table::AlterTableProcedure;
|
||||
use crate::error::{self, Result};
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
use crate::key::{DeserializedValueWithBytes, RegionDistribution};
|
||||
|
||||
impl AlterTableProcedure {
|
||||
/// Builds new_meta
|
||||
@@ -51,7 +51,9 @@ impl AlterTableProcedure {
|
||||
AlterKind::RenameTable { new_table_name } => {
|
||||
new_info.name = new_table_name.to_string();
|
||||
}
|
||||
AlterKind::DropColumns { .. } | AlterKind::ChangeColumnTypes { .. } => {}
|
||||
AlterKind::DropColumns { .. }
|
||||
| AlterKind::ChangeColumnTypes { .. }
|
||||
| AlterKind::ChangeTableOptions { .. } => {}
|
||||
}
|
||||
|
||||
Ok(new_info)
|
||||
@@ -75,11 +77,16 @@ impl AlterTableProcedure {
|
||||
pub(crate) async fn on_update_metadata_for_alter(
|
||||
&self,
|
||||
new_table_info: RawTableInfo,
|
||||
region_distribution: RegionDistribution,
|
||||
current_table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
|
||||
) -> Result<()> {
|
||||
let table_metadata_manager = &self.context.table_metadata_manager;
|
||||
table_metadata_manager
|
||||
.update_table_info(current_table_info_value, new_table_info)
|
||||
.update_table_info(
|
||||
current_table_info_value,
|
||||
Some(region_distribution),
|
||||
new_table_info,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -58,10 +58,10 @@ impl CreateLogicalTablesProcedure {
|
||||
&new_table_info.name,
|
||||
);
|
||||
|
||||
// Update physical table's metadata
|
||||
// Update physical table's metadata and we don't need to touch per-region settings.
|
||||
self.context
|
||||
.table_metadata_manager
|
||||
.update_table_info(&physical_table_info, new_table_info)
|
||||
.update_table_info(&physical_table_info, None, new_table_info)
|
||||
.await?;
|
||||
|
||||
// Invalid physical table cache
|
||||
|
||||
@@ -29,7 +29,10 @@ use crate::test_util::MockDatanodeHandler;
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for () {
|
||||
async fn handle(&self, _peer: &Peer, _request: RegionRequest) -> Result<RegionResponse> {
|
||||
unreachable!()
|
||||
Ok(RegionResponse {
|
||||
affected_rows: 0,
|
||||
extensions: Default::default(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
|
||||
@@ -19,13 +19,14 @@ use std::sync::Arc;
|
||||
use api::v1::alter_expr::Kind;
|
||||
use api::v1::region::{region_request, RegionRequest};
|
||||
use api::v1::{
|
||||
AddColumn, AddColumns, AlterExpr, ColumnDataType, ColumnDef as PbColumnDef, DropColumn,
|
||||
DropColumns, SemanticType,
|
||||
AddColumn, AddColumns, AlterExpr, ChangeTableOption, ChangeTableOptions, ColumnDataType,
|
||||
ColumnDef as PbColumnDef, DropColumn, DropColumns, SemanticType,
|
||||
};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use store_api::storage::RegionId;
|
||||
use table::requests::TTL_KEY;
|
||||
use tokio::sync::mpsc::{self};
|
||||
|
||||
use crate::ddl::alter_table::AlterTableProcedure;
|
||||
@@ -34,6 +35,7 @@ use crate::ddl::test_util::create_table::test_create_table_task;
|
||||
use crate::ddl::test_util::datanode_handler::{
|
||||
DatanodeWatcher, RequestOutdatedErrorDatanodeHandler,
|
||||
};
|
||||
use crate::key::datanode_table::DatanodeTableKey;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::peer::Peer;
|
||||
@@ -293,12 +295,21 @@ async fn test_on_update_metadata_add_columns() {
|
||||
let table_name = "foo";
|
||||
let table_id = 1024;
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
|
||||
let region_id = RegionId::new(table_id, 0);
|
||||
let mock_table_routes = vec![RegionRoute {
|
||||
region: Region::new_test(region_id),
|
||||
leader_peer: Some(Peer::default()),
|
||||
follower_peers: vec![],
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
}];
|
||||
// Puts a value to table name key.
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::physical(vec![]),
|
||||
TableRouteValue::physical(mock_table_routes),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
@@ -326,6 +337,7 @@ async fn test_on_update_metadata_add_columns() {
|
||||
let mut procedure =
|
||||
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
|
||||
procedure.on_prepare().await.unwrap();
|
||||
procedure.submit_alter_region_requests().await.unwrap();
|
||||
procedure.on_update_metadata().await.unwrap();
|
||||
|
||||
let table_info = ddl_context
|
||||
@@ -343,3 +355,76 @@ async fn test_on_update_metadata_add_columns() {
|
||||
table_info.meta.next_column_id
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_update_table_options() {
|
||||
let node_manager = Arc::new(MockDatanodeManager::new(()));
|
||||
let ddl_context = new_ddl_context(node_manager);
|
||||
let cluster_id = 1;
|
||||
let table_name = "foo";
|
||||
let table_id = 1024;
|
||||
let task = test_create_table_task(table_name, table_id);
|
||||
|
||||
let region_id = RegionId::new(table_id, 0);
|
||||
let mock_table_routes = vec![RegionRoute {
|
||||
region: Region::new_test(region_id),
|
||||
leader_peer: Some(Peer::default()),
|
||||
follower_peers: vec![],
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
}];
|
||||
// Puts a value to table name key.
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::physical(mock_table_routes),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let task = AlterTableTask {
|
||||
alter_table: AlterExpr {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: table_name.to_string(),
|
||||
kind: Some(Kind::ChangeTableOptions(ChangeTableOptions {
|
||||
change_table_options: vec![ChangeTableOption {
|
||||
key: TTL_KEY.to_string(),
|
||||
value: "1d".to_string(),
|
||||
}],
|
||||
})),
|
||||
},
|
||||
};
|
||||
let mut procedure =
|
||||
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
|
||||
procedure.on_prepare().await.unwrap();
|
||||
procedure.submit_alter_region_requests().await.unwrap();
|
||||
procedure.on_update_metadata().await.unwrap();
|
||||
|
||||
let table_info = ddl_context
|
||||
.table_metadata_manager
|
||||
.table_info_manager()
|
||||
.get(table_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.into_inner()
|
||||
.table_info;
|
||||
|
||||
let datanode_key = DatanodeTableKey::new(0, table_id);
|
||||
let region_info = ddl_context
|
||||
.table_metadata_manager
|
||||
.datanode_table_manager()
|
||||
.get(&datanode_key)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.region_info;
|
||||
|
||||
assert_eq!(
|
||||
region_info.region_options,
|
||||
HashMap::from(&table_info.meta.options)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -652,6 +652,18 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Datanode table info not found, table id: {}, datanode id: {}",
|
||||
table_id,
|
||||
datanode_id
|
||||
))]
|
||||
DatanodeTableInfoNotFound {
|
||||
datanode_id: DatanodeId,
|
||||
table_id: TableId,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -752,6 +764,7 @@ impl ErrorExt for Error {
|
||||
PostgresExecution { .. } => StatusCode::Internal,
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
ConnectPostgres { .. } => StatusCode::Internal,
|
||||
Error::DatanodeTableInfoNotFound { .. } => StatusCode::Internal,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -133,7 +133,6 @@ use self::flow::flow_name::FlowNameValue;
|
||||
use self::schema_name::{SchemaManager, SchemaNameKey, SchemaNameValue};
|
||||
use self::table_route::{TableRouteManager, TableRouteValue};
|
||||
use self::tombstone::TombstoneManager;
|
||||
use crate::ddl::utils::region_storage_path;
|
||||
use crate::error::{self, Result, SerdeJsonSnafu};
|
||||
use crate::key::node_address::NodeAddressValue;
|
||||
use crate::key::table_route::TableRouteKey;
|
||||
@@ -593,8 +592,6 @@ impl TableMetadataManager {
|
||||
table_info.meta.region_numbers = region_numbers;
|
||||
let table_id = table_info.ident.table_id;
|
||||
let engine = table_info.meta.engine.clone();
|
||||
let region_storage_path =
|
||||
region_storage_path(&table_info.catalog_name, &table_info.schema_name);
|
||||
|
||||
// Creates table name.
|
||||
let table_name = TableNameKey::new(
|
||||
@@ -606,7 +603,7 @@ impl TableMetadataManager {
|
||||
.table_name_manager()
|
||||
.build_create_txn(&table_name, table_id)?;
|
||||
|
||||
let region_options = (&table_info.meta.options).into();
|
||||
let region_options = table_info.to_region_options();
|
||||
// Creates table info.
|
||||
let table_info_value = TableInfoValue::new(table_info);
|
||||
let (create_table_info_txn, on_create_table_info_failure) = self
|
||||
@@ -625,6 +622,7 @@ impl TableMetadataManager {
|
||||
]);
|
||||
|
||||
if let TableRouteValue::Physical(x) = &table_route_value {
|
||||
let region_storage_path = table_info_value.region_storage_path();
|
||||
let create_datanode_table_txn = self.datanode_table_manager().build_create_txn(
|
||||
table_id,
|
||||
&engine,
|
||||
@@ -926,13 +924,15 @@ impl TableMetadataManager {
|
||||
}
|
||||
|
||||
/// Updates table info and returns an error if different metadata exists.
|
||||
/// And cascade-ly update all redundant table options for each region
|
||||
/// if region_distribution is present.
|
||||
pub async fn update_table_info(
|
||||
&self,
|
||||
current_table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
|
||||
region_distribution: Option<RegionDistribution>,
|
||||
new_table_info: RawTableInfo,
|
||||
) -> Result<()> {
|
||||
let table_id = current_table_info_value.table_info.ident.table_id;
|
||||
|
||||
let new_table_info_value = current_table_info_value.update(new_table_info);
|
||||
|
||||
// Updates table info.
|
||||
@@ -940,8 +940,19 @@ impl TableMetadataManager {
|
||||
.table_info_manager()
|
||||
.build_update_txn(table_id, current_table_info_value, &new_table_info_value)?;
|
||||
|
||||
let mut r = self.kv_backend.txn(update_table_info_txn).await?;
|
||||
let txn = if let Some(region_distribution) = region_distribution {
|
||||
// region options induced from table info.
|
||||
let new_region_options = new_table_info_value.table_info.to_region_options();
|
||||
let update_datanode_table_options_txn = self
|
||||
.datanode_table_manager
|
||||
.build_update_table_options_txn(table_id, region_distribution, new_region_options)
|
||||
.await?;
|
||||
Txn::merge_all([update_table_info_txn, update_datanode_table_options_txn])
|
||||
} else {
|
||||
update_table_info_txn
|
||||
};
|
||||
|
||||
let mut r = self.kv_backend.txn(txn).await?;
|
||||
// Checks whether metadata was already updated.
|
||||
if !r.succeeded {
|
||||
let mut set = TxnOpGetResponseSet::from(&mut r.responses);
|
||||
@@ -1669,12 +1680,12 @@ mod tests {
|
||||
DeserializedValueWithBytes::from_inner(TableInfoValue::new(table_info.clone()));
|
||||
// should be ok.
|
||||
table_metadata_manager
|
||||
.update_table_info(¤t_table_info_value, new_table_info.clone())
|
||||
.update_table_info(¤t_table_info_value, None, new_table_info.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
// if table info was updated, it should be ok.
|
||||
table_metadata_manager
|
||||
.update_table_info(¤t_table_info_value, new_table_info.clone())
|
||||
.update_table_info(¤t_table_info_value, None, new_table_info.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -1696,7 +1707,7 @@ mod tests {
|
||||
// if the current_table_info_value is wrong, it should return an error.
|
||||
// The ABA problem.
|
||||
assert!(table_metadata_manager
|
||||
.update_table_info(&wrong_table_info_value, new_table_info)
|
||||
.update_table_info(&wrong_table_info_value, None, new_table_info)
|
||||
.await
|
||||
.is_err())
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ use store_api::storage::RegionNumber;
|
||||
use table::metadata::TableId;
|
||||
|
||||
use super::MetadataKey;
|
||||
use crate::error::{InvalidMetadataSnafu, Result};
|
||||
use crate::error::{DatanodeTableInfoNotFoundSnafu, InvalidMetadataSnafu, Result};
|
||||
use crate::key::{
|
||||
MetadataValue, RegionDistribution, DATANODE_TABLE_KEY_PATTERN, DATANODE_TABLE_KEY_PREFIX,
|
||||
};
|
||||
@@ -209,6 +209,49 @@ impl DatanodeTableManager {
|
||||
Ok(txn)
|
||||
}
|
||||
|
||||
/// Builds a transaction to updates the redundant table options (including WAL options)
|
||||
/// for given table id, if provided.
|
||||
///
|
||||
/// Note that the provided `new_region_options` must be a
|
||||
/// complete set of all options rather than incremental changes.
|
||||
pub(crate) async fn build_update_table_options_txn(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
region_distribution: RegionDistribution,
|
||||
new_region_options: HashMap<String, String>,
|
||||
) -> Result<Txn> {
|
||||
assert!(!region_distribution.is_empty());
|
||||
// safety: region_distribution must not be empty
|
||||
let (any_datanode, _) = region_distribution.first_key_value().unwrap();
|
||||
|
||||
let mut region_info = self
|
||||
.kv_backend
|
||||
.get(&DatanodeTableKey::new(*any_datanode, table_id).to_bytes())
|
||||
.await
|
||||
.transpose()
|
||||
.context(DatanodeTableInfoNotFoundSnafu {
|
||||
datanode_id: *any_datanode,
|
||||
table_id,
|
||||
})?
|
||||
.and_then(|r| DatanodeTableValue::try_from_raw_value(&r.value))?
|
||||
.region_info;
|
||||
// substitute region options only.
|
||||
region_info.region_options = new_region_options;
|
||||
|
||||
let mut txns = Vec::with_capacity(region_distribution.len());
|
||||
|
||||
for (datanode, regions) in region_distribution.into_iter() {
|
||||
let key = DatanodeTableKey::new(datanode, table_id);
|
||||
let key_bytes = key.to_bytes();
|
||||
let value_bytes = DatanodeTableValue::new(table_id, regions, region_info.clone())
|
||||
.try_as_raw_value()?;
|
||||
txns.push(TxnOp::Put(key_bytes, value_bytes));
|
||||
}
|
||||
|
||||
let txn = Txn::new().and_then(txns);
|
||||
Ok(txn)
|
||||
}
|
||||
|
||||
/// Builds the update datanode table transactions. It only executes while the primary keys comparing successes.
|
||||
pub(crate) fn build_update_txn(
|
||||
&self,
|
||||
|
||||
@@ -23,6 +23,7 @@ use table::table_name::TableName;
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use super::TABLE_INFO_KEY_PATTERN;
|
||||
use crate::ddl::utils::region_storage_path;
|
||||
use crate::error::{InvalidMetadataSnafu, Result};
|
||||
use crate::key::txn_helper::TxnOpGetResponseSet;
|
||||
use crate::key::{DeserializedValueWithBytes, MetadataKey, MetadataValue, TABLE_INFO_KEY_PREFIX};
|
||||
@@ -125,6 +126,11 @@ impl TableInfoValue {
|
||||
table_name: self.table_info.name.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds storage path for all regions in table.
|
||||
pub fn region_storage_path(&self) -> String {
|
||||
region_storage_path(&self.table_info.catalog_name, &self.table_info.schema_name)
|
||||
}
|
||||
}
|
||||
|
||||
pub type TableInfoManagerRef = Arc<TableInfoManager>;
|
||||
|
||||
@@ -17,6 +17,7 @@ use std::slice;
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion::arrow::util::pretty::pretty_format_batches;
|
||||
use datatypes::prelude::DataType;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{Helper, VectorRef};
|
||||
@@ -58,13 +59,18 @@ impl RecordBatch {
|
||||
}
|
||||
|
||||
/// Create an empty [`RecordBatch`] from `schema`.
|
||||
pub fn new_empty(schema: SchemaRef) -> Result<RecordBatch> {
|
||||
pub fn new_empty(schema: SchemaRef) -> RecordBatch {
|
||||
let df_record_batch = DfRecordBatch::new_empty(schema.arrow_schema().clone());
|
||||
Ok(RecordBatch {
|
||||
let columns = schema
|
||||
.column_schemas()
|
||||
.iter()
|
||||
.map(|col| col.data_type.create_mutable_vector(0).to_vector())
|
||||
.collect();
|
||||
RecordBatch {
|
||||
schema,
|
||||
columns: vec![],
|
||||
columns,
|
||||
df_record_batch,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_project(&self, indices: &[usize]) -> Result<Self> {
|
||||
|
||||
@@ -4,21 +4,36 @@ version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[lib]
|
||||
path = "src/lib.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "common-runtime-bin"
|
||||
path = "src/bin.rs"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait.workspace = true
|
||||
clap.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
futures.workspace = true
|
||||
lazy_static.workspace = true
|
||||
num_cpus.workspace = true
|
||||
once_cell.workspace = true
|
||||
parking_lot.workspace = true
|
||||
paste.workspace = true
|
||||
pin-project.workspace = true
|
||||
prometheus.workspace = true
|
||||
rand.workspace = true
|
||||
ratelimit.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
snafu.workspace = true
|
||||
tempfile.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-metrics = "0.3"
|
||||
tokio-metrics-collector = { git = "https://github.com/MichaelScofield/tokio-metrics-collector.git", rev = "89d692d5753d28564a7aac73c6ac5aba22243ba0" }
|
||||
|
||||
60
src/common/runtime/README.md
Normal file
60
src/common/runtime/README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# Greptime Runtime
|
||||
|
||||
## Run performance test for different priority & workload type
|
||||
|
||||
```
|
||||
# workspace is at this subcrate
|
||||
cargo run --release -- --loop-cnt 500
|
||||
```
|
||||
|
||||
## Related PRs & issues
|
||||
|
||||
- Preliminary support cpu limitation
|
||||
|
||||
ISSUE: https://github.com/GreptimeTeam/greptimedb/issues/3685
|
||||
|
||||
PR: https://github.com/GreptimeTeam/greptimedb/pull/4782
|
||||
|
||||
## CPU resource constraints (ThrottleableRuntime)
|
||||
|
||||
|
||||
To achieve CPU resource constraints, we adopt the concept of rate limiting. When creating a future, we first wrap it with another layer of future to intercept the poll operation during runtime. By using the ratelimit library, we can simply implement a mechanism that allows only a limited number of polls for a batch of tasks under a certain priority within a specific time frame (the current token generation interval is set to 10ms).
|
||||
|
||||
The default used runtime can be switched by
|
||||
``` rust
|
||||
pub type Runtime = DefaultRuntime;
|
||||
```
|
||||
in `runtime.rs`.
|
||||
|
||||
We tested four type of workload with 5 priorities, whose setup are as follows:
|
||||
|
||||
``` rust
|
||||
impl Priority {
|
||||
fn ratelimiter_count(&self) -> Result<Option<Ratelimiter>> {
|
||||
let max = 8000;
|
||||
let gen_per_10ms = match self {
|
||||
Priority::VeryLow => Some(2000),
|
||||
Priority::Low => Some(4000),
|
||||
Priority::Middle => Some(6000),
|
||||
Priority::High => Some(8000),
|
||||
Priority::VeryHigh => None,
|
||||
};
|
||||
if let Some(gen_per_10ms) = gen_per_10ms {
|
||||
Ratelimiter::builder(gen_per_10ms, Duration::from_millis(10)) // generate poll count per 10ms
|
||||
.max_tokens(max) // reserved token for batch request
|
||||
.build()
|
||||
.context(BuildRuntimeRateLimiterSnafu)
|
||||
.map(Some)
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This is the preliminary experimental effect so far:
|
||||
|
||||

|
||||
|
||||
## TODO
|
||||
- Introduce PID to achieve more accurate limitation.
|
||||
BIN
src/common/runtime/resources/rdme-exp.png
Normal file
BIN
src/common/runtime/resources/rdme-exp.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 226 KiB |
205
src/common/runtime/src/bin.rs
Normal file
205
src/common/runtime/src/bin.rs
Normal file
@@ -0,0 +1,205 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use clap::Parser;
|
||||
|
||||
#[derive(Debug, Default, Parser)]
|
||||
pub struct Command {
|
||||
#[clap(long)]
|
||||
loop_cnt: usize,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let cmd = Command::parse();
|
||||
|
||||
test_diff_priority_cpu::test_diff_workload_priority(cmd.loop_cnt);
|
||||
}
|
||||
|
||||
mod test_diff_priority_cpu {
|
||||
use std::path::PathBuf;
|
||||
|
||||
use common_runtime::runtime::{BuilderBuild, Priority, RuntimeTrait};
|
||||
use common_runtime::{Builder, Runtime};
|
||||
use common_telemetry::debug;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn compute_pi_str(precision: usize) -> String {
|
||||
let mut pi = 0.0;
|
||||
let mut sign = 1.0;
|
||||
|
||||
for i in 0..precision {
|
||||
pi += sign / (2 * i + 1) as f64;
|
||||
sign *= -1.0;
|
||||
}
|
||||
|
||||
pi *= 4.0;
|
||||
format!("{:.prec$}", pi, prec = precision)
|
||||
}
|
||||
|
||||
macro_rules! def_workload_enum {
|
||||
($($variant:ident),+) => {
|
||||
#[derive(Debug)]
|
||||
enum WorkloadType {
|
||||
$($variant),+
|
||||
}
|
||||
|
||||
/// array of workloads for iteration
|
||||
const WORKLOADS: &'static [WorkloadType] = &[
|
||||
$( WorkloadType::$variant ),+
|
||||
];
|
||||
};
|
||||
}
|
||||
|
||||
def_workload_enum!(
|
||||
ComputeHeavily,
|
||||
ComputeHeavily2,
|
||||
WriteFile,
|
||||
SpawnBlockingWriteFile
|
||||
);
|
||||
|
||||
async fn workload_compute_heavily() {
|
||||
let prefix = 10;
|
||||
|
||||
for _ in 0..3000 {
|
||||
let _ = compute_pi_str(prefix);
|
||||
tokio::task::yield_now().await;
|
||||
}
|
||||
}
|
||||
async fn workload_compute_heavily2() {
|
||||
let prefix = 30;
|
||||
for _ in 0..2000 {
|
||||
let _ = compute_pi_str(prefix);
|
||||
tokio::task::yield_now().await;
|
||||
}
|
||||
}
|
||||
async fn workload_write_file(_idx: u64, tempdir: PathBuf) {
|
||||
use tokio::io::AsyncWriteExt;
|
||||
let prefix = 50;
|
||||
|
||||
let mut file = tokio::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.append(true)
|
||||
.create(true)
|
||||
.open(tempdir.join(format!("pi_{}", prefix)))
|
||||
.await
|
||||
.unwrap();
|
||||
for i in 0..200 {
|
||||
let pi = compute_pi_str(prefix);
|
||||
|
||||
if i % 2 == 0 {
|
||||
file.write_all(pi.as_bytes()).await.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
async fn workload_spawn_blocking_write_file(tempdir: PathBuf) {
|
||||
use std::io::Write;
|
||||
let prefix = 100;
|
||||
let mut file = Some(
|
||||
std::fs::OpenOptions::new()
|
||||
.append(true)
|
||||
.create(true)
|
||||
.open(tempdir.join(format!("pi_{}", prefix)))
|
||||
.unwrap(),
|
||||
);
|
||||
for i in 0..100 {
|
||||
let pi = compute_pi_str(prefix);
|
||||
if i % 2 == 0 {
|
||||
let mut file1 = file.take().unwrap();
|
||||
file = Some(
|
||||
tokio::task::spawn_blocking(move || {
|
||||
file1.write_all(pi.as_bytes()).unwrap();
|
||||
file1
|
||||
})
|
||||
.await
|
||||
.unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_diff_workload_priority(loop_cnt: usize) {
|
||||
let tempdir = tempfile::tempdir().unwrap();
|
||||
let priorities = [
|
||||
Priority::VeryLow,
|
||||
Priority::Low,
|
||||
Priority::Middle,
|
||||
Priority::High,
|
||||
Priority::VeryHigh,
|
||||
];
|
||||
for wl in WORKLOADS {
|
||||
for p in priorities.iter() {
|
||||
let runtime: Runtime = Builder::default()
|
||||
.runtime_name("test")
|
||||
.thread_name("test")
|
||||
.worker_threads(8)
|
||||
.priority(*p)
|
||||
.build()
|
||||
.expect("Fail to create runtime");
|
||||
let runtime2 = runtime.clone();
|
||||
runtime.block_on(test_spec_priority_and_workload(
|
||||
*p, runtime2, wl, &tempdir, loop_cnt,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn test_spec_priority_and_workload(
|
||||
priority: Priority,
|
||||
runtime: Runtime,
|
||||
workload_id: &WorkloadType,
|
||||
tempdir: &TempDir,
|
||||
loop_cnt: usize,
|
||||
) {
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await;
|
||||
debug!(
|
||||
"testing cpu usage for priority {:?} workload_id {:?}",
|
||||
priority, workload_id,
|
||||
);
|
||||
// start monitor thread
|
||||
let mut tasks = vec![];
|
||||
let start = std::time::Instant::now();
|
||||
for i in 0..loop_cnt {
|
||||
// persist cpu usage in json: {priority}.{workload_id}
|
||||
match *workload_id {
|
||||
WorkloadType::ComputeHeavily => {
|
||||
tasks.push(runtime.spawn(workload_compute_heavily()));
|
||||
}
|
||||
WorkloadType::ComputeHeavily2 => {
|
||||
tasks.push(runtime.spawn(workload_compute_heavily2()));
|
||||
}
|
||||
WorkloadType::SpawnBlockingWriteFile => {
|
||||
tasks.push(runtime.spawn(workload_spawn_blocking_write_file(
|
||||
tempdir.path().to_path_buf(),
|
||||
)));
|
||||
}
|
||||
WorkloadType::WriteFile => {
|
||||
tasks.push(
|
||||
runtime.spawn(workload_write_file(i as u64, tempdir.path().to_path_buf())),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
for task in tasks {
|
||||
task.await.unwrap();
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
debug!(
|
||||
"test cpu usage for priority {:?} workload_id {:?} elapsed {}ms",
|
||||
priority,
|
||||
workload_id,
|
||||
elapsed.as_millis()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -33,6 +33,14 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build runtime rate limiter"))]
|
||||
BuildRuntimeRateLimiter {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: ratelimit::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Repeated task {} is already started", name))]
|
||||
IllegalState {
|
||||
name: String,
|
||||
|
||||
@@ -21,6 +21,7 @@ use once_cell::sync::Lazy;
|
||||
use paste::paste;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::runtime::{BuilderBuild, RuntimeTrait};
|
||||
use crate::{Builder, JoinHandle, Runtime};
|
||||
|
||||
const GLOBAL_WORKERS: usize = 8;
|
||||
|
||||
@@ -17,6 +17,8 @@ pub mod global;
|
||||
mod metrics;
|
||||
mod repeated_task;
|
||||
pub mod runtime;
|
||||
pub mod runtime_default;
|
||||
pub mod runtime_throttleable;
|
||||
|
||||
pub use global::{
|
||||
block_on_compact, block_on_global, compact_runtime, create_runtime, global_runtime,
|
||||
|
||||
@@ -23,6 +23,7 @@ use tokio::task::JoinHandle;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::error::{IllegalStateSnafu, Result, WaitGcTaskStopSnafu};
|
||||
use crate::runtime::RuntimeTrait;
|
||||
use crate::Runtime;
|
||||
|
||||
/// Task to execute repeatedly.
|
||||
|
||||
@@ -19,24 +19,20 @@ use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
use snafu::ResultExt;
|
||||
use tokio::runtime::{Builder as RuntimeBuilder, Handle};
|
||||
use tokio::runtime::Builder as RuntimeBuilder;
|
||||
use tokio::sync::oneshot;
|
||||
pub use tokio::task::{JoinError, JoinHandle};
|
||||
|
||||
use crate::error::*;
|
||||
use crate::metrics::*;
|
||||
use crate::runtime_default::DefaultRuntime;
|
||||
use crate::runtime_throttleable::ThrottleableRuntime;
|
||||
|
||||
// configurations
|
||||
pub type Runtime = DefaultRuntime;
|
||||
|
||||
static RUNTIME_ID: AtomicUsize = AtomicUsize::new(0);
|
||||
|
||||
/// A runtime to run future tasks
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Runtime {
|
||||
name: String,
|
||||
handle: Handle,
|
||||
// Used to receive a drop signal when dropper is dropped, inspired by databend
|
||||
_dropper: Arc<Dropper>,
|
||||
}
|
||||
|
||||
/// Dropping the dropper will cause runtime to shutdown.
|
||||
#[derive(Debug)]
|
||||
pub struct Dropper {
|
||||
@@ -50,45 +46,42 @@ impl Drop for Dropper {
|
||||
}
|
||||
}
|
||||
|
||||
impl Runtime {
|
||||
pub fn builder() -> Builder {
|
||||
pub trait RuntimeTrait {
|
||||
/// Get a runtime builder
|
||||
fn builder() -> Builder {
|
||||
Builder::default()
|
||||
}
|
||||
|
||||
/// Spawn a future and execute it in this thread pool
|
||||
///
|
||||
/// Similar to tokio::runtime::Runtime::spawn()
|
||||
pub fn spawn<F>(&self, future: F) -> JoinHandle<F::Output>
|
||||
fn spawn<F>(&self, future: F) -> JoinHandle<F::Output>
|
||||
where
|
||||
F: Future + Send + 'static,
|
||||
F::Output: Send + 'static,
|
||||
{
|
||||
self.handle.spawn(future)
|
||||
}
|
||||
F::Output: Send + 'static;
|
||||
|
||||
/// Run the provided function on an executor dedicated to blocking
|
||||
/// operations.
|
||||
pub fn spawn_blocking<F, R>(&self, func: F) -> JoinHandle<R>
|
||||
fn spawn_blocking<F, R>(&self, func: F) -> JoinHandle<R>
|
||||
where
|
||||
F: FnOnce() -> R + Send + 'static,
|
||||
R: Send + 'static,
|
||||
{
|
||||
self.handle.spawn_blocking(func)
|
||||
}
|
||||
R: Send + 'static;
|
||||
|
||||
/// Run a future to complete, this is the runtime's entry point
|
||||
pub fn block_on<F: Future>(&self, future: F) -> F::Output {
|
||||
self.handle.block_on(future)
|
||||
}
|
||||
fn block_on<F: Future>(&self, future: F) -> F::Output;
|
||||
|
||||
pub fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
/// Get the name of the runtime
|
||||
fn name(&self) -> &str;
|
||||
}
|
||||
|
||||
pub trait BuilderBuild<R: RuntimeTrait> {
|
||||
fn build(&mut self) -> Result<R>;
|
||||
}
|
||||
|
||||
pub struct Builder {
|
||||
runtime_name: String,
|
||||
thread_name: String,
|
||||
priority: Priority,
|
||||
builder: RuntimeBuilder,
|
||||
}
|
||||
|
||||
@@ -98,11 +91,17 @@ impl Default for Builder {
|
||||
runtime_name: format!("runtime-{}", RUNTIME_ID.fetch_add(1, Ordering::Relaxed)),
|
||||
thread_name: "default-worker".to_string(),
|
||||
builder: RuntimeBuilder::new_multi_thread(),
|
||||
priority: Priority::VeryHigh,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
pub fn priority(&mut self, priority: Priority) -> &mut Self {
|
||||
self.priority = priority;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the number of worker threads the Runtime will use.
|
||||
///
|
||||
/// This can be any number above 0. The default value is the number of cores available to the system.
|
||||
@@ -139,8 +138,10 @@ impl Builder {
|
||||
self.thread_name = val.into();
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build(&mut self) -> Result<Runtime> {
|
||||
impl BuilderBuild<DefaultRuntime> for Builder {
|
||||
fn build(&mut self) -> Result<DefaultRuntime> {
|
||||
let runtime = self
|
||||
.builder
|
||||
.enable_all()
|
||||
@@ -163,18 +164,53 @@ impl Builder {
|
||||
#[cfg(tokio_unstable)]
|
||||
register_collector(name.clone(), &handle);
|
||||
|
||||
Ok(Runtime {
|
||||
name,
|
||||
Ok(DefaultRuntime::new(
|
||||
&name,
|
||||
handle,
|
||||
_dropper: Arc::new(Dropper {
|
||||
Arc::new(Dropper {
|
||||
close: Some(send_stop),
|
||||
}),
|
||||
})
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl BuilderBuild<ThrottleableRuntime> for Builder {
|
||||
fn build(&mut self) -> Result<ThrottleableRuntime> {
|
||||
let runtime = self
|
||||
.builder
|
||||
.enable_all()
|
||||
.thread_name(self.thread_name.clone())
|
||||
.on_thread_start(on_thread_start(self.thread_name.clone()))
|
||||
.on_thread_stop(on_thread_stop(self.thread_name.clone()))
|
||||
.on_thread_park(on_thread_park(self.thread_name.clone()))
|
||||
.on_thread_unpark(on_thread_unpark(self.thread_name.clone()))
|
||||
.build()
|
||||
.context(BuildRuntimeSnafu)?;
|
||||
|
||||
let name = self.runtime_name.clone();
|
||||
let handle = runtime.handle().clone();
|
||||
let (send_stop, recv_stop) = oneshot::channel();
|
||||
// Block the runtime to shutdown.
|
||||
let _ = thread::Builder::new()
|
||||
.name(format!("{}-blocker", self.thread_name))
|
||||
.spawn(move || runtime.block_on(recv_stop));
|
||||
|
||||
#[cfg(tokio_unstable)]
|
||||
register_collector(name.clone(), &handle);
|
||||
|
||||
ThrottleableRuntime::new(
|
||||
&name,
|
||||
self.priority,
|
||||
handle,
|
||||
Arc::new(Dropper {
|
||||
close: Some(send_stop),
|
||||
}),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(tokio_unstable)]
|
||||
pub fn register_collector(name: String, handle: &Handle) {
|
||||
pub fn register_collector(name: String, handle: &tokio::runtime::Handle) {
|
||||
let name = name.replace("-", "_");
|
||||
let monitor = tokio_metrics::RuntimeMonitor::new(handle);
|
||||
let collector = tokio_metrics_collector::RuntimeCollector::new(monitor, name);
|
||||
@@ -213,8 +249,18 @@ fn on_thread_unpark(thread_name: String) -> impl Fn() + 'static {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||
pub enum Priority {
|
||||
VeryLow = 0,
|
||||
Low = 1,
|
||||
Middle = 2,
|
||||
High = 3,
|
||||
VeryHigh = 4,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
@@ -235,12 +281,12 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_metric() {
|
||||
let runtime = Builder::default()
|
||||
let runtime: Runtime = Builder::default()
|
||||
.worker_threads(5)
|
||||
.thread_name("test_runtime_metric")
|
||||
.build()
|
||||
.unwrap();
|
||||
// wait threads created
|
||||
// wait threads create
|
||||
thread::sleep(Duration::from_millis(50));
|
||||
|
||||
let _handle = runtime.spawn(async {
|
||||
|
||||
77
src/common/runtime/src/runtime_default.rs
Normal file
77
src/common/runtime/src/runtime_default.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::future::Future;
|
||||
use std::sync::Arc;
|
||||
|
||||
use tokio::runtime::Handle;
|
||||
pub use tokio::task::JoinHandle;
|
||||
|
||||
use crate::runtime::{Dropper, RuntimeTrait};
|
||||
use crate::Builder;
|
||||
|
||||
/// A runtime to run future tasks
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DefaultRuntime {
|
||||
name: String,
|
||||
handle: Handle,
|
||||
// Used to receive a drop signal when dropper is dropped, inspired by databend
|
||||
_dropper: Arc<Dropper>,
|
||||
}
|
||||
|
||||
impl DefaultRuntime {
|
||||
pub(crate) fn new(name: &str, handle: Handle, dropper: Arc<Dropper>) -> Self {
|
||||
Self {
|
||||
name: name.to_string(),
|
||||
handle,
|
||||
_dropper: dropper,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RuntimeTrait for DefaultRuntime {
|
||||
fn builder() -> Builder {
|
||||
Builder::default()
|
||||
}
|
||||
|
||||
/// Spawn a future and execute it in this thread pool
|
||||
///
|
||||
/// Similar to tokio::runtime::Runtime::spawn()
|
||||
fn spawn<F>(&self, future: F) -> JoinHandle<F::Output>
|
||||
where
|
||||
F: Future + Send + 'static,
|
||||
F::Output: Send + 'static,
|
||||
{
|
||||
self.handle.spawn(future)
|
||||
}
|
||||
|
||||
/// Run the provided function on an executor dedicated to blocking
|
||||
/// operations.
|
||||
fn spawn_blocking<F, R>(&self, func: F) -> JoinHandle<R>
|
||||
where
|
||||
F: FnOnce() -> R + Send + 'static,
|
||||
R: Send + 'static,
|
||||
{
|
||||
self.handle.spawn_blocking(func)
|
||||
}
|
||||
|
||||
/// Run a future to complete, this is the runtime's entry point
|
||||
fn block_on<F: Future>(&self, future: F) -> F::Output {
|
||||
self.handle.block_on(future)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
}
|
||||
285
src/common/runtime/src/runtime_throttleable.rs
Normal file
285
src/common/runtime/src/runtime_throttleable.rs
Normal file
@@ -0,0 +1,285 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
use std::time::Duration;
|
||||
|
||||
use futures::FutureExt;
|
||||
use ratelimit::Ratelimiter;
|
||||
use snafu::ResultExt;
|
||||
use tokio::runtime::Handle;
|
||||
pub use tokio::task::JoinHandle;
|
||||
use tokio::time::Sleep;
|
||||
|
||||
use crate::error::{BuildRuntimeRateLimiterSnafu, Result};
|
||||
use crate::runtime::{Dropper, Priority, RuntimeTrait};
|
||||
use crate::Builder;
|
||||
|
||||
struct RuntimeRateLimiter {
|
||||
pub ratelimiter: Option<Ratelimiter>,
|
||||
}
|
||||
|
||||
impl Debug for RuntimeRateLimiter {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("RuntimeThrottleShareWithFuture")
|
||||
.field(
|
||||
"ratelimiter_max_tokens",
|
||||
&self.ratelimiter.as_ref().map(|v| v.max_tokens()),
|
||||
)
|
||||
.field(
|
||||
"ratelimiter_refill_amount",
|
||||
&self.ratelimiter.as_ref().map(|v| v.refill_amount()),
|
||||
)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// A runtime to run future tasks
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ThrottleableRuntime {
|
||||
name: String,
|
||||
handle: Handle,
|
||||
shared_with_future: Arc<RuntimeRateLimiter>,
|
||||
// Used to receive a drop signal when dropper is dropped, inspired by databend
|
||||
_dropper: Arc<Dropper>,
|
||||
}
|
||||
|
||||
impl ThrottleableRuntime {
|
||||
pub(crate) fn new(
|
||||
name: &str,
|
||||
priority: Priority,
|
||||
handle: Handle,
|
||||
dropper: Arc<Dropper>,
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
name: name.to_string(),
|
||||
handle,
|
||||
shared_with_future: Arc::new(RuntimeRateLimiter {
|
||||
ratelimiter: priority.ratelimiter_count()?,
|
||||
}),
|
||||
_dropper: dropper,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl RuntimeTrait for ThrottleableRuntime {
|
||||
fn builder() -> Builder {
|
||||
Builder::default()
|
||||
}
|
||||
|
||||
/// Spawn a future and execute it in this thread pool
|
||||
///
|
||||
/// Similar to tokio::runtime::Runtime::spawn()
|
||||
fn spawn<F>(&self, future: F) -> JoinHandle<F::Output>
|
||||
where
|
||||
F: Future + Send + 'static,
|
||||
F::Output: Send + 'static,
|
||||
{
|
||||
self.handle
|
||||
.spawn(ThrottleFuture::new(self.shared_with_future.clone(), future))
|
||||
}
|
||||
|
||||
/// Run the provided function on an executor dedicated to blocking
|
||||
/// operations.
|
||||
fn spawn_blocking<F, R>(&self, func: F) -> JoinHandle<R>
|
||||
where
|
||||
F: FnOnce() -> R + Send + 'static,
|
||||
R: Send + 'static,
|
||||
{
|
||||
self.handle.spawn_blocking(func)
|
||||
}
|
||||
|
||||
/// Run a future to complete, this is the runtime's entry point
|
||||
fn block_on<F: Future>(&self, future: F) -> F::Output {
|
||||
self.handle.block_on(future)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
}
|
||||
|
||||
enum State {
|
||||
Pollable,
|
||||
Throttled(Pin<Box<Sleep>>),
|
||||
}
|
||||
|
||||
impl State {
|
||||
fn unwrap_backoff(&mut self) -> &mut Pin<Box<Sleep>> {
|
||||
match self {
|
||||
State::Throttled(sleep) => sleep,
|
||||
_ => panic!("unwrap_backoff failed"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pin_project::pin_project]
|
||||
pub struct ThrottleFuture<F: Future + Send + 'static> {
|
||||
#[pin]
|
||||
future: F,
|
||||
|
||||
/// RateLimiter of this future
|
||||
handle: Arc<RuntimeRateLimiter>,
|
||||
|
||||
state: State,
|
||||
}
|
||||
|
||||
impl<F> ThrottleFuture<F>
|
||||
where
|
||||
F: Future + Send + 'static,
|
||||
F::Output: Send + 'static,
|
||||
{
|
||||
fn new(handle: Arc<RuntimeRateLimiter>, future: F) -> Self {
|
||||
Self {
|
||||
future,
|
||||
handle,
|
||||
state: State::Pollable,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<F> Future for ThrottleFuture<F>
|
||||
where
|
||||
F: Future + Send + 'static,
|
||||
F::Output: Send + 'static,
|
||||
{
|
||||
type Output = F::Output;
|
||||
|
||||
fn poll(self: std::pin::Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
let this = self.project();
|
||||
|
||||
match this.state {
|
||||
State::Pollable => {}
|
||||
State::Throttled(ref mut sleep) => match sleep.poll_unpin(cx) {
|
||||
Poll::Ready(_) => {
|
||||
*this.state = State::Pollable;
|
||||
}
|
||||
Poll::Pending => return Poll::Pending,
|
||||
},
|
||||
};
|
||||
|
||||
if let Some(ratelimiter) = &this.handle.ratelimiter {
|
||||
if let Err(wait) = ratelimiter.try_wait() {
|
||||
*this.state = State::Throttled(Box::pin(tokio::time::sleep(wait)));
|
||||
match this.state.unwrap_backoff().poll_unpin(cx) {
|
||||
Poll::Ready(_) => {
|
||||
*this.state = State::Pollable;
|
||||
}
|
||||
Poll::Pending => {
|
||||
return Poll::Pending;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let poll_res = this.future.poll(cx);
|
||||
|
||||
match poll_res {
|
||||
Poll::Ready(r) => Poll::Ready(r),
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Priority {
|
||||
fn ratelimiter_count(&self) -> Result<Option<Ratelimiter>> {
|
||||
let max = 8000;
|
||||
let gen_per_10ms = match self {
|
||||
Priority::VeryLow => Some(2000),
|
||||
Priority::Low => Some(4000),
|
||||
Priority::Middle => Some(6000),
|
||||
Priority::High => Some(8000),
|
||||
Priority::VeryHigh => None,
|
||||
};
|
||||
if let Some(gen_per_10ms) = gen_per_10ms {
|
||||
Ratelimiter::builder(gen_per_10ms, Duration::from_millis(10)) // generate poll count per 10ms
|
||||
.max_tokens(max) // reserved token for batch request
|
||||
.build()
|
||||
.context(BuildRuntimeRateLimiterSnafu)
|
||||
.map(Some)
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use tokio::fs::File;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::time::Duration;
|
||||
|
||||
use super::*;
|
||||
use crate::runtime::BuilderBuild;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_throttleable_runtime_spawn_simple() {
|
||||
for p in [
|
||||
Priority::VeryLow,
|
||||
Priority::Low,
|
||||
Priority::Middle,
|
||||
Priority::High,
|
||||
Priority::VeryHigh,
|
||||
] {
|
||||
let runtime: ThrottleableRuntime = Builder::default()
|
||||
.runtime_name("test")
|
||||
.thread_name("test")
|
||||
.worker_threads(8)
|
||||
.priority(p)
|
||||
.build()
|
||||
.expect("Fail to create runtime");
|
||||
|
||||
// Spawn a simple future that returns 42
|
||||
let handle = runtime.spawn(async {
|
||||
tokio::time::sleep(Duration::from_millis(10)).await;
|
||||
42
|
||||
});
|
||||
let result = handle.await.expect("Task panicked");
|
||||
assert_eq!(result, 42);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_throttleable_runtime_spawn_complex() {
|
||||
let tempdir = tempfile::tempdir().unwrap();
|
||||
for p in [
|
||||
Priority::VeryLow,
|
||||
Priority::Low,
|
||||
Priority::Middle,
|
||||
Priority::High,
|
||||
Priority::VeryHigh,
|
||||
] {
|
||||
let runtime: ThrottleableRuntime = Builder::default()
|
||||
.runtime_name("test")
|
||||
.thread_name("test")
|
||||
.worker_threads(8)
|
||||
.priority(p)
|
||||
.build()
|
||||
.expect("Fail to create runtime");
|
||||
let tempdirpath = tempdir.path().to_path_buf();
|
||||
let handle = runtime.spawn(async move {
|
||||
let mut file = File::create(tempdirpath.join("test.txt")).await.unwrap();
|
||||
file.write_all(b"Hello, world!").await.unwrap();
|
||||
42
|
||||
});
|
||||
let result = handle.await.expect("Task panicked");
|
||||
assert_eq!(result, 42);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -26,13 +26,13 @@ opentelemetry = { version = "0.21.0", default-features = false, features = [
|
||||
opentelemetry-otlp = { version = "0.14.0", features = ["tokio"] }
|
||||
opentelemetry-semantic-conventions = "0.13.0"
|
||||
opentelemetry_sdk = { version = "0.21.0", features = ["rt-tokio"] }
|
||||
parking_lot = { version = "0.12" }
|
||||
parking_lot.workspace = true
|
||||
prometheus.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
tokio.workspace = true
|
||||
tracing = "0.1"
|
||||
tracing-appender = "0.2"
|
||||
tracing-appender.workspace = true
|
||||
tracing-log = "0.1"
|
||||
tracing-opentelemetry = "0.22.0"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
|
||||
tracing-subscriber.workspace = true
|
||||
|
||||
@@ -192,6 +192,7 @@ pub fn init_global_logging(
|
||||
if opts.log_format == LogFormat::Json {
|
||||
Some(
|
||||
Layer::new()
|
||||
.with_thread_ids(true)
|
||||
.json()
|
||||
.with_writer(writer)
|
||||
.with_ansi(atty::is(atty::Stream::Stdout))
|
||||
@@ -200,6 +201,7 @@ pub fn init_global_logging(
|
||||
} else {
|
||||
Some(
|
||||
Layer::new()
|
||||
.with_thread_ids(true)
|
||||
.with_writer(writer)
|
||||
.with_ansi(atty::is(atty::Stream::Stdout))
|
||||
.boxed(),
|
||||
@@ -228,13 +230,20 @@ pub fn init_global_logging(
|
||||
if opts.log_format == LogFormat::Json {
|
||||
Some(
|
||||
Layer::new()
|
||||
.with_thread_ids(true)
|
||||
.json()
|
||||
.with_writer(writer)
|
||||
.with_ansi(false)
|
||||
.boxed(),
|
||||
)
|
||||
} else {
|
||||
Some(Layer::new().with_writer(writer).with_ansi(false).boxed())
|
||||
Some(
|
||||
Layer::new()
|
||||
.with_thread_ids(true)
|
||||
.with_writer(writer)
|
||||
.with_ansi(false)
|
||||
.boxed(),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
@@ -260,6 +269,7 @@ pub fn init_global_logging(
|
||||
Some(
|
||||
Layer::new()
|
||||
.json()
|
||||
.with_thread_ids(true)
|
||||
.with_writer(writer)
|
||||
.with_ansi(false)
|
||||
.with_filter(filter::LevelFilter::ERROR)
|
||||
@@ -268,6 +278,7 @@ pub fn init_global_logging(
|
||||
} else {
|
||||
Some(
|
||||
Layer::new()
|
||||
.with_thread_ids(true)
|
||||
.with_writer(writer)
|
||||
.with_ansi(false)
|
||||
.with_filter(filter::LevelFilter::ERROR)
|
||||
|
||||
@@ -23,6 +23,7 @@ use common_function::function::FunctionRef;
|
||||
use common_function::scalars::aggregate::AggregateFunctionMetaRef;
|
||||
use common_query::prelude::ScalarUdf;
|
||||
use common_query::Output;
|
||||
use common_runtime::runtime::{BuilderBuild, RuntimeTrait};
|
||||
use common_runtime::Runtime;
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use query::dataframe::DataFrame;
|
||||
|
||||
@@ -189,6 +189,13 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid JSON text: {}", value))]
|
||||
InvalidJson {
|
||||
value: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Value exceeds the precision {} bound", precision))]
|
||||
ValueExceedsPrecision {
|
||||
precision: u8,
|
||||
@@ -222,7 +229,8 @@ impl ErrorExt for Error {
|
||||
| DefaultValueType { .. }
|
||||
| DuplicateMeta { .. }
|
||||
| InvalidTimestampPrecision { .. }
|
||||
| InvalidPrecisionOrScale { .. } => StatusCode::InvalidArguments,
|
||||
| InvalidPrecisionOrScale { .. }
|
||||
| InvalidJson { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
ValueExceedsPrecision { .. }
|
||||
| CastType { .. }
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(let_chains)]
|
||||
#![feature(assert_matches)]
|
||||
|
||||
pub mod arrow_array;
|
||||
pub mod data_type;
|
||||
|
||||
@@ -36,6 +36,36 @@ impl BinaryVector {
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
&self.array
|
||||
}
|
||||
|
||||
/// Creates a new binary vector of JSONB from a binary vector.
|
||||
/// The binary vector must contain valid JSON strings.
|
||||
pub fn convert_binary_to_json(&self) -> Result<BinaryVector> {
|
||||
let arrow_array = self.to_arrow_array();
|
||||
let mut vector = vec![];
|
||||
for binary in arrow_array
|
||||
.as_any()
|
||||
.downcast_ref::<BinaryArray>()
|
||||
.unwrap()
|
||||
.iter()
|
||||
{
|
||||
let jsonb = if let Some(binary) = binary {
|
||||
match jsonb::from_slice(binary) {
|
||||
Ok(jsonb) => Some(jsonb.to_vec()),
|
||||
Err(_) => {
|
||||
let s = String::from_utf8_lossy(binary);
|
||||
return error::InvalidJsonSnafu {
|
||||
value: s.to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
vector.push(jsonb);
|
||||
}
|
||||
Ok(BinaryVector::from(vector))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BinaryArray> for BinaryVector {
|
||||
@@ -233,6 +263,8 @@ vectors::impl_try_from_arrow_array_for_vector!(BinaryArray, BinaryVector);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use common_base::bytes::Bytes;
|
||||
use serde_json;
|
||||
@@ -383,4 +415,52 @@ mod tests {
|
||||
assert_eq!(b"four", vector.get_data(3).unwrap());
|
||||
assert_eq!(builder.len(), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_binary_json_conversion() {
|
||||
// json strings
|
||||
let json_strings = vec![
|
||||
b"{\"hello\": \"world\"}".to_vec(),
|
||||
b"{\"foo\": 1}".to_vec(),
|
||||
b"123".to_vec(),
|
||||
];
|
||||
let json_vector = BinaryVector::from(json_strings.clone())
|
||||
.convert_binary_to_json()
|
||||
.unwrap();
|
||||
let jsonbs = json_strings
|
||||
.iter()
|
||||
.map(|v| jsonb::parse_value(v).unwrap().to_vec())
|
||||
.collect::<Vec<_>>();
|
||||
for i in 0..3 {
|
||||
assert_eq!(
|
||||
json_vector.get_ref(i).as_binary().unwrap().unwrap(),
|
||||
jsonbs.get(i).unwrap().as_slice()
|
||||
);
|
||||
}
|
||||
|
||||
// jsonb
|
||||
let json_vector = BinaryVector::from(jsonbs.clone())
|
||||
.convert_binary_to_json()
|
||||
.unwrap();
|
||||
for i in 0..3 {
|
||||
assert_eq!(
|
||||
json_vector.get_ref(i).as_binary().unwrap().unwrap(),
|
||||
jsonbs.get(i).unwrap().as_slice()
|
||||
);
|
||||
}
|
||||
|
||||
// binary with jsonb header (0x80, 0x40, 0x20)
|
||||
let binary_with_jsonb_header: Vec<u8> = [0x80, 0x23, 0x40, 0x22].to_vec();
|
||||
let error = BinaryVector::from(vec![binary_with_jsonb_header])
|
||||
.convert_binary_to_json()
|
||||
.unwrap_err();
|
||||
assert_matches!(error, error::Error::InvalidJson { .. });
|
||||
|
||||
// invalid json string
|
||||
let json_strings = vec![b"{\"hello\": \"world\"".to_vec()];
|
||||
let error = BinaryVector::from(json_strings)
|
||||
.convert_binary_to_json()
|
||||
.unwrap_err();
|
||||
assert_matches!(error, error::Error::InvalidJson { .. });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,8 @@ mod find_unique;
|
||||
mod replicate;
|
||||
mod take;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_base::BitVec;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
@@ -89,6 +91,12 @@ macro_rules! impl_scalar_vector_op {
|
||||
}
|
||||
|
||||
fn cast(&self, to_type: &ConcreteDataType) -> Result<VectorRef> {
|
||||
if to_type == &ConcreteDataType::json_datatype() {
|
||||
if let Some(vector) = self.as_any().downcast_ref::<BinaryVector>() {
|
||||
let json_vector = vector.convert_binary_to_json()?;
|
||||
return Ok(Arc::new(json_vector) as VectorRef);
|
||||
}
|
||||
}
|
||||
cast::cast_non_constant!(self, to_type)
|
||||
}
|
||||
|
||||
|
||||
@@ -91,8 +91,9 @@ impl RegionEngine for FileRegionEngine {
|
||||
request: ScanRequest,
|
||||
) -> Result<RegionScannerRef, BoxedError> {
|
||||
let stream = self.handle_query(region_id, request).await?;
|
||||
let metadata = self.get_metadata(region_id).await?;
|
||||
// We don't support enabling append mode for file engine.
|
||||
let scanner = Box::new(SinglePartitionScanner::new(stream, false));
|
||||
let scanner = Box::new(SinglePartitionScanner::new(stream, false, metadata));
|
||||
Ok(scanner)
|
||||
}
|
||||
|
||||
|
||||
@@ -271,10 +271,17 @@ impl FlowWorkerManager {
|
||||
let rows_proto: Vec<v1::Row> = insert
|
||||
.into_iter()
|
||||
.map(|(mut row, _ts)| {
|
||||
// `update_at` col
|
||||
row.extend([Value::from(common_time::Timestamp::new_millisecond(
|
||||
now,
|
||||
))]);
|
||||
// extend `update_at` col if needed
|
||||
// if schema include a millisecond timestamp here, and result row doesn't have it, add it
|
||||
if row.len() < proto_schema.len()
|
||||
&& proto_schema[row.len()].datatype
|
||||
== greptime_proto::v1::ColumnDataType::TimestampMillisecond
|
||||
as i32
|
||||
{
|
||||
row.extend([Value::from(
|
||||
common_time::Timestamp::new_millisecond(now),
|
||||
)]);
|
||||
}
|
||||
// ts col, if auto create
|
||||
if is_ts_placeholder {
|
||||
ensure!(
|
||||
@@ -291,6 +298,17 @@ impl FlowWorkerManager {
|
||||
common_time::Timestamp::new_millisecond(0),
|
||||
)]);
|
||||
}
|
||||
if row.len() != proto_schema.len() {
|
||||
InternalSnafu {
|
||||
reason: format!(
|
||||
"Flow output row length mismatch, expect {} got {}, the columns in schema are: {:?}",
|
||||
proto_schema.len(),
|
||||
row.len(),
|
||||
proto_schema.iter().map(|c|&c.column_name).collect_vec()
|
||||
),
|
||||
}
|
||||
.fail()?;
|
||||
}
|
||||
Ok(row.into())
|
||||
})
|
||||
.collect::<Result<Vec<_>, Error>>()?;
|
||||
|
||||
@@ -106,7 +106,7 @@ pub async fn sql_to_flow_plan(
|
||||
.context(ExternalSnafu)?;
|
||||
let plan = engine
|
||||
.planner()
|
||||
.plan(stmt, query_ctx)
|
||||
.plan(&stmt, query_ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
|
||||
@@ -278,7 +278,7 @@ mod test {
|
||||
let stmt = QueryLanguageParser::parse_sql(sql, &QueryContext::arc()).unwrap();
|
||||
let plan = engine
|
||||
.planner()
|
||||
.plan(stmt, QueryContext::arc())
|
||||
.plan(&stmt, QueryContext::arc())
|
||||
.await
|
||||
.unwrap();
|
||||
let plan = apply_df_optimizer(plan).await.unwrap();
|
||||
@@ -300,7 +300,7 @@ mod test {
|
||||
let stmt = QueryLanguageParser::parse_sql(sql, &QueryContext::arc()).unwrap();
|
||||
let plan = engine
|
||||
.planner()
|
||||
.plan(stmt, QueryContext::arc())
|
||||
.plan(&stmt, QueryContext::arc())
|
||||
.await
|
||||
.unwrap();
|
||||
let plan = apply_df_optimizer(plan).await;
|
||||
|
||||
@@ -313,6 +313,14 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to init plugin"))]
|
||||
// this comment is to bypass the unused snafu check in "check-snafu.py"
|
||||
InitPlugin {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -375,8 +383,9 @@ impl ErrorExt for Error {
|
||||
| Error::ExecLogicalPlan { source, .. } => source.status_code(),
|
||||
|
||||
Error::InvokeRegionServer { source, .. } => source.status_code(),
|
||||
|
||||
Error::External { source, .. } => source.status_code(),
|
||||
Error::External { source, .. } | Error::InitPlugin { source, .. } => {
|
||||
source.status_code()
|
||||
}
|
||||
Error::FindTableRoute { source, .. } => source.status_code(),
|
||||
|
||||
#[cfg(feature = "python")]
|
||||
|
||||
@@ -225,11 +225,45 @@ impl Instance {
|
||||
async fn query_statement(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Output> {
|
||||
check_permission(self.plugins.clone(), &stmt, &query_ctx)?;
|
||||
|
||||
let stmt = QueryStatement::Sql(stmt);
|
||||
self.statement_executor
|
||||
.execute_stmt(stmt, query_ctx)
|
||||
.await
|
||||
.context(TableOperationSnafu)
|
||||
let query_interceptor = self.plugins.get::<SqlQueryInterceptorRef<Error>>();
|
||||
let query_interceptor = query_interceptor.as_ref();
|
||||
|
||||
let output = match stmt {
|
||||
Statement::Query(_) | Statement::Explain(_) | Statement::Delete(_) => {
|
||||
let stmt = QueryStatement::Sql(stmt);
|
||||
let plan = self
|
||||
.statement_executor
|
||||
.plan(&stmt, query_ctx.clone())
|
||||
.await?;
|
||||
|
||||
let QueryStatement::Sql(stmt) = stmt else {
|
||||
unreachable!()
|
||||
};
|
||||
query_interceptor.pre_execute(&stmt, Some(&plan), query_ctx.clone())?;
|
||||
|
||||
self.statement_executor.exec_plan(plan, query_ctx).await
|
||||
}
|
||||
Statement::Tql(tql) => {
|
||||
let plan = self
|
||||
.statement_executor
|
||||
.plan_tql(tql.clone(), &query_ctx)
|
||||
.await?;
|
||||
|
||||
query_interceptor.pre_execute(
|
||||
&Statement::Tql(tql),
|
||||
Some(&plan),
|
||||
query_ctx.clone(),
|
||||
)?;
|
||||
|
||||
self.statement_executor.exec_plan(plan, query_ctx).await
|
||||
}
|
||||
_ => {
|
||||
query_interceptor.pre_execute(&stmt, None, query_ctx.clone())?;
|
||||
|
||||
self.statement_executor.execute_sql(stmt, query_ctx).await
|
||||
}
|
||||
};
|
||||
output.context(TableOperationSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -255,14 +289,6 @@ impl SqlQueryHandler for Instance {
|
||||
Ok(stmts) => {
|
||||
let mut results = Vec::with_capacity(stmts.len());
|
||||
for stmt in stmts {
|
||||
// TODO(sunng87): figure out at which stage we can call
|
||||
// this hook after ArrowFlight adoption. We need to provide
|
||||
// LogicalPlan as to this hook.
|
||||
if let Err(e) = query_interceptor.pre_execute(&stmt, None, query_ctx.clone()) {
|
||||
results.push(Err(e));
|
||||
break;
|
||||
}
|
||||
|
||||
if let Err(e) = checker
|
||||
.check_permission(
|
||||
query_ctx.current_user(),
|
||||
@@ -341,7 +367,7 @@ impl SqlQueryHandler for Instance {
|
||||
let plan = self
|
||||
.query_engine
|
||||
.planner()
|
||||
.plan(QueryStatement::Sql(stmt), query_ctx.clone())
|
||||
.plan(&QueryStatement::Sql(stmt), query_ctx.clone())
|
||||
.await
|
||||
.context(PlanStatementSnafu)?;
|
||||
self.query_engine
|
||||
|
||||
@@ -113,8 +113,8 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
.plugins
|
||||
.get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
|
||||
interceptor_ref.pre_execute(ctx.clone())?;
|
||||
let (requests, rows) = otlp::logs::to_grpc_insert_requests(request, pipeline, table_name)?;
|
||||
|
||||
let (requests, rows) = otlp::logs::to_grpc_insert_requests(request, pipeline, table_name)?;
|
||||
self.handle_log_inserts(requests, ctx)
|
||||
.await
|
||||
.inspect(|_| OTLP_LOGS_ROWS.inc_by(rows as u64))
|
||||
|
||||
@@ -62,6 +62,8 @@ impl IndexApplier for PredicatesIndexApplier {
|
||||
break;
|
||||
}
|
||||
|
||||
common_telemetry::info!("Predicate apply, apply name start, name: {}", name);
|
||||
|
||||
let Some(meta) = metadata.metas.get(name) else {
|
||||
match context.index_not_found_strategy {
|
||||
IndexNotFoundStrategy::ReturnEmpty => {
|
||||
@@ -85,6 +87,8 @@ impl IndexApplier for PredicatesIndexApplier {
|
||||
let bm = mapper.map_values(&values).await?;
|
||||
|
||||
bitmap &= bm;
|
||||
|
||||
common_telemetry::info!("Predicate apply, apply name end, name: {}", name);
|
||||
}
|
||||
|
||||
output.matched_segment_ids = bitmap;
|
||||
@@ -114,17 +118,17 @@ impl PredicatesIndexApplier {
|
||||
.partition_in_place(|(_, ps)| ps.iter().any(|p| matches!(p, Predicate::InList(_))));
|
||||
let mut iter = predicates.into_iter();
|
||||
for _ in 0..in_list_index {
|
||||
let (tag_name, predicates) = iter.next().unwrap();
|
||||
let (column_name, predicates) = iter.next().unwrap();
|
||||
let fst_applier = Box::new(KeysFstApplier::try_from(predicates)?) as _;
|
||||
fst_appliers.push((tag_name, fst_applier));
|
||||
fst_appliers.push((column_name, fst_applier));
|
||||
}
|
||||
|
||||
for (tag_name, predicates) in iter {
|
||||
for (column_name, predicates) in iter {
|
||||
if predicates.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let fst_applier = Box::new(IntersectionFstApplier::try_from(predicates)?) as _;
|
||||
fst_appliers.push((tag_name, fst_applier));
|
||||
fst_appliers.push((column_name, fst_applier));
|
||||
}
|
||||
|
||||
Ok(PredicatesIndexApplier { fst_appliers })
|
||||
|
||||
@@ -17,6 +17,7 @@ use std::any::Any;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_macro::stack_trace_debug;
|
||||
use common_runtime::error::Error as RuntimeError;
|
||||
use common_runtime::JoinError;
|
||||
use serde_json::error::Error as JsonError;
|
||||
use snafu::{Location, Snafu};
|
||||
use store_api::storage::RegionId;
|
||||
@@ -306,6 +307,14 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Join error"))]
|
||||
Join {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: JoinError,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
|
||||
@@ -31,8 +31,8 @@ use store_api::storage::RegionId;
|
||||
|
||||
use crate::error::{
|
||||
AddEntryLogBatchSnafu, DiscontinuousLogIndexSnafu, Error, FetchEntrySnafu,
|
||||
IllegalNamespaceSnafu, IllegalStateSnafu, InvalidProviderSnafu, OverrideCompactedEntrySnafu,
|
||||
RaftEngineSnafu, Result, StartGcTaskSnafu, StopGcTaskSnafu,
|
||||
IllegalNamespaceSnafu, IllegalStateSnafu, InvalidProviderSnafu, JoinSnafu,
|
||||
OverrideCompactedEntrySnafu, RaftEngineSnafu, Result, StartGcTaskSnafu, StopGcTaskSnafu,
|
||||
};
|
||||
use crate::metrics;
|
||||
use crate::raft_engine::backend::SYSTEM_NAMESPACE;
|
||||
@@ -250,6 +250,12 @@ impl LogStore for RaftEngineLogStore {
|
||||
.engine
|
||||
.write(&mut batch, sync)
|
||||
.context(RaftEngineSnafu)?;
|
||||
let engine = self.engine.clone();
|
||||
let _ = common_runtime::spawn_blocking_global(move || {
|
||||
engine.write(&mut batch, sync).context(RaftEngineSnafu)
|
||||
})
|
||||
.await
|
||||
.context(JoinSnafu)?;
|
||||
|
||||
Ok(AppendBatchResponse { last_entry_ids })
|
||||
}
|
||||
|
||||
@@ -29,4 +29,4 @@ futures = "0.3"
|
||||
meta-srv = { workspace = true, features = ["mock"] }
|
||||
tower.workspace = true
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
tracing-subscriber.workspace = true
|
||||
|
||||
@@ -42,7 +42,7 @@ humantime-serde.workspace = true
|
||||
itertools.workspace = true
|
||||
lazy_static.workspace = true
|
||||
once_cell.workspace = true
|
||||
parking_lot = "0.12"
|
||||
parking_lot.workspace = true
|
||||
prometheus.workspace = true
|
||||
prost.workspace = true
|
||||
rand.workspace = true
|
||||
@@ -59,7 +59,7 @@ tokio-stream = { workspace = true, features = ["net"] }
|
||||
toml.workspace = true
|
||||
tonic.workspace = true
|
||||
tower.workspace = true
|
||||
typetag = "0.2"
|
||||
typetag.workspace = true
|
||||
url = "2.3"
|
||||
|
||||
[dev-dependencies]
|
||||
@@ -69,4 +69,4 @@ common-meta = { workspace = true, features = ["testing"] }
|
||||
common-procedure-test.workspace = true
|
||||
session.workspace = true
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
tracing-subscriber.workspace = true
|
||||
|
||||
@@ -18,7 +18,6 @@ use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use common_meta::DatanodeId;
|
||||
use common_runtime::JoinError;
|
||||
use rand::distributions::WeightedError;
|
||||
use snafu::{Location, Snafu};
|
||||
use store_api::storage::RegionId;
|
||||
use table::metadata::TableId;
|
||||
@@ -32,6 +31,14 @@ use crate::pubsub::Message;
|
||||
#[snafu(visibility(pub))]
|
||||
#[stack_trace_debug]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to choose items"))]
|
||||
ChooseItems {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: rand::distributions::WeightedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Exceeded deadline, operation: {}", operation))]
|
||||
ExceededDeadline {
|
||||
#[snafu(implicit)]
|
||||
@@ -643,20 +650,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to set weight array"))]
|
||||
WeightArray {
|
||||
#[snafu(source)]
|
||||
error: WeightedError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Weight array is not set"))]
|
||||
NotSetWeightArray {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unexpected table route type: {}", err_msg))]
|
||||
UnexpectedLogicalRouteTable {
|
||||
#[snafu(implicit)]
|
||||
@@ -759,10 +752,9 @@ impl ErrorExt for Error {
|
||||
| Error::NoEnoughAvailableNode { .. }
|
||||
| Error::PublishMessage { .. }
|
||||
| Error::Join { .. }
|
||||
| Error::WeightArray { .. }
|
||||
| Error::NotSetWeightArray { .. }
|
||||
| Error::PeerUnavailable { .. }
|
||||
| Error::ExceededDeadline { .. } => StatusCode::Internal,
|
||||
| Error::ExceededDeadline { .. }
|
||||
| Error::ChooseItems { .. } => StatusCode::Internal,
|
||||
|
||||
Error::Unsupported { .. } => StatusCode::Unsupported,
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::fmt::{Debug, Display};
|
||||
use std::ops::Range;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
@@ -113,6 +114,34 @@ impl HeartbeatAccumulator {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct PusherId {
|
||||
pub role: Role,
|
||||
pub id: u64,
|
||||
}
|
||||
|
||||
impl Debug for PusherId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}-{}", self.role, self.id)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for PusherId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}-{}", self.role, self.id)
|
||||
}
|
||||
}
|
||||
|
||||
impl PusherId {
|
||||
pub fn new(role: Role, id: u64) -> Self {
|
||||
Self { role, id }
|
||||
}
|
||||
|
||||
pub fn string_key(&self) -> String {
|
||||
format!("{}-{}", self.role as i32, self.id)
|
||||
}
|
||||
}
|
||||
|
||||
/// The pusher of the heartbeat response.
|
||||
pub struct Pusher {
|
||||
sender: Sender<std::result::Result<HeartbeatResponse, tonic::Status>>,
|
||||
@@ -154,10 +183,11 @@ impl Pusher {
|
||||
pub struct Pushers(Arc<RwLock<BTreeMap<String, Pusher>>>);
|
||||
|
||||
impl Pushers {
|
||||
async fn push(&self, pusher_id: &str, mailbox_message: MailboxMessage) -> Result<()> {
|
||||
async fn push(&self, pusher_id: PusherId, mailbox_message: MailboxMessage) -> Result<()> {
|
||||
let pusher_id = pusher_id.string_key();
|
||||
let pushers = self.0.read().await;
|
||||
let pusher = pushers
|
||||
.get(pusher_id)
|
||||
.get(&pusher_id)
|
||||
.context(error::PusherNotFoundSnafu { pusher_id })?;
|
||||
pusher
|
||||
.push(HeartbeatResponse {
|
||||
@@ -234,21 +264,19 @@ pub struct HeartbeatHandlerGroup {
|
||||
|
||||
impl HeartbeatHandlerGroup {
|
||||
/// Registers the heartbeat response [`Pusher`] with the given key to the group.
|
||||
pub async fn register_pusher(&self, key: impl AsRef<str>, pusher: Pusher) {
|
||||
let key = key.as_ref();
|
||||
pub async fn register_pusher(&self, pusher_id: PusherId, pusher: Pusher) {
|
||||
METRIC_META_HEARTBEAT_CONNECTION_NUM.inc();
|
||||
info!("Pusher register: {}", key);
|
||||
let _ = self.pushers.insert(key.to_string(), pusher).await;
|
||||
info!("Pusher register: {}", pusher_id);
|
||||
let _ = self.pushers.insert(pusher_id.string_key(), pusher).await;
|
||||
}
|
||||
|
||||
/// Deregisters the heartbeat response [`Pusher`] with the given key from the group.
|
||||
///
|
||||
/// Returns the [`Pusher`] if it exists.
|
||||
pub async fn deregister_push(&self, key: impl AsRef<str>) -> Option<Pusher> {
|
||||
let key = key.as_ref();
|
||||
pub async fn deregister_push(&self, pusher_id: PusherId) -> Option<Pusher> {
|
||||
METRIC_META_HEARTBEAT_CONNECTION_NUM.dec();
|
||||
info!("Pusher unregister: {}", key);
|
||||
self.pushers.remove(key).await
|
||||
info!("Pusher unregister: {}", pusher_id);
|
||||
self.pushers.remove(&pusher_id.string_key()).await
|
||||
}
|
||||
|
||||
/// Returns the [`Pushers`] of the group.
|
||||
@@ -417,7 +445,7 @@ impl Mailbox for HeartbeatMailbox {
|
||||
let _ = self.timeouts.insert(message_id, deadline);
|
||||
self.timeout_notify.notify_one();
|
||||
|
||||
self.pushers.push(&pusher_id, msg).await?;
|
||||
self.pushers.push(pusher_id, msg).await?;
|
||||
|
||||
Ok(MailboxReceiver::new(message_id, rx, *ch))
|
||||
}
|
||||
@@ -720,7 +748,7 @@ mod tests {
|
||||
use common_meta::sequence::SequenceBuilder;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use super::{HeartbeatHandlerGroupBuilder, Pushers};
|
||||
use super::{HeartbeatHandlerGroupBuilder, PusherId, Pushers};
|
||||
use crate::error;
|
||||
use crate::handler::collect_stats_handler::CollectStatsHandler;
|
||||
use crate::handler::response_header_handler::ResponseHeaderHandler;
|
||||
@@ -761,11 +789,10 @@ mod tests {
|
||||
protocol_version: PROTOCOL_VERSION,
|
||||
..Default::default()
|
||||
};
|
||||
let pusher_id = PusherId::new(Role::Datanode, datanode_id);
|
||||
let pusher: Pusher = Pusher::new(pusher_tx, &res_header);
|
||||
let handler_group = HeartbeatHandlerGroup::default();
|
||||
handler_group
|
||||
.register_pusher(format!("{}-{}", Role::Datanode as i32, datanode_id), pusher)
|
||||
.await;
|
||||
handler_group.register_pusher(pusher_id, pusher).await;
|
||||
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new());
|
||||
let seq = SequenceBuilder::new("test_seq", kv_backend).build();
|
||||
|
||||
@@ -93,9 +93,11 @@ mod tests {
|
||||
approximate_bytes: 0,
|
||||
engine: default_engine().to_string(),
|
||||
role: RegionRole::Follower,
|
||||
num_rows: 0,
|
||||
memtable_size: 0,
|
||||
manifest_size: 0,
|
||||
sst_size: 0,
|
||||
index_size: 0,
|
||||
}
|
||||
}
|
||||
acc.stat = Some(Stat {
|
||||
|
||||
@@ -135,9 +135,11 @@ mod test {
|
||||
wcus: 0,
|
||||
approximate_bytes: 0,
|
||||
engine: String::new(),
|
||||
num_rows: 0,
|
||||
memtable_size: 0,
|
||||
manifest_size: 0,
|
||||
sst_size: 0,
|
||||
index_size: 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -184,22 +184,26 @@ pub struct MetasrvInfo {
|
||||
// Options for datanode.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
pub struct DatanodeOptions {
|
||||
pub client_options: DatanodeClientOptions,
|
||||
pub client: DatanodeClientOptions,
|
||||
}
|
||||
|
||||
// Options for datanode client.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DatanodeClientOptions {
|
||||
pub timeout_millis: u64,
|
||||
pub connect_timeout_millis: u64,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub timeout: Duration,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub connect_timeout: Duration,
|
||||
pub tcp_nodelay: bool,
|
||||
}
|
||||
|
||||
impl Default for DatanodeClientOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
timeout_millis: channel_manager::DEFAULT_GRPC_REQUEST_TIMEOUT_SECS * 1000,
|
||||
connect_timeout_millis: channel_manager::DEFAULT_GRPC_CONNECT_TIMEOUT_SECS * 1000,
|
||||
timeout: Duration::from_secs(channel_manager::DEFAULT_GRPC_REQUEST_TIMEOUT_SECS),
|
||||
connect_timeout: Duration::from_secs(
|
||||
channel_manager::DEFAULT_GRPC_CONNECT_TIMEOUT_SECS,
|
||||
),
|
||||
tcp_nodelay: true,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::{Arc, Mutex, RwLock};
|
||||
use std::time::Duration;
|
||||
|
||||
use client::client_manager::NodeClients;
|
||||
use common_base::Plugins;
|
||||
@@ -250,13 +249,9 @@ impl MetasrvBuilder {
|
||||
let memory_region_keeper = Arc::new(MemoryRegionKeeper::default());
|
||||
let node_manager = node_manager.unwrap_or_else(|| {
|
||||
let datanode_client_channel_config = ChannelConfig::new()
|
||||
.timeout(Duration::from_millis(
|
||||
options.datanode.client_options.timeout_millis,
|
||||
))
|
||||
.connect_timeout(Duration::from_millis(
|
||||
options.datanode.client_options.connect_timeout_millis,
|
||||
))
|
||||
.tcp_nodelay(options.datanode.client_options.tcp_nodelay);
|
||||
.timeout(options.datanode.client.timeout)
|
||||
.connect_timeout(options.datanode.client.connect_timeout)
|
||||
.tcp_nodelay(options.datanode.client.tcp_nodelay);
|
||||
Arc::new(NodeClients::new(datanode_client_channel_config))
|
||||
});
|
||||
let cache_invalidator = Arc::new(MetasrvCacheInvalidator::new(
|
||||
|
||||
@@ -84,7 +84,7 @@ impl MailboxContext {
|
||||
) {
|
||||
let pusher_id = channel.pusher_id();
|
||||
let pusher = Pusher::new(tx, &RequestHeader::default());
|
||||
let _ = self.pushers.insert(pusher_id, pusher).await;
|
||||
let _ = self.pushers.insert(pusher_id.string_key(), pusher).await;
|
||||
}
|
||||
|
||||
pub fn mailbox(&self) -> &MailboxRef {
|
||||
|
||||
@@ -24,6 +24,7 @@ pub mod mock {
|
||||
use client::Client;
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
use common_meta::peer::Peer;
|
||||
use common_runtime::runtime::BuilderBuild;
|
||||
use common_runtime::{Builder as RuntimeBuilder, Runtime};
|
||||
use servers::grpc::region_server::{RegionServerHandler, RegionServerRequestHandler};
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
@@ -12,29 +12,23 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use common_meta::peer::Peer;
|
||||
use snafu::ensure;
|
||||
|
||||
use super::weighted_choose::{WeightedChoose, WeightedItem};
|
||||
use super::weighted_choose::WeightedChoose;
|
||||
use crate::error;
|
||||
use crate::error::Result;
|
||||
use crate::metasrv::SelectTarget;
|
||||
use crate::selector::SelectorOptions;
|
||||
|
||||
/// According to the `opts`, choose peers from the `weight_array` through `weighted_choose`.
|
||||
pub fn choose_peers<W>(
|
||||
mut weight_array: Vec<WeightedItem<Peer>>,
|
||||
opts: &SelectorOptions,
|
||||
weighted_choose: &mut W,
|
||||
) -> Result<Vec<Peer>>
|
||||
pub fn choose_peers<W>(opts: &SelectorOptions, weighted_choose: &mut W) -> Result<Vec<Peer>>
|
||||
where
|
||||
W: WeightedChoose<Peer>,
|
||||
{
|
||||
let min_required_items = opts.min_required_items;
|
||||
ensure!(
|
||||
!weight_array.is_empty(),
|
||||
!weighted_choose.is_empty(),
|
||||
error::NoEnoughAvailableNodeSnafu {
|
||||
required: min_required_items,
|
||||
available: 0_usize,
|
||||
@@ -43,12 +37,11 @@ where
|
||||
);
|
||||
|
||||
if opts.allow_duplication {
|
||||
weighted_choose.set_weight_array(weight_array)?;
|
||||
(0..min_required_items)
|
||||
.map(|_| weighted_choose.choose_one())
|
||||
.collect::<Result<_>>()
|
||||
} else {
|
||||
let weight_array_len = weight_array.len();
|
||||
let weight_array_len = weighted_choose.len();
|
||||
|
||||
// When opts.allow_duplication is false, we need to check that the length of the weighted array is greater than
|
||||
// or equal to min_required_items, otherwise it may cause an infinite loop.
|
||||
@@ -61,33 +54,7 @@ where
|
||||
}
|
||||
);
|
||||
|
||||
if weight_array_len == min_required_items {
|
||||
return Ok(weight_array.into_iter().map(|item| item.item).collect());
|
||||
}
|
||||
|
||||
weighted_choose.set_weight_array(weight_array.clone())?;
|
||||
|
||||
// Assume min_required_items is 3, weight_array_len is 100, then we can choose 3 items from the weight array
|
||||
// and return. But assume min_required_items is 99, weight_array_len is 100. It's not cheap to choose 99 items
|
||||
// from the weight array. So we can reverse choose 1 item from the weight array, and return the remaining 99
|
||||
// items.
|
||||
if min_required_items * 2 > weight_array_len {
|
||||
let select_num = weight_array_len - min_required_items;
|
||||
let mut selected = HashSet::with_capacity(select_num);
|
||||
while selected.len() < select_num {
|
||||
let item = weighted_choose.reverse_choose_one()?;
|
||||
selected.insert(item);
|
||||
}
|
||||
weight_array.retain(|item| !selected.contains(&item.item));
|
||||
Ok(weight_array.into_iter().map(|item| item.item).collect())
|
||||
} else {
|
||||
let mut selected = HashSet::with_capacity(min_required_items);
|
||||
while selected.len() < min_required_items {
|
||||
let item = weighted_choose.choose_one()?;
|
||||
selected.insert(item);
|
||||
}
|
||||
Ok(selected.into_iter().collect())
|
||||
}
|
||||
weighted_choose.choose_multiple(min_required_items)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,7 +77,6 @@ mod tests {
|
||||
addr: "127.0.0.1:3001".to_string(),
|
||||
},
|
||||
weight: 1,
|
||||
reverse_weight: 1,
|
||||
},
|
||||
WeightedItem {
|
||||
item: Peer {
|
||||
@@ -118,7 +84,6 @@ mod tests {
|
||||
addr: "127.0.0.1:3001".to_string(),
|
||||
},
|
||||
weight: 1,
|
||||
reverse_weight: 1,
|
||||
},
|
||||
WeightedItem {
|
||||
item: Peer {
|
||||
@@ -126,7 +91,6 @@ mod tests {
|
||||
addr: "127.0.0.1:3001".to_string(),
|
||||
},
|
||||
weight: 1,
|
||||
reverse_weight: 1,
|
||||
},
|
||||
WeightedItem {
|
||||
item: Peer {
|
||||
@@ -134,7 +98,6 @@ mod tests {
|
||||
addr: "127.0.0.1:3001".to_string(),
|
||||
},
|
||||
weight: 1,
|
||||
reverse_weight: 1,
|
||||
},
|
||||
WeightedItem {
|
||||
item: Peer {
|
||||
@@ -142,7 +105,6 @@ mod tests {
|
||||
addr: "127.0.0.1:3001".to_string(),
|
||||
},
|
||||
weight: 1,
|
||||
reverse_weight: 1,
|
||||
},
|
||||
];
|
||||
|
||||
@@ -152,14 +114,11 @@ mod tests {
|
||||
allow_duplication: false,
|
||||
};
|
||||
|
||||
let selected_peers: HashSet<_> = choose_peers(
|
||||
weight_array.clone(),
|
||||
&opts,
|
||||
&mut RandomWeightedChoose::default(),
|
||||
)
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect();
|
||||
let selected_peers: HashSet<_> =
|
||||
choose_peers(&opts, &mut RandomWeightedChoose::new(weight_array.clone()))
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
assert_eq!(i, selected_peers.len());
|
||||
}
|
||||
@@ -169,11 +128,8 @@ mod tests {
|
||||
allow_duplication: false,
|
||||
};
|
||||
|
||||
let selected_result = choose_peers(
|
||||
weight_array.clone(),
|
||||
&opts,
|
||||
&mut RandomWeightedChoose::default(),
|
||||
);
|
||||
let selected_result =
|
||||
choose_peers(&opts, &mut RandomWeightedChoose::new(weight_array.clone()));
|
||||
assert!(selected_result.is_err());
|
||||
|
||||
for i in 1..=50 {
|
||||
@@ -182,12 +138,8 @@ mod tests {
|
||||
allow_duplication: true,
|
||||
};
|
||||
|
||||
let selected_peers = choose_peers(
|
||||
weight_array.clone(),
|
||||
&opts,
|
||||
&mut RandomWeightedChoose::default(),
|
||||
)
|
||||
.unwrap();
|
||||
let selected_peers =
|
||||
choose_peers(&opts, &mut RandomWeightedChoose::new(weight_array.clone())).unwrap();
|
||||
|
||||
assert_eq!(i, selected_peers.len());
|
||||
}
|
||||
|
||||
@@ -48,13 +48,12 @@ impl Selector for LeaseBasedSelector {
|
||||
addr: v.node_addr.clone(),
|
||||
},
|
||||
weight: 1,
|
||||
reverse_weight: 1,
|
||||
})
|
||||
.collect();
|
||||
|
||||
// 3. choose peers by weight_array.
|
||||
let weighted_choose = &mut RandomWeightedChoose::default();
|
||||
let selected = choose_peers(weight_array, &opts, weighted_choose)?;
|
||||
let mut weighted_choose = RandomWeightedChoose::new(weight_array);
|
||||
let selected = choose_peers(&opts, &mut weighted_choose)?;
|
||||
|
||||
Ok(selected)
|
||||
}
|
||||
|
||||
@@ -19,7 +19,6 @@ use common_meta::key::TableMetadataManager;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::find_leaders;
|
||||
use common_telemetry::{debug, info};
|
||||
use parking_lot::RwLock;
|
||||
use snafu::ResultExt;
|
||||
use table::metadata::TableId;
|
||||
|
||||
@@ -29,36 +28,30 @@ use crate::lease;
|
||||
use crate::metasrv::SelectorContext;
|
||||
use crate::selector::common::choose_peers;
|
||||
use crate::selector::weight_compute::{RegionNumsBasedWeightCompute, WeightCompute};
|
||||
use crate::selector::weighted_choose::{RandomWeightedChoose, WeightedChoose};
|
||||
use crate::selector::weighted_choose::RandomWeightedChoose;
|
||||
use crate::selector::{Namespace, Selector, SelectorOptions};
|
||||
|
||||
pub struct LoadBasedSelector<W, C> {
|
||||
weighted_choose: RwLock<W>,
|
||||
pub struct LoadBasedSelector<C> {
|
||||
weight_compute: C,
|
||||
}
|
||||
|
||||
impl<W, C> LoadBasedSelector<W, C> {
|
||||
pub fn new(weighted_choose: W, weight_compute: C) -> Self {
|
||||
Self {
|
||||
weighted_choose: RwLock::new(weighted_choose),
|
||||
weight_compute,
|
||||
}
|
||||
impl<C> LoadBasedSelector<C> {
|
||||
pub fn new(weight_compute: C) -> Self {
|
||||
Self { weight_compute }
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for LoadBasedSelector<RandomWeightedChoose<Peer>, RegionNumsBasedWeightCompute> {
|
||||
impl Default for LoadBasedSelector<RegionNumsBasedWeightCompute> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
weighted_choose: RwLock::new(RandomWeightedChoose::default()),
|
||||
weight_compute: RegionNumsBasedWeightCompute,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<W, C> Selector for LoadBasedSelector<W, C>
|
||||
impl<C> Selector for LoadBasedSelector<C>
|
||||
where
|
||||
W: WeightedChoose<Peer>,
|
||||
C: WeightCompute<Source = HashMap<DatanodeStatKey, DatanodeStatValue>>,
|
||||
{
|
||||
type Context = SelectorContext;
|
||||
@@ -100,8 +93,8 @@ where
|
||||
let weight_array = self.weight_compute.compute(&stat_kvs);
|
||||
|
||||
// 5. choose peers by weight_array.
|
||||
let mut weighted_choose = self.weighted_choose.write();
|
||||
let selected = choose_peers(weight_array, &opts, &mut *weighted_choose)?;
|
||||
let mut weighted_choose = RandomWeightedChoose::new(weight_array);
|
||||
let selected = choose_peers(&opts, &mut weighted_choose)?;
|
||||
|
||||
debug!(
|
||||
"LoadBasedSelector select peers: {:?}, namespace: {}, opts: {:?}.",
|
||||
|
||||
@@ -85,7 +85,6 @@ impl WeightCompute for RegionNumsBasedWeightCompute {
|
||||
.map(|(peer, region_num)| WeightedItem {
|
||||
item: peer,
|
||||
weight: (max_weight - region_num + base_weight) as usize,
|
||||
reverse_weight: (region_num - min_weight + base_weight) as usize,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
@@ -181,10 +180,6 @@ mod tests {
|
||||
},
|
||||
4,
|
||||
);
|
||||
|
||||
for weight in weight_array.iter() {
|
||||
assert_eq!(weight.reverse_weight, *expected.get(&weight.item).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
fn mock_stat_1() -> Stat {
|
||||
@@ -198,9 +193,11 @@ mod tests {
|
||||
approximate_bytes: 1,
|
||||
engine: "mito2".to_string(),
|
||||
role: RegionRole::Leader,
|
||||
num_rows: 0,
|
||||
memtable_size: 0,
|
||||
manifest_size: 0,
|
||||
sst_size: 0,
|
||||
index_size: 0,
|
||||
}],
|
||||
..Default::default()
|
||||
}
|
||||
@@ -217,9 +214,11 @@ mod tests {
|
||||
approximate_bytes: 1,
|
||||
engine: "mito2".to_string(),
|
||||
role: RegionRole::Leader,
|
||||
num_rows: 0,
|
||||
memtable_size: 0,
|
||||
manifest_size: 0,
|
||||
sst_size: 0,
|
||||
index_size: 0,
|
||||
}],
|
||||
..Default::default()
|
||||
}
|
||||
@@ -236,9 +235,11 @@ mod tests {
|
||||
approximate_bytes: 1,
|
||||
engine: "mito2".to_string(),
|
||||
role: RegionRole::Leader,
|
||||
num_rows: 0,
|
||||
memtable_size: 0,
|
||||
manifest_size: 0,
|
||||
sst_size: 0,
|
||||
index_size: 0,
|
||||
}],
|
||||
..Default::default()
|
||||
}
|
||||
|
||||
@@ -12,41 +12,37 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use rand::distributions::WeightedIndex;
|
||||
use rand::prelude::Distribution;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::thread_rng;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error;
|
||||
use crate::error::Result;
|
||||
|
||||
/// A common trait for weighted balance algorithm.
|
||||
pub trait WeightedChoose<Item>: Send + Sync {
|
||||
/// The method will re-set weight array.
|
||||
///
|
||||
/// Note:
|
||||
/// 1. make sure weight_array is not empty.
|
||||
/// 2. the total weight is greater than 0.
|
||||
///
|
||||
/// Otherwise an error will be returned.
|
||||
fn set_weight_array(&mut self, weight_array: Vec<WeightedItem<Item>>) -> Result<()>;
|
||||
|
||||
/// The method will choose one item.
|
||||
///
|
||||
/// If not set weight_array before, an error will be returned.
|
||||
fn choose_one(&mut self) -> Result<Item>;
|
||||
|
||||
/// The method will reverse choose one item.
|
||||
/// The method will choose multiple items.
|
||||
///
|
||||
/// If not set weight_array before, an error will be returned.
|
||||
fn reverse_choose_one(&mut self) -> Result<Item>;
|
||||
/// Returns less than `amount` items if the weight_array is not enough.
|
||||
fn choose_multiple(&mut self, amount: usize) -> Result<Vec<Item>>;
|
||||
|
||||
/// Returns the length of the weight_array.
|
||||
fn len(&self) -> usize;
|
||||
|
||||
/// Returns whether the weight_array is empty.
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
}
|
||||
|
||||
/// The struct represents a weighted item.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct WeightedItem<Item> {
|
||||
pub item: Item,
|
||||
pub weight: usize,
|
||||
pub reverse_weight: usize,
|
||||
}
|
||||
|
||||
/// A implementation of weighted balance: random weighted choose.
|
||||
@@ -64,16 +60,18 @@ pub struct WeightedItem<Item> {
|
||||
/// ```
|
||||
pub struct RandomWeightedChoose<Item> {
|
||||
items: Vec<WeightedItem<Item>>,
|
||||
weighted_index: Option<WeightedIndex<usize>>,
|
||||
reverse_weighted_index: Option<WeightedIndex<usize>>,
|
||||
}
|
||||
|
||||
impl<Item> RandomWeightedChoose<Item> {
|
||||
pub fn new(items: Vec<WeightedItem<Item>>) -> Self {
|
||||
Self { items }
|
||||
}
|
||||
}
|
||||
|
||||
impl<Item> Default for RandomWeightedChoose<Item> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
items: Vec::default(),
|
||||
weighted_index: None,
|
||||
reverse_weighted_index: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -82,48 +80,29 @@ impl<Item> WeightedChoose<Item> for RandomWeightedChoose<Item>
|
||||
where
|
||||
Item: Clone + Send + Sync,
|
||||
{
|
||||
fn set_weight_array(&mut self, weight_array: Vec<WeightedItem<Item>>) -> Result<()> {
|
||||
self.weighted_index = Some(
|
||||
WeightedIndex::new(weight_array.iter().map(|item| item.weight))
|
||||
.context(error::WeightArraySnafu)?,
|
||||
);
|
||||
|
||||
self.reverse_weighted_index = Some(
|
||||
WeightedIndex::new(weight_array.iter().map(|item| item.reverse_weight))
|
||||
.context(error::WeightArraySnafu)?,
|
||||
);
|
||||
|
||||
self.items = weight_array;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn choose_one(&mut self) -> Result<Item> {
|
||||
ensure!(
|
||||
!self.items.is_empty() && self.weighted_index.is_some(),
|
||||
error::NotSetWeightArraySnafu
|
||||
);
|
||||
|
||||
// unwrap safety: whether weighted_index is none has been checked before.
|
||||
let weighted_index = self.weighted_index.as_ref().unwrap();
|
||||
|
||||
Ok(self.items[weighted_index.sample(&mut thread_rng())]
|
||||
let item = self
|
||||
.items
|
||||
.choose_weighted(&mut thread_rng(), |item| item.weight as f64)
|
||||
.context(error::ChooseItemsSnafu)?
|
||||
.item
|
||||
.clone())
|
||||
.clone();
|
||||
Ok(item)
|
||||
}
|
||||
|
||||
fn reverse_choose_one(&mut self) -> Result<Item> {
|
||||
ensure!(
|
||||
!self.items.is_empty() && self.reverse_weighted_index.is_some(),
|
||||
error::NotSetWeightArraySnafu
|
||||
);
|
||||
fn choose_multiple(&mut self, amount: usize) -> Result<Vec<Item>> {
|
||||
Ok(self
|
||||
.items
|
||||
.choose_multiple_weighted(&mut thread_rng(), amount, |item| item.weight as f64)
|
||||
.context(error::ChooseItemsSnafu)?
|
||||
.cloned()
|
||||
.map(|item| item.item)
|
||||
.collect::<Vec<_>>())
|
||||
}
|
||||
|
||||
// unwrap safety: whether reverse_weighted_index is none has been checked before.
|
||||
let reverse_weighted_index = self.reverse_weighted_index.as_ref().unwrap();
|
||||
|
||||
Ok(self.items[reverse_weighted_index.sample(&mut thread_rng())]
|
||||
.item
|
||||
.clone())
|
||||
fn len(&self) -> usize {
|
||||
self.items.len()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -133,45 +112,22 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_random_weighted_choose() {
|
||||
let mut choose = RandomWeightedChoose::default();
|
||||
choose
|
||||
.set_weight_array(vec![
|
||||
WeightedItem {
|
||||
item: 1,
|
||||
weight: 100,
|
||||
reverse_weight: 0,
|
||||
},
|
||||
WeightedItem {
|
||||
item: 2,
|
||||
weight: 0,
|
||||
reverse_weight: 100,
|
||||
},
|
||||
])
|
||||
.unwrap();
|
||||
let mut choose = RandomWeightedChoose::new(vec![
|
||||
WeightedItem {
|
||||
item: 1,
|
||||
weight: 100,
|
||||
},
|
||||
WeightedItem { item: 2, weight: 0 },
|
||||
]);
|
||||
|
||||
for _ in 0..100 {
|
||||
let ret = choose.choose_one().unwrap();
|
||||
assert_eq!(1, ret);
|
||||
}
|
||||
|
||||
for _ in 0..100 {
|
||||
let ret = choose.reverse_choose_one().unwrap();
|
||||
assert_eq!(2, ret);
|
||||
let ret = choose.choose_multiple(3).unwrap();
|
||||
assert_eq!(vec![1, 2], ret);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_random_weighted_choose_should_panic() {
|
||||
let mut choose: RandomWeightedChoose<u32> = RandomWeightedChoose::default();
|
||||
choose.set_weight_array(vec![]).unwrap();
|
||||
let _ = choose.choose_one().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_random_reverse_weighted_choose_should_panic() {
|
||||
let mut choose: RandomWeightedChoose<u32> = RandomWeightedChoose::default();
|
||||
choose.set_weight_array(vec![]).unwrap();
|
||||
let _ = choose.reverse_choose_one().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ use tonic::{Request, Response, Streaming};
|
||||
|
||||
use crate::error;
|
||||
use crate::error::Result;
|
||||
use crate::handler::{HeartbeatHandlerGroup, Pusher};
|
||||
use crate::handler::{HeartbeatHandlerGroup, Pusher, PusherId};
|
||||
use crate::metasrv::{Context, Metasrv};
|
||||
use crate::metrics::METRIC_META_HEARTBEAT_RECV;
|
||||
use crate::service::{GrpcResult, GrpcStream};
|
||||
@@ -52,7 +52,7 @@ impl heartbeat_server::Heartbeat for Metasrv {
|
||||
|
||||
let ctx = self.new_ctx();
|
||||
let _handle = common_runtime::spawn_global(async move {
|
||||
let mut pusher_key = None;
|
||||
let mut pusher_id = None;
|
||||
while let Some(msg) = in_stream.next().await {
|
||||
let mut is_not_leader = false;
|
||||
match msg {
|
||||
@@ -67,11 +67,11 @@ impl heartbeat_server::Heartbeat for Metasrv {
|
||||
break;
|
||||
};
|
||||
|
||||
if pusher_key.is_none() {
|
||||
pusher_key = register_pusher(&handler_group, header, tx.clone()).await;
|
||||
if pusher_id.is_none() {
|
||||
pusher_id = register_pusher(&handler_group, header, tx.clone()).await;
|
||||
}
|
||||
if let Some(k) = &pusher_key {
|
||||
METRIC_META_HEARTBEAT_RECV.with_label_values(&[k]);
|
||||
if let Some(k) = &pusher_id {
|
||||
METRIC_META_HEARTBEAT_RECV.with_label_values(&[&k.to_string()]);
|
||||
} else {
|
||||
METRIC_META_HEARTBEAT_RECV.with_label_values(&["none"]);
|
||||
}
|
||||
@@ -111,13 +111,10 @@ impl heartbeat_server::Heartbeat for Metasrv {
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
"Heartbeat stream closed: {:?}",
|
||||
pusher_key.as_ref().unwrap_or(&"unknown".to_string())
|
||||
);
|
||||
info!("Heartbeat stream closed: {pusher_id:?}");
|
||||
|
||||
if let Some(key) = pusher_key {
|
||||
let _ = handler_group.deregister_push(&key).await;
|
||||
if let Some(pusher_id) = pusher_id {
|
||||
let _ = handler_group.deregister_push(pusher_id).await;
|
||||
}
|
||||
});
|
||||
|
||||
@@ -176,13 +173,13 @@ async fn register_pusher(
|
||||
handler_group: &HeartbeatHandlerGroup,
|
||||
header: &RequestHeader,
|
||||
sender: Sender<std::result::Result<HeartbeatResponse, tonic::Status>>,
|
||||
) -> Option<String> {
|
||||
let role = header.role() as i32;
|
||||
let node_id = get_node_id(header);
|
||||
let key = format!("{}-{}", role, node_id);
|
||||
) -> Option<PusherId> {
|
||||
let role = header.role();
|
||||
let id = get_node_id(header);
|
||||
let pusher_id = PusherId::new(role, id);
|
||||
let pusher = Pusher::new(sender, header);
|
||||
handler_group.register_pusher(&key, pusher).await;
|
||||
Some(key)
|
||||
handler_group.register_pusher(pusher_id, pusher).await;
|
||||
Some(pusher_id)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -24,6 +24,7 @@ use futures::Future;
|
||||
use tokio::sync::oneshot;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::handler::PusherId;
|
||||
|
||||
pub type MailboxRef = Arc<dyn Mailbox>;
|
||||
|
||||
@@ -53,11 +54,11 @@ impl Display for Channel {
|
||||
}
|
||||
|
||||
impl Channel {
|
||||
pub(crate) fn pusher_id(&self) -> String {
|
||||
pub(crate) fn pusher_id(&self) -> PusherId {
|
||||
match self {
|
||||
Channel::Datanode(id) => format!("{}-{}", Role::Datanode as i32, id),
|
||||
Channel::Frontend(id) => format!("{}-{}", Role::Frontend as i32, id),
|
||||
Channel::Flownode(id) => format!("{}-{}", Role::Flownode as i32, id),
|
||||
Channel::Datanode(id) => PusherId::new(Role::Datanode, *id),
|
||||
Channel::Frontend(id) => PusherId::new(Role::Frontend, *id),
|
||||
Channel::Flownode(id) => PusherId::new(Role::Flownode, *id),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,18 +80,15 @@ impl CacheManager {
|
||||
CacheManagerBuilder::default()
|
||||
}
|
||||
|
||||
/// Gets cached [ParquetMetaData].
|
||||
/// Gets cached [ParquetMetaData] from in-memory cache first.
|
||||
/// If not found, tries to get it from write cache and fill the in-memory cache.
|
||||
pub async fn get_parquet_meta_data(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
file_id: FileId,
|
||||
) -> Option<Arc<ParquetMetaData>> {
|
||||
// Try to get metadata from sst meta cache
|
||||
let metadata = self.sst_meta_cache.as_ref().and_then(|sst_meta_cache| {
|
||||
let value = sst_meta_cache.get(&SstMetaKey(region_id, file_id));
|
||||
update_hit_miss(value, SST_META_TYPE)
|
||||
});
|
||||
|
||||
let metadata = self.get_parquet_meta_data_from_mem_cache(region_id, file_id);
|
||||
if metadata.is_some() {
|
||||
return metadata;
|
||||
}
|
||||
@@ -110,6 +107,20 @@ impl CacheManager {
|
||||
None
|
||||
}
|
||||
|
||||
/// Gets cached [ParquetMetaData] from in-memory cache.
|
||||
/// This method does not perform I/O.
|
||||
pub fn get_parquet_meta_data_from_mem_cache(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
file_id: FileId,
|
||||
) -> Option<Arc<ParquetMetaData>> {
|
||||
// Try to get metadata from sst meta cache
|
||||
self.sst_meta_cache.as_ref().and_then(|sst_meta_cache| {
|
||||
let value = sst_meta_cache.get(&SstMetaKey(region_id, file_id));
|
||||
update_hit_miss(value, SST_META_TYPE)
|
||||
})
|
||||
}
|
||||
|
||||
/// Puts [ParquetMetaData] into the cache.
|
||||
pub fn put_parquet_meta_data(
|
||||
&self,
|
||||
|
||||
13
src/mito2/src/cache/cache_size.rs
vendored
13
src/mito2/src/cache/cache_size.rs
vendored
@@ -127,16 +127,3 @@ fn parquet_offset_index_heap_size(offset_index: &ParquetOffsetIndex) -> usize {
|
||||
})
|
||||
.sum()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::cache::test_util::parquet_meta;
|
||||
|
||||
#[test]
|
||||
fn test_parquet_meta_size() {
|
||||
let metadata = parquet_meta();
|
||||
|
||||
assert_eq!(956, parquet_meta_size(&metadata));
|
||||
}
|
||||
}
|
||||
|
||||
67
src/mito2/src/cache/index.rs
vendored
67
src/mito2/src/cache/index.rs
vendored
@@ -87,7 +87,16 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
|
||||
&mut self,
|
||||
dest: &mut Vec<u8>,
|
||||
) -> index::inverted_index::error::Result<usize> {
|
||||
self.inner.read_all(dest).await
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader read_all start, file_id: {}",
|
||||
self.file_id,
|
||||
);
|
||||
let res = self.inner.read_all(dest).await;
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader read_all end, file_id: {}",
|
||||
self.file_id,
|
||||
);
|
||||
res
|
||||
}
|
||||
|
||||
async fn seek_read(
|
||||
@@ -95,7 +104,20 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<Vec<u8>> {
|
||||
self.inner.seek_read(offset, size).await
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader seek_read start, file_id: {}, offset: {}, size: {}",
|
||||
self.file_id,
|
||||
offset,
|
||||
size,
|
||||
);
|
||||
let res = self.inner.seek_read(offset, size).await;
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader seek_read end, file_id: {}, offset: {}, size: {}",
|
||||
self.file_id,
|
||||
offset,
|
||||
size,
|
||||
);
|
||||
res
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> index::inverted_index::error::Result<Arc<InvertedIndexMetas>> {
|
||||
@@ -103,8 +125,16 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
|
||||
CACHE_HIT.with_label_values(&[INDEX_METADATA_TYPE]).inc();
|
||||
Ok(cached)
|
||||
} else {
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader get metadata start, file_id: {}",
|
||||
self.file_id,
|
||||
);
|
||||
let meta = self.inner.metadata().await?;
|
||||
self.cache.put_index_metadata(self.file_id, meta.clone());
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader get metadata end, file_id: {}",
|
||||
self.file_id,
|
||||
);
|
||||
CACHE_MISS.with_label_values(&[INDEX_METADATA_TYPE]).inc();
|
||||
Ok(meta)
|
||||
}
|
||||
@@ -115,9 +145,23 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<FstMap> {
|
||||
self.get_or_load(offset, size)
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader fst start, file_id: {}, offset: {}, size: {}",
|
||||
self.file_id,
|
||||
offset,
|
||||
size,
|
||||
);
|
||||
let res = self
|
||||
.get_or_load(offset, size)
|
||||
.await
|
||||
.and_then(|r| FstMap::new(r).context(DecodeFstSnafu))
|
||||
.and_then(|r| FstMap::new(r).context(DecodeFstSnafu));
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader fst end, file_id: {}, offset: {}, size: {}",
|
||||
self.file_id,
|
||||
offset,
|
||||
size,
|
||||
);
|
||||
res
|
||||
}
|
||||
|
||||
async fn bitmap(
|
||||
@@ -125,7 +169,20 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<BitVec> {
|
||||
self.get_or_load(offset, size).await.map(BitVec::from_vec)
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader bitmap start, file_id: {}, offset: {}, size: {}",
|
||||
self.file_id,
|
||||
offset,
|
||||
size,
|
||||
);
|
||||
let res = self.get_or_load(offset, size).await.map(BitVec::from_vec);
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader bitmap end, file_id: {}, offset: {}, size: {}",
|
||||
self.file_id,
|
||||
offset,
|
||||
size,
|
||||
);
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -134,7 +134,7 @@ pub async fn open_compaction_region(
|
||||
));
|
||||
|
||||
let file_purger = {
|
||||
let purge_scheduler = Arc::new(LocalScheduler::new(mito_config.max_background_jobs));
|
||||
let purge_scheduler = Arc::new(LocalScheduler::new(mito_config.max_background_purges));
|
||||
Arc::new(LocalFilePurger::new(
|
||||
purge_scheduler.clone(),
|
||||
access_layer.clone(),
|
||||
@@ -259,6 +259,7 @@ impl Compactor for DefaultCompactor {
|
||||
|
||||
let write_opts = WriteOptions {
|
||||
write_buffer_size: compaction_region.engine_config.sst_write_buffer_size,
|
||||
compression_method: compaction_region.engine_config.compression_method,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::collections::hash_map::Entry;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fmt::Debug;
|
||||
|
||||
use common_telemetry::{debug, info};
|
||||
use common_telemetry::{info, trace};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::timestamp_millis::BucketAligned;
|
||||
use common_time::Timestamp;
|
||||
@@ -114,7 +114,7 @@ impl TwcsPicker {
|
||||
// Files in window exceeds file num limit
|
||||
vec![enforce_file_num(&files.files, max_files)]
|
||||
} else {
|
||||
debug!("Skip building compaction output, active window: {:?}, current window: {}, max runs: {}, found runs: {}, ", active_window, *window, max_runs, found_runs);
|
||||
trace!("Skip building compaction output, active window: {:?}, current window: {}, max runs: {}, found runs: {}, ", active_window, *window, max_runs, found_runs);
|
||||
continue;
|
||||
};
|
||||
|
||||
@@ -297,6 +297,9 @@ fn assign_to_windows<'a>(
|
||||
let mut windows: HashMap<i64, Window> = HashMap::new();
|
||||
// Iterates all files and assign to time windows according to max timestamp
|
||||
for f in files {
|
||||
if f.compacting() {
|
||||
continue;
|
||||
}
|
||||
let (_, end) = f.time_range();
|
||||
let time_window = end
|
||||
.convert_to(TimeUnit::Second)
|
||||
|
||||
@@ -28,9 +28,6 @@ use crate::error::Result;
|
||||
use crate::memtable::MemtableConfig;
|
||||
use crate::sst::DEFAULT_WRITE_BUFFER_SIZE;
|
||||
|
||||
/// Default max running background job.
|
||||
const DEFAULT_MAX_BG_JOB: usize = 4;
|
||||
|
||||
const MULTIPART_UPLOAD_MINIMUM_SIZE: ReadableSize = ReadableSize::mb(5);
|
||||
/// Default channel size for parallel scan task.
|
||||
const DEFAULT_SCAN_CHANNEL_SIZE: usize = 32;
|
||||
@@ -69,8 +66,12 @@ pub struct MitoConfig {
|
||||
pub compress_manifest: bool,
|
||||
|
||||
// Background job configs:
|
||||
/// Max number of running background jobs (default 4).
|
||||
pub max_background_jobs: usize,
|
||||
/// Max number of running background flush jobs (default: 1/2 of cpu cores).
|
||||
pub max_background_flushes: usize,
|
||||
/// Max number of running background compaction jobs (default: 1/4 of cpu cores).
|
||||
pub max_background_compactions: usize,
|
||||
/// Max number of running background purge jobs (default: number of cpu cores).
|
||||
pub max_background_purges: usize,
|
||||
|
||||
// Flush configs:
|
||||
/// Interval to auto flush a region if it has not flushed yet (default 30 min).
|
||||
@@ -127,6 +128,20 @@ pub struct MitoConfig {
|
||||
/// To align with the old behavior, the default value is 0 (no restrictions).
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub min_compaction_interval: Duration,
|
||||
|
||||
/// Skip wal
|
||||
pub skip_wal: bool,
|
||||
/// SST compression method.
|
||||
pub compression_method: CompressionMethod,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum CompressionMethod {
|
||||
#[default]
|
||||
Zstd,
|
||||
Lz4,
|
||||
None,
|
||||
}
|
||||
|
||||
impl Default for MitoConfig {
|
||||
@@ -137,7 +152,9 @@ impl Default for MitoConfig {
|
||||
worker_request_batch_size: 64,
|
||||
manifest_checkpoint_distance: 10,
|
||||
compress_manifest: false,
|
||||
max_background_jobs: DEFAULT_MAX_BG_JOB,
|
||||
max_background_flushes: divide_num_cpus(2),
|
||||
max_background_compactions: divide_num_cpus(4),
|
||||
max_background_purges: common_config::utils::get_cpus(),
|
||||
auto_flush_interval: Duration::from_secs(30 * 60),
|
||||
global_write_buffer_size: ReadableSize::gb(1),
|
||||
global_write_buffer_reject_size: ReadableSize::gb(2),
|
||||
@@ -158,6 +175,8 @@ impl Default for MitoConfig {
|
||||
fulltext_index: FulltextIndexConfig::default(),
|
||||
memtable: MemtableConfig::default(),
|
||||
min_compaction_interval: Duration::from_secs(0),
|
||||
skip_wal: false,
|
||||
compression_method: CompressionMethod::Zstd,
|
||||
};
|
||||
|
||||
// Adjust buffer and cache size according to system memory if we can.
|
||||
@@ -185,9 +204,26 @@ impl MitoConfig {
|
||||
self.worker_channel_size = 1;
|
||||
}
|
||||
|
||||
if self.max_background_jobs == 0 {
|
||||
warn!("Sanitize max background jobs 0 to {}", DEFAULT_MAX_BG_JOB);
|
||||
self.max_background_jobs = DEFAULT_MAX_BG_JOB;
|
||||
if self.max_background_flushes == 0 {
|
||||
warn!(
|
||||
"Sanitize max background flushes 0 to {}",
|
||||
divide_num_cpus(2)
|
||||
);
|
||||
self.max_background_flushes = divide_num_cpus(2);
|
||||
}
|
||||
if self.max_background_compactions == 0 {
|
||||
warn!(
|
||||
"Sanitize max background compactions 0 to {}",
|
||||
divide_num_cpus(4)
|
||||
);
|
||||
self.max_background_compactions = divide_num_cpus(4);
|
||||
}
|
||||
if self.max_background_purges == 0 {
|
||||
warn!(
|
||||
"Sanitize max background purges 0 to {}",
|
||||
common_config::utils::get_cpus()
|
||||
);
|
||||
self.max_background_purges = common_config::utils::get_cpus();
|
||||
}
|
||||
|
||||
if self.global_write_buffer_reject_size <= self.global_write_buffer_size {
|
||||
|
||||
@@ -163,13 +163,13 @@ impl MitoEngine {
|
||||
}
|
||||
|
||||
/// Returns a region scanner to scan the region for `request`.
|
||||
async fn region_scanner(
|
||||
fn region_scanner(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: ScanRequest,
|
||||
) -> Result<RegionScannerRef> {
|
||||
let scanner = self.scanner(region_id, request)?;
|
||||
scanner.region_scanner().await
|
||||
scanner.region_scanner()
|
||||
}
|
||||
|
||||
/// Scans a region.
|
||||
@@ -527,7 +527,6 @@ impl RegionEngine for MitoEngine {
|
||||
request: ScanRequest,
|
||||
) -> Result<RegionScannerRef, BoxedError> {
|
||||
self.region_scanner(region_id, request)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
}
|
||||
|
||||
|
||||
@@ -580,7 +580,8 @@ async fn test_region_usage() {
|
||||
flush_region(&engine, region_id, None).await;
|
||||
|
||||
let region_stat = region.region_statistic();
|
||||
assert_eq!(region_stat.sst_size, 3010);
|
||||
assert_eq!(region_stat.sst_size, 2790);
|
||||
assert_eq!(region_stat.num_rows, 10);
|
||||
|
||||
// region total usage
|
||||
// Some memtables may share items.
|
||||
|
||||
@@ -272,7 +272,7 @@ async fn test_readonly_during_compaction() {
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
// Ensure there is only one background worker for purge task.
|
||||
max_background_jobs: 1,
|
||||
max_background_purges: 1,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
@@ -310,7 +310,7 @@ async fn test_readonly_during_compaction() {
|
||||
listener.wake();
|
||||
|
||||
let notify = Arc::new(Notify::new());
|
||||
// We already sets max background jobs to 1, so we can submit a task to the
|
||||
// We already sets max background purges to 1, so we can submit a task to the
|
||||
// purge scheduler to ensure all purge tasks are finished.
|
||||
let job_notify = notify.clone();
|
||||
engine
|
||||
|
||||
@@ -259,3 +259,56 @@ async fn test_prune_memtable_complex_expr() {
|
||||
+-------+---------+---------------------+";
|
||||
assert_eq!(expected, batches.pretty_print().unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mem_range_prune() {
|
||||
let mut env = TestEnv::new();
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
put_rows(
|
||||
&engine,
|
||||
region_id,
|
||||
Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: build_rows(5, 8),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Starts scan and gets the memtable time range.
|
||||
let stream = engine
|
||||
.scan_to_stream(region_id, ScanRequest::default())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
put_rows(
|
||||
&engine,
|
||||
region_id,
|
||||
Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: build_rows(10, 12),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
let expected = "\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 5 | 5.0 | 1970-01-01T00:00:05 |
|
||||
| 6 | 6.0 | 1970-01-01T00:00:06 |
|
||||
| 7 | 7.0 | 1970-01-01T00:00:07 |
|
||||
+-------+---------+---------------------+";
|
||||
assert_eq!(expected, batches.pretty_print().unwrap());
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user