mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-25 23:49:58 +00:00
Compare commits
59 Commits
v0.11.0
...
chore/benc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9d3dc2d311 | ||
|
|
422d18da8b | ||
|
|
66f0581f5b | ||
|
|
c9ad8c7101 | ||
|
|
2107737db1 | ||
|
|
548e1988ab | ||
|
|
218236cc5b | ||
|
|
f04d380259 | ||
|
|
fa773cf480 | ||
|
|
9b4e8555e2 | ||
|
|
c6b7caa2ec | ||
|
|
58d6982c93 | ||
|
|
e662c241e6 | ||
|
|
266919c226 | ||
|
|
7d1bcc9d49 | ||
|
|
18e8c45384 | ||
|
|
c33cf59398 | ||
|
|
421088a868 | ||
|
|
d821dc5a3e | ||
|
|
bfc777e6ac | ||
|
|
8a5384697b | ||
|
|
d0245473a9 | ||
|
|
043d0bd7c2 | ||
|
|
acedff030b | ||
|
|
88f7075a2a | ||
|
|
54698325b6 | ||
|
|
5ffda7e971 | ||
|
|
f82af15eba | ||
|
|
9d7fea902e | ||
|
|
358d5e1d63 | ||
|
|
579059d99f | ||
|
|
53d55c0b6b | ||
|
|
bef6896280 | ||
|
|
4b4c6dbb66 | ||
|
|
0c302ba127 | ||
|
|
7139ba08c8 | ||
|
|
f3e0a31e5d | ||
|
|
36c82121fb | ||
|
|
716bb82d37 | ||
|
|
2bb450b09a | ||
|
|
e8e9526738 | ||
|
|
fee75a1fad | ||
|
|
b8a78b7838 | ||
|
|
2137c53274 | ||
|
|
03ad6e2a8d | ||
|
|
d53fbcb936 | ||
|
|
8c1959c580 | ||
|
|
e2a41ccaec | ||
|
|
a8012147ab | ||
|
|
60f8dbf7f0 | ||
|
|
9da2e17d0e | ||
|
|
1a8e77a480 | ||
|
|
e1e39993f7 | ||
|
|
a30d918df2 | ||
|
|
2c4ac76754 | ||
|
|
a6893aad42 | ||
|
|
d91517688a | ||
|
|
3d1b8c4fac | ||
|
|
7c69ca0502 |
@@ -18,6 +18,8 @@ runs:
|
||||
--set controller.replicaCount=${{ inputs.controller-replicas }} \
|
||||
--set controller.resources.requests.cpu=50m \
|
||||
--set controller.resources.requests.memory=128Mi \
|
||||
--set controller.resources.limits.cpu=2000m \
|
||||
--set controller.resources.limits.memory=2Gi \
|
||||
--set listeners.controller.protocol=PLAINTEXT \
|
||||
--set listeners.client.protocol=PLAINTEXT \
|
||||
--create-namespace \
|
||||
|
||||
1
.github/cargo-blacklist.txt
vendored
1
.github/cargo-blacklist.txt
vendored
@@ -1,2 +1,3 @@
|
||||
native-tls
|
||||
openssl
|
||||
aws-lc-sys
|
||||
|
||||
11
.github/workflows/develop.yml
vendored
11
.github/workflows/develop.yml
vendored
@@ -269,13 +269,6 @@ jobs:
|
||||
- name: Install cargo-gc-bin
|
||||
shell: bash
|
||||
run: cargo install cargo-gc-bin
|
||||
- name: Check aws-lc-sys will not build
|
||||
shell: bash
|
||||
run: |
|
||||
if cargo tree -i aws-lc-sys -e features | grep -q aws-lc-sys; then
|
||||
echo "Found aws-lc-sys, which has compilation problems on older gcc versions. Please replace it with ring until its building experience improves."
|
||||
exit 1
|
||||
fi
|
||||
- name: Build greptime bianry
|
||||
shell: bash
|
||||
# `cargo gc` will invoke `cargo build` with specified args
|
||||
@@ -330,8 +323,6 @@ jobs:
|
||||
uses: ./.github/actions/setup-kafka-cluster
|
||||
- name: Setup Etcd cluser
|
||||
uses: ./.github/actions/setup-etcd-cluster
|
||||
- name: Setup Postgres cluser
|
||||
uses: ./.github/actions/setup-postgres-cluster
|
||||
# Prepares for fuzz tests
|
||||
- uses: arduino/setup-protoc@v3
|
||||
with:
|
||||
@@ -481,8 +472,6 @@ jobs:
|
||||
uses: ./.github/actions/setup-kafka-cluster
|
||||
- name: Setup Etcd cluser
|
||||
uses: ./.github/actions/setup-etcd-cluster
|
||||
- name: Setup Postgres cluser
|
||||
uses: ./.github/actions/setup-postgres-cluster
|
||||
# Prepares for fuzz tests
|
||||
- uses: arduino/setup-protoc@v3
|
||||
with:
|
||||
|
||||
4
.github/workflows/nightly-build.yml
vendored
4
.github/workflows/nightly-build.yml
vendored
@@ -12,7 +12,7 @@ on:
|
||||
linux_amd64_runner:
|
||||
type: choice
|
||||
description: The runner uses to build linux-amd64 artifacts
|
||||
default: ec2-c6i.2xlarge-amd64
|
||||
default: ec2-c6i.4xlarge-amd64
|
||||
options:
|
||||
- ubuntu-20.04
|
||||
- ubuntu-20.04-8-cores
|
||||
@@ -27,7 +27,7 @@ on:
|
||||
linux_arm64_runner:
|
||||
type: choice
|
||||
description: The runner uses to build linux-arm64 artifacts
|
||||
default: ec2-c6g.2xlarge-arm64
|
||||
default: ec2-c6g.4xlarge-arm64
|
||||
options:
|
||||
- ec2-c6g.xlarge-arm64 # 4C8G
|
||||
- ec2-c6g.2xlarge-arm64 # 8C16G
|
||||
|
||||
11
.github/workflows/nightly-ci.yml
vendored
11
.github/workflows/nightly-ci.yml
vendored
@@ -114,6 +114,17 @@ jobs:
|
||||
GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
|
||||
UNITTEST_LOG_DIR: "__unittest_logs"
|
||||
|
||||
cleanbuild-linux-nix:
|
||||
runs-on: ubuntu-latest-8-cores
|
||||
timeout-minutes: 60
|
||||
needs: [coverage, fmt, clippy, check]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: cachix/install-nix-action@v27
|
||||
with:
|
||||
nix_path: nixpkgs=channel:nixos-unstable
|
||||
- run: nix-shell --pure --run "cargo build"
|
||||
|
||||
check-status:
|
||||
name: Check status
|
||||
needs: [sqlness-test, sqlness-windows, test-on-windows]
|
||||
|
||||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -91,7 +91,7 @@ env:
|
||||
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
|
||||
NIGHTLY_RELEASE_PREFIX: nightly
|
||||
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
|
||||
NEXT_RELEASE_VERSION: v0.11.0
|
||||
NEXT_RELEASE_VERSION: v0.12.0
|
||||
|
||||
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
|
||||
permissions:
|
||||
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -47,6 +47,10 @@ benchmarks/data
|
||||
|
||||
venv/
|
||||
|
||||
# Fuzz tests
|
||||
# Fuzz tests
|
||||
tests-fuzz/artifacts/
|
||||
tests-fuzz/corpus/
|
||||
|
||||
# Nix
|
||||
.direnv
|
||||
.envrc
|
||||
|
||||
475
Cargo.lock
generated
475
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -68,7 +68,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -180,6 +180,7 @@ sysinfo = "0.30"
|
||||
# on branch v0.44.x
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "54a267ac89c09b11c0c88934690530807185d3e7", features = [
|
||||
"visitor",
|
||||
"serde",
|
||||
] }
|
||||
strum = { version = "0.25", features = ["derive"] }
|
||||
tempfile = "3"
|
||||
|
||||
@@ -13,11 +13,11 @@
|
||||
| Key | Type | Default | Descriptions |
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
|
||||
| `default_timezone` | String | Unset | The default timezone of the server. |
|
||||
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
|
||||
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
|
||||
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
|
||||
| `runtime` | -- | -- | The runtime options. |
|
||||
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
|
||||
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
|
||||
@@ -61,9 +61,9 @@
|
||||
| `wal` | -- | -- | The WAL options. |
|
||||
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
|
||||
| `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `128MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
@@ -150,6 +150,7 @@
|
||||
| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. |
|
||||
| `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
|
||||
| `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
|
||||
| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. |
|
||||
| `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. |
|
||||
| `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
|
||||
| `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
|
||||
@@ -286,12 +287,12 @@
|
||||
| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
|
||||
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. |
|
||||
| `store_addrs` | Array | -- | Store server address default to etcd store. |
|
||||
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
|
||||
| `backend` | String | `EtcdStore` | The datastore for meta server. |
|
||||
| `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
|
||||
| `use_memory_store` | Bool | `false` | Store data in memory. |
|
||||
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. |
|
||||
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
|
||||
| `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
|
||||
| `backend` | String | `EtcdStore` | The datastore for meta server. |
|
||||
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
|
||||
| `runtime` | -- | -- | The runtime options. |
|
||||
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
|
||||
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
|
||||
@@ -356,7 +357,6 @@
|
||||
| `node_id` | Integer | Unset | The datanode identifier and should be unique in the cluster. |
|
||||
| `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.<br/>It will block the datanode start if it can't receive leases in the heartbeat from metasrv. |
|
||||
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
|
||||
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
|
||||
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
|
||||
| `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. |
|
||||
@@ -364,6 +364,7 @@
|
||||
| `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. |
|
||||
| `rpc_max_recv_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_recv_message_size` instead. |
|
||||
| `rpc_max_send_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_send_message_size` instead. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
|
||||
| `http` | -- | -- | The HTTP server options. |
|
||||
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
|
||||
| `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
|
||||
@@ -398,9 +399,9 @@
|
||||
| `wal` | -- | -- | The WAL options. |
|
||||
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
|
||||
| `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `128MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
@@ -475,6 +476,9 @@
|
||||
| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query<br/>- `auto`: automatically (default)<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `auto` | Memory threshold for performing an external sort during index creation.<br/>- `auto`: automatically determine the threshold based on the system memory size (default)<br/>- `unlimited`: no memory limit<br/>- `[size]` e.g. `64MB`: fixed memory threshold |
|
||||
| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. |
|
||||
| `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
|
||||
| `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
|
||||
| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. |
|
||||
| `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. |
|
||||
| `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
|
||||
| `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
|
||||
|
||||
@@ -13,9 +13,6 @@ require_lease_before_startup = false
|
||||
## By default, it provides services after all regions have been initialized.
|
||||
init_regions_in_background = false
|
||||
|
||||
## Enable telemetry to collect anonymous usage data.
|
||||
enable_telemetry = true
|
||||
|
||||
## Parallelism of initializing regions.
|
||||
init_regions_parallelism = 16
|
||||
|
||||
@@ -42,6 +39,8 @@ rpc_max_recv_message_size = "512MB"
|
||||
## @toml2docs:none-default
|
||||
rpc_max_send_message_size = "512MB"
|
||||
|
||||
## Enable telemetry to collect anonymous usage data. Enabled by default.
|
||||
#+ enable_telemetry = true
|
||||
|
||||
## The HTTP server options.
|
||||
[http]
|
||||
@@ -143,15 +142,15 @@ dir = "/tmp/greptimedb/wal"
|
||||
|
||||
## The size of the WAL segment file.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
file_size = "256MB"
|
||||
file_size = "128MB"
|
||||
|
||||
## The threshold of the WAL size to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_threshold = "4GB"
|
||||
purge_threshold = "1GB"
|
||||
|
||||
## The interval to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_interval = "10m"
|
||||
purge_interval = "1m"
|
||||
|
||||
## The read batch size.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
@@ -544,6 +543,15 @@ mem_threshold_on_create = "auto"
|
||||
## Deprecated, use `region_engine.mito.index.aux_path` instead.
|
||||
intermediate_path = ""
|
||||
|
||||
## Cache size for inverted index metadata.
|
||||
metadata_cache_size = "64MiB"
|
||||
|
||||
## Cache size for inverted index content.
|
||||
content_cache_size = "128MiB"
|
||||
|
||||
## Page size for inverted index content cache.
|
||||
content_cache_page_size = "8MiB"
|
||||
|
||||
## The options for full-text index in Mito engine.
|
||||
[region_engine.mito.fulltext_index]
|
||||
|
||||
|
||||
@@ -10,6 +10,12 @@ server_addr = "127.0.0.1:3002"
|
||||
## Store server address default to etcd store.
|
||||
store_addrs = ["127.0.0.1:2379"]
|
||||
|
||||
## If it's not empty, the metasrv will store all data with this key prefix.
|
||||
store_key_prefix = ""
|
||||
|
||||
## The datastore for meta server.
|
||||
backend = "EtcdStore"
|
||||
|
||||
## Datanode selector type.
|
||||
## - `round_robin` (default value)
|
||||
## - `lease_based`
|
||||
@@ -20,20 +26,14 @@ selector = "round_robin"
|
||||
## Store data in memory.
|
||||
use_memory_store = false
|
||||
|
||||
## Whether to enable greptimedb telemetry.
|
||||
enable_telemetry = true
|
||||
|
||||
## If it's not empty, the metasrv will store all data with this key prefix.
|
||||
store_key_prefix = ""
|
||||
|
||||
## Whether to enable region failover.
|
||||
## This feature is only available on GreptimeDB running on cluster mode and
|
||||
## - Using Remote WAL
|
||||
## - Using shared storage (e.g., s3).
|
||||
enable_region_failover = false
|
||||
|
||||
## The datastore for meta server.
|
||||
backend = "EtcdStore"
|
||||
## Whether to enable greptimedb telemetry. Enabled by default.
|
||||
#+ enable_telemetry = true
|
||||
|
||||
## The runtime options.
|
||||
#+ [runtime]
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
## The running mode of the datanode. It can be `standalone` or `distributed`.
|
||||
mode = "standalone"
|
||||
|
||||
## Enable telemetry to collect anonymous usage data.
|
||||
enable_telemetry = true
|
||||
|
||||
## The default timezone of the server.
|
||||
## @toml2docs:none-default
|
||||
default_timezone = "UTC"
|
||||
@@ -18,6 +15,9 @@ init_regions_parallelism = 16
|
||||
## The maximum current queries allowed to be executed. Zero means unlimited.
|
||||
max_concurrent_queries = 0
|
||||
|
||||
## Enable telemetry to collect anonymous usage data. Enabled by default.
|
||||
#+ enable_telemetry = true
|
||||
|
||||
## The runtime options.
|
||||
#+ [runtime]
|
||||
## The number of threads to execute the runtime for global read operations.
|
||||
@@ -147,15 +147,15 @@ dir = "/tmp/greptimedb/wal"
|
||||
|
||||
## The size of the WAL segment file.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
file_size = "256MB"
|
||||
file_size = "128MB"
|
||||
|
||||
## The threshold of the WAL size to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_threshold = "4GB"
|
||||
purge_threshold = "1GB"
|
||||
|
||||
## The interval to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_interval = "10m"
|
||||
purge_interval = "1m"
|
||||
|
||||
## The read batch size.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
@@ -588,6 +588,9 @@ metadata_cache_size = "64MiB"
|
||||
## Cache size for inverted index content.
|
||||
content_cache_size = "128MiB"
|
||||
|
||||
## Page size for inverted index content cache.
|
||||
content_cache_page_size = "8MiB"
|
||||
|
||||
## The options for full-text index in Mito engine.
|
||||
[region_engine.mito.fulltext_index]
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,2 +1,3 @@
|
||||
[toolchain]
|
||||
channel = "nightly-2024-10-19"
|
||||
components = ["rust-analyzer"]
|
||||
|
||||
@@ -58,8 +58,10 @@ def main():
|
||||
if not check_snafu_in_files(branch_name, other_rust_files)
|
||||
]
|
||||
|
||||
for name in unused_snafu:
|
||||
print(name)
|
||||
if unused_snafu:
|
||||
print("Unused error variants:")
|
||||
for name in unused_snafu:
|
||||
print(name)
|
||||
|
||||
if unused_snafu:
|
||||
raise SystemExit(1)
|
||||
|
||||
27
shell.nix
Normal file
27
shell.nix
Normal file
@@ -0,0 +1,27 @@
|
||||
let
|
||||
nixpkgs = fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-unstable";
|
||||
fenix = import (fetchTarball "https://github.com/nix-community/fenix/archive/main.tar.gz") {};
|
||||
pkgs = import nixpkgs { config = {}; overlays = []; };
|
||||
in
|
||||
|
||||
pkgs.mkShell rec {
|
||||
nativeBuildInputs = with pkgs; [
|
||||
pkg-config
|
||||
git
|
||||
clang
|
||||
gcc
|
||||
protobuf
|
||||
mold
|
||||
(fenix.fromToolchainFile {
|
||||
dir = ./.;
|
||||
})
|
||||
cargo-nextest
|
||||
taplo
|
||||
];
|
||||
|
||||
buildInputs = with pkgs; [
|
||||
libgit2
|
||||
];
|
||||
|
||||
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath buildInputs;
|
||||
}
|
||||
@@ -16,7 +16,7 @@ use std::collections::HashMap;
|
||||
|
||||
use datatypes::schema::{
|
||||
ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, COMMENT_KEY,
|
||||
FULLTEXT_KEY, INVERTED_INDEX_KEY,
|
||||
FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY,
|
||||
};
|
||||
use greptime_proto::v1::Analyzer;
|
||||
use snafu::ResultExt;
|
||||
@@ -29,6 +29,8 @@ use crate::v1::{ColumnDef, ColumnOptions, SemanticType};
|
||||
const FULLTEXT_GRPC_KEY: &str = "fulltext";
|
||||
/// Key used to store inverted index options in gRPC column options.
|
||||
const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index";
|
||||
/// Key used to store skip index options in gRPC column options.
|
||||
const SKIPPING_INDEX_GRPC_KEY: &str = "skipping_index";
|
||||
|
||||
/// Tries to construct a `ColumnSchema` from the given `ColumnDef`.
|
||||
pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
|
||||
@@ -60,6 +62,9 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
|
||||
if let Some(inverted_index) = options.options.get(INVERTED_INDEX_GRPC_KEY) {
|
||||
metadata.insert(INVERTED_INDEX_KEY.to_string(), inverted_index.clone());
|
||||
}
|
||||
if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) {
|
||||
metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.clone());
|
||||
}
|
||||
}
|
||||
|
||||
ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable)
|
||||
@@ -84,6 +89,11 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<Column
|
||||
.options
|
||||
.insert(INVERTED_INDEX_GRPC_KEY.to_string(), inverted_index.clone());
|
||||
}
|
||||
if let Some(skipping_index) = column_schema.metadata().get(SKIPPING_INDEX_KEY) {
|
||||
options
|
||||
.options
|
||||
.insert(SKIPPING_INDEX_GRPC_KEY.to_string(), skipping_index.clone());
|
||||
}
|
||||
|
||||
(!options.options.is_empty()).then_some(options)
|
||||
}
|
||||
|
||||
1
src/cache/Cargo.toml
vendored
1
src/cache/Cargo.toml
vendored
@@ -11,4 +11,3 @@ common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
moka.workspace = true
|
||||
snafu.workspace = true
|
||||
substrait.workspace = true
|
||||
|
||||
@@ -18,7 +18,6 @@ async-stream.workspace = true
|
||||
async-trait = "0.1"
|
||||
bytes.workspace = true
|
||||
common-catalog.workspace = true
|
||||
common-config.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
@@ -58,7 +57,5 @@ catalog = { workspace = true, features = ["testing"] }
|
||||
chrono.workspace = true
|
||||
common-meta = { workspace = true, features = ["testing"] }
|
||||
common-query = { workspace = true, features = ["testing"] }
|
||||
common-test-util.workspace = true
|
||||
log-store.workspace = true
|
||||
object-store.workspace = true
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -54,6 +54,10 @@ const INIT_CAPACITY: usize = 42;
|
||||
pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY";
|
||||
/// Time index constraint name
|
||||
pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX";
|
||||
/// Inverted index constraint name
|
||||
pub(crate) const INVERTED_INDEX_CONSTRAINT_NAME: &str = "INVERTED INDEX";
|
||||
/// Fulltext index constraint name
|
||||
pub(crate) const FULLTEXT_INDEX_CONSTRAINT_NAME: &str = "FULLTEXT INDEX";
|
||||
|
||||
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
|
||||
pub(super) struct InformationSchemaKeyColumnUsage {
|
||||
@@ -216,14 +220,13 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
|
||||
|
||||
while let Some(table) = stream.try_next().await? {
|
||||
let mut primary_constraints = vec![];
|
||||
|
||||
let table_info = table.table_info();
|
||||
let table_name = &table_info.name;
|
||||
let keys = &table_info.meta.primary_key_indices;
|
||||
let schema = table.schema();
|
||||
|
||||
for (idx, column) in schema.column_schemas().iter().enumerate() {
|
||||
let mut constraints = vec![];
|
||||
if column.is_time_index() {
|
||||
self.add_key_column_usage(
|
||||
&predicates,
|
||||
@@ -236,30 +239,31 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
1, //always 1 for time index
|
||||
);
|
||||
}
|
||||
if keys.contains(&idx) {
|
||||
primary_constraints.push((
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
table_name.to_string(),
|
||||
column.name.clone(),
|
||||
));
|
||||
}
|
||||
// TODO(dimbtp): foreign key constraint not supported yet
|
||||
}
|
||||
if keys.contains(&idx) {
|
||||
constraints.push(PRI_CONSTRAINT_NAME);
|
||||
}
|
||||
if column.is_inverted_indexed() {
|
||||
constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);
|
||||
}
|
||||
|
||||
for (i, (catalog_name, schema_name, table_name, column_name)) in
|
||||
primary_constraints.into_iter().enumerate()
|
||||
{
|
||||
self.add_key_column_usage(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
PRI_CONSTRAINT_NAME,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
&table_name,
|
||||
&column_name,
|
||||
i as u32 + 1,
|
||||
);
|
||||
if column.has_fulltext_index_key() {
|
||||
constraints.push(FULLTEXT_INDEX_CONSTRAINT_NAME);
|
||||
}
|
||||
|
||||
if !constraints.is_empty() {
|
||||
let aggregated_constraints = constraints.join(", ");
|
||||
self.add_key_column_usage(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
&aggregated_constraints,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
table_name,
|
||||
&column.name,
|
||||
idx as u32 + 1,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@ common-error.workspace = true
|
||||
common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-options.workspace = true
|
||||
common-procedure.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
@@ -61,5 +60,4 @@ client = { workspace = true, features = ["testing"] }
|
||||
common-test-util.workspace = true
|
||||
common-version.workspace = true
|
||||
serde.workspace = true
|
||||
temp-env = "0.3"
|
||||
tempfile.workspace = true
|
||||
|
||||
@@ -42,8 +42,6 @@ tonic.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
common-grpc-expr.workspace = true
|
||||
datanode.workspace = true
|
||||
derive-new = "0.5"
|
||||
tracing = "0.1"
|
||||
|
||||
[dev-dependencies.substrait_proto]
|
||||
|
||||
@@ -59,10 +59,6 @@ impl Instance {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn datanode_mut(&mut self) -> &mut Datanode {
|
||||
&mut self.datanode
|
||||
}
|
||||
|
||||
pub fn datanode(&self) -> &Datanode {
|
||||
&self.datanode
|
||||
}
|
||||
|
||||
@@ -63,10 +63,6 @@ impl Instance {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn flownode_mut(&mut self) -> &mut FlownodeInstance {
|
||||
&mut self.flownode
|
||||
}
|
||||
|
||||
pub fn flownode(&self) -> &FlownodeInstance {
|
||||
&self.flownode
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ common-macro.workspace = true
|
||||
futures.workspace = true
|
||||
paste = "1.0"
|
||||
pin-project.workspace = true
|
||||
rand.workspace = true
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
snafu.workspace = true
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -36,6 +36,11 @@ pub struct Metadata {
|
||||
/// `RangeReader` reads a range of bytes from a source.
|
||||
#[async_trait]
|
||||
pub trait RangeReader: Send + Unpin {
|
||||
/// Sets the file size hint for the reader.
|
||||
///
|
||||
/// It's used to optimize the reading process by reducing the number of remote requests.
|
||||
fn with_file_size_hint(&mut self, file_size_hint: u64);
|
||||
|
||||
/// Returns the metadata of the source.
|
||||
async fn metadata(&mut self) -> io::Result<Metadata>;
|
||||
|
||||
@@ -70,6 +75,10 @@ pub trait RangeReader: Send + Unpin {
|
||||
|
||||
#[async_trait]
|
||||
impl<R: ?Sized + RangeReader> RangeReader for &mut R {
|
||||
fn with_file_size_hint(&mut self, file_size_hint: u64) {
|
||||
(*self).with_file_size_hint(file_size_hint)
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
(*self).metadata().await
|
||||
}
|
||||
@@ -186,15 +195,17 @@ impl<R: RangeReader + 'static> AsyncRead for AsyncReadAdapter<R> {
|
||||
|
||||
#[async_trait]
|
||||
impl RangeReader for Vec<u8> {
|
||||
fn with_file_size_hint(&mut self, _file_size_hint: u64) {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
Ok(Metadata {
|
||||
content_length: self.len() as u64,
|
||||
})
|
||||
}
|
||||
|
||||
async fn read(&mut self, mut range: Range<u64>) -> io::Result<Bytes> {
|
||||
range.end = range.end.min(self.len() as u64);
|
||||
|
||||
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
|
||||
let bytes = Bytes::copy_from_slice(&self[range.start as usize..range.end as usize]);
|
||||
Ok(bytes)
|
||||
}
|
||||
@@ -222,6 +233,10 @@ impl FileReader {
|
||||
|
||||
#[async_trait]
|
||||
impl RangeReader for FileReader {
|
||||
fn with_file_size_hint(&mut self, _file_size_hint: u64) {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
Ok(Metadata {
|
||||
content_length: self.content_length,
|
||||
|
||||
@@ -19,7 +19,7 @@ pub const GIB: u64 = MIB * BINARY_DATA_MAGNITUDE;
|
||||
pub const TIB: u64 = GIB * BINARY_DATA_MAGNITUDE;
|
||||
pub const PIB: u64 = TIB * BINARY_DATA_MAGNITUDE;
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd)]
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Default)]
|
||||
pub struct ReadableSize(pub u64);
|
||||
|
||||
impl ReadableSize {
|
||||
|
||||
@@ -8,10 +8,5 @@ license.workspace = true
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
snafu.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
chrono.workspace = true
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -48,5 +48,4 @@ url = "2.3"
|
||||
[dev-dependencies]
|
||||
common-telemetry.workspace = true
|
||||
common-test-util.workspace = true
|
||||
dotenv.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
@@ -27,7 +27,7 @@ pub fn build_fs_backend(root: &str) -> Result<ObjectStore> {
|
||||
DefaultLoggingInterceptor,
|
||||
))
|
||||
.layer(object_store::layers::TracingLayer)
|
||||
.layer(object_store::layers::PrometheusMetricsLayer::new(true))
|
||||
.layer(object_store::layers::build_prometheus_metrics_layer(true))
|
||||
.finish();
|
||||
Ok(object_store)
|
||||
}
|
||||
|
||||
@@ -89,7 +89,7 @@ pub fn build_s3_backend(
|
||||
DefaultLoggingInterceptor,
|
||||
))
|
||||
.layer(object_store::layers::TracingLayer)
|
||||
.layer(object_store::layers::PrometheusMetricsLayer::new(true))
|
||||
.layer(object_store::layers::build_prometheus_metrics_layer(true))
|
||||
.finish())
|
||||
}
|
||||
|
||||
|
||||
@@ -5,12 +5,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
async-trait.workspace = true
|
||||
common-base.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-query.workspace = true
|
||||
session.workspace = true
|
||||
snafu.workspace = true
|
||||
sql.workspace = true
|
||||
|
||||
@@ -51,6 +51,5 @@ wkt = { version = "0.11", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
approx = "0.5"
|
||||
ron = "0.7"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
mod convert;
|
||||
mod distance;
|
||||
pub(crate) mod impl_conv;
|
||||
mod scalar_add;
|
||||
mod scalar_mul;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -32,5 +34,9 @@ impl VectorFunction {
|
||||
registry.register(Arc::new(distance::CosDistanceFunction));
|
||||
registry.register(Arc::new(distance::DotProductFunction));
|
||||
registry.register(Arc::new(distance::L2SqDistanceFunction));
|
||||
|
||||
// scalar calculation
|
||||
registry.register(Arc::new(scalar_add::ScalarAddFunction));
|
||||
registry.register(Arc::new(scalar_mul::ScalarMulFunction));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,7 +109,6 @@ pub fn parse_veclit_from_strlit(s: &str) -> Result<Vec<f32>> {
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
/// Convert a vector literal to a binary literal.
|
||||
pub fn veclit_to_binlit(vec: &[f32]) -> Vec<u8> {
|
||||
if cfg!(target_endian = "little") {
|
||||
|
||||
173
src/common/function/src/scalars/vector/scalar_add.rs
Normal file
173
src/common/function/src/scalars/vector/scalar_add.rs
Normal file
@@ -0,0 +1,173 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result};
|
||||
use common_query::prelude::Signature;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef};
|
||||
use nalgebra::DVectorView;
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::helper;
|
||||
use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit};
|
||||
|
||||
const NAME: &str = "vec_scalar_add";
|
||||
|
||||
/// Adds a scalar to each element of a vector.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```sql
|
||||
/// SELECT vec_to_string(vec_scalar_add(1, "[1, 2, 3]")) as result;
|
||||
///
|
||||
/// +---------+
|
||||
/// | result |
|
||||
/// +---------+
|
||||
/// | [2,3,4] |
|
||||
/// +---------+
|
||||
///
|
||||
/// -- Negative scalar to simulate subtraction
|
||||
/// SELECT vec_to_string(vec_scalar_add(-1, "[1, 2, 3]")) as result;
|
||||
///
|
||||
/// +---------+
|
||||
/// | result |
|
||||
/// +---------+
|
||||
/// | [0,1,2] |
|
||||
/// +---------+
|
||||
/// ```
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ScalarAddFunction;
|
||||
|
||||
impl Function for ScalarAddFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::binary_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
helper::one_of_sigs2(
|
||||
vec![ConcreteDataType::float64_datatype()],
|
||||
vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::binary_datatype(),
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 2,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly two, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
let arg0 = &columns[0];
|
||||
let arg1 = &columns[1];
|
||||
|
||||
let len = arg0.len();
|
||||
let mut result = BinaryVectorBuilder::with_capacity(len);
|
||||
if len == 0 {
|
||||
return Ok(result.to_vector());
|
||||
}
|
||||
|
||||
let arg1_const = as_veclit_if_const(arg1)?;
|
||||
|
||||
for i in 0..len {
|
||||
let arg0 = arg0.get(i).as_f64_lossy();
|
||||
let Some(arg0) = arg0 else {
|
||||
result.push_null();
|
||||
continue;
|
||||
};
|
||||
|
||||
let arg1 = match arg1_const.as_ref() {
|
||||
Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())),
|
||||
None => as_veclit(arg1.get_ref(i))?,
|
||||
};
|
||||
let Some(arg1) = arg1 else {
|
||||
result.push_null();
|
||||
continue;
|
||||
};
|
||||
|
||||
let vec = DVectorView::from_slice(&arg1, arg1.len());
|
||||
let vec_res = vec.add_scalar(arg0 as _);
|
||||
|
||||
let veclit = vec_res.as_slice();
|
||||
let binlit = veclit_to_binlit(veclit);
|
||||
result.push(Some(&binlit));
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ScalarAddFunction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", NAME.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::vectors::{Float32Vector, StringVector};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_scalar_add() {
|
||||
let func = ScalarAddFunction;
|
||||
|
||||
let input0 = Arc::new(Float32Vector::from(vec![
|
||||
Some(1.0),
|
||||
Some(-1.0),
|
||||
None,
|
||||
Some(3.0),
|
||||
]));
|
||||
let input1 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
Some("[7.0,8.0,9.0]".to_string()),
|
||||
None,
|
||||
]));
|
||||
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), &[input0, input1])
|
||||
.unwrap();
|
||||
|
||||
let result = result.as_ref();
|
||||
assert_eq!(result.len(), 4);
|
||||
assert_eq!(
|
||||
result.get_ref(0).as_binary().unwrap(),
|
||||
Some(veclit_to_binlit(&[2.0, 3.0, 4.0]).as_slice())
|
||||
);
|
||||
assert_eq!(
|
||||
result.get_ref(1).as_binary().unwrap(),
|
||||
Some(veclit_to_binlit(&[3.0, 4.0, 5.0]).as_slice())
|
||||
);
|
||||
assert!(result.get_ref(2).is_null());
|
||||
assert!(result.get_ref(3).is_null());
|
||||
}
|
||||
}
|
||||
173
src/common/function/src/scalars/vector/scalar_mul.rs
Normal file
173
src/common/function/src/scalars/vector/scalar_mul.rs
Normal file
@@ -0,0 +1,173 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result};
|
||||
use common_query::prelude::Signature;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef};
|
||||
use nalgebra::DVectorView;
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::helper;
|
||||
use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit};
|
||||
|
||||
const NAME: &str = "vec_scalar_mul";
|
||||
|
||||
/// Multiples a scalar to each element of a vector.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```sql
|
||||
/// SELECT vec_to_string(vec_scalar_mul(2, "[1, 2, 3]")) as result;
|
||||
///
|
||||
/// +---------+
|
||||
/// | result |
|
||||
/// +---------+
|
||||
/// | [2,4,6] |
|
||||
/// +---------+
|
||||
///
|
||||
/// -- 1/scalar to simulate division
|
||||
/// SELECT vec_to_string(vec_scalar_mul(0.5, "[2, 4, 6]")) as result;
|
||||
///
|
||||
/// +---------+
|
||||
/// | result |
|
||||
/// +---------+
|
||||
/// | [1,2,3] |
|
||||
/// +---------+
|
||||
/// ```
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ScalarMulFunction;
|
||||
|
||||
impl Function for ScalarMulFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::binary_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
helper::one_of_sigs2(
|
||||
vec![ConcreteDataType::float64_datatype()],
|
||||
vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::binary_datatype(),
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 2,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly two, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
let arg0 = &columns[0];
|
||||
let arg1 = &columns[1];
|
||||
|
||||
let len = arg0.len();
|
||||
let mut result = BinaryVectorBuilder::with_capacity(len);
|
||||
if len == 0 {
|
||||
return Ok(result.to_vector());
|
||||
}
|
||||
|
||||
let arg1_const = as_veclit_if_const(arg1)?;
|
||||
|
||||
for i in 0..len {
|
||||
let arg0 = arg0.get(i).as_f64_lossy();
|
||||
let Some(arg0) = arg0 else {
|
||||
result.push_null();
|
||||
continue;
|
||||
};
|
||||
|
||||
let arg1 = match arg1_const.as_ref() {
|
||||
Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())),
|
||||
None => as_veclit(arg1.get_ref(i))?,
|
||||
};
|
||||
let Some(arg1) = arg1 else {
|
||||
result.push_null();
|
||||
continue;
|
||||
};
|
||||
|
||||
let vec = DVectorView::from_slice(&arg1, arg1.len());
|
||||
let vec_res = vec.scale(arg0 as _);
|
||||
|
||||
let veclit = vec_res.as_slice();
|
||||
let binlit = veclit_to_binlit(veclit);
|
||||
result.push(Some(&binlit));
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ScalarMulFunction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", NAME.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::vectors::{Float32Vector, StringVector};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_scalar_mul() {
|
||||
let func = ScalarMulFunction;
|
||||
|
||||
let input0 = Arc::new(Float32Vector::from(vec![
|
||||
Some(2.0),
|
||||
Some(-0.5),
|
||||
None,
|
||||
Some(3.0),
|
||||
]));
|
||||
let input1 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[8.0,10.0,12.0]".to_string()),
|
||||
Some("[7.0,8.0,9.0]".to_string()),
|
||||
None,
|
||||
]));
|
||||
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), &[input0, input1])
|
||||
.unwrap();
|
||||
|
||||
let result = result.as_ref();
|
||||
assert_eq!(result.len(), 4);
|
||||
assert_eq!(
|
||||
result.get_ref(0).as_binary().unwrap(),
|
||||
Some(veclit_to_binlit(&[2.0, 4.0, 6.0]).as_slice())
|
||||
);
|
||||
assert_eq!(
|
||||
result.get_ref(1).as_binary().unwrap(),
|
||||
Some(veclit_to_binlit(&[-4.0, -5.0, -6.0]).as_slice())
|
||||
);
|
||||
assert!(result.get_ref(2).is_null());
|
||||
assert!(result.get_ref(3).is_null());
|
||||
}
|
||||
}
|
||||
@@ -49,14 +49,6 @@ impl TableRoute {
|
||||
TableRoute::Logical(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns [LogicalTableRouteValue] reference if it's [TableRoute::Logical]; Otherwise it returns [None].
|
||||
pub fn as_logical_table_route_ref(&self) -> Option<&Arc<LogicalTableRouteValue>> {
|
||||
match self {
|
||||
TableRoute::Physical(_) => None,
|
||||
TableRoute::Logical(table_route) => Some(table_route),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// [TableRouteCache] caches the [TableId] to [TableRoute] mapping.
|
||||
|
||||
@@ -290,28 +290,6 @@ impl TableRouteManager {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [`PhysicalTableRouteValue`] in the first level,
|
||||
/// It won't follow the [`LogicalTableRouteValue`] to find the next level [`PhysicalTableRouteValue`].
|
||||
///
|
||||
/// Returns an error if the first level value is not a [`PhysicalTableRouteValue`].
|
||||
pub async fn try_get_physical_table_route(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> Result<Option<PhysicalTableRouteValue>> {
|
||||
match self.storage.get(table_id).await? {
|
||||
Some(route) => {
|
||||
ensure!(
|
||||
route.is_physical(),
|
||||
UnexpectedLogicalRouteTableSnafu {
|
||||
err_msg: format!("{route:?} is a non-physical TableRouteValue.")
|
||||
}
|
||||
);
|
||||
Ok(Some(route.into_physical_table_route()))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [TableId] recursively.
|
||||
///
|
||||
/// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if:
|
||||
@@ -569,37 +547,6 @@ impl TableRouteStorage {
|
||||
.transpose()
|
||||
}
|
||||
|
||||
/// Returns the physical `DeserializedValueWithBytes<TableRouteValue>` recursively.
|
||||
///
|
||||
/// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if:
|
||||
/// - the physical table(`logical_or_physical_table_id`) does not exist
|
||||
/// - the corresponding physical table of the logical table(`logical_or_physical_table_id`) does not exist.
|
||||
pub async fn get_physical_table_route_with_raw_bytes(
|
||||
&self,
|
||||
logical_or_physical_table_id: TableId,
|
||||
) -> Result<(TableId, DeserializedValueWithBytes<TableRouteValue>)> {
|
||||
let table_route = self
|
||||
.get_with_raw_bytes(logical_or_physical_table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: logical_or_physical_table_id,
|
||||
})?;
|
||||
|
||||
match table_route.get_inner_ref() {
|
||||
TableRouteValue::Physical(_) => Ok((logical_or_physical_table_id, table_route)),
|
||||
TableRouteValue::Logical(x) => {
|
||||
let physical_table_id = x.physical_table_id();
|
||||
let physical_table_route = self
|
||||
.get_with_raw_bytes(physical_table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu {
|
||||
table_id: physical_table_id,
|
||||
})?;
|
||||
Ok((physical_table_id, physical_table_route))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns batch of [`TableRouteValue`] that respects the order of `table_ids`.
|
||||
pub async fn batch_get(&self, table_ids: &[TableId]) -> Result<Vec<Option<TableRouteValue>>> {
|
||||
let mut table_routes = self.batch_get_inner(table_ids).await?;
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::info;
|
||||
use etcd_client::{
|
||||
Client, DeleteOptions, GetOptions, PutOptions, Txn, TxnOp, TxnOpResponse, TxnResponse,
|
||||
};
|
||||
@@ -55,6 +56,7 @@ impl EtcdStore {
|
||||
}
|
||||
|
||||
pub fn with_etcd_client(client: Client, max_txn_ops: usize) -> KvBackendRef {
|
||||
info!("Connected to etcd");
|
||||
Arc::new(Self {
|
||||
client,
|
||||
max_txn_ops,
|
||||
|
||||
@@ -89,39 +89,6 @@ pub fn convert_to_region_leader_map(region_routes: &[RegionRoute]) -> HashMap<Re
|
||||
.collect::<HashMap<_, _>>()
|
||||
}
|
||||
|
||||
/// Returns the HashMap<[RegionNumber], HashSet<DatanodeId>>
|
||||
pub fn convert_to_region_peer_map(
|
||||
region_routes: &[RegionRoute],
|
||||
) -> HashMap<RegionNumber, HashSet<u64>> {
|
||||
region_routes
|
||||
.iter()
|
||||
.map(|x| {
|
||||
let set = x
|
||||
.follower_peers
|
||||
.iter()
|
||||
.map(|p| p.id)
|
||||
.chain(x.leader_peer.as_ref().map(|p| p.id))
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
(x.region.id.region_number(), set)
|
||||
})
|
||||
.collect::<HashMap<_, _>>()
|
||||
}
|
||||
|
||||
/// Returns the HashMap<[RegionNumber], [LeaderState]>;
|
||||
pub fn convert_to_region_leader_state_map(
|
||||
region_routes: &[RegionRoute],
|
||||
) -> HashMap<RegionNumber, LeaderState> {
|
||||
region_routes
|
||||
.iter()
|
||||
.filter_map(|x| {
|
||||
x.leader_state
|
||||
.as_ref()
|
||||
.map(|state| (x.region.id.region_number(), *state))
|
||||
})
|
||||
.collect::<HashMap<_, _>>()
|
||||
}
|
||||
|
||||
pub fn find_region_leader(
|
||||
region_routes: &[RegionRoute],
|
||||
region_number: RegionNumber,
|
||||
@@ -147,19 +114,6 @@ pub fn find_leader_regions(region_routes: &[RegionRoute], datanode: &Peer) -> Ve
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn extract_all_peers(region_routes: &[RegionRoute]) -> Vec<Peer> {
|
||||
let mut peers = region_routes
|
||||
.iter()
|
||||
.flat_map(|x| x.leader_peer.iter().chain(x.follower_peers.iter()))
|
||||
.collect::<HashSet<_>>()
|
||||
.into_iter()
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
peers.sort_by_key(|x| x.id);
|
||||
|
||||
peers
|
||||
}
|
||||
|
||||
impl TableRoute {
|
||||
pub fn new(table: Table, region_routes: Vec<RegionRoute>) -> Self {
|
||||
let region_leaders = region_routes
|
||||
|
||||
@@ -544,7 +544,7 @@ mod tests {
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use futures_util::future::BoxFuture;
|
||||
use futures_util::FutureExt;
|
||||
use object_store::ObjectStore;
|
||||
use object_store::{EntryMode, ObjectStore};
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use super::*;
|
||||
@@ -578,7 +578,11 @@ mod tests {
|
||||
) {
|
||||
let dir = proc_path!(procedure_store, "{procedure_id}/");
|
||||
let lister = object_store.list(&dir).await.unwrap();
|
||||
let mut files_in_dir: Vec<_> = lister.into_iter().map(|de| de.name().to_string()).collect();
|
||||
let mut files_in_dir: Vec<_> = lister
|
||||
.into_iter()
|
||||
.filter(|x| x.metadata().mode() == EntryMode::FILE)
|
||||
.map(|de| de.name().to_string())
|
||||
.collect();
|
||||
files_in_dir.sort_unstable();
|
||||
assert_eq!(files, files_in_dir);
|
||||
}
|
||||
|
||||
@@ -26,7 +26,6 @@ use std::sync::Arc;
|
||||
|
||||
use adapter::RecordBatchMetrics;
|
||||
use arc_swap::ArcSwapOption;
|
||||
use datafusion::physical_plan::memory::MemoryStream;
|
||||
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::arrow::compute::SortOptions;
|
||||
pub use datatypes::arrow::record_batch::RecordBatch as DfRecordBatch;
|
||||
@@ -170,19 +169,6 @@ impl RecordBatches {
|
||||
index: 0,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn into_df_stream(self) -> DfSendableRecordBatchStream {
|
||||
let df_record_batches = self
|
||||
.batches
|
||||
.into_iter()
|
||||
.map(|batch| batch.into_df_record_batch())
|
||||
.collect();
|
||||
// unwrap safety: `MemoryStream::try_new` won't fail
|
||||
Box::pin(
|
||||
MemoryStream::try_new(df_record_batches, self.schema.arrow_schema().clone(), None)
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for RecordBatches {
|
||||
|
||||
@@ -35,8 +35,6 @@ serde_json.workspace = true
|
||||
snafu.workspace = true
|
||||
tempfile.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-metrics = "0.3"
|
||||
tokio-metrics-collector = { git = "https://github.com/MichaelScofield/tokio-metrics-collector.git", rev = "89d692d5753d28564a7aac73c6ac5aba22243ba0" }
|
||||
tokio-util.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -29,10 +29,6 @@ pub fn format_utc_datetime(utc: &NaiveDateTime, pattern: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn system_datetime_to_utc(local: &NaiveDateTime) -> LocalResult<NaiveDateTime> {
|
||||
datetime_to_utc(local, get_timezone(None))
|
||||
}
|
||||
|
||||
/// Cast a [`NaiveDateTime`] with the given timezone.
|
||||
pub fn datetime_to_utc(
|
||||
datetime: &NaiveDateTime,
|
||||
|
||||
@@ -49,9 +49,9 @@ impl Default for RaftEngineConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
dir: None,
|
||||
file_size: ReadableSize::mb(256),
|
||||
purge_threshold: ReadableSize::gb(4),
|
||||
purge_interval: Duration::from_secs(600),
|
||||
file_size: ReadableSize::mb(128),
|
||||
purge_threshold: ReadableSize::gb(1),
|
||||
purge_interval: Duration::from_secs(60),
|
||||
read_batch_size: 128,
|
||||
sync_write: false,
|
||||
enable_log_recycle: true,
|
||||
|
||||
@@ -193,6 +193,14 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build http client"))]
|
||||
BuildHttpClient {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: reqwest::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing required field: {}", name))]
|
||||
MissingRequiredField {
|
||||
name: String,
|
||||
@@ -406,9 +414,10 @@ impl ErrorExt for Error {
|
||||
| MissingKvBackend { .. }
|
||||
| TomlFormat { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
PayloadNotExist { .. } | Unexpected { .. } | WatchAsyncTaskChange { .. } => {
|
||||
StatusCode::Unexpected
|
||||
}
|
||||
PayloadNotExist { .. }
|
||||
| Unexpected { .. }
|
||||
| WatchAsyncTaskChange { .. }
|
||||
| BuildHttpClient { .. } => StatusCode::Unexpected,
|
||||
|
||||
AsyncTaskExecute { source, .. } => source.status_code(),
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ use object_store::{Access, Error, HttpClient, ObjectStore, ObjectStoreBuilder, O
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::config::{HttpClientConfig, ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE};
|
||||
use crate::error::{self, CreateDirSnafu, Result};
|
||||
use crate::error::{self, BuildHttpClientSnafu, CreateDirSnafu, Result};
|
||||
|
||||
pub(crate) async fn new_raw_object_store(
|
||||
store: &ObjectStoreConfig,
|
||||
@@ -236,7 +236,8 @@ pub(crate) fn build_http_client(config: &HttpClientConfig) -> Result<HttpClient>
|
||||
builder.timeout(config.timeout)
|
||||
};
|
||||
|
||||
HttpClient::build(http_builder).context(error::InitBackendSnafu)
|
||||
let client = http_builder.build().context(BuildHttpClientSnafu)?;
|
||||
Ok(HttpClient::with(client))
|
||||
}
|
||||
struct PrintDetailedError;
|
||||
|
||||
|
||||
@@ -370,6 +370,51 @@ impl ConcreteDataType {
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the datatype name in postgres type system
|
||||
pub fn postgres_datatype_name(&self) -> &'static str {
|
||||
match self {
|
||||
&ConcreteDataType::Null(_) => "UNKNOWN",
|
||||
&ConcreteDataType::Boolean(_) => "BOOL",
|
||||
&ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR",
|
||||
&ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2",
|
||||
&ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4",
|
||||
&ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8",
|
||||
&ConcreteDataType::Float32(_) => "FLOAT4",
|
||||
&ConcreteDataType::Float64(_) => "FLOAT8",
|
||||
&ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
|
||||
&ConcreteDataType::String(_) => "VARCHAR",
|
||||
&ConcreteDataType::Date(_) => "DATE",
|
||||
&ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
|
||||
&ConcreteDataType::Time(_) => "TIME",
|
||||
&ConcreteDataType::Interval(_) => "INTERVAL",
|
||||
&ConcreteDataType::Decimal128(_) => "NUMERIC",
|
||||
&ConcreteDataType::Json(_) => "JSON",
|
||||
ConcreteDataType::List(list) => match list.item_type() {
|
||||
&ConcreteDataType::Null(_) => "UNKNOWN",
|
||||
&ConcreteDataType::Boolean(_) => "_BOOL",
|
||||
&ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR",
|
||||
&ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2",
|
||||
&ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4",
|
||||
&ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8",
|
||||
&ConcreteDataType::Float32(_) => "_FLOAT4",
|
||||
&ConcreteDataType::Float64(_) => "_FLOAT8",
|
||||
&ConcreteDataType::Binary(_) => "_BYTEA",
|
||||
&ConcreteDataType::String(_) => "_VARCHAR",
|
||||
&ConcreteDataType::Date(_) => "_DATE",
|
||||
&ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
|
||||
&ConcreteDataType::Time(_) => "_TIME",
|
||||
&ConcreteDataType::Interval(_) => "_INTERVAL",
|
||||
&ConcreteDataType::Decimal128(_) => "_NUMERIC",
|
||||
&ConcreteDataType::Json(_) => "_JSON",
|
||||
&ConcreteDataType::Duration(_)
|
||||
| &ConcreteDataType::Dictionary(_)
|
||||
| &ConcreteDataType::Vector(_)
|
||||
| &ConcreteDataType::List(_) => "UNKNOWN",
|
||||
},
|
||||
&ConcreteDataType::Duration(_) | &ConcreteDataType::Dictionary(_) => "UNKNOWN",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&ConcreteDataType> for ConcreteDataType {
|
||||
|
||||
@@ -232,6 +232,12 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Invalid skipping index option: {}", msg))]
|
||||
InvalidSkippingIndexOption {
|
||||
msg: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -252,7 +258,8 @@ impl ErrorExt for Error {
|
||||
| InvalidPrecisionOrScale { .. }
|
||||
| InvalidJson { .. }
|
||||
| InvalidVector { .. }
|
||||
| InvalidFulltextOption { .. } => StatusCode::InvalidArguments,
|
||||
| InvalidFulltextOption { .. }
|
||||
| InvalidSkippingIndexOption { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
ValueExceedsPrecision { .. }
|
||||
| CastType { .. }
|
||||
|
||||
@@ -28,10 +28,11 @@ use snafu::{ensure, ResultExt};
|
||||
use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result};
|
||||
use crate::prelude::ConcreteDataType;
|
||||
pub use crate::schema::column_schema::{
|
||||
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata,
|
||||
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, SkippingIndexOptions,
|
||||
COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
|
||||
TIME_INDEX_KEY,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
|
||||
SKIPPING_INDEX_KEY, TIME_INDEX_KEY,
|
||||
};
|
||||
pub use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
pub use crate::schema::raw::RawSchema;
|
||||
|
||||
@@ -39,12 +39,20 @@ const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
|
||||
pub const FULLTEXT_KEY: &str = "greptime:fulltext";
|
||||
/// Key used to store whether the column has inverted index in arrow field's metadata.
|
||||
pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index";
|
||||
/// Key used to store skip options in arrow field's metadata.
|
||||
pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index";
|
||||
|
||||
/// Keys used in fulltext options
|
||||
pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive";
|
||||
|
||||
/// Keys used in SKIPPING index options
|
||||
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity";
|
||||
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type";
|
||||
|
||||
pub const DEFAULT_GRANULARITY: u32 = 10240;
|
||||
|
||||
/// Schema of a column, used as an immutable struct.
|
||||
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ColumnSchema {
|
||||
@@ -156,6 +164,10 @@ impl ColumnSchema {
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn has_fulltext_index_key(&self) -> bool {
|
||||
self.metadata.contains_key(FULLTEXT_KEY)
|
||||
}
|
||||
|
||||
pub fn has_inverted_index_key(&self) -> bool {
|
||||
self.metadata.contains_key(INVERTED_INDEX_KEY)
|
||||
}
|
||||
@@ -298,6 +310,34 @@ impl ColumnSchema {
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Retrieves the skipping index options for the column.
|
||||
pub fn skipping_index_options(&self) -> Result<Option<SkippingIndexOptions>> {
|
||||
match self.metadata.get(SKIPPING_INDEX_KEY) {
|
||||
None => Ok(None),
|
||||
Some(json) => {
|
||||
let options =
|
||||
serde_json::from_str(json).context(error::DeserializeSnafu { json })?;
|
||||
Ok(Some(options))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_skipping_options(mut self, options: SkippingIndexOptions) -> Result<Self> {
|
||||
self.metadata.insert(
|
||||
SKIPPING_INDEX_KEY.to_string(),
|
||||
serde_json::to_string(&options).context(error::SerializeSnafu)?,
|
||||
);
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
pub fn set_skipping_options(&mut self, options: &SkippingIndexOptions) -> Result<()> {
|
||||
self.metadata.insert(
|
||||
SKIPPING_INDEX_KEY.to_string(),
|
||||
serde_json::to_string(options).context(error::SerializeSnafu)?,
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Column extended type set in column schema's metadata.
|
||||
@@ -495,6 +535,76 @@ impl fmt::Display for FulltextAnalyzer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Skipping options for a column.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub struct SkippingIndexOptions {
|
||||
/// The granularity of the skip index.
|
||||
pub granularity: u32,
|
||||
/// The type of the skip index.
|
||||
#[serde(default)]
|
||||
pub index_type: SkipIndexType,
|
||||
}
|
||||
|
||||
impl fmt::Display for SkippingIndexOptions {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "granularity={}", self.granularity)?;
|
||||
write!(f, ", index_type={}", self.index_type)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Skip index types.
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
|
||||
pub enum SkipIndexType {
|
||||
#[default]
|
||||
BloomFilter,
|
||||
}
|
||||
|
||||
impl fmt::Display for SkipIndexType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
SkipIndexType::BloomFilter => write!(f, "BLOOM"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<HashMap<String, String>> for SkippingIndexOptions {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(options: HashMap<String, String>) -> Result<Self> {
|
||||
// Parse granularity with default value 1
|
||||
let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) {
|
||||
Some(value) => value.parse::<u32>().map_err(|_| {
|
||||
error::InvalidSkippingIndexOptionSnafu {
|
||||
msg: format!("Invalid granularity: {value}, expected: positive integer"),
|
||||
}
|
||||
.build()
|
||||
})?,
|
||||
None => DEFAULT_GRANULARITY,
|
||||
};
|
||||
|
||||
// Parse index type with default value BloomFilter
|
||||
let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) {
|
||||
Some(typ) => match typ.to_ascii_uppercase().as_str() {
|
||||
"BLOOM" => SkipIndexType::BloomFilter,
|
||||
_ => {
|
||||
return error::InvalidSkippingIndexOptionSnafu {
|
||||
msg: format!("Invalid index type: {typ}, expected: 'BLOOM'"),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
},
|
||||
None => SkipIndexType::default(),
|
||||
};
|
||||
|
||||
Ok(SkippingIndexOptions {
|
||||
granularity,
|
||||
index_type,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -38,5 +38,4 @@ tokio.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
api.workspace = true
|
||||
common-procedure-test.workspace = true
|
||||
common-test-util.workspace = true
|
||||
|
||||
@@ -46,7 +46,7 @@ impl FileRegionManifest {
|
||||
pub async fn store(&self, region_dir: &str, object_store: &ObjectStore) -> Result<()> {
|
||||
let path = ®ion_manifest_path(region_dir);
|
||||
let exist = object_store
|
||||
.is_exist(path)
|
||||
.exists(path)
|
||||
.await
|
||||
.context(CheckObjectSnafu { path })?;
|
||||
ensure!(!exist, ManifestExistsSnafu { path });
|
||||
|
||||
@@ -130,7 +130,7 @@ mod tests {
|
||||
assert_eq!(region.metadata.primary_key, vec![1]);
|
||||
|
||||
assert!(object_store
|
||||
.is_exist("create_region_dir/manifest/_file_manifest")
|
||||
.exists("create_region_dir/manifest/_file_manifest")
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
@@ -198,13 +198,13 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
assert!(object_store
|
||||
.is_exist("drop_region_dir/manifest/_file_manifest")
|
||||
.exists("drop_region_dir/manifest/_file_manifest")
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
FileRegion::drop(®ion, &object_store).await.unwrap();
|
||||
assert!(!object_store
|
||||
.is_exist("drop_region_dir/manifest/_file_manifest")
|
||||
.exists("drop_region_dir/manifest/_file_manifest")
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
|
||||
@@ -47,7 +47,6 @@ hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "m
|
||||
itertools.workspace = true
|
||||
lazy_static.workspace = true
|
||||
meta-client.workspace = true
|
||||
minstant = "0.1.7"
|
||||
nom = "7.1.3"
|
||||
num-traits = "0.2"
|
||||
operator.workspace = true
|
||||
|
||||
@@ -206,28 +206,6 @@ impl DiffRequest {
|
||||
}
|
||||
}
|
||||
|
||||
/// iterate through the diff row and form continuous diff row with same diff type
|
||||
pub fn diff_row_to_request(rows: Vec<DiffRow>) -> Vec<DiffRequest> {
|
||||
let mut reqs = Vec::new();
|
||||
for (row, ts, diff) in rows {
|
||||
let last = reqs.last_mut();
|
||||
match (last, diff) {
|
||||
(Some(DiffRequest::Insert(rows)), 1) => {
|
||||
rows.push((row, ts));
|
||||
}
|
||||
(Some(DiffRequest::Insert(_)), -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])),
|
||||
(Some(DiffRequest::Delete(rows)), -1) => {
|
||||
rows.push((row, ts));
|
||||
}
|
||||
(Some(DiffRequest::Delete(_)), 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])),
|
||||
(None, 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])),
|
||||
(None, -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
reqs
|
||||
}
|
||||
|
||||
pub fn batches_to_rows_req(batches: Vec<Batch>) -> Result<Vec<DiffRequest>, Error> {
|
||||
let mut reqs = Vec::new();
|
||||
for batch in batches {
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
//! Source and Sink for the dataflow
|
||||
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use common_telemetry::{debug, trace};
|
||||
use hydroflow::scheduled::graph_ext::GraphExt;
|
||||
@@ -28,7 +28,7 @@ use crate::compute::types::{Arranged, Collection, CollectionBundle, Toff};
|
||||
use crate::error::{Error, PlanSnafu};
|
||||
use crate::expr::error::InternalSnafu;
|
||||
use crate::expr::{Batch, EvalError};
|
||||
use crate::repr::{DiffRow, Row, BROADCAST_CAP};
|
||||
use crate::repr::{DiffRow, Row};
|
||||
|
||||
#[allow(clippy::mutable_key_type)]
|
||||
impl Context<'_, '_> {
|
||||
@@ -242,44 +242,4 @@ impl Context<'_, '_> {
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Render a sink which send updates to broadcast channel, have internal buffer in case broadcast channel is full
|
||||
pub fn render_sink(&mut self, bundle: CollectionBundle, sender: broadcast::Sender<DiffRow>) {
|
||||
let CollectionBundle {
|
||||
collection,
|
||||
arranged: _,
|
||||
} = bundle;
|
||||
let mut buf = VecDeque::with_capacity(1000);
|
||||
|
||||
let schd = self.compute_state.get_scheduler();
|
||||
let inner_schd = schd.clone();
|
||||
let now = self.compute_state.current_time_ref();
|
||||
|
||||
let sink = self
|
||||
.df
|
||||
.add_subgraph_sink("Sink", collection.into_inner(), move |_ctx, recv| {
|
||||
let data = recv.take_inner();
|
||||
buf.extend(data.into_iter().flat_map(|i| i.into_iter()));
|
||||
if sender.len() >= BROADCAST_CAP {
|
||||
return;
|
||||
} else {
|
||||
while let Some(row) = buf.pop_front() {
|
||||
// if the sender is full, stop sending
|
||||
if sender.len() >= BROADCAST_CAP {
|
||||
break;
|
||||
}
|
||||
// TODO(discord9): handling tokio broadcast error
|
||||
let _ = sender.send(row);
|
||||
}
|
||||
}
|
||||
|
||||
// if buffer is not empty, schedule the next run at next tick
|
||||
// so the buffer can be drained as soon as possible
|
||||
if !buf.is_empty() {
|
||||
inner_schd.schedule_at(*now.borrow() + 1);
|
||||
}
|
||||
});
|
||||
|
||||
schd.set_cur_subgraph(sink);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -82,22 +82,6 @@ impl Arranged {
|
||||
writer: self.writer.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Copy the full arrangement, including the future and the current updates.
|
||||
///
|
||||
/// Internally `Rc-ed` so it's cheap to copy
|
||||
pub fn try_copy_full(&self) -> Option<Self> {
|
||||
self.arrangement
|
||||
.clone_full_arrange()
|
||||
.map(|arrangement| Arranged {
|
||||
arrangement,
|
||||
readers: self.readers.clone(),
|
||||
writer: self.writer.clone(),
|
||||
})
|
||||
}
|
||||
pub fn add_reader(&self, id: SubgraphId) {
|
||||
self.readers.borrow_mut().push(id)
|
||||
}
|
||||
}
|
||||
|
||||
/// A bundle of the various ways a collection can be represented.
|
||||
|
||||
@@ -21,11 +21,6 @@ use datafusion_common::DataFusionError;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
fn is_send_sync() {
|
||||
fn check<T: Send + Sync>() {}
|
||||
check::<EvalError>();
|
||||
}
|
||||
|
||||
/// EvalError is about errors happen on columnar evaluation
|
||||
///
|
||||
/// TODO(discord9): add detailed location of column/operator(instead of code) to errors tp help identify related column
|
||||
|
||||
@@ -359,14 +359,6 @@ impl MapFilterProject {
|
||||
)
|
||||
}
|
||||
|
||||
/// Convert the `MapFilterProject` into a staged evaluation plan.
|
||||
///
|
||||
/// The main behavior is extract temporal predicates, which cannot be evaluated
|
||||
/// using the standard machinery.
|
||||
pub fn into_plan(self) -> Result<MfpPlan, Error> {
|
||||
MfpPlan::create_from(self)
|
||||
}
|
||||
|
||||
/// Lists input columns whose values are used in outputs.
|
||||
///
|
||||
/// It is entirely appropriate to determine the demand of an instance
|
||||
@@ -602,26 +594,6 @@ impl SafeMfpPlan {
|
||||
}
|
||||
}
|
||||
|
||||
/// A version of `evaluate` which produces an iterator over `Datum`
|
||||
/// as output.
|
||||
///
|
||||
/// This version can be useful when one wants to capture the resulting
|
||||
/// datums without packing and then unpacking a row.
|
||||
#[inline(always)]
|
||||
pub fn evaluate_iter<'a>(
|
||||
&'a self,
|
||||
datums: &'a mut Vec<Value>,
|
||||
) -> Result<Option<impl Iterator<Item = Value> + 'a>, EvalError> {
|
||||
let passed_predicates = self.evaluate_inner(datums)?;
|
||||
if !passed_predicates {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(
|
||||
self.mfp.projection.iter().map(move |i| datums[*i].clone()),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Populates `values` with `self.expressions` and tests `self.predicates`.
|
||||
///
|
||||
/// This does not apply `self.projection`, which is up to the calling method.
|
||||
|
||||
@@ -18,10 +18,8 @@
|
||||
mod join;
|
||||
mod reduce;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::expr::{GlobalId, Id, LocalId, MapFilterProject, SafeMfpPlan, TypedExpr};
|
||||
use crate::expr::{Id, LocalId, MapFilterProject, SafeMfpPlan, TypedExpr};
|
||||
use crate::plan::join::JoinPlan;
|
||||
pub(crate) use crate::plan::reduce::{AccumulablePlan, AggrWithIndex, KeyValPlan, ReducePlan};
|
||||
use crate::repr::{DiffRow, RelationDesc};
|
||||
@@ -186,48 +184,6 @@ pub enum Plan {
|
||||
},
|
||||
}
|
||||
|
||||
impl Plan {
|
||||
/// Find all the used collection in the plan
|
||||
pub fn find_used_collection(&self) -> BTreeSet<GlobalId> {
|
||||
fn recur_find_use(plan: &Plan, used: &mut BTreeSet<GlobalId>) {
|
||||
match plan {
|
||||
Plan::Get { id } => {
|
||||
match id {
|
||||
Id::Local(_) => (),
|
||||
Id::Global(g) => {
|
||||
used.insert(*g);
|
||||
}
|
||||
};
|
||||
}
|
||||
Plan::Let { value, body, .. } => {
|
||||
recur_find_use(&value.plan, used);
|
||||
recur_find_use(&body.plan, used);
|
||||
}
|
||||
Plan::Mfp { input, .. } => {
|
||||
recur_find_use(&input.plan, used);
|
||||
}
|
||||
Plan::Reduce { input, .. } => {
|
||||
recur_find_use(&input.plan, used);
|
||||
}
|
||||
Plan::Join { inputs, .. } => {
|
||||
for input in inputs {
|
||||
recur_find_use(&input.plan, used);
|
||||
}
|
||||
}
|
||||
Plan::Union { inputs, .. } => {
|
||||
for input in inputs {
|
||||
recur_find_use(&input.plan, used);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let mut ret = Default::default();
|
||||
recur_find_use(self, &mut ret);
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
impl Plan {
|
||||
pub fn with_types(self, schema: RelationDesc) -> TypedPlan {
|
||||
TypedPlan { schema, plan: self }
|
||||
|
||||
@@ -46,14 +46,6 @@ impl Key {
|
||||
self.column_indices.push(col);
|
||||
}
|
||||
|
||||
/// Add columns to Key
|
||||
pub fn add_cols<I>(&mut self, cols: I)
|
||||
where
|
||||
I: IntoIterator<Item = usize>,
|
||||
{
|
||||
self.column_indices.extend(cols);
|
||||
}
|
||||
|
||||
/// Remove a column from Key
|
||||
pub fn remove_col(&mut self, col: usize) {
|
||||
self.column_indices.retain(|&r| r != col);
|
||||
|
||||
@@ -25,7 +25,6 @@ common-catalog.workspace = true
|
||||
common-config.workspace = true
|
||||
common-datasource.workspace = true
|
||||
common-error.workspace = true
|
||||
common-frontend.workspace = true
|
||||
common-function.workspace = true
|
||||
common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
@@ -71,7 +70,6 @@ common-test-util.workspace = true
|
||||
datanode.workspace = true
|
||||
datatypes.workspace = true
|
||||
futures = "0.3"
|
||||
meta-srv = { workspace = true, features = ["mock"] }
|
||||
serde_json.workspace = true
|
||||
strfmt = "0.2"
|
||||
tower.workspace = true
|
||||
|
||||
@@ -19,14 +19,16 @@ use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use client::Output;
|
||||
use common_error::ext::BoxedError;
|
||||
use pipeline::pipeline_operator::PipelineOperator;
|
||||
use pipeline::{GreptimeTransformer, Pipeline, PipelineInfo, PipelineVersion};
|
||||
use servers::error::{
|
||||
AuthSnafu, Error as ServerError, ExecuteGrpcRequestSnafu, PipelineSnafu, Result as ServerResult,
|
||||
};
|
||||
use servers::interceptor::{LogIngestInterceptor, LogIngestInterceptorRef};
|
||||
use servers::query_handler::PipelineHandler;
|
||||
use session::context::QueryContextRef;
|
||||
use session::context::{QueryContext, QueryContextRef};
|
||||
use snafu::ResultExt;
|
||||
use table::Table;
|
||||
|
||||
use crate::instance::Instance;
|
||||
|
||||
@@ -84,6 +86,22 @@ impl PipelineHandler for Instance {
|
||||
.await
|
||||
.context(PipelineSnafu)
|
||||
}
|
||||
|
||||
async fn get_table(
|
||||
&self,
|
||||
table: &str,
|
||||
query_ctx: &QueryContext,
|
||||
) -> std::result::Result<Option<Arc<Table>>, catalog::error::Error> {
|
||||
let catalog = query_ctx.current_catalog();
|
||||
let schema = query_ctx.current_schema();
|
||||
self.catalog_manager
|
||||
.table(catalog, &schema, table, None)
|
||||
.await
|
||||
}
|
||||
|
||||
fn build_pipeline(&self, pipeline: &str) -> ServerResult<Pipeline<GreptimeTransformer>> {
|
||||
PipelineOperator::build_pipeline(pipeline).context(PipelineSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
|
||||
@@ -17,6 +17,7 @@ common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-runtime.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
fastbloom = "0.8"
|
||||
fst.workspace = true
|
||||
futures.workspace = true
|
||||
greptime-proto.workspace = true
|
||||
@@ -26,6 +27,7 @@ prost.workspace = true
|
||||
regex.workspace = true
|
||||
regex-automata.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
snafu.workspace = true
|
||||
tantivy = { version = "0.22", features = ["zstd-compression"] }
|
||||
tantivy-jieba = "0.11.0"
|
||||
|
||||
53
src/index/src/bloom_filter.rs
Normal file
53
src/index/src/bloom_filter.rs
Normal file
@@ -0,0 +1,53 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub mod creator;
|
||||
mod error;
|
||||
|
||||
pub type Bytes = Vec<u8>;
|
||||
pub type BytesRef<'a> = &'a [u8];
|
||||
|
||||
/// The Meta information of the bloom filter stored in the file.
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub struct BloomFilterMeta {
|
||||
/// The number of rows per segment.
|
||||
pub rows_per_segment: usize,
|
||||
|
||||
/// The number of segments.
|
||||
pub seg_count: usize,
|
||||
|
||||
/// The number of total rows.
|
||||
pub row_count: usize,
|
||||
|
||||
/// The size of the bloom filter excluding the meta information.
|
||||
pub bloom_filter_segments_size: usize,
|
||||
|
||||
/// Offset and size of bloom filters in the file.
|
||||
pub bloom_filter_segments: Vec<BloomFilterSegmentLocation>,
|
||||
}
|
||||
|
||||
/// The location of the bloom filter segment in the file.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct BloomFilterSegmentLocation {
|
||||
/// The offset of the bloom filter segment in the file.
|
||||
pub offset: u64,
|
||||
|
||||
/// The size of the bloom filter segment in the file.
|
||||
pub size: u64,
|
||||
|
||||
/// The number of elements in the bloom filter segment.
|
||||
pub elem_count: usize,
|
||||
}
|
||||
294
src/index/src/bloom_filter/creator.rs
Normal file
294
src/index/src/bloom_filter/creator.rs
Normal file
@@ -0,0 +1,294 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use fastbloom::BloomFilter;
|
||||
use futures::{AsyncWrite, AsyncWriteExt};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use super::error::{IoSnafu, SerdeJsonSnafu};
|
||||
use crate::bloom_filter::error::Result;
|
||||
use crate::bloom_filter::{BloomFilterMeta, BloomFilterSegmentLocation, Bytes};
|
||||
|
||||
/// The seed used for the Bloom filter.
|
||||
const SEED: u128 = 42;
|
||||
|
||||
/// The false positive rate of the Bloom filter.
|
||||
const FALSE_POSITIVE_RATE: f64 = 0.01;
|
||||
|
||||
/// `BloomFilterCreator` is responsible for creating and managing bloom filters
|
||||
/// for a set of elements. It divides the rows into segments and creates
|
||||
/// bloom filters for each segment.
|
||||
///
|
||||
/// # Format
|
||||
///
|
||||
/// The bloom filter creator writes the following format to the writer:
|
||||
///
|
||||
/// ```text
|
||||
/// +--------------------+--------------------+-----+----------------------+----------------------+
|
||||
/// | Bloom filter 0 | Bloom filter 1 | ... | BloomFilterMeta | Meta size |
|
||||
/// +--------------------+--------------------+-----+----------------------+----------------------+
|
||||
/// |<- bytes (size 0) ->|<- bytes (size 1) ->| ... |<- json (meta size) ->|<- u32 LE (4 bytes) ->|
|
||||
/// ```
|
||||
///
|
||||
pub struct BloomFilterCreator {
|
||||
/// The number of rows per segment set by the user.
|
||||
rows_per_segment: usize,
|
||||
|
||||
/// Row count that added to the bloom filter so far.
|
||||
accumulated_row_count: usize,
|
||||
|
||||
/// A set of distinct elements in the current segment.
|
||||
cur_seg_distinct_elems: HashSet<Bytes>,
|
||||
|
||||
/// The memory usage of the current segment's distinct elements.
|
||||
cur_seg_distinct_elems_mem_usage: usize,
|
||||
|
||||
/// Storage for finalized Bloom filters.
|
||||
finalized_bloom_filters: FinalizedBloomFilterStorage,
|
||||
}
|
||||
|
||||
impl BloomFilterCreator {
|
||||
/// Creates a new `BloomFilterCreator` with the specified number of rows per segment.
|
||||
///
|
||||
/// # PANICS
|
||||
///
|
||||
/// `rows_per_segment` <= 0
|
||||
pub fn new(rows_per_segment: usize) -> Self {
|
||||
assert!(
|
||||
rows_per_segment > 0,
|
||||
"rows_per_segment must be greater than 0"
|
||||
);
|
||||
|
||||
Self {
|
||||
rows_per_segment,
|
||||
accumulated_row_count: 0,
|
||||
cur_seg_distinct_elems: HashSet::default(),
|
||||
cur_seg_distinct_elems_mem_usage: 0,
|
||||
finalized_bloom_filters: FinalizedBloomFilterStorage::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds a row of elements to the bloom filter. If the number of accumulated rows
|
||||
/// reaches `rows_per_segment`, it finalizes the current segment.
|
||||
pub fn push_row_elems(&mut self, elems: impl IntoIterator<Item = Bytes>) {
|
||||
self.accumulated_row_count += 1;
|
||||
for elem in elems.into_iter() {
|
||||
let len = elem.len();
|
||||
let is_new = self.cur_seg_distinct_elems.insert(elem);
|
||||
if is_new {
|
||||
self.cur_seg_distinct_elems_mem_usage += len;
|
||||
}
|
||||
}
|
||||
|
||||
if self.accumulated_row_count % self.rows_per_segment == 0 {
|
||||
self.finalize_segment();
|
||||
}
|
||||
}
|
||||
|
||||
/// Finalizes any remaining segments and writes the bloom filters and metadata to the provided writer.
|
||||
pub async fn finish(&mut self, mut writer: impl AsyncWrite + Unpin) -> Result<()> {
|
||||
if !self.cur_seg_distinct_elems.is_empty() {
|
||||
self.finalize_segment();
|
||||
}
|
||||
|
||||
let mut meta = BloomFilterMeta {
|
||||
rows_per_segment: self.rows_per_segment,
|
||||
seg_count: self.finalized_bloom_filters.len(),
|
||||
row_count: self.accumulated_row_count,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut buf = Vec::new();
|
||||
for segment in self.finalized_bloom_filters.drain() {
|
||||
let slice = segment.bloom_filter.as_slice();
|
||||
buf.clear();
|
||||
write_u64_slice(&mut buf, slice);
|
||||
writer.write_all(&buf).await.context(IoSnafu)?;
|
||||
|
||||
let size = buf.len();
|
||||
meta.bloom_filter_segments.push(BloomFilterSegmentLocation {
|
||||
offset: meta.bloom_filter_segments_size as _,
|
||||
size: size as _,
|
||||
elem_count: segment.element_count,
|
||||
});
|
||||
meta.bloom_filter_segments_size += size;
|
||||
}
|
||||
|
||||
let meta_bytes = serde_json::to_vec(&meta).context(SerdeJsonSnafu)?;
|
||||
writer.write_all(&meta_bytes).await.context(IoSnafu)?;
|
||||
|
||||
let meta_size = meta_bytes.len() as u32;
|
||||
writer
|
||||
.write_all(&meta_size.to_le_bytes())
|
||||
.await
|
||||
.context(IoSnafu)?;
|
||||
writer.flush().await.unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the memory usage of the creating bloom filter.
|
||||
pub fn memory_usage(&self) -> usize {
|
||||
self.cur_seg_distinct_elems_mem_usage + self.finalized_bloom_filters.memory_usage()
|
||||
}
|
||||
|
||||
fn finalize_segment(&mut self) {
|
||||
let elem_count = self.cur_seg_distinct_elems.len();
|
||||
self.finalized_bloom_filters
|
||||
.add(self.cur_seg_distinct_elems.drain(), elem_count);
|
||||
self.cur_seg_distinct_elems_mem_usage = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Storage for finalized Bloom filters.
|
||||
///
|
||||
/// TODO(zhongzc): Add support for storing intermediate bloom filters on disk to control memory usage.
|
||||
#[derive(Debug, Default)]
|
||||
struct FinalizedBloomFilterStorage {
|
||||
/// Bloom filters that are stored in memory.
|
||||
in_memory: Vec<FinalizedBloomFilterSegment>,
|
||||
}
|
||||
|
||||
impl FinalizedBloomFilterStorage {
|
||||
fn memory_usage(&self) -> usize {
|
||||
self.in_memory.iter().map(|s| s.size).sum()
|
||||
}
|
||||
|
||||
/// Adds a new finalized Bloom filter to the storage.
|
||||
///
|
||||
/// TODO(zhongzc): Add support for flushing to disk.
|
||||
fn add(&mut self, elems: impl IntoIterator<Item = Bytes>, elem_count: usize) {
|
||||
let mut bf = BloomFilter::with_false_pos(FALSE_POSITIVE_RATE)
|
||||
.seed(&SEED)
|
||||
.expected_items(elem_count);
|
||||
for elem in elems.into_iter() {
|
||||
bf.insert(&elem);
|
||||
}
|
||||
|
||||
let cbf = FinalizedBloomFilterSegment::new(bf, elem_count);
|
||||
self.in_memory.push(cbf);
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.in_memory.len()
|
||||
}
|
||||
|
||||
fn drain(&mut self) -> impl Iterator<Item = FinalizedBloomFilterSegment> + '_ {
|
||||
self.in_memory.drain(..)
|
||||
}
|
||||
}
|
||||
|
||||
/// A finalized Bloom filter segment.
|
||||
#[derive(Debug)]
|
||||
struct FinalizedBloomFilterSegment {
|
||||
/// The underlying Bloom filter.
|
||||
bloom_filter: BloomFilter,
|
||||
|
||||
/// The number of elements in the Bloom filter.
|
||||
element_count: usize,
|
||||
|
||||
/// The occupied memory size of the Bloom filter.
|
||||
size: usize,
|
||||
}
|
||||
|
||||
impl FinalizedBloomFilterSegment {
|
||||
fn new(bloom_filter: BloomFilter, elem_count: usize) -> Self {
|
||||
let memory_usage = std::mem::size_of_val(bloom_filter.as_slice());
|
||||
Self {
|
||||
bloom_filter,
|
||||
element_count: elem_count,
|
||||
size: memory_usage,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes a slice of `u64` to the buffer in little-endian order.
|
||||
fn write_u64_slice(buf: &mut Vec<u8>, slice: &[u64]) {
|
||||
buf.reserve(std::mem::size_of_val(slice));
|
||||
for &x in slice {
|
||||
buf.extend_from_slice(&x.to_le_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use futures::io::Cursor;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn u64_vec_from_bytes(bytes: &[u8]) -> Vec<u64> {
|
||||
bytes
|
||||
.chunks_exact(std::mem::size_of::<u64>())
|
||||
.map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_bloom_filter_creator() {
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
let mut creator = BloomFilterCreator::new(2);
|
||||
|
||||
creator.push_row_elems(vec![b"a".to_vec(), b"b".to_vec()]);
|
||||
assert!(creator.cur_seg_distinct_elems_mem_usage > 0);
|
||||
assert!(creator.memory_usage() > 0);
|
||||
|
||||
creator.push_row_elems(vec![b"c".to_vec(), b"d".to_vec()]);
|
||||
// Finalize the first segment
|
||||
assert!(creator.cur_seg_distinct_elems_mem_usage == 0);
|
||||
assert!(creator.memory_usage() > 0);
|
||||
|
||||
creator.push_row_elems(vec![b"e".to_vec(), b"f".to_vec()]);
|
||||
assert!(creator.cur_seg_distinct_elems_mem_usage > 0);
|
||||
assert!(creator.memory_usage() > 0);
|
||||
|
||||
creator.finish(&mut writer).await.unwrap();
|
||||
|
||||
let bytes = writer.into_inner();
|
||||
let total_size = bytes.len();
|
||||
let meta_size_offset = total_size - 4;
|
||||
let meta_size = u32::from_le_bytes((&bytes[meta_size_offset..]).try_into().unwrap());
|
||||
|
||||
let meta_bytes = &bytes[total_size - meta_size as usize - 4..total_size - 4];
|
||||
let meta: BloomFilterMeta = serde_json::from_slice(meta_bytes).unwrap();
|
||||
|
||||
assert_eq!(meta.rows_per_segment, 2);
|
||||
assert_eq!(meta.seg_count, 2);
|
||||
assert_eq!(meta.row_count, 3);
|
||||
assert_eq!(
|
||||
meta.bloom_filter_segments_size + meta_bytes.len() + 4,
|
||||
total_size
|
||||
);
|
||||
|
||||
let mut bfs = Vec::new();
|
||||
for segment in meta.bloom_filter_segments {
|
||||
let bloom_filter_bytes =
|
||||
&bytes[segment.offset as usize..(segment.offset + segment.size) as usize];
|
||||
let v = u64_vec_from_bytes(bloom_filter_bytes);
|
||||
let bloom_filter = BloomFilter::from_vec(v)
|
||||
.seed(&SEED)
|
||||
.expected_items(segment.elem_count);
|
||||
bfs.push(bloom_filter);
|
||||
}
|
||||
|
||||
assert_eq!(bfs.len(), 2);
|
||||
assert!(bfs[0].contains(&b"a"));
|
||||
assert!(bfs[0].contains(&b"b"));
|
||||
assert!(bfs[0].contains(&b"c"));
|
||||
assert!(bfs[0].contains(&b"d"));
|
||||
assert!(bfs[1].contains(&b"e"));
|
||||
assert!(bfs[1].contains(&b"f"));
|
||||
}
|
||||
}
|
||||
66
src/index/src/bloom_filter/error.rs
Normal file
66
src/index/src/bloom_filter/error.rs
Normal file
@@ -0,0 +1,66 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
#[derive(Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
#[stack_trace_debug]
|
||||
pub enum Error {
|
||||
#[snafu(display("IO error"))]
|
||||
Io {
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to serde json"))]
|
||||
SerdeJson {
|
||||
#[snafu(source)]
|
||||
error: serde_json::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("External error"))]
|
||||
External {
|
||||
source: BoxedError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
use Error::*;
|
||||
|
||||
match self {
|
||||
Io { .. } | Self::SerdeJson { .. } => StatusCode::Unexpected,
|
||||
|
||||
External { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -26,14 +26,6 @@ use crate::inverted_index::search::predicate::Predicate;
|
||||
#[snafu(visibility(pub))]
|
||||
#[stack_trace_debug]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to seek"))]
|
||||
Seek {
|
||||
#[snafu(source)]
|
||||
error: IoError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read"))]
|
||||
Read {
|
||||
#[snafu(source)]
|
||||
@@ -76,6 +68,18 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Blob size too small"))]
|
||||
BlobSizeTooSmall {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid footer payload size"))]
|
||||
InvalidFooterPayloadSize {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unexpected inverted index footer payload size, max: {max_payload_size}, actual: {actual_payload_size}"))]
|
||||
UnexpectedFooterPayloadSize {
|
||||
max_payload_size: u64,
|
||||
@@ -215,8 +219,7 @@ impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
use Error::*;
|
||||
match self {
|
||||
Seek { .. }
|
||||
| Read { .. }
|
||||
Read { .. }
|
||||
| Write { .. }
|
||||
| Flush { .. }
|
||||
| Close { .. }
|
||||
@@ -229,7 +232,9 @@ impl ErrorExt for Error {
|
||||
| KeysApplierUnexpectedPredicates { .. }
|
||||
| CommonIo { .. }
|
||||
| UnknownIntermediateCodecMagic { .. }
|
||||
| FstCompile { .. } => StatusCode::Unexpected,
|
||||
| FstCompile { .. }
|
||||
| InvalidFooterPayloadSize { .. }
|
||||
| BlobSizeTooSmall { .. } => StatusCode::Unexpected,
|
||||
|
||||
ParseRegex { .. }
|
||||
| ParseDFA { .. }
|
||||
|
||||
@@ -12,9 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use common_base::BitVec;
|
||||
use greptime_proto::v1::index::InvertedIndexMetas;
|
||||
use snafu::ResultExt;
|
||||
@@ -30,23 +32,23 @@ mod footer;
|
||||
#[mockall::automock]
|
||||
#[async_trait]
|
||||
pub trait InvertedIndexReader: Send {
|
||||
/// Reads all data to dest.
|
||||
async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize>;
|
||||
|
||||
/// Seeks to given offset and reads data with exact size as provided.
|
||||
async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>>;
|
||||
async fn range_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>>;
|
||||
|
||||
/// Reads the bytes in the given ranges.
|
||||
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>>;
|
||||
|
||||
/// Retrieves metadata of all inverted indices stored within the blob.
|
||||
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>>;
|
||||
|
||||
/// Retrieves the finite state transducer (FST) map from the given offset and size.
|
||||
async fn fst(&mut self, offset: u64, size: u32) -> Result<FstMap> {
|
||||
let fst_data = self.seek_read(offset, size).await?;
|
||||
let fst_data = self.range_read(offset, size).await?;
|
||||
FstMap::new(fst_data).context(DecodeFstSnafu)
|
||||
}
|
||||
|
||||
/// Retrieves the bitmap from the given offset and size.
|
||||
async fn bitmap(&mut self, offset: u64, size: u32) -> Result<BitVec> {
|
||||
self.seek_read(offset, size).await.map(BitVec::from_vec)
|
||||
self.range_read(offset, size).await.map(BitVec::from_vec)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,18 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use common_base::range_read::RangeReader;
|
||||
use greptime_proto::v1::index::InvertedIndexMetas;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use super::footer::DEFAULT_PREFETCH_SIZE;
|
||||
use crate::inverted_index::error::{CommonIoSnafu, Result, UnexpectedBlobSizeSnafu};
|
||||
use crate::inverted_index::format::reader::footer::InvertedIndeFooterReader;
|
||||
use crate::inverted_index::format::reader::footer::InvertedIndexFooterReader;
|
||||
use crate::inverted_index::format::reader::InvertedIndexReader;
|
||||
use crate::inverted_index::format::MIN_BLOB_SIZE;
|
||||
|
||||
@@ -49,16 +52,7 @@ impl<R> InvertedIndexBlobReader<R> {
|
||||
|
||||
#[async_trait]
|
||||
impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
|
||||
async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize> {
|
||||
let metadata = self.source.metadata().await.context(CommonIoSnafu)?;
|
||||
self.source
|
||||
.read_into(0..metadata.content_length, dest)
|
||||
.await
|
||||
.context(CommonIoSnafu)?;
|
||||
Ok(metadata.content_length as usize)
|
||||
}
|
||||
|
||||
async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
|
||||
async fn range_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
|
||||
let buf = self
|
||||
.source
|
||||
.read(offset..offset + size as u64)
|
||||
@@ -67,12 +61,17 @@ impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
|
||||
Ok(buf.into())
|
||||
}
|
||||
|
||||
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
|
||||
self.source.read_vec(ranges).await.context(CommonIoSnafu)
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>> {
|
||||
let metadata = self.source.metadata().await.context(CommonIoSnafu)?;
|
||||
let blob_size = metadata.content_length;
|
||||
Self::validate_blob_size(blob_size)?;
|
||||
|
||||
let mut footer_reader = InvertedIndeFooterReader::new(&mut self.source, blob_size);
|
||||
let mut footer_reader = InvertedIndexFooterReader::new(&mut self.source, blob_size)
|
||||
.with_prefetch_size(DEFAULT_PREFETCH_SIZE);
|
||||
footer_reader.metadata().await.map(Arc::new)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,53 +18,88 @@ use prost::Message;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::inverted_index::error::{
|
||||
CommonIoSnafu, DecodeProtoSnafu, Result, UnexpectedFooterPayloadSizeSnafu,
|
||||
UnexpectedOffsetSizeSnafu, UnexpectedZeroSegmentRowCountSnafu,
|
||||
BlobSizeTooSmallSnafu, CommonIoSnafu, DecodeProtoSnafu, InvalidFooterPayloadSizeSnafu, Result,
|
||||
UnexpectedFooterPayloadSizeSnafu, UnexpectedOffsetSizeSnafu,
|
||||
UnexpectedZeroSegmentRowCountSnafu,
|
||||
};
|
||||
use crate::inverted_index::format::FOOTER_PAYLOAD_SIZE_SIZE;
|
||||
|
||||
/// InvertedIndeFooterReader is for reading the footer section of the blob.
|
||||
pub struct InvertedIndeFooterReader<R> {
|
||||
pub const DEFAULT_PREFETCH_SIZE: u64 = 1024; // 1KiB
|
||||
|
||||
/// InvertedIndexFooterReader is for reading the footer section of the blob.
|
||||
pub struct InvertedIndexFooterReader<R> {
|
||||
source: R,
|
||||
blob_size: u64,
|
||||
prefetch_size: Option<u64>,
|
||||
}
|
||||
|
||||
impl<R> InvertedIndeFooterReader<R> {
|
||||
impl<R> InvertedIndexFooterReader<R> {
|
||||
pub fn new(source: R, blob_size: u64) -> Self {
|
||||
Self { source, blob_size }
|
||||
Self {
|
||||
source,
|
||||
blob_size,
|
||||
prefetch_size: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the prefetch size for the footer reader.
|
||||
pub fn with_prefetch_size(mut self, prefetch_size: u64) -> Self {
|
||||
self.prefetch_size = Some(prefetch_size.max(FOOTER_PAYLOAD_SIZE_SIZE));
|
||||
self
|
||||
}
|
||||
|
||||
pub fn prefetch_size(&self) -> u64 {
|
||||
self.prefetch_size.unwrap_or(FOOTER_PAYLOAD_SIZE_SIZE)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: RangeReader> InvertedIndeFooterReader<R> {
|
||||
impl<R: RangeReader> InvertedIndexFooterReader<R> {
|
||||
pub async fn metadata(&mut self) -> Result<InvertedIndexMetas> {
|
||||
let payload_size = self.read_payload_size().await?;
|
||||
let metas = self.read_payload(payload_size).await?;
|
||||
Ok(metas)
|
||||
}
|
||||
ensure!(
|
||||
self.blob_size >= FOOTER_PAYLOAD_SIZE_SIZE,
|
||||
BlobSizeTooSmallSnafu
|
||||
);
|
||||
|
||||
async fn read_payload_size(&mut self) -> Result<u64> {
|
||||
let mut size_buf = [0u8; FOOTER_PAYLOAD_SIZE_SIZE as usize];
|
||||
let end = self.blob_size;
|
||||
let start = end - FOOTER_PAYLOAD_SIZE_SIZE;
|
||||
self.source
|
||||
.read_into(start..end, &mut &mut size_buf[..])
|
||||
let footer_start = self.blob_size.saturating_sub(self.prefetch_size());
|
||||
let suffix = self
|
||||
.source
|
||||
.read(footer_start..self.blob_size)
|
||||
.await
|
||||
.context(CommonIoSnafu)?;
|
||||
let suffix_len = suffix.len();
|
||||
let length = u32::from_le_bytes(Self::read_tailing_four_bytes(&suffix)?) as u64;
|
||||
self.validate_payload_size(length)?;
|
||||
|
||||
let payload_size = u32::from_le_bytes(size_buf) as u64;
|
||||
self.validate_payload_size(payload_size)?;
|
||||
let footer_size = FOOTER_PAYLOAD_SIZE_SIZE;
|
||||
|
||||
Ok(payload_size)
|
||||
// Did not fetch the entire file metadata in the initial read, need to make a second request.
|
||||
if length > suffix_len as u64 - footer_size {
|
||||
let metadata_start = self.blob_size - length - footer_size;
|
||||
let meta = self
|
||||
.source
|
||||
.read(metadata_start..self.blob_size - footer_size)
|
||||
.await
|
||||
.context(CommonIoSnafu)?;
|
||||
self.parse_payload(&meta, length)
|
||||
} else {
|
||||
let metadata_start = self.blob_size - length - footer_size - footer_start;
|
||||
let meta = &suffix[metadata_start as usize..suffix_len - footer_size as usize];
|
||||
self.parse_payload(meta, length)
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_payload(&mut self, payload_size: u64) -> Result<InvertedIndexMetas> {
|
||||
let end = self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE;
|
||||
let start = end - payload_size;
|
||||
let bytes = self.source.read(start..end).await.context(CommonIoSnafu)?;
|
||||
fn read_tailing_four_bytes(suffix: &[u8]) -> Result<[u8; 4]> {
|
||||
let suffix_len = suffix.len();
|
||||
ensure!(suffix_len >= 4, InvalidFooterPayloadSizeSnafu);
|
||||
let mut bytes = [0; 4];
|
||||
bytes.copy_from_slice(&suffix[suffix_len - 4..suffix_len]);
|
||||
|
||||
let metas = InvertedIndexMetas::decode(&*bytes).context(DecodeProtoSnafu)?;
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
fn parse_payload(&mut self, bytes: &[u8], payload_size: u64) -> Result<InvertedIndexMetas> {
|
||||
let metas = InvertedIndexMetas::decode(bytes).context(DecodeProtoSnafu)?;
|
||||
self.validate_metas(&metas, payload_size)?;
|
||||
|
||||
Ok(metas)
|
||||
}
|
||||
|
||||
@@ -113,9 +148,12 @@ impl<R: RangeReader> InvertedIndeFooterReader<R> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use prost::Message;
|
||||
|
||||
use super::*;
|
||||
use crate::inverted_index::error::Error;
|
||||
|
||||
fn create_test_payload(meta: InvertedIndexMeta) -> Vec<u8> {
|
||||
let mut metas = InvertedIndexMetas {
|
||||
@@ -141,14 +179,18 @@ mod tests {
|
||||
|
||||
let mut payload_buf = create_test_payload(meta);
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);
|
||||
|
||||
let payload_size = reader.read_payload_size().await.unwrap();
|
||||
let metas = reader.read_payload(payload_size).await.unwrap();
|
||||
for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] {
|
||||
let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size);
|
||||
if prefetch > 0 {
|
||||
reader = reader.with_prefetch_size(prefetch);
|
||||
}
|
||||
|
||||
assert_eq!(metas.metas.len(), 1);
|
||||
let index_meta = &metas.metas.get("test").unwrap();
|
||||
assert_eq!(index_meta.name, "test");
|
||||
let metas = reader.metadata().await.unwrap();
|
||||
assert_eq!(metas.metas.len(), 1);
|
||||
let index_meta = &metas.metas.get("test").unwrap();
|
||||
assert_eq!(index_meta.name, "test");
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -157,14 +199,20 @@ mod tests {
|
||||
name: "test".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut payload_buf = create_test_payload(meta);
|
||||
payload_buf.push(0xff); // Add an extra byte to corrupt the footer
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);
|
||||
|
||||
let payload_size_result = reader.read_payload_size().await;
|
||||
assert!(payload_size_result.is_err());
|
||||
for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] {
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size);
|
||||
if prefetch > 0 {
|
||||
reader = reader.with_prefetch_size(prefetch);
|
||||
}
|
||||
|
||||
let result = reader.metadata().await;
|
||||
assert_matches!(result, Err(Error::UnexpectedFooterPayloadSize { .. }));
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -178,10 +226,15 @@ mod tests {
|
||||
|
||||
let mut payload_buf = create_test_payload(meta);
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);
|
||||
|
||||
let payload_size = reader.read_payload_size().await.unwrap();
|
||||
let payload_result = reader.read_payload(payload_size).await;
|
||||
assert!(payload_result.is_err());
|
||||
for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] {
|
||||
let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size);
|
||||
if prefetch > 0 {
|
||||
reader = reader.with_prefetch_size(prefetch);
|
||||
}
|
||||
|
||||
let result = reader.metadata().await;
|
||||
assert_matches!(result, Err(Error::UnexpectedOffsetSize { .. }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(iter_partition_in_place)]
|
||||
#![feature(assert_matches)]
|
||||
|
||||
pub mod bloom_filter;
|
||||
pub mod fulltext_index;
|
||||
pub mod inverted_index;
|
||||
|
||||
@@ -206,43 +206,41 @@ pub async fn metasrv_builder(
|
||||
plugins: Plugins,
|
||||
kv_backend: Option<KvBackendRef>,
|
||||
) -> Result<MetasrvBuilder> {
|
||||
let (kv_backend, election) = match (kv_backend, &opts.backend) {
|
||||
let (mut kv_backend, election) = match (kv_backend, &opts.backend) {
|
||||
(Some(kv_backend), _) => (kv_backend, None),
|
||||
(None, BackendImpl::MemoryStore) => (Arc::new(MemoryKvBackend::new()) as _, None),
|
||||
(None, BackendImpl::EtcdStore) => {
|
||||
let etcd_client = create_etcd_client(opts).await?;
|
||||
let kv_backend = {
|
||||
let etcd_backend =
|
||||
EtcdStore::with_etcd_client(etcd_client.clone(), opts.max_txn_ops);
|
||||
if !opts.store_key_prefix.is_empty() {
|
||||
Arc::new(ChrootKvBackend::new(
|
||||
opts.store_key_prefix.clone().into_bytes(),
|
||||
etcd_backend,
|
||||
))
|
||||
} else {
|
||||
etcd_backend
|
||||
}
|
||||
};
|
||||
(
|
||||
kv_backend,
|
||||
Some(
|
||||
EtcdElection::with_etcd_client(
|
||||
&opts.server_addr,
|
||||
etcd_client.clone(),
|
||||
opts.store_key_prefix.clone(),
|
||||
)
|
||||
.await?,
|
||||
),
|
||||
let kv_backend = EtcdStore::with_etcd_client(etcd_client.clone(), opts.max_txn_ops);
|
||||
let election = EtcdElection::with_etcd_client(
|
||||
&opts.server_addr,
|
||||
etcd_client,
|
||||
opts.store_key_prefix.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
(kv_backend, Some(election))
|
||||
}
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
(None, BackendImpl::PostgresStore) => {
|
||||
let pg_client = create_postgres_client(opts).await?;
|
||||
let kv_backend = PgStore::with_pg_client(pg_client).await.unwrap();
|
||||
// TODO(jeremy, weny): implement election for postgres
|
||||
(kv_backend, None)
|
||||
}
|
||||
};
|
||||
|
||||
if !opts.store_key_prefix.is_empty() {
|
||||
info!(
|
||||
"using chroot kv backend with prefix: {prefix}",
|
||||
prefix = opts.store_key_prefix
|
||||
);
|
||||
kv_backend = Arc::new(ChrootKvBackend::new(
|
||||
opts.store_key_prefix.clone().into_bytes(),
|
||||
kv_backend,
|
||||
))
|
||||
}
|
||||
|
||||
let in_memory = Arc::new(MemoryKvBackend::new()) as ResettableKvBackendRef;
|
||||
|
||||
let selector = match opts.selector {
|
||||
|
||||
@@ -204,10 +204,6 @@ impl Context {
|
||||
pub fn reset_in_memory(&self) {
|
||||
self.in_memory.reset();
|
||||
}
|
||||
|
||||
pub fn reset_leader_cached_kv_backend(&self) {
|
||||
self.leader_cached_kv_backend.reset();
|
||||
}
|
||||
}
|
||||
|
||||
/// The value of the leader. It is used to store the leader's address.
|
||||
@@ -470,6 +466,10 @@ impl Metasrv {
|
||||
});
|
||||
}
|
||||
} else {
|
||||
warn!(
|
||||
"Ensure only one instance of Metasrv is running, as there is no election service."
|
||||
);
|
||||
|
||||
if let Err(e) = self.wal_options_allocator.start().await {
|
||||
error!(e; "Failed to start wal options allocator");
|
||||
}
|
||||
|
||||
@@ -52,11 +52,6 @@ pub async fn mock_with_etcdstore(addr: &str) -> MockInfo {
|
||||
mock(Default::default(), kv_backend, None, None, None).await
|
||||
}
|
||||
|
||||
pub async fn mock_with_memstore_and_selector(selector: SelectorRef) -> MockInfo {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new());
|
||||
mock(Default::default(), kv_backend, Some(selector), None, None).await
|
||||
}
|
||||
|
||||
pub async fn mock(
|
||||
opts: MetasrvOptions,
|
||||
kv_backend: KvBackendRef,
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) mod close_downgraded_region;
|
||||
pub(crate) mod downgrade_leader_region;
|
||||
pub(crate) mod manager;
|
||||
pub(crate) mod migration_abort;
|
||||
@@ -43,6 +44,7 @@ use common_procedure::error::{
|
||||
Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
|
||||
};
|
||||
use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status, StringKey};
|
||||
use common_telemetry::info;
|
||||
use manager::RegionMigrationProcedureGuard;
|
||||
pub use manager::{
|
||||
RegionMigrationManagerRef, RegionMigrationProcedureTask, RegionMigrationProcedureTracker,
|
||||
@@ -91,7 +93,9 @@ impl PersistentContext {
|
||||
let lock_key = vec![
|
||||
CatalogLock::Read(&self.catalog).into(),
|
||||
SchemaLock::read(&self.catalog, &self.schema).into(),
|
||||
TableLock::Read(region_id.table_id()).into(),
|
||||
// The optimistic updating of table route is not working very well,
|
||||
// so we need to use the write lock here.
|
||||
TableLock::Write(region_id.table_id()).into(),
|
||||
RegionLock::Write(region_id).into(),
|
||||
];
|
||||
|
||||
@@ -253,7 +257,7 @@ impl Context {
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::RetryLaterWithSourceSnafu {
|
||||
.with_context(|_| error::RetryLaterWithSourceSnafu {
|
||||
reason: format!("Failed to get TableRoute: {table_id}"),
|
||||
})?
|
||||
.context(error::TableRouteNotFoundSnafu { table_id })?;
|
||||
@@ -317,7 +321,7 @@ impl Context {
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::RetryLaterWithSourceSnafu {
|
||||
.with_context(|_| error::RetryLaterWithSourceSnafu {
|
||||
reason: format!("Failed to get TableInfo: {table_id}"),
|
||||
})?
|
||||
.context(error::TableInfoNotFoundSnafu { table_id })?;
|
||||
@@ -350,7 +354,7 @@ impl Context {
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::RetryLaterWithSourceSnafu {
|
||||
.with_context(|_| error::RetryLaterWithSourceSnafu {
|
||||
reason: format!("Failed to get DatanodeTable: ({datanode_id},{table_id})"),
|
||||
})?
|
||||
.context(error::DatanodeTableNotFoundSnafu {
|
||||
@@ -364,12 +368,6 @@ impl Context {
|
||||
Ok(datanode_value.as_ref().unwrap())
|
||||
}
|
||||
|
||||
/// Removes the `table_info` of [VolatileContext], returns true if any.
|
||||
pub fn remove_table_info_value(&mut self) -> bool {
|
||||
let value = self.volatile_ctx.table_info.take();
|
||||
value.is_some()
|
||||
}
|
||||
|
||||
/// Returns the [RegionId].
|
||||
pub fn region_id(&self) -> RegionId {
|
||||
self.persistent_ctx.region_id
|
||||
@@ -474,6 +472,48 @@ impl RegionMigrationProcedure {
|
||||
_guard: guard,
|
||||
})
|
||||
}
|
||||
|
||||
async fn rollback_inner(&mut self) -> Result<()> {
|
||||
let _timer = METRIC_META_REGION_MIGRATION_EXECUTE
|
||||
.with_label_values(&["rollback"])
|
||||
.start_timer();
|
||||
|
||||
let table_id = self.context.region_id().table_id();
|
||||
let region_id = self.context.region_id();
|
||||
self.context.remove_table_route_value();
|
||||
let table_metadata_manager = self.context.table_metadata_manager.clone();
|
||||
let table_route = self.context.get_table_route_value().await?;
|
||||
|
||||
// Safety: It must be a physical table route.
|
||||
let downgraded = table_route
|
||||
.region_routes()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.filter(|route| route.region.id == region_id)
|
||||
.any(|route| route.is_leader_downgrading());
|
||||
|
||||
if downgraded {
|
||||
info!("Rollbacking downgraded region leader table route, region: {region_id}");
|
||||
table_metadata_manager
|
||||
.update_leader_region_status(table_id, table_route, |route| {
|
||||
if route.region.id == region_id {
|
||||
Some(None)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.with_context(|_| error::RetryLaterWithSourceSnafu {
|
||||
reason: format!("Failed to update the table route during the rollback downgraded leader region: {region_id}"),
|
||||
})?;
|
||||
}
|
||||
|
||||
self.context.register_failure_detectors().await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -482,6 +522,16 @@ impl Procedure for RegionMigrationProcedure {
|
||||
Self::TYPE_NAME
|
||||
}
|
||||
|
||||
async fn rollback(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<()> {
|
||||
self.rollback_inner()
|
||||
.await
|
||||
.map_err(ProcedureError::external)
|
||||
}
|
||||
|
||||
fn rollback_supported(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
let state = &mut self.state;
|
||||
|
||||
@@ -707,6 +757,12 @@ mod tests {
|
||||
Assertion::simple(assert_update_metadata_upgrade, assert_no_persist),
|
||||
),
|
||||
// UpdateMetadata::Upgrade
|
||||
Step::next(
|
||||
"Should be the close downgraded region",
|
||||
None,
|
||||
Assertion::simple(assert_close_downgraded_region, assert_no_persist),
|
||||
),
|
||||
// CloseDowngradedRegion
|
||||
Step::next(
|
||||
"Should be the region migration end",
|
||||
None,
|
||||
@@ -1077,6 +1133,12 @@ mod tests {
|
||||
Assertion::simple(assert_update_metadata_upgrade, assert_no_persist),
|
||||
),
|
||||
// UpdateMetadata::Upgrade
|
||||
Step::next(
|
||||
"Should be the close downgraded region",
|
||||
None,
|
||||
Assertion::simple(assert_close_downgraded_region, assert_no_persist),
|
||||
),
|
||||
// CloseDowngradedRegion
|
||||
Step::next(
|
||||
"Should be the region migration end",
|
||||
None,
|
||||
|
||||
@@ -0,0 +1,138 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::MailboxMessage;
|
||||
use common_meta::distributed_time_constants::MAILBOX_RTT_SECS;
|
||||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
|
||||
use common_meta::key::datanode_table::RegionInfo;
|
||||
use common_meta::RegionIdent;
|
||||
use common_procedure::Status;
|
||||
use common_telemetry::{info, warn};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::handler::HeartbeatMailbox;
|
||||
use crate::procedure::region_migration::migration_end::RegionMigrationEnd;
|
||||
use crate::procedure::region_migration::{Context, State};
|
||||
use crate::service::mailbox::Channel;
|
||||
|
||||
const CLOSE_DOWNGRADED_REGION_TIMEOUT: Duration = Duration::from_secs(MAILBOX_RTT_SECS);
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct CloseDowngradedRegion;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[typetag::serde]
|
||||
impl State for CloseDowngradedRegion {
|
||||
async fn next(&mut self, ctx: &mut Context) -> Result<(Box<dyn State>, Status)> {
|
||||
if let Err(err) = self.close_downgraded_leader_region(ctx).await {
|
||||
let downgrade_leader_datanode = &ctx.persistent_ctx.from_peer;
|
||||
let region_id = ctx.region_id();
|
||||
warn!(err; "Failed to close downgraded leader region: {region_id} on datanode {:?}", downgrade_leader_datanode);
|
||||
}
|
||||
|
||||
Ok((Box::new(RegionMigrationEnd), Status::done()))
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl CloseDowngradedRegion {
|
||||
/// Builds close region instruction.
|
||||
///
|
||||
/// Abort(non-retry):
|
||||
/// - Datanode Table is not found.
|
||||
async fn build_close_region_instruction(&self, ctx: &mut Context) -> Result<Instruction> {
|
||||
let pc = &ctx.persistent_ctx;
|
||||
let downgrade_leader_datanode_id = pc.from_peer.id;
|
||||
let cluster_id = pc.cluster_id;
|
||||
let table_id = pc.region_id.table_id();
|
||||
let region_number = pc.region_id.region_number();
|
||||
let datanode_table_value = ctx.get_from_peer_datanode_table_value().await?;
|
||||
|
||||
let RegionInfo { engine, .. } = datanode_table_value.region_info.clone();
|
||||
|
||||
Ok(Instruction::CloseRegion(RegionIdent {
|
||||
cluster_id,
|
||||
datanode_id: downgrade_leader_datanode_id,
|
||||
table_id,
|
||||
region_number,
|
||||
engine,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Closes the downgraded leader region.
|
||||
async fn close_downgraded_leader_region(&self, ctx: &mut Context) -> Result<()> {
|
||||
let close_instruction = self.build_close_region_instruction(ctx).await?;
|
||||
let region_id = ctx.region_id();
|
||||
let pc = &ctx.persistent_ctx;
|
||||
let downgrade_leader_datanode = &pc.from_peer;
|
||||
let msg = MailboxMessage::json_message(
|
||||
&format!("Close downgraded region: {}", region_id),
|
||||
&format!("Meta@{}", ctx.server_addr()),
|
||||
&format!(
|
||||
"Datanode-{}@{}",
|
||||
downgrade_leader_datanode.id, downgrade_leader_datanode.addr
|
||||
),
|
||||
common_time::util::current_time_millis(),
|
||||
&close_instruction,
|
||||
)
|
||||
.with_context(|_| error::SerializeToJsonSnafu {
|
||||
input: close_instruction.to_string(),
|
||||
})?;
|
||||
|
||||
let ch = Channel::Datanode(downgrade_leader_datanode.id);
|
||||
let receiver = ctx
|
||||
.mailbox
|
||||
.send(&ch, msg, CLOSE_DOWNGRADED_REGION_TIMEOUT)
|
||||
.await?;
|
||||
|
||||
match receiver.await? {
|
||||
Ok(msg) => {
|
||||
let reply = HeartbeatMailbox::json_reply(&msg)?;
|
||||
info!(
|
||||
"Received close downgraded leade region reply: {:?}, region: {}",
|
||||
reply, region_id
|
||||
);
|
||||
let InstructionReply::CloseRegion(SimpleReply { result, error }) = reply else {
|
||||
return error::UnexpectedInstructionReplySnafu {
|
||||
mailbox_message: msg.to_string(),
|
||||
reason: "expect close region reply",
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
|
||||
if result {
|
||||
Ok(())
|
||||
} else {
|
||||
error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"Failed to close downgraded leader region: {region_id} on datanode {:?}, error: {error:?}",
|
||||
downgrade_leader_datanode,
|
||||
),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -21,11 +21,11 @@ use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use super::migration_abort::RegionMigrationAbort;
|
||||
use super::migration_end::RegionMigrationEnd;
|
||||
use super::open_candidate_region::OpenCandidateRegion;
|
||||
use super::update_metadata::UpdateMetadata;
|
||||
use crate::error::{self, Result};
|
||||
use crate::procedure::region_migration::migration_abort::RegionMigrationAbort;
|
||||
use crate::procedure::region_migration::migration_end::RegionMigrationEnd;
|
||||
use crate::procedure::region_migration::open_candidate_region::OpenCandidateRegion;
|
||||
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
|
||||
use crate::procedure::region_migration::{Context, State};
|
||||
|
||||
/// The behaviors:
|
||||
|
||||
@@ -25,9 +25,9 @@ use common_telemetry::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use super::update_metadata::UpdateMetadata;
|
||||
use crate::error::{self, Result};
|
||||
use crate::handler::HeartbeatMailbox;
|
||||
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
|
||||
use crate::procedure::region_migration::{Context, State};
|
||||
use crate::service::mailbox::Channel;
|
||||
|
||||
@@ -145,7 +145,10 @@ impl OpenCandidateRegion {
|
||||
match receiver.await? {
|
||||
Ok(msg) => {
|
||||
let reply = HeartbeatMailbox::json_reply(&msg)?;
|
||||
info!("Received open region reply: {:?}", reply);
|
||||
info!(
|
||||
"Received open region reply: {:?}, region: {}",
|
||||
reply, region_id
|
||||
);
|
||||
let InstructionReply::OpenRegion(SimpleReply { result, error }) = reply else {
|
||||
return error::UnexpectedInstructionReplySnafu {
|
||||
mailbox_message: msg.to_string(),
|
||||
|
||||
@@ -44,19 +44,21 @@ use store_api::storage::RegionId;
|
||||
use table::metadata::RawTableInfo;
|
||||
use tokio::sync::mpsc::{Receiver, Sender};
|
||||
|
||||
use super::manager::RegionMigrationProcedureTracker;
|
||||
use super::migration_abort::RegionMigrationAbort;
|
||||
use super::upgrade_candidate_region::UpgradeCandidateRegion;
|
||||
use super::{Context, ContextFactory, DefaultContextFactory, State, VolatileContext};
|
||||
use crate::cache_invalidator::MetasrvCacheInvalidator;
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::handler::{HeartbeatMailbox, Pusher, Pushers};
|
||||
use crate::metasrv::MetasrvInfo;
|
||||
use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion;
|
||||
use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion;
|
||||
use crate::procedure::region_migration::manager::RegionMigrationProcedureTracker;
|
||||
use crate::procedure::region_migration::migration_abort::RegionMigrationAbort;
|
||||
use crate::procedure::region_migration::migration_end::RegionMigrationEnd;
|
||||
use crate::procedure::region_migration::open_candidate_region::OpenCandidateRegion;
|
||||
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
|
||||
use crate::procedure::region_migration::PersistentContext;
|
||||
use crate::procedure::region_migration::upgrade_candidate_region::UpgradeCandidateRegion;
|
||||
use crate::procedure::region_migration::{
|
||||
Context, ContextFactory, DefaultContextFactory, PersistentContext, State, VolatileContext,
|
||||
};
|
||||
use crate::service::mailbox::{Channel, MailboxRef};
|
||||
|
||||
pub type MockHeartbeatReceiver = Receiver<std::result::Result<HeartbeatResponse, tonic::Status>>;
|
||||
@@ -569,6 +571,14 @@ pub(crate) fn assert_region_migration_end(next: &dyn State) {
|
||||
let _ = next.as_any().downcast_ref::<RegionMigrationEnd>().unwrap();
|
||||
}
|
||||
|
||||
/// Asserts the [State] should be [CloseDowngradedRegion].
|
||||
pub(crate) fn assert_close_downgraded_region(next: &dyn State) {
|
||||
let _ = next
|
||||
.as_any()
|
||||
.downcast_ref::<CloseDowngradedRegion>()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// Asserts the [State] should be [RegionMigrationAbort].
|
||||
pub(crate) fn assert_region_migration_abort(next: &dyn State) {
|
||||
let _ = next
|
||||
|
||||
@@ -22,10 +22,10 @@ use common_procedure::Status;
|
||||
use common_telemetry::warn;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::migration_abort::RegionMigrationAbort;
|
||||
use super::migration_end::RegionMigrationEnd;
|
||||
use crate::error::Result;
|
||||
use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion;
|
||||
use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion;
|
||||
use crate::procedure::region_migration::migration_abort::RegionMigrationAbort;
|
||||
use crate::procedure::region_migration::{Context, State};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@@ -58,7 +58,7 @@ impl State for UpdateMetadata {
|
||||
if let Err(err) = ctx.invalidate_table_cache().await {
|
||||
warn!("Failed to broadcast the invalidate table cache message during the upgrade candidate, error: {err:?}");
|
||||
};
|
||||
Ok((Box::new(RegionMigrationEnd), Status::done()))
|
||||
Ok((Box::new(CloseDowngradedRegion), Status::executing(false)))
|
||||
}
|
||||
UpdateMetadata::Rollback => {
|
||||
self.rollback_downgraded_region(ctx).await?;
|
||||
|
||||
@@ -195,7 +195,7 @@ mod tests {
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::procedure::region_migration::migration_end::RegionMigrationEnd;
|
||||
use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion;
|
||||
use crate::procedure::region_migration::test_util::{self, TestingEnv};
|
||||
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
|
||||
use crate::procedure::region_migration::{ContextFactory, PersistentContext, State};
|
||||
@@ -443,7 +443,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_next_migration_end_state() {
|
||||
async fn test_next_close_downgraded_region_state() {
|
||||
let mut state = Box::new(UpdateMetadata::Upgrade);
|
||||
let env = TestingEnv::new();
|
||||
let persistent_context = new_persistent_context();
|
||||
@@ -471,7 +471,10 @@ mod tests {
|
||||
|
||||
let (next, _) = state.next(&mut ctx).await.unwrap();
|
||||
|
||||
let _ = next.as_any().downcast_ref::<RegionMigrationEnd>().unwrap();
|
||||
let _ = next
|
||||
.as_any()
|
||||
.downcast_ref::<CloseDowngradedRegion>()
|
||||
.unwrap();
|
||||
|
||||
let table_route = table_metadata_manager
|
||||
.table_route_manager()
|
||||
|
||||
@@ -23,9 +23,9 @@ use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use tokio::time::{sleep, Instant};
|
||||
|
||||
use super::update_metadata::UpdateMetadata;
|
||||
use crate::error::{self, Result};
|
||||
use crate::handler::HeartbeatMailbox;
|
||||
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
|
||||
use crate::procedure::region_migration::{Context, State};
|
||||
use crate::service::mailbox::Channel;
|
||||
|
||||
@@ -155,7 +155,7 @@ impl UpgradeCandidateRegion {
|
||||
exists,
|
||||
error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"Expected region {} doesn't exist on datanode {:?}",
|
||||
"Candidate region {} doesn't exist on datanode {:?}",
|
||||
region_id, candidate
|
||||
)
|
||||
}
|
||||
|
||||
@@ -210,7 +210,6 @@ impl RegionEngine for MetricEngine {
|
||||
for x in [
|
||||
utils::to_metadata_region_id(region_id),
|
||||
utils::to_data_region_id(region_id),
|
||||
region_id,
|
||||
] {
|
||||
if let Err(e) = self.inner.mito.set_region_role(x, role)
|
||||
&& e.status_code() != StatusCode::RegionNotFound
|
||||
@@ -226,6 +225,13 @@ impl RegionEngine for MetricEngine {
|
||||
region_id: RegionId,
|
||||
region_role_state: SettableRegionRoleState,
|
||||
) -> std::result::Result<SetRegionRoleStateResponse, BoxedError> {
|
||||
self.inner
|
||||
.mito
|
||||
.set_region_role_state_gracefully(
|
||||
utils::to_metadata_region_id(region_id),
|
||||
region_role_state,
|
||||
)
|
||||
.await?;
|
||||
self.inner
|
||||
.mito
|
||||
.set_region_role_state_gracefully(region_id, region_role_state)
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_telemetry::debug;
|
||||
use snafu::ResultExt;
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{AffectedRows, RegionCatchupRequest, RegionRequest};
|
||||
@@ -35,6 +36,7 @@ impl MetricEngineInner {
|
||||
}
|
||||
let metadata_region_id = utils::to_metadata_region_id(region_id);
|
||||
// TODO(weny): improve the catchup, we can read the wal entries only once.
|
||||
debug!("Catchup metadata region {metadata_region_id}");
|
||||
self.mito
|
||||
.handle_request(
|
||||
metadata_region_id,
|
||||
@@ -48,6 +50,7 @@ impl MetricEngineInner {
|
||||
.context(MitoCatchupOperationSnafu)?;
|
||||
|
||||
let data_region_id = utils::to_data_region_id(region_id);
|
||||
debug!("Catchup data region {data_region_id}");
|
||||
self.mito
|
||||
.handle_request(
|
||||
data_region_id,
|
||||
|
||||
@@ -313,12 +313,12 @@ mod test {
|
||||
let region_dir = "test_metric_region";
|
||||
// assert metadata region's dir
|
||||
let metadata_region_dir = join_dir(region_dir, METADATA_REGION_SUBDIR);
|
||||
let exist = object_store.is_exist(&metadata_region_dir).await.unwrap();
|
||||
let exist = object_store.exists(&metadata_region_dir).await.unwrap();
|
||||
assert!(exist);
|
||||
|
||||
// assert data region's dir
|
||||
let data_region_dir = join_dir(region_dir, DATA_REGION_SUBDIR);
|
||||
let exist = object_store.is_exist(&data_region_dir).await.unwrap();
|
||||
let exist = object_store.exists(&data_region_dir).await.unwrap();
|
||||
assert!(exist);
|
||||
|
||||
// check mito engine
|
||||
|
||||
@@ -17,6 +17,7 @@ aquamarine.workspace = true
|
||||
async-channel = "1.9"
|
||||
async-stream.workspace = true
|
||||
async-trait = "0.1"
|
||||
bytemuck.workspace = true
|
||||
bytes.workspace = true
|
||||
common-base.workspace = true
|
||||
common-config.workspace = true
|
||||
@@ -76,7 +77,6 @@ uuid.workspace = true
|
||||
[dev-dependencies]
|
||||
common-function.workspace = true
|
||||
common-meta = { workspace = true, features = ["testing"] }
|
||||
common-procedure-test.workspace = true
|
||||
common-test-util.workspace = true
|
||||
criterion = "0.4"
|
||||
dotenv.workspace = true
|
||||
|
||||
@@ -32,6 +32,7 @@ use moka::notification::RemovalCause;
|
||||
use moka::sync::Cache;
|
||||
use parquet::column::page::Page;
|
||||
use parquet::file::metadata::ParquetMetaData;
|
||||
use puffin::puffin_manager::cache::{PuffinMetadataCache, PuffinMetadataCacheRef};
|
||||
use store_api::storage::{ConcreteDataType, RegionId, TimeSeriesRowSelector};
|
||||
|
||||
use crate::cache::cache_size::parquet_meta_size;
|
||||
@@ -68,6 +69,8 @@ pub struct CacheManager {
|
||||
write_cache: Option<WriteCacheRef>,
|
||||
/// Cache for inverted index.
|
||||
index_cache: Option<InvertedIndexCacheRef>,
|
||||
/// Puffin metadata cache.
|
||||
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
|
||||
/// Cache for time series selectors.
|
||||
selector_result_cache: Option<SelectorResultCache>,
|
||||
}
|
||||
@@ -217,6 +220,10 @@ impl CacheManager {
|
||||
pub(crate) fn index_cache(&self) -> Option<&InvertedIndexCacheRef> {
|
||||
self.index_cache.as_ref()
|
||||
}
|
||||
|
||||
pub(crate) fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> {
|
||||
self.puffin_metadata_cache.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
/// Increases selector cache miss metrics.
|
||||
@@ -237,6 +244,8 @@ pub struct CacheManagerBuilder {
|
||||
page_cache_size: u64,
|
||||
index_metadata_size: u64,
|
||||
index_content_size: u64,
|
||||
index_content_page_size: u64,
|
||||
puffin_metadata_size: u64,
|
||||
write_cache: Option<WriteCacheRef>,
|
||||
selector_result_cache_size: u64,
|
||||
}
|
||||
@@ -278,6 +287,18 @@ impl CacheManagerBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets page size for index content.
|
||||
pub fn index_content_page_size(mut self, bytes: u64) -> Self {
|
||||
self.index_content_page_size = bytes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets cache size for puffin metadata.
|
||||
pub fn puffin_metadata_size(mut self, bytes: u64) -> Self {
|
||||
self.puffin_metadata_size = bytes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets selector result cache size.
|
||||
pub fn selector_result_cache_size(mut self, bytes: u64) -> Self {
|
||||
self.selector_result_cache_size = bytes;
|
||||
@@ -338,8 +359,13 @@ impl CacheManagerBuilder {
|
||||
})
|
||||
.build()
|
||||
});
|
||||
let inverted_index_cache =
|
||||
InvertedIndexCache::new(self.index_metadata_size, self.index_content_size);
|
||||
let inverted_index_cache = InvertedIndexCache::new(
|
||||
self.index_metadata_size,
|
||||
self.index_content_size,
|
||||
self.index_content_page_size,
|
||||
);
|
||||
let puffin_metadata_cache =
|
||||
PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES);
|
||||
let selector_result_cache = (self.selector_result_cache_size != 0).then(|| {
|
||||
Cache::builder()
|
||||
.max_capacity(self.selector_result_cache_size)
|
||||
@@ -361,6 +387,7 @@ impl CacheManagerBuilder {
|
||||
page_cache,
|
||||
write_cache: self.write_cache,
|
||||
index_cache: Some(Arc::new(inverted_index_cache)),
|
||||
puffin_metadata_cache: Some(Arc::new(puffin_metadata_cache)),
|
||||
selector_result_cache,
|
||||
}
|
||||
}
|
||||
|
||||
4
src/mito2/src/cache/file_cache.rs
vendored
4
src/mito2/src/cache/file_cache.rs
vendored
@@ -286,7 +286,7 @@ impl FileCache {
|
||||
}
|
||||
|
||||
async fn get_reader(&self, file_path: &str) -> object_store::Result<Option<Reader>> {
|
||||
if self.local_store.is_exist(file_path).await? {
|
||||
if self.local_store.exists(file_path).await? {
|
||||
Ok(Some(self.local_store.reader(file_path).await?))
|
||||
} else {
|
||||
Ok(None)
|
||||
@@ -480,7 +480,7 @@ mod tests {
|
||||
cache.memory_index.run_pending_tasks().await;
|
||||
|
||||
// The file also not exists.
|
||||
assert!(!local_store.is_exist(&file_path).await.unwrap());
|
||||
assert!(!local_store.exists(&file_path).await.unwrap());
|
||||
assert_eq!(0, cache.memory_index.weighted_size());
|
||||
}
|
||||
|
||||
|
||||
378
src/mito2/src/cache/index.rs
vendored
378
src/mito2/src/cache/index.rs
vendored
@@ -12,14 +12,17 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::index::InvertedIndexMetas;
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use common_base::BitVec;
|
||||
use index::inverted_index::error::DecodeFstSnafu;
|
||||
use index::inverted_index::format::reader::InvertedIndexReader;
|
||||
use index::inverted_index::FstMap;
|
||||
use object_store::Buffer;
|
||||
use prost::Message;
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -34,14 +37,16 @@ const INDEX_CONTENT_TYPE: &str = "index_content";
|
||||
/// Inverted index blob reader with cache.
|
||||
pub struct CachedInvertedIndexBlobReader<R> {
|
||||
file_id: FileId,
|
||||
file_size: u64,
|
||||
inner: R,
|
||||
cache: InvertedIndexCacheRef,
|
||||
}
|
||||
|
||||
impl<R> CachedInvertedIndexBlobReader<R> {
|
||||
pub fn new(file_id: FileId, inner: R, cache: InvertedIndexCacheRef) -> Self {
|
||||
pub fn new(file_id: FileId, file_size: u64, inner: R, cache: InvertedIndexCacheRef) -> Self {
|
||||
Self {
|
||||
file_id,
|
||||
file_size,
|
||||
inner,
|
||||
cache,
|
||||
}
|
||||
@@ -59,43 +64,71 @@ where
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<Vec<u8>> {
|
||||
let range = offset as usize..(offset + size as u64) as usize;
|
||||
if let Some(cached) = self.cache.get_index(IndexKey {
|
||||
file_id: self.file_id,
|
||||
}) {
|
||||
CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
|
||||
Ok(cached[range].to_vec())
|
||||
} else {
|
||||
let mut all_data = Vec::with_capacity(1024 * 1024);
|
||||
self.inner.read_all(&mut all_data).await?;
|
||||
let result = all_data[range].to_vec();
|
||||
self.cache.put_index(
|
||||
IndexKey {
|
||||
file_id: self.file_id,
|
||||
},
|
||||
Arc::new(all_data),
|
||||
);
|
||||
CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
|
||||
Ok(result)
|
||||
let keys =
|
||||
IndexDataPageKey::generate_page_keys(self.file_id, offset, size, self.cache.page_size);
|
||||
// Size is 0, return empty data.
|
||||
if keys.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let mut data = Vec::with_capacity(keys.len());
|
||||
data.resize(keys.len(), Bytes::new());
|
||||
let mut cache_miss_range = vec![];
|
||||
let mut cache_miss_idx = vec![];
|
||||
let last_index = keys.len() - 1;
|
||||
// TODO: Avoid copy as much as possible.
|
||||
for (i, index) in keys.iter().enumerate() {
|
||||
match self.cache.get_index(index) {
|
||||
Some(page) => {
|
||||
CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
|
||||
data[i] = page;
|
||||
}
|
||||
None => {
|
||||
CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
|
||||
let base_offset = index.page_id * self.cache.page_size;
|
||||
let pruned_size = if i == last_index {
|
||||
prune_size(&keys, self.file_size, self.cache.page_size)
|
||||
} else {
|
||||
self.cache.page_size
|
||||
};
|
||||
cache_miss_range.push(base_offset..base_offset + pruned_size);
|
||||
cache_miss_idx.push(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
if !cache_miss_range.is_empty() {
|
||||
let pages = self.inner.read_vec(&cache_miss_range).await?;
|
||||
for (i, page) in cache_miss_idx.into_iter().zip(pages.into_iter()) {
|
||||
let key = keys[i].clone();
|
||||
data[i] = page.clone();
|
||||
self.cache.put_index(key, page.clone());
|
||||
}
|
||||
}
|
||||
let buffer = Buffer::from_iter(data.into_iter());
|
||||
Ok(buffer
|
||||
.slice(IndexDataPageKey::calculate_range(
|
||||
offset,
|
||||
size,
|
||||
self.cache.page_size,
|
||||
))
|
||||
.to_vec())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobReader<R> {
|
||||
async fn read_all(
|
||||
&mut self,
|
||||
dest: &mut Vec<u8>,
|
||||
) -> index::inverted_index::error::Result<usize> {
|
||||
self.inner.read_all(dest).await
|
||||
}
|
||||
|
||||
async fn seek_read(
|
||||
async fn range_read(
|
||||
&mut self,
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<Vec<u8>> {
|
||||
self.inner.seek_read(offset, size).await
|
||||
self.inner.range_read(offset, size).await
|
||||
}
|
||||
|
||||
async fn read_vec(
|
||||
&mut self,
|
||||
ranges: &[Range<u64>],
|
||||
) -> index::inverted_index::error::Result<Vec<Bytes>> {
|
||||
self.inner.read_vec(ranges).await
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> index::inverted_index::error::Result<Arc<InvertedIndexMetas>> {
|
||||
@@ -130,22 +163,69 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct IndexKey {
|
||||
pub struct IndexMetadataKey {
|
||||
file_id: FileId,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct IndexDataPageKey {
|
||||
file_id: FileId,
|
||||
page_id: u64,
|
||||
}
|
||||
|
||||
impl IndexDataPageKey {
|
||||
/// Converts an offset to a page ID based on the page size.
|
||||
fn calculate_page_id(offset: u64, page_size: u64) -> u64 {
|
||||
offset / page_size
|
||||
}
|
||||
|
||||
/// Calculates the total number of pages that a given size spans, starting from a specific offset.
|
||||
fn calculate_page_count(offset: u64, size: u32, page_size: u64) -> u32 {
|
||||
let start_page = Self::calculate_page_id(offset, page_size);
|
||||
let end_page = Self::calculate_page_id(offset + (size as u64) - 1, page_size);
|
||||
(end_page + 1 - start_page) as u32
|
||||
}
|
||||
|
||||
/// Calculates the byte range for data retrieval based on the specified offset and size.
|
||||
///
|
||||
/// This function determines the starting and ending byte positions required for reading data.
|
||||
/// For example, with an offset of 5000 and a size of 5000, using a PAGE_SIZE of 4096,
|
||||
/// the resulting byte range will be 904..5904. This indicates that:
|
||||
/// - The reader will first access fixed-size pages [4096, 8192) and [8192, 12288).
|
||||
/// - To read the range [5000..10000), it only needs to fetch bytes within the range [904, 5904) across two pages.
|
||||
fn calculate_range(offset: u64, size: u32, page_size: u64) -> Range<usize> {
|
||||
let start = (offset % page_size) as usize;
|
||||
let end = start + size as usize;
|
||||
start..end
|
||||
}
|
||||
|
||||
/// Generates a vector of IndexKey instances for the pages that a given offset and size span.
|
||||
fn generate_page_keys(file_id: FileId, offset: u64, size: u32, page_size: u64) -> Vec<Self> {
|
||||
let start_page = Self::calculate_page_id(offset, page_size);
|
||||
let total_pages = Self::calculate_page_count(offset, size, page_size);
|
||||
(0..total_pages)
|
||||
.map(|i| Self {
|
||||
file_id,
|
||||
page_id: start_page + i as u64,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
pub type InvertedIndexCacheRef = Arc<InvertedIndexCache>;
|
||||
|
||||
pub struct InvertedIndexCache {
|
||||
/// Cache for inverted index metadata
|
||||
index_metadata: moka::sync::Cache<IndexKey, Arc<InvertedIndexMetas>>,
|
||||
index_metadata: moka::sync::Cache<IndexMetadataKey, Arc<InvertedIndexMetas>>,
|
||||
/// Cache for inverted index content.
|
||||
index: moka::sync::Cache<IndexKey, Arc<Vec<u8>>>,
|
||||
index: moka::sync::Cache<IndexDataPageKey, Bytes>,
|
||||
// Page size for index content.
|
||||
page_size: u64,
|
||||
}
|
||||
|
||||
impl InvertedIndexCache {
|
||||
/// Creates `InvertedIndexCache` with provided `index_metadata_cap` and `index_content_cap`.
|
||||
pub fn new(index_metadata_cap: u64, index_content_cap: u64) -> Self {
|
||||
pub fn new(index_metadata_cap: u64, index_content_cap: u64, page_size: u64) -> Self {
|
||||
common_telemetry::debug!("Building InvertedIndexCache with metadata size: {index_metadata_cap}, content size: {index_content_cap}");
|
||||
let index_metadata = moka::sync::CacheBuilder::new(index_metadata_cap)
|
||||
.name("inverted_index_metadata")
|
||||
@@ -170,29 +250,29 @@ impl InvertedIndexCache {
|
||||
Self {
|
||||
index_metadata,
|
||||
index: index_cache,
|
||||
page_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl InvertedIndexCache {
|
||||
pub fn get_index_metadata(&self, file_id: FileId) -> Option<Arc<InvertedIndexMetas>> {
|
||||
self.index_metadata.get(&IndexKey { file_id })
|
||||
self.index_metadata.get(&IndexMetadataKey { file_id })
|
||||
}
|
||||
|
||||
pub fn put_index_metadata(&self, file_id: FileId, metadata: Arc<InvertedIndexMetas>) {
|
||||
let key = IndexKey { file_id };
|
||||
let key = IndexMetadataKey { file_id };
|
||||
CACHE_BYTES
|
||||
.with_label_values(&[INDEX_METADATA_TYPE])
|
||||
.add(index_metadata_weight(&key, &metadata).into());
|
||||
self.index_metadata.insert(key, metadata)
|
||||
}
|
||||
|
||||
// todo(hl): align index file content to pages with size like 4096 bytes.
|
||||
pub fn get_index(&self, key: IndexKey) -> Option<Arc<Vec<u8>>> {
|
||||
self.index.get(&key)
|
||||
pub fn get_index(&self, key: &IndexDataPageKey) -> Option<Bytes> {
|
||||
self.index.get(key)
|
||||
}
|
||||
|
||||
pub fn put_index(&self, key: IndexKey, value: Arc<Vec<u8>>) {
|
||||
pub fn put_index(&self, key: IndexDataPageKey, value: Bytes) {
|
||||
CACHE_BYTES
|
||||
.with_label_values(&[INDEX_CONTENT_TYPE])
|
||||
.add(index_content_weight(&key, &value).into());
|
||||
@@ -201,11 +281,229 @@ impl InvertedIndexCache {
|
||||
}
|
||||
|
||||
/// Calculates weight for index metadata.
|
||||
fn index_metadata_weight(k: &IndexKey, v: &Arc<InvertedIndexMetas>) -> u32 {
|
||||
fn index_metadata_weight(k: &IndexMetadataKey, v: &Arc<InvertedIndexMetas>) -> u32 {
|
||||
(k.file_id.as_bytes().len() + v.encoded_len()) as u32
|
||||
}
|
||||
|
||||
/// Calculates weight for index content.
|
||||
fn index_content_weight(k: &IndexKey, v: &Arc<Vec<u8>>) -> u32 {
|
||||
fn index_content_weight(k: &IndexDataPageKey, v: &Bytes) -> u32 {
|
||||
(k.file_id.as_bytes().len() + v.len()) as u32
|
||||
}
|
||||
|
||||
/// Prunes the size of the last page based on the indexes.
|
||||
/// We have following cases:
|
||||
/// 1. The rest file size is less than the page size, read to the end of the file.
|
||||
/// 2. Otherwise, read the page size.
|
||||
fn prune_size(indexes: &[IndexDataPageKey], file_size: u64, page_size: u64) -> u64 {
|
||||
let last_page_start = indexes.last().map(|i| i.page_id * page_size).unwrap_or(0);
|
||||
page_size.min(file_size - last_page_start)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use common_base::BitVec;
|
||||
use futures::stream;
|
||||
use index::inverted_index::format::reader::{InvertedIndexBlobReader, InvertedIndexReader};
|
||||
use index::inverted_index::format::writer::{InvertedIndexBlobWriter, InvertedIndexWriter};
|
||||
use index::inverted_index::Bytes;
|
||||
use prometheus::register_int_counter_vec;
|
||||
use rand::{Rng, RngCore};
|
||||
|
||||
use super::*;
|
||||
use crate::sst::index::store::InstrumentedStore;
|
||||
use crate::test_util::TestEnv;
|
||||
|
||||
// Repeat times for following little fuzz tests.
|
||||
const FUZZ_REPEAT_TIMES: usize = 100;
|
||||
|
||||
// Fuzz test for index data page key
|
||||
#[test]
|
||||
fn fuzz_index_calculation() {
|
||||
// randomly generate a large u8 array
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut data = vec![0u8; 1024 * 1024];
|
||||
rng.fill_bytes(&mut data);
|
||||
let file_id = FileId::random();
|
||||
|
||||
for _ in 0..FUZZ_REPEAT_TIMES {
|
||||
let offset = rng.gen_range(0..data.len() as u64);
|
||||
let size = rng.gen_range(0..data.len() as u32 - offset as u32);
|
||||
let page_size: usize = rng.gen_range(1..1024);
|
||||
|
||||
let indexes =
|
||||
IndexDataPageKey::generate_page_keys(file_id, offset, size, page_size as u64);
|
||||
let page_num = indexes.len();
|
||||
let mut read = Vec::with_capacity(size as usize);
|
||||
for key in indexes.into_iter() {
|
||||
let start = key.page_id as usize * page_size;
|
||||
let page = if start + page_size < data.len() {
|
||||
&data[start..start + page_size]
|
||||
} else {
|
||||
&data[start..]
|
||||
};
|
||||
read.extend_from_slice(page);
|
||||
}
|
||||
let expected_range = offset as usize..(offset + size as u64 as u64) as usize;
|
||||
let read =
|
||||
read[IndexDataPageKey::calculate_range(offset, size, page_size as u64)].to_vec();
|
||||
if read != data.get(expected_range).unwrap() {
|
||||
panic!(
|
||||
"fuzz_read_index failed, offset: {}, size: {}, page_size: {}\nread len: {}, expected len: {}\nrange: {:?}, page num: {}",
|
||||
offset, size, page_size, read.len(), size as usize,
|
||||
IndexDataPageKey::calculate_range(offset, size, page_size as u64),
|
||||
page_num
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn unpack(fst_value: u64) -> [u32; 2] {
|
||||
bytemuck::cast::<u64, [u32; 2]>(fst_value)
|
||||
}
|
||||
|
||||
async fn create_inverted_index_blob() -> Vec<u8> {
|
||||
let mut blob = Vec::new();
|
||||
let mut writer = InvertedIndexBlobWriter::new(&mut blob);
|
||||
writer
|
||||
.add_index(
|
||||
"tag0".to_string(),
|
||||
BitVec::from_slice(&[0b0000_0001, 0b0000_0000]),
|
||||
Box::new(stream::iter(vec![
|
||||
Ok((Bytes::from("a"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
Ok((Bytes::from("b"), BitVec::from_slice(&[0b0010_0000]))),
|
||||
Ok((Bytes::from("c"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
])),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
writer
|
||||
.add_index(
|
||||
"tag1".to_string(),
|
||||
BitVec::from_slice(&[0b0000_0001, 0b0000_0000]),
|
||||
Box::new(stream::iter(vec![
|
||||
Ok((Bytes::from("x"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
Ok((Bytes::from("y"), BitVec::from_slice(&[0b0010_0000]))),
|
||||
Ok((Bytes::from("z"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
])),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
writer
|
||||
.finish(8, NonZeroUsize::new(1).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
blob
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_inverted_index_cache() {
|
||||
let blob = create_inverted_index_blob().await;
|
||||
|
||||
// Init a test range reader in local fs.
|
||||
let mut env = TestEnv::new();
|
||||
let file_size = blob.len() as u64;
|
||||
let store = env.init_object_store_manager();
|
||||
let temp_path = "data";
|
||||
store.write(temp_path, blob).await.unwrap();
|
||||
let store = InstrumentedStore::new(store);
|
||||
let metric =
|
||||
register_int_counter_vec!("test_bytes", "a counter for test", &["test"]).unwrap();
|
||||
let counter = metric.with_label_values(&["test"]);
|
||||
let range_reader = store
|
||||
.range_reader("data", &counter, &counter)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let reader = InvertedIndexBlobReader::new(range_reader);
|
||||
let mut cached_reader = CachedInvertedIndexBlobReader::new(
|
||||
FileId::random(),
|
||||
file_size,
|
||||
reader,
|
||||
Arc::new(InvertedIndexCache::new(8192, 8192, 50)),
|
||||
);
|
||||
let metadata = cached_reader.metadata().await.unwrap();
|
||||
assert_eq!(metadata.total_row_count, 8);
|
||||
assert_eq!(metadata.segment_row_count, 1);
|
||||
assert_eq!(metadata.metas.len(), 2);
|
||||
// tag0
|
||||
let tag0 = metadata.metas.get("tag0").unwrap();
|
||||
let stats0 = tag0.stats.as_ref().unwrap();
|
||||
assert_eq!(stats0.distinct_count, 3);
|
||||
assert_eq!(stats0.null_count, 1);
|
||||
assert_eq!(stats0.min_value, Bytes::from("a"));
|
||||
assert_eq!(stats0.max_value, Bytes::from("c"));
|
||||
let fst0 = cached_reader
|
||||
.fst(
|
||||
tag0.base_offset + tag0.relative_fst_offset as u64,
|
||||
tag0.fst_size,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(fst0.len(), 3);
|
||||
let [offset, size] = unpack(fst0.get(b"a").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
let [offset, size] = unpack(fst0.get(b"b").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
|
||||
let [offset, size] = unpack(fst0.get(b"c").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
|
||||
// tag1
|
||||
let tag1 = metadata.metas.get("tag1").unwrap();
|
||||
let stats1 = tag1.stats.as_ref().unwrap();
|
||||
assert_eq!(stats1.distinct_count, 3);
|
||||
assert_eq!(stats1.null_count, 1);
|
||||
assert_eq!(stats1.min_value, Bytes::from("x"));
|
||||
assert_eq!(stats1.max_value, Bytes::from("z"));
|
||||
let fst1 = cached_reader
|
||||
.fst(
|
||||
tag1.base_offset + tag1.relative_fst_offset as u64,
|
||||
tag1.fst_size,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(fst1.len(), 3);
|
||||
let [offset, size] = unpack(fst1.get(b"x").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
let [offset, size] = unpack(fst1.get(b"y").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
|
||||
let [offset, size] = unpack(fst1.get(b"z").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
|
||||
// fuzz test
|
||||
let mut rng = rand::thread_rng();
|
||||
for _ in 0..FUZZ_REPEAT_TIMES {
|
||||
let offset = rng.gen_range(0..file_size);
|
||||
let size = rng.gen_range(0..file_size as u32 - offset as u32);
|
||||
let expected = cached_reader.range_read(offset, size).await.unwrap();
|
||||
let read = cached_reader.get_or_load(offset, size).await.unwrap();
|
||||
assert_eq!(read, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
2
src/mito2/src/cache/write_cache.rs
vendored
2
src/mito2/src/cache/write_cache.rs
vendored
@@ -501,7 +501,7 @@ mod tests {
|
||||
|
||||
// Read metadata from write cache
|
||||
let builder = ParquetReaderBuilder::new(data_home, handle.clone(), mock_store.clone())
|
||||
.cache(cache_manager.clone());
|
||||
.cache(Some(cache_manager.clone()));
|
||||
let reader = builder.build().await.unwrap();
|
||||
|
||||
// Check parquet metadata
|
||||
|
||||
@@ -44,7 +44,7 @@ use tokio::sync::mpsc::{self, Sender};
|
||||
|
||||
use crate::access_layer::AccessLayerRef;
|
||||
use crate::cache::CacheManagerRef;
|
||||
use crate::compaction::compactor::{CompactionRegion, DefaultCompactor};
|
||||
use crate::compaction::compactor::{CompactionRegion, CompactionVersion, DefaultCompactor};
|
||||
use crate::compaction::picker::{new_picker, CompactionTask};
|
||||
use crate::compaction::task::CompactionTaskImpl;
|
||||
use crate::config::MitoConfig;
|
||||
@@ -53,13 +53,13 @@ use crate::error::{
|
||||
RegionTruncatedSnafu, RemoteCompactionSnafu, Result, TimeRangePredicateOverflowSnafu,
|
||||
TimeoutSnafu,
|
||||
};
|
||||
use crate::metrics::COMPACTION_STAGE_ELAPSED;
|
||||
use crate::metrics::{COMPACTION_STAGE_ELAPSED, INFLIGHT_COMPACTION_COUNT};
|
||||
use crate::read::projection::ProjectionMapper;
|
||||
use crate::read::scan_region::ScanInput;
|
||||
use crate::read::seq_scan::SeqScan;
|
||||
use crate::read::BoxedBatchReader;
|
||||
use crate::region::options::MergeMode;
|
||||
use crate::region::version::{VersionControlRef, VersionRef};
|
||||
use crate::region::version::VersionControlRef;
|
||||
use crate::region::ManifestContextRef;
|
||||
use crate::request::{OptionOutputTx, OutputTx, WorkerRequest};
|
||||
use crate::schedule::remote_job_scheduler::{
|
||||
@@ -73,7 +73,7 @@ use crate::worker::WorkerListener;
|
||||
/// Region compaction request.
|
||||
pub struct CompactionRequest {
|
||||
pub(crate) engine_config: Arc<MitoConfig>,
|
||||
pub(crate) current_version: VersionRef,
|
||||
pub(crate) current_version: CompactionVersion,
|
||||
pub(crate) access_layer: AccessLayerRef,
|
||||
/// Sender to send notification to the region worker.
|
||||
pub(crate) request_sender: mpsc::Sender<WorkerRequest>,
|
||||
@@ -271,11 +271,11 @@ impl CompactionScheduler {
|
||||
current_version.options.ttl,
|
||||
&schema_metadata_manager,
|
||||
)
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
warn!(e; "Failed to get ttl for region: {}", region_id);
|
||||
TimeToLive::default()
|
||||
});
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
warn!(e; "Failed to get ttl for region: {}", region_id);
|
||||
TimeToLive::default()
|
||||
});
|
||||
|
||||
debug!(
|
||||
"Pick compaction strategy {:?} for region: {}, ttl: {:?}",
|
||||
@@ -340,6 +340,7 @@ impl CompactionScheduler {
|
||||
"Scheduled remote compaction job {} for region {}",
|
||||
job_id, region_id
|
||||
);
|
||||
INFLIGHT_COMPACTION_COUNT.inc();
|
||||
return Ok(());
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -350,7 +351,7 @@ impl CompactionScheduler {
|
||||
job_id: None,
|
||||
reason: e.reason,
|
||||
}
|
||||
.fail();
|
||||
.fail();
|
||||
}
|
||||
|
||||
error!(e; "Failed to schedule remote compaction job for region {}, fallback to local compaction", region_id);
|
||||
@@ -384,7 +385,9 @@ impl CompactionScheduler {
|
||||
// Submit the compaction task.
|
||||
self.scheduler
|
||||
.schedule(Box::pin(async move {
|
||||
INFLIGHT_COMPACTION_COUNT.inc();
|
||||
local_compaction_task.run().await;
|
||||
INFLIGHT_COMPACTION_COUNT.dec();
|
||||
}))
|
||||
.map_err(|e| {
|
||||
error!(e; "Failed to submit compaction request for region {}", region_id);
|
||||
@@ -519,7 +522,7 @@ impl CompactionStatus {
|
||||
listener: WorkerListener,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
) -> CompactionRequest {
|
||||
let current_version = self.version_control.current().version;
|
||||
let current_version = CompactionVersion::from(self.version_control.current().version);
|
||||
let start_time = Instant::now();
|
||||
let mut req = CompactionRequest {
|
||||
engine_config,
|
||||
@@ -570,7 +573,6 @@ pub struct SerializedCompactionOutput {
|
||||
struct CompactionSstReaderBuilder<'a> {
|
||||
metadata: RegionMetadataRef,
|
||||
sst_layer: AccessLayerRef,
|
||||
cache: CacheManagerRef,
|
||||
inputs: &'a [FileHandle],
|
||||
append_mode: bool,
|
||||
filter_deleted: bool,
|
||||
@@ -584,7 +586,7 @@ impl<'a> CompactionSstReaderBuilder<'a> {
|
||||
let mut scan_input = ScanInput::new(self.sst_layer, ProjectionMapper::all(&self.metadata)?)
|
||||
.with_files(self.inputs.to_vec())
|
||||
.with_append_mode(self.append_mode)
|
||||
.with_cache(self.cache)
|
||||
.with_cache(None)
|
||||
.with_filter_deleted(self.filter_deleted)
|
||||
// We ignore file not found error during compaction.
|
||||
.with_ignore_file_not_found(true)
|
||||
|
||||
@@ -35,12 +35,10 @@ use crate::error::{EmptyRegionDirSnafu, JoinSnafu, ObjectStoreNotFoundSnafu, Res
|
||||
use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
|
||||
use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
|
||||
use crate::manifest::storage::manifest_compress_type;
|
||||
use crate::memtable::time_partition::TimePartitions;
|
||||
use crate::memtable::MemtableBuilderProvider;
|
||||
use crate::read::Source;
|
||||
use crate::region::opener::new_manifest_dir;
|
||||
use crate::region::options::RegionOptions;
|
||||
use crate::region::version::{VersionBuilder, VersionRef};
|
||||
use crate::region::version::VersionRef;
|
||||
use crate::region::{ManifestContext, RegionLeaderState, RegionRoleState};
|
||||
use crate::schedule::scheduler::LocalScheduler;
|
||||
use crate::sst::file::{FileMeta, IndexType};
|
||||
@@ -48,6 +46,34 @@ use crate::sst::file_purger::LocalFilePurger;
|
||||
use crate::sst::index::intermediate::IntermediateManager;
|
||||
use crate::sst::index::puffin_manager::PuffinManagerFactory;
|
||||
use crate::sst::parquet::WriteOptions;
|
||||
use crate::sst::version::{SstVersion, SstVersionRef};
|
||||
|
||||
/// Region version for compaction that does not hold memtables.
|
||||
#[derive(Clone)]
|
||||
pub struct CompactionVersion {
|
||||
/// Metadata of the region.
|
||||
///
|
||||
/// Altering metadata isn't frequent, storing metadata in Arc to allow sharing
|
||||
/// metadata and reuse metadata when creating a new `Version`.
|
||||
pub(crate) metadata: RegionMetadataRef,
|
||||
/// Options of the region.
|
||||
pub(crate) options: RegionOptions,
|
||||
/// SSTs of the region.
|
||||
pub(crate) ssts: SstVersionRef,
|
||||
/// Inferred compaction time window.
|
||||
pub(crate) compaction_time_window: Option<Duration>,
|
||||
}
|
||||
|
||||
impl From<VersionRef> for CompactionVersion {
|
||||
fn from(value: VersionRef) -> Self {
|
||||
Self {
|
||||
metadata: value.metadata.clone(),
|
||||
options: value.options.clone(),
|
||||
ssts: value.ssts.clone(),
|
||||
compaction_time_window: value.compaction_time_window,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// CompactionRegion represents a region that needs to be compacted.
|
||||
/// It's the subset of MitoRegion.
|
||||
@@ -62,7 +88,7 @@ pub struct CompactionRegion {
|
||||
pub(crate) cache_manager: CacheManagerRef,
|
||||
pub(crate) access_layer: AccessLayerRef,
|
||||
pub(crate) manifest_ctx: Arc<ManifestContext>,
|
||||
pub(crate) current_version: VersionRef,
|
||||
pub(crate) current_version: CompactionVersion,
|
||||
pub(crate) file_purger: Option<Arc<LocalFilePurger>>,
|
||||
pub(crate) ttl: Option<TimeToLive>,
|
||||
}
|
||||
@@ -147,30 +173,14 @@ pub async fn open_compaction_region(
|
||||
};
|
||||
|
||||
let current_version = {
|
||||
let memtable_builder = MemtableBuilderProvider::new(None, Arc::new(mito_config.clone()))
|
||||
.builder_for_options(
|
||||
req.region_options.memtable.as_ref(),
|
||||
req.region_options.need_dedup(),
|
||||
req.region_options.merge_mode(),
|
||||
);
|
||||
|
||||
// Initial memtable id is 0.
|
||||
let mutable = Arc::new(TimePartitions::new(
|
||||
region_metadata.clone(),
|
||||
memtable_builder.clone(),
|
||||
0,
|
||||
req.region_options.compaction.time_window(),
|
||||
));
|
||||
|
||||
let version = VersionBuilder::new(region_metadata.clone(), mutable)
|
||||
.add_files(file_purger.clone(), manifest.files.values().cloned())
|
||||
.flushed_entry_id(manifest.flushed_entry_id)
|
||||
.flushed_sequence(manifest.flushed_sequence)
|
||||
.truncated_entry_id(manifest.truncated_entry_id)
|
||||
.compaction_time_window(manifest.compaction_time_window)
|
||||
.options(req.region_options.clone())
|
||||
.build();
|
||||
Arc::new(version)
|
||||
let mut ssts = SstVersion::new();
|
||||
ssts.add_files(file_purger.clone(), manifest.files.values().cloned());
|
||||
CompactionVersion {
|
||||
metadata: region_metadata.clone(),
|
||||
options: req.region_options.clone(),
|
||||
ssts: Arc::new(ssts),
|
||||
compaction_time_window: manifest.compaction_time_window,
|
||||
}
|
||||
};
|
||||
|
||||
let ttl = find_ttl(
|
||||
@@ -296,7 +306,6 @@ impl Compactor for DefaultCompactor {
|
||||
let reader = CompactionSstReaderBuilder {
|
||||
metadata: region_metadata.clone(),
|
||||
sst_layer: sst_layer.clone(),
|
||||
cache: cache_manager.clone(),
|
||||
inputs: &output.inputs,
|
||||
append_mode,
|
||||
filter_deleted: output.filter_deleted,
|
||||
|
||||
@@ -23,10 +23,9 @@ use common_time::Timestamp;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::compaction::buckets::infer_time_bucket;
|
||||
use crate::compaction::compactor::CompactionRegion;
|
||||
use crate::compaction::compactor::{CompactionRegion, CompactionVersion};
|
||||
use crate::compaction::picker::{Picker, PickerOutput};
|
||||
use crate::compaction::{get_expired_ssts, CompactionOutput};
|
||||
use crate::region::version::VersionRef;
|
||||
use crate::sst::file::{FileHandle, FileId};
|
||||
|
||||
/// Compaction picker that splits the time range of all involved files to windows, and merges
|
||||
@@ -48,7 +47,11 @@ impl WindowedCompactionPicker {
|
||||
// use persisted window. If persist window is not present, we check the time window
|
||||
// provided while creating table. If all of those are absent, we infer the window
|
||||
// from files in level0.
|
||||
fn calculate_time_window(&self, region_id: RegionId, current_version: &VersionRef) -> i64 {
|
||||
fn calculate_time_window(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
current_version: &CompactionVersion,
|
||||
) -> i64 {
|
||||
self.compaction_time_window_seconds
|
||||
.or(current_version
|
||||
.compaction_time_window
|
||||
@@ -67,7 +70,7 @@ impl WindowedCompactionPicker {
|
||||
fn pick_inner(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
current_version: &VersionRef,
|
||||
current_version: &CompactionVersion,
|
||||
current_time: Timestamp,
|
||||
) -> (Vec<CompactionOutput>, Vec<FileHandle>, i64) {
|
||||
let time_window = self.calculate_time_window(region_id, current_version);
|
||||
@@ -205,28 +208,19 @@ mod tests {
|
||||
use common_time::Timestamp;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::compaction::compactor::CompactionVersion;
|
||||
use crate::compaction::window::{file_time_bucket_span, WindowedCompactionPicker};
|
||||
use crate::memtable::partition_tree::{PartitionTreeConfig, PartitionTreeMemtableBuilder};
|
||||
use crate::memtable::time_partition::TimePartitions;
|
||||
use crate::memtable::version::MemtableVersion;
|
||||
use crate::region::options::RegionOptions;
|
||||
use crate::region::version::{Version, VersionRef};
|
||||
use crate::sst::file::{FileId, FileMeta, Level};
|
||||
use crate::sst::version::SstVersion;
|
||||
use crate::test_util::memtable_util::metadata_for_test;
|
||||
use crate::test_util::NoopFilePurger;
|
||||
|
||||
fn build_version(files: &[(FileId, i64, i64, Level)], ttl: Option<Duration>) -> VersionRef {
|
||||
fn build_version(
|
||||
files: &[(FileId, i64, i64, Level)],
|
||||
ttl: Option<Duration>,
|
||||
) -> CompactionVersion {
|
||||
let metadata = metadata_for_test();
|
||||
let memtables = Arc::new(MemtableVersion::new(Arc::new(TimePartitions::new(
|
||||
metadata.clone(),
|
||||
Arc::new(PartitionTreeMemtableBuilder::new(
|
||||
PartitionTreeConfig::default(),
|
||||
None,
|
||||
)),
|
||||
0,
|
||||
None,
|
||||
))));
|
||||
let file_purger_ref = Arc::new(NoopFilePurger);
|
||||
|
||||
let mut ssts = SstVersion::new();
|
||||
@@ -244,14 +238,9 @@ mod tests {
|
||||
}),
|
||||
);
|
||||
|
||||
Arc::new(Version {
|
||||
CompactionVersion {
|
||||
metadata,
|
||||
memtables,
|
||||
ssts: Arc::new(ssts),
|
||||
flushed_entry_id: 0,
|
||||
flushed_sequence: 0,
|
||||
truncated_entry_id: None,
|
||||
compaction_time_window: None,
|
||||
options: RegionOptions {
|
||||
ttl: ttl.map(|t| t.into()),
|
||||
compaction: Default::default(),
|
||||
@@ -262,7 +251,8 @@ mod tests {
|
||||
memtable: None,
|
||||
merge_mode: None,
|
||||
},
|
||||
})
|
||||
compaction_time_window: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -304,6 +304,9 @@ pub struct IndexConfig {
|
||||
|
||||
/// Write buffer size for creating the index.
|
||||
pub write_buffer_size: ReadableSize,
|
||||
|
||||
/// Cache size for metadata of puffin files. Setting it to 0 to disable the cache.
|
||||
pub metadata_cache_size: ReadableSize,
|
||||
}
|
||||
|
||||
impl Default for IndexConfig {
|
||||
@@ -312,6 +315,7 @@ impl Default for IndexConfig {
|
||||
aux_path: String::new(),
|
||||
staging_size: ReadableSize::gb(2),
|
||||
write_buffer_size: ReadableSize::mb(8),
|
||||
metadata_cache_size: ReadableSize::mb(64),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -412,6 +416,8 @@ pub struct InvertedIndexConfig {
|
||||
pub metadata_cache_size: ReadableSize,
|
||||
/// Cache size for inverted index content. Setting it to 0 to disable the cache.
|
||||
pub content_cache_size: ReadableSize,
|
||||
/// Page size for inverted index content.
|
||||
pub content_cache_page_size: ReadableSize,
|
||||
}
|
||||
|
||||
impl InvertedIndexConfig {
|
||||
@@ -437,6 +443,7 @@ impl Default for InvertedIndexConfig {
|
||||
intermediate_path: String::new(),
|
||||
metadata_cache_size: ReadableSize::mb(64),
|
||||
content_cache_size: ReadableSize::mb(128),
|
||||
content_cache_page_size: ReadableSize::mb(8),
|
||||
};
|
||||
|
||||
if let Some(sys_memory) = common_config::utils::get_sys_total_memory() {
|
||||
|
||||
@@ -424,12 +424,16 @@ impl EngineInner {
|
||||
// Get cache.
|
||||
let cache_manager = self.workers.cache_manager();
|
||||
|
||||
let scan_region =
|
||||
ScanRegion::new(version, region.access_layer.clone(), request, cache_manager)
|
||||
.with_parallel_scan_channel_size(self.config.parallel_scan_channel_size)
|
||||
.with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled())
|
||||
.with_ignore_fulltext_index(self.config.fulltext_index.apply_on_query.disabled())
|
||||
.with_start_time(query_start);
|
||||
let scan_region = ScanRegion::new(
|
||||
version,
|
||||
region.access_layer.clone(),
|
||||
request,
|
||||
Some(cache_manager),
|
||||
)
|
||||
.with_parallel_scan_channel_size(self.config.parallel_scan_channel_size)
|
||||
.with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled())
|
||||
.with_ignore_fulltext_index(self.config.fulltext_index.apply_on_query.disabled())
|
||||
.with_start_time(query_start);
|
||||
|
||||
Ok(scan_region)
|
||||
}
|
||||
|
||||
@@ -192,12 +192,12 @@ async fn test_engine_create_with_custom_store() {
|
||||
assert!(object_store_manager
|
||||
.find("Gcs")
|
||||
.unwrap()
|
||||
.is_exist(region_dir)
|
||||
.exists(region_dir)
|
||||
.await
|
||||
.unwrap());
|
||||
assert!(!object_store_manager
|
||||
.default_object_store()
|
||||
.is_exist(region_dir)
|
||||
.exists(region_dir)
|
||||
.await
|
||||
.unwrap());
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user