mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-25 07:30:02 +00:00
Compare commits
61 Commits
windows_pd
...
v0.11.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
17d75c767c | ||
|
|
a1ed450c0c | ||
|
|
ea4ce9d1e3 | ||
|
|
1f7d9666b7 | ||
|
|
9f1a0d78b2 | ||
|
|
ed8e418716 | ||
|
|
9e7121c1bb | ||
|
|
94a49ed4f0 | ||
|
|
f5e743379f | ||
|
|
6735e5867e | ||
|
|
925525726b | ||
|
|
6427682a9a | ||
|
|
55b0022676 | ||
|
|
2d84cc8d87 | ||
|
|
c030705b17 | ||
|
|
443c600bd0 | ||
|
|
39cadfe10b | ||
|
|
9b5e4e80f7 | ||
|
|
041a276b66 | ||
|
|
614a25ddc5 | ||
|
|
4337e20010 | ||
|
|
65c52cc698 | ||
|
|
50f31fd681 | ||
|
|
b5af5aaf8d | ||
|
|
27693c7f1e | ||
|
|
a59fef9ffb | ||
|
|
bcecd8ce52 | ||
|
|
ffdcb8c1ac | ||
|
|
554121ad79 | ||
|
|
43c12b4f2c | ||
|
|
7aa8c28fe4 | ||
|
|
34fbe7739e | ||
|
|
06d7bd99dd | ||
|
|
b71d842615 | ||
|
|
7f71693b8e | ||
|
|
615ea1a171 | ||
|
|
4e725d259d | ||
|
|
dc2252eb6d | ||
|
|
6d4cc2e070 | ||
|
|
6066ce2c4a | ||
|
|
b90d8f7dbd | ||
|
|
fdccf4ff84 | ||
|
|
8b1484c064 | ||
|
|
576e20ac78 | ||
|
|
10b3e3da0f | ||
|
|
4a3ef2d718 | ||
|
|
65eabb2a05 | ||
|
|
bc5a57f51f | ||
|
|
f24b9d8814 | ||
|
|
dd4d0a88ce | ||
|
|
3d2096fe9d | ||
|
|
35715bb710 | ||
|
|
08a3befa67 | ||
|
|
ca1758d4e7 | ||
|
|
42bf818167 | ||
|
|
2c9b117224 | ||
|
|
3edf2317e1 | ||
|
|
85d72a3cd0 | ||
|
|
928172bd82 | ||
|
|
e9f5bddeff | ||
|
|
486755d795 |
@@ -54,7 +54,7 @@ runs:
|
||||
PROFILE_TARGET: ${{ inputs.cargo-profile == 'dev' && 'debug' || inputs.cargo-profile }}
|
||||
with:
|
||||
artifacts-dir: ${{ inputs.artifacts-dir }}
|
||||
target-files: ./target/$PROFILE_TARGET/greptime
|
||||
target-file: ./target/$PROFILE_TARGET/greptime
|
||||
version: ${{ inputs.version }}
|
||||
working-dir: ${{ inputs.working-dir }}
|
||||
|
||||
@@ -72,6 +72,6 @@ runs:
|
||||
if: ${{ inputs.build-android-artifacts == 'true' }}
|
||||
with:
|
||||
artifacts-dir: ${{ inputs.artifacts-dir }}
|
||||
target-files: ./target/aarch64-linux-android/release/greptime
|
||||
target-file: ./target/aarch64-linux-android/release/greptime
|
||||
version: ${{ inputs.version }}
|
||||
working-dir: ${{ inputs.working-dir }}
|
||||
|
||||
@@ -90,5 +90,5 @@ runs:
|
||||
uses: ./.github/actions/upload-artifacts
|
||||
with:
|
||||
artifacts-dir: ${{ inputs.artifacts-dir }}
|
||||
target-files: target/${{ inputs.arch }}/${{ inputs.cargo-profile }}/greptime
|
||||
target-file: target/${{ inputs.arch }}/${{ inputs.cargo-profile }}/greptime
|
||||
version: ${{ inputs.version }}
|
||||
|
||||
@@ -76,5 +76,5 @@ runs:
|
||||
uses: ./.github/actions/upload-artifacts
|
||||
with:
|
||||
artifacts-dir: ${{ inputs.artifacts-dir }}
|
||||
target-files: target/${{ inputs.arch }}/${{ inputs.cargo-profile }}/greptime,target/${{ inputs.arch }}/${{ inputs.cargo-profile }}/greptime.pdb
|
||||
target-file: target/${{ inputs.arch }}/${{ inputs.cargo-profile }}/greptime
|
||||
version: ${{ inputs.version }}
|
||||
|
||||
@@ -5,7 +5,7 @@ meta:
|
||||
|
||||
[datanode]
|
||||
[datanode.client]
|
||||
timeout = "120s"
|
||||
timeout = "60s"
|
||||
datanode:
|
||||
configData: |-
|
||||
[runtime]
|
||||
@@ -21,7 +21,7 @@ frontend:
|
||||
global_rt_size = 4
|
||||
|
||||
[meta_client]
|
||||
ddl_timeout = "120s"
|
||||
ddl_timeout = "60s"
|
||||
objectStorage:
|
||||
s3:
|
||||
bucket: default
|
||||
|
||||
@@ -5,7 +5,7 @@ meta:
|
||||
|
||||
[datanode]
|
||||
[datanode.client]
|
||||
timeout = "120s"
|
||||
timeout = "60s"
|
||||
datanode:
|
||||
configData: |-
|
||||
[runtime]
|
||||
@@ -17,7 +17,7 @@ frontend:
|
||||
global_rt_size = 4
|
||||
|
||||
[meta_client]
|
||||
ddl_timeout = "120s"
|
||||
ddl_timeout = "60s"
|
||||
objectStorage:
|
||||
s3:
|
||||
bucket: default
|
||||
|
||||
@@ -11,7 +11,7 @@ meta:
|
||||
|
||||
[datanode]
|
||||
[datanode.client]
|
||||
timeout = "120s"
|
||||
timeout = "60s"
|
||||
datanode:
|
||||
configData: |-
|
||||
[runtime]
|
||||
@@ -28,7 +28,7 @@ frontend:
|
||||
global_rt_size = 4
|
||||
|
||||
[meta_client]
|
||||
ddl_timeout = "120s"
|
||||
ddl_timeout = "60s"
|
||||
objectStorage:
|
||||
s3:
|
||||
bucket: default
|
||||
|
||||
14
.github/actions/upload-artifacts/action.yml
vendored
14
.github/actions/upload-artifacts/action.yml
vendored
@@ -4,8 +4,8 @@ inputs:
|
||||
artifacts-dir:
|
||||
description: Directory to store artifacts
|
||||
required: true
|
||||
target-files:
|
||||
description: The multiple target files to upload, separated by comma
|
||||
target-file:
|
||||
description: The path of the target artifact
|
||||
required: false
|
||||
version:
|
||||
description: Version of the artifact
|
||||
@@ -18,16 +18,12 @@ runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Create artifacts directory
|
||||
if: ${{ inputs.target-files != '' }}
|
||||
if: ${{ inputs.target-file != '' }}
|
||||
working-directory: ${{ inputs.working-dir }}
|
||||
shell: bash
|
||||
run: |
|
||||
set -e
|
||||
mkdir -p ${{ inputs.artifacts-dir }}
|
||||
IFS=',' read -ra FILES <<< "${{ inputs.target-files }}"
|
||||
for file in "${FILES[@]}"; do
|
||||
cp "$file" ${{ inputs.artifacts-dir }}/
|
||||
done
|
||||
mkdir -p ${{ inputs.artifacts-dir }} && \
|
||||
cp ${{ inputs.target-file }} ${{ inputs.artifacts-dir }}
|
||||
|
||||
# The compressed artifacts will use the following layout:
|
||||
# greptime-linux-amd64-pyo3-v0.3.0sha256sum
|
||||
|
||||
2
.github/workflows/dev-build.yml
vendored
2
.github/workflows/dev-build.yml
vendored
@@ -29,7 +29,7 @@ on:
|
||||
linux_arm64_runner:
|
||||
type: choice
|
||||
description: The runner uses to build linux-arm64 artifacts
|
||||
default: ec2-c6g.4xlarge-arm64
|
||||
default: ec2-c6g.8xlarge-arm64
|
||||
options:
|
||||
- ec2-c6g.xlarge-arm64 # 4C8G
|
||||
- ec2-c6g.2xlarge-arm64 # 8C16G
|
||||
|
||||
2
.github/workflows/nightly-build.yml
vendored
2
.github/workflows/nightly-build.yml
vendored
@@ -27,7 +27,7 @@ on:
|
||||
linux_arm64_runner:
|
||||
type: choice
|
||||
description: The runner uses to build linux-arm64 artifacts
|
||||
default: ec2-c6g.4xlarge-arm64
|
||||
default: ec2-c6g.8xlarge-arm64
|
||||
options:
|
||||
- ec2-c6g.xlarge-arm64 # 4C8G
|
||||
- ec2-c6g.2xlarge-arm64 # 8C16G
|
||||
|
||||
1
.github/workflows/nightly-ci.yml
vendored
1
.github/workflows/nightly-ci.yml
vendored
@@ -117,6 +117,7 @@ jobs:
|
||||
cleanbuild-linux-nix:
|
||||
runs-on: ubuntu-latest-8-cores
|
||||
timeout-minutes: 60
|
||||
needs: [coverage, fmt, clippy, check]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: cachix/install-nix-action@v27
|
||||
|
||||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -91,7 +91,7 @@ env:
|
||||
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
|
||||
NIGHTLY_RELEASE_PREFIX: nightly
|
||||
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
|
||||
NEXT_RELEASE_VERSION: v0.12.0
|
||||
NEXT_RELEASE_VERSION: v0.11.0
|
||||
|
||||
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
|
||||
permissions:
|
||||
|
||||
186
Cargo.lock
generated
186
Cargo.lock
generated
@@ -188,7 +188,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
@@ -773,7 +773,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "auth"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -896,6 +896,18 @@ dependencies = [
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backon"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d67782c3f868daa71d3533538e98a8e13713231969def7536e8039606fc46bf0"
|
||||
dependencies = [
|
||||
"fastrand",
|
||||
"futures-core",
|
||||
"pin-project",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backon"
|
||||
version = "1.2.0"
|
||||
@@ -1314,7 +1326,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cache"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"catalog",
|
||||
"common-error",
|
||||
@@ -1348,7 +1360,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow",
|
||||
@@ -1684,7 +1696,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
|
||||
|
||||
[[package]]
|
||||
name = "cli"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"auth",
|
||||
@@ -1727,7 +1739,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"substrait 0.12.0",
|
||||
"substrait 0.11.1",
|
||||
"table",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
@@ -1736,7 +1748,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1763,7 +1775,7 @@ dependencies = [
|
||||
"rand",
|
||||
"serde_json",
|
||||
"snafu 0.8.5",
|
||||
"substrait 0.12.0",
|
||||
"substrait 0.11.1",
|
||||
"substrait 0.37.3",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -1804,7 +1816,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"auth",
|
||||
@@ -1864,7 +1876,7 @@ dependencies = [
|
||||
"similar-asserts",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"substrait 0.12.0",
|
||||
"substrait 0.11.1",
|
||||
"table",
|
||||
"temp-env",
|
||||
"tempfile",
|
||||
@@ -1916,7 +1928,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"async-trait",
|
||||
@@ -1938,11 +1950,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
|
||||
[[package]]
|
||||
name = "common-config"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -1965,7 +1977,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
@@ -2001,7 +2013,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-decimal"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"bigdecimal 0.4.5",
|
||||
"common-error",
|
||||
@@ -2014,7 +2026,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"snafu 0.8.5",
|
||||
"strum 0.25.0",
|
||||
@@ -2023,7 +2035,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-frontend"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-error",
|
||||
@@ -2033,7 +2045,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"approx 0.5.1",
|
||||
@@ -2077,7 +2089,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-greptimedb-telemetry"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-runtime",
|
||||
@@ -2094,7 +2106,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -2120,7 +2132,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -2139,7 +2151,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-macro"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-query",
|
||||
@@ -2153,7 +2165,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -2166,7 +2178,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"api",
|
||||
@@ -2223,7 +2235,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-options"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"common-grpc",
|
||||
"humantime-serde",
|
||||
@@ -2232,11 +2244,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-plugins"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
|
||||
[[package]]
|
||||
name = "common-pprof"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -2248,11 +2260,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"backon",
|
||||
"backon 1.2.0",
|
||||
"common-base",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -2275,7 +2287,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -2283,7 +2295,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2309,7 +2321,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-error",
|
||||
@@ -2328,7 +2340,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.19",
|
||||
@@ -2356,7 +2368,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"backtrace",
|
||||
@@ -2384,7 +2396,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"client",
|
||||
"common-query",
|
||||
@@ -2396,7 +2408,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -2414,7 +2426,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-version"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"build-data",
|
||||
"const_format",
|
||||
@@ -2424,7 +2436,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-wal"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -3223,7 +3235,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -3274,7 +3286,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"substrait 0.12.0",
|
||||
"substrait 0.11.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"toml 0.8.19",
|
||||
@@ -3283,7 +3295,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -3907,7 +3919,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-engine"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -4023,7 +4035,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
|
||||
|
||||
[[package]]
|
||||
name = "flow"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow",
|
||||
@@ -4081,7 +4093,7 @@ dependencies = [
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"strum 0.25.0",
|
||||
"substrait 0.12.0",
|
||||
"substrait 0.11.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.11.0",
|
||||
@@ -4119,7 +4131,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -4152,7 +4164,6 @@ dependencies = [
|
||||
"futures",
|
||||
"humantime-serde",
|
||||
"lazy_static",
|
||||
"log-query",
|
||||
"log-store",
|
||||
"meta-client",
|
||||
"opentelemetry-proto 0.5.0",
|
||||
@@ -5269,7 +5280,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"asynchronous-codec",
|
||||
@@ -6118,19 +6129,18 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
|
||||
|
||||
[[package]]
|
||||
name = "log-query"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"serde",
|
||||
"snafu 0.8.5",
|
||||
"table",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -6474,7 +6484,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -6501,7 +6511,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -6580,7 +6590,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "metric-engine"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -6674,7 +6684,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -7411,7 +7421,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -7471,12 +7481,13 @@ checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9"
|
||||
|
||||
[[package]]
|
||||
name = "opendal"
|
||||
version = "0.50.2"
|
||||
source = "git+https://github.com/GreptimeTeam/opendal.git?rev=c82605177f2feec83e49dcaa537c505639d94024#c82605177f2feec83e49dcaa537c505639d94024"
|
||||
version = "0.49.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b04d09b9822c2f75a1d2fc513a2c1279c70e91e7407936fffdf6a6976ec530a"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"backon",
|
||||
"backon 0.4.4",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"chrono",
|
||||
@@ -7489,7 +7500,6 @@ dependencies = [
|
||||
"md-5",
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"prometheus",
|
||||
"quick-xml 0.36.2",
|
||||
"reqsign",
|
||||
"reqwest",
|
||||
@@ -7664,7 +7674,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "operator"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -7712,7 +7722,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)",
|
||||
"store-api",
|
||||
"substrait 0.12.0",
|
||||
"substrait 0.11.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -7962,7 +7972,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -8161,7 +8171,7 @@ dependencies = [
|
||||
"rand",
|
||||
"ring 0.17.8",
|
||||
"rust_decimal",
|
||||
"thiserror 2.0.6",
|
||||
"thiserror 2.0.4",
|
||||
"tokio",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-util",
|
||||
@@ -8248,7 +8258,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pipeline"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -8410,7 +8420,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "plugins"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"clap 4.5.19",
|
||||
@@ -8698,7 +8708,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"async-trait",
|
||||
@@ -8933,7 +8943,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"async-compression 0.4.13",
|
||||
"async-trait",
|
||||
@@ -9058,7 +9068,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -9099,7 +9109,6 @@ dependencies = [
|
||||
"humantime",
|
||||
"itertools 0.10.5",
|
||||
"lazy_static",
|
||||
"log-query",
|
||||
"meter-core",
|
||||
"meter-macros",
|
||||
"num",
|
||||
@@ -9122,7 +9131,7 @@ dependencies = [
|
||||
"sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)",
|
||||
"statrs",
|
||||
"store-api",
|
||||
"substrait 0.12.0",
|
||||
"substrait 0.11.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -9506,9 +9515,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "reqsign"
|
||||
version = "0.16.1"
|
||||
version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb0075a66c8bfbf4cc8b70dca166e722e1f55a3ea9250ecbb85f4d92a5f64149"
|
||||
checksum = "03dd4ba7c3901dd43e6b8c7446a760d45bc1ea4301002e1a6fa48f97c3a796fa"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -10606,7 +10615,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "script"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -10898,7 +10907,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -10954,7 +10963,6 @@ dependencies = [
|
||||
"json5",
|
||||
"jsonb",
|
||||
"lazy_static",
|
||||
"log-query",
|
||||
"loki-api",
|
||||
"mime_guess",
|
||||
"mysql_async",
|
||||
@@ -11010,7 +11018,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -11364,7 +11372,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"chrono",
|
||||
@@ -11428,7 +11436,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.19",
|
||||
@@ -11646,7 +11654,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -11808,7 +11816,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
@@ -12007,7 +12015,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -12284,7 +12292,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
|
||||
|
||||
[[package]]
|
||||
name = "tests-fuzz"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"async-trait",
|
||||
@@ -12327,7 +12335,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -12391,7 +12399,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlx",
|
||||
"store-api",
|
||||
"substrait 0.12.0",
|
||||
"substrait 0.11.1",
|
||||
"table",
|
||||
"tempfile",
|
||||
"time",
|
||||
@@ -12437,11 +12445,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "2.0.6"
|
||||
version = "2.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47"
|
||||
checksum = "2f49a1853cf82743e3b7950f77e0f4d622ca36cf4317cba00c767838bac8d490"
|
||||
dependencies = [
|
||||
"thiserror-impl 2.0.6",
|
||||
"thiserror-impl 2.0.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -12457,9 +12465,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "2.0.6"
|
||||
version = "2.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312"
|
||||
checksum = "8381894bb3efe0c4acac3ded651301ceee58a15d47c2e34885ed1908ad667061"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
||||
@@ -68,7 +68,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.12.0"
|
||||
version = "0.11.1"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -238,7 +238,6 @@ file-engine = { path = "src/file-engine" }
|
||||
flow = { path = "src/flow" }
|
||||
frontend = { path = "src/frontend", default-features = false }
|
||||
index = { path = "src/index" }
|
||||
log-query = { path = "src/log-query" }
|
||||
log-store = { path = "src/log-store" }
|
||||
meta-client = { path = "src/meta-client" }
|
||||
meta-srv = { path = "src/meta-srv" }
|
||||
|
||||
18
README.md
18
README.md
@@ -70,23 +70,23 @@ Our core developers have been building time-series data platforms for years. Bas
|
||||
|
||||
* **Unified Processing of Metrics, Logs, and Events**
|
||||
|
||||
GreptimeDB unifies time series data processing by treating all data - whether metrics, logs, or events - as timestamped events with context. Users can analyze this data using either [SQL](https://docs.greptime.com/user-guide/query-data/sql) or [PromQL](https://docs.greptime.com/user-guide/query-data/promql) and leverage stream processing ([Flow](https://docs.greptime.com/user-guide/flow-computation/overview)) to enable continuous aggregation. [Read more](https://docs.greptime.com/user-guide/concepts/data-model).
|
||||
GreptimeDB unifies time series data processing by treating all data - whether metrics, logs, or events - as timestamped events with context. Users can analyze this data using either [SQL](https://docs.greptime.com/user-guide/query-data/sql) or [PromQL](https://docs.greptime.com/user-guide/query-data/promql) and leverage stream processing ([Flow](https://docs.greptime.com/user-guide/continuous-aggregation/overview)) to enable continuous aggregation. [Read more](https://docs.greptime.com/user-guide/concepts/data-model).
|
||||
|
||||
* **Cloud-native Distributed Database**
|
||||
|
||||
Built for [Kubernetes](https://docs.greptime.com/user-guide/deployments/deploy-on-kubernetes/greptimedb-operator-management). GreptimeDB achieves seamless scalability with its [cloud-native architecture](https://docs.greptime.com/user-guide/concepts/architecture) of separated compute and storage, built on object storage (AWS S3, Azure Blob Storage, etc.) while enabling cross-cloud deployment through a unified data access layer.
|
||||
Built for [Kubernetes](https://docs.greptime.com/user-guide/deployments/deploy-on-kubernetes/greptimedb-operator-management). GreptimeDB achieves seamless scalability with its [cloud-native architecture](https://docs.greptime.com/user-guide/concepts/architecture) of separated compute and storage, built on object storage (AWS S3, Azure Blob Storage, etc.) while enabling cross-cloud deployment through a unified data access layer.
|
||||
|
||||
* **Performance and Cost-effective**
|
||||
|
||||
Written in pure Rust for superior performance and reliability. GreptimeDB features a distributed query engine with intelligent indexing to handle high cardinality data efficiently. Its optimized columnar storage achieves 50x cost efficiency on cloud object storage through advanced compression. [Benchmark reports](https://www.greptime.com/blogs/2024-09-09-report-summary).
|
||||
Written in pure Rust for superior performance and reliability. GreptimeDB features a distributed query engine with intelligent indexing to handle high cardinality data efficiently. Its optimized columnar storage achieves 50x cost efficiency on cloud object storage through advanced compression. [Benchmark reports](https://www.greptime.com/blogs/2024-09-09-report-summary).
|
||||
|
||||
* **Cloud-Edge Collaboration**
|
||||
|
||||
GreptimeDB seamlessly operates across cloud and edge (ARM/Android/Linux), providing consistent APIs and control plane for unified data management and efficient synchronization. [Learn how to run on Android](https://docs.greptime.com/user-guide/deployments/run-on-android/).
|
||||
GreptimeDB seamlessly operates across cloud and edge (ARM/Android/Linux), providing consistent APIs and control plane for unified data management and efficient synchronization. [Learn how to run on Android](https://docs.greptime.com/user-guide/deployments/run-on-android/).
|
||||
|
||||
* **Multi-protocol Ingestion, SQL & PromQL Ready**
|
||||
|
||||
Widely adopted database protocols and APIs, including MySQL, PostgreSQL, InfluxDB, OpenTelemetry, Loki and Prometheus, etc. Effortless Adoption & Seamless Migration. [Supported Protocols Overview](https://docs.greptime.com/user-guide/protocols/overview).
|
||||
Widely adopted database protocols and APIs, including MySQL, PostgreSQL, InfluxDB, OpenTelemetry, Loki and Prometheus, etc. Effortless Adoption & Seamless Migration. [Supported Protocols Overview](https://docs.greptime.com/user-guide/protocols/overview).
|
||||
|
||||
For more detailed info please read [Why GreptimeDB](https://docs.greptime.com/user-guide/concepts/why-greptimedb).
|
||||
|
||||
@@ -138,7 +138,7 @@ Check the prerequisite:
|
||||
|
||||
* [Rust toolchain](https://www.rust-lang.org/tools/install) (nightly)
|
||||
* [Protobuf compiler](https://grpc.io/docs/protoc-installation/) (>= 3.15)
|
||||
* Python toolchain (optional): Required only if built with PyO3 backend. More details for compiling with PyO3 can be found in its [documentation](https://pyo3.rs/v0.18.1/building_and_distribution#configuring-the-python-version).
|
||||
* Python toolchain (optional): Required only if built with PyO3 backend. More detail for compiling with PyO3 can be found in its [documentation](https://pyo3.rs/v0.18.1/building_and_distribution#configuring-the-python-version).
|
||||
|
||||
Build GreptimeDB binary:
|
||||
|
||||
@@ -154,10 +154,6 @@ cargo run -- standalone start
|
||||
|
||||
## Tools & Extensions
|
||||
|
||||
### Kubernetes
|
||||
|
||||
- [GreptimeDB Operator](https://github.com/GrepTimeTeam/greptimedb-operator)
|
||||
|
||||
### Dashboard
|
||||
|
||||
- [The dashboard UI for GreptimeDB](https://github.com/GreptimeTeam/dashboard)
|
||||
@@ -177,7 +173,7 @@ Our official Grafana dashboard for monitoring GreptimeDB is available at [grafan
|
||||
|
||||
## Project Status
|
||||
|
||||
GreptimeDB is currently in Beta. We are targeting GA (General Availability) with v1.0 release by Early 2025.
|
||||
GreptimeDB is currently in Beta. We are targeting GA (General Availability) with v1.0 release by Early 2025.
|
||||
|
||||
While in Beta, GreptimeDB is already:
|
||||
|
||||
|
||||
@@ -25,7 +25,6 @@ pub enum PermissionReq<'a> {
|
||||
GrpcRequest(&'a Request),
|
||||
SqlStatement(&'a Statement),
|
||||
PromQuery,
|
||||
LogQuery,
|
||||
Opentsdb,
|
||||
LineProtocol,
|
||||
PromStoreWrite,
|
||||
|
||||
@@ -38,7 +38,7 @@ pub fn new_table_cache(
|
||||
) -> TableCache {
|
||||
let init = init_factory(table_info_cache, table_name_cache);
|
||||
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, filter)
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, Box::new(filter))
|
||||
}
|
||||
|
||||
fn init_factory(
|
||||
|
||||
@@ -27,7 +27,7 @@ pub fn build_fs_backend(root: &str) -> Result<ObjectStore> {
|
||||
DefaultLoggingInterceptor,
|
||||
))
|
||||
.layer(object_store::layers::TracingLayer)
|
||||
.layer(object_store::layers::build_prometheus_metrics_layer(true))
|
||||
.layer(object_store::layers::PrometheusMetricsLayer::new(true))
|
||||
.finish();
|
||||
Ok(object_store)
|
||||
}
|
||||
|
||||
@@ -89,7 +89,7 @@ pub fn build_s3_backend(
|
||||
DefaultLoggingInterceptor,
|
||||
))
|
||||
.layer(object_store::layers::TracingLayer)
|
||||
.layer(object_store::layers::build_prometheus_metrics_layer(true))
|
||||
.layer(object_store::layers::PrometheusMetricsLayer::new(true))
|
||||
.finish())
|
||||
}
|
||||
|
||||
|
||||
@@ -26,4 +26,3 @@ pub mod function_registry;
|
||||
pub mod handlers;
|
||||
pub mod helper;
|
||||
pub mod state;
|
||||
pub mod utils;
|
||||
|
||||
@@ -204,10 +204,20 @@ impl PatternAst {
|
||||
fn convert_literal(column: &str, pattern: &str) -> Expr {
|
||||
logical_expr::col(column).like(logical_expr::lit(format!(
|
||||
"%{}%",
|
||||
crate::utils::escape_like_pattern(pattern)
|
||||
Self::escape_pattern(pattern)
|
||||
)))
|
||||
}
|
||||
|
||||
fn escape_pattern(pattern: &str) -> String {
|
||||
pattern
|
||||
.chars()
|
||||
.flat_map(|c| match c {
|
||||
'\\' | '%' | '_' => vec!['\\', c],
|
||||
_ => vec![c],
|
||||
})
|
||||
.collect::<String>()
|
||||
}
|
||||
|
||||
/// Transform this AST with preset rules to make it correct.
|
||||
fn transform_ast(self) -> Result<Self> {
|
||||
self.transform_up(Self::collapse_binary_branch_fn)
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/// Escapes special characters in the provided pattern string for `LIKE`.
|
||||
///
|
||||
/// Specifically, it prefixes the backslash (`\`), percent (`%`), and underscore (`_`)
|
||||
/// characters with an additional backslash to ensure they are treated literally.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// let escaped = escape_pattern("100%_some\\path");
|
||||
/// assert_eq!(escaped, "100\\%\\_some\\\\path");
|
||||
/// ```
|
||||
pub fn escape_like_pattern(pattern: &str) -> String {
|
||||
pattern
|
||||
.chars()
|
||||
.flat_map(|c| match c {
|
||||
'\\' | '%' | '_' => vec!['\\', c],
|
||||
_ => vec![c],
|
||||
})
|
||||
.collect::<String>()
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_escape_like_pattern() {
|
||||
assert_eq!(
|
||||
escape_like_pattern("100%_some\\path"),
|
||||
"100\\%\\_some\\\\path"
|
||||
);
|
||||
assert_eq!(escape_like_pattern(""), "");
|
||||
assert_eq!(escape_like_pattern("hello"), "hello");
|
||||
assert_eq!(escape_like_pattern("\\%_"), "\\\\\\%\\_");
|
||||
assert_eq!(escape_like_pattern("%%__\\\\"), "\\%\\%\\_\\_\\\\\\\\");
|
||||
assert_eq!(escape_like_pattern("abc123"), "abc123");
|
||||
assert_eq!(escape_like_pattern("%_\\"), "\\%\\_\\\\");
|
||||
assert_eq!(
|
||||
escape_like_pattern("%%__\\\\another%string"),
|
||||
"\\%\\%\\_\\_\\\\\\\\another\\%string"
|
||||
);
|
||||
assert_eq!(escape_like_pattern("foo%bar_"), "foo\\%bar\\_");
|
||||
assert_eq!(escape_like_pattern("\\_\\%"), "\\\\\\_\\\\\\%");
|
||||
}
|
||||
}
|
||||
44
src/common/meta/src/cache/container.rs
vendored
44
src/common/meta/src/cache/container.rs
vendored
@@ -43,7 +43,7 @@ pub struct CacheContainer<K, V, CacheToken> {
|
||||
cache: Cache<K, V>,
|
||||
invalidator: Invalidator<K, V, CacheToken>,
|
||||
initializer: Initializer<K, V>,
|
||||
token_filter: fn(&CacheToken) -> bool,
|
||||
token_filter: TokenFilter<CacheToken>,
|
||||
}
|
||||
|
||||
impl<K, V, CacheToken> CacheContainer<K, V, CacheToken>
|
||||
@@ -58,7 +58,7 @@ where
|
||||
cache: Cache<K, V>,
|
||||
invalidator: Invalidator<K, V, CacheToken>,
|
||||
initializer: Initializer<K, V>,
|
||||
token_filter: fn(&CacheToken) -> bool,
|
||||
token_filter: TokenFilter<CacheToken>,
|
||||
) -> Self {
|
||||
Self {
|
||||
name,
|
||||
@@ -206,13 +206,10 @@ mod tests {
|
||||
name: &'a str,
|
||||
}
|
||||
|
||||
fn always_true_filter(_: &String) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get() {
|
||||
let cache: Cache<NameKey, String> = CacheBuilder::new(128).build();
|
||||
let filter: TokenFilter<String> = Box::new(|_| true);
|
||||
let counter = Arc::new(AtomicI32::new(0));
|
||||
let moved_counter = counter.clone();
|
||||
let init: Initializer<NameKey, String> = Arc::new(move |_| {
|
||||
@@ -222,13 +219,7 @@ mod tests {
|
||||
let invalidator: Invalidator<NameKey, String, String> =
|
||||
Box::new(|_, _| Box::pin(async { Ok(()) }));
|
||||
|
||||
let adv_cache = CacheContainer::new(
|
||||
"test".to_string(),
|
||||
cache,
|
||||
invalidator,
|
||||
init,
|
||||
always_true_filter,
|
||||
);
|
||||
let adv_cache = CacheContainer::new("test".to_string(), cache, invalidator, init, filter);
|
||||
let key = NameKey { name: "key" };
|
||||
let value = adv_cache.get(key).await.unwrap().unwrap();
|
||||
assert_eq!(value, "hi");
|
||||
@@ -242,6 +233,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_get_by_ref() {
|
||||
let cache: Cache<String, String> = CacheBuilder::new(128).build();
|
||||
let filter: TokenFilter<String> = Box::new(|_| true);
|
||||
let counter = Arc::new(AtomicI32::new(0));
|
||||
let moved_counter = counter.clone();
|
||||
let init: Initializer<String, String> = Arc::new(move |_| {
|
||||
@@ -251,13 +243,7 @@ mod tests {
|
||||
let invalidator: Invalidator<String, String, String> =
|
||||
Box::new(|_, _| Box::pin(async { Ok(()) }));
|
||||
|
||||
let adv_cache = CacheContainer::new(
|
||||
"test".to_string(),
|
||||
cache,
|
||||
invalidator,
|
||||
init,
|
||||
always_true_filter,
|
||||
);
|
||||
let adv_cache = CacheContainer::new("test".to_string(), cache, invalidator, init, filter);
|
||||
let value = adv_cache.get_by_ref("foo").await.unwrap().unwrap();
|
||||
assert_eq!(value, "hi");
|
||||
let value = adv_cache.get_by_ref("foo").await.unwrap().unwrap();
|
||||
@@ -271,18 +257,13 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_get_value_not_exits() {
|
||||
let cache: Cache<String, String> = CacheBuilder::new(128).build();
|
||||
let filter: TokenFilter<String> = Box::new(|_| true);
|
||||
let init: Initializer<String, String> =
|
||||
Arc::new(move |_| Box::pin(async { error::ValueNotExistSnafu {}.fail() }));
|
||||
let invalidator: Invalidator<String, String, String> =
|
||||
Box::new(|_, _| Box::pin(async { Ok(()) }));
|
||||
|
||||
let adv_cache = CacheContainer::new(
|
||||
"test".to_string(),
|
||||
cache,
|
||||
invalidator,
|
||||
init,
|
||||
always_true_filter,
|
||||
);
|
||||
let adv_cache = CacheContainer::new("test".to_string(), cache, invalidator, init, filter);
|
||||
let value = adv_cache.get_by_ref("foo").await.unwrap();
|
||||
assert!(value.is_none());
|
||||
}
|
||||
@@ -290,6 +271,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_invalidate() {
|
||||
let cache: Cache<String, String> = CacheBuilder::new(128).build();
|
||||
let filter: TokenFilter<String> = Box::new(|_| true);
|
||||
let counter = Arc::new(AtomicI32::new(0));
|
||||
let moved_counter = counter.clone();
|
||||
let init: Initializer<String, String> = Arc::new(move |_| {
|
||||
@@ -303,13 +285,7 @@ mod tests {
|
||||
})
|
||||
});
|
||||
|
||||
let adv_cache = CacheContainer::new(
|
||||
"test".to_string(),
|
||||
cache,
|
||||
invalidator,
|
||||
init,
|
||||
always_true_filter,
|
||||
);
|
||||
let adv_cache = CacheContainer::new("test".to_string(), cache, invalidator, init, filter);
|
||||
let value = adv_cache.get_by_ref("foo").await.unwrap().unwrap();
|
||||
assert_eq!(value, "hi");
|
||||
let value = adv_cache.get_by_ref("foo").await.unwrap().unwrap();
|
||||
|
||||
@@ -45,7 +45,7 @@ pub fn new_table_flownode_set_cache(
|
||||
let table_flow_manager = Arc::new(TableFlowManager::new(kv_backend));
|
||||
let init = init_factory(table_flow_manager);
|
||||
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, filter)
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, Box::new(filter))
|
||||
}
|
||||
|
||||
fn init_factory(table_flow_manager: TableFlowManagerRef) -> Initializer<TableId, FlownodeSet> {
|
||||
|
||||
22
src/common/meta/src/cache/registry.rs
vendored
22
src/common/meta/src/cache/registry.rs
vendored
@@ -151,15 +151,12 @@ mod tests {
|
||||
use crate::cache::*;
|
||||
use crate::instruction::CacheIdent;
|
||||
|
||||
fn always_true_filter(_: &CacheIdent) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn test_cache(
|
||||
name: &str,
|
||||
invalidator: Invalidator<String, String, CacheIdent>,
|
||||
) -> CacheContainer<String, String, CacheIdent> {
|
||||
let cache: Cache<String, String> = CacheBuilder::new(128).build();
|
||||
let filter: TokenFilter<CacheIdent> = Box::new(|_| true);
|
||||
let counter = Arc::new(AtomicI32::new(0));
|
||||
let moved_counter = counter.clone();
|
||||
let init: Initializer<String, String> = Arc::new(move |_| {
|
||||
@@ -167,13 +164,7 @@ mod tests {
|
||||
Box::pin(async { Ok(Some("hi".to_string())) })
|
||||
});
|
||||
|
||||
CacheContainer::new(
|
||||
name.to_string(),
|
||||
cache,
|
||||
invalidator,
|
||||
init,
|
||||
always_true_filter,
|
||||
)
|
||||
CacheContainer::new(name.to_string(), cache, invalidator, init, filter)
|
||||
}
|
||||
|
||||
fn test_i32_cache(
|
||||
@@ -181,6 +172,7 @@ mod tests {
|
||||
invalidator: Invalidator<i32, String, CacheIdent>,
|
||||
) -> CacheContainer<i32, String, CacheIdent> {
|
||||
let cache: Cache<i32, String> = CacheBuilder::new(128).build();
|
||||
let filter: TokenFilter<CacheIdent> = Box::new(|_| true);
|
||||
let counter = Arc::new(AtomicI32::new(0));
|
||||
let moved_counter = counter.clone();
|
||||
let init: Initializer<i32, String> = Arc::new(move |_| {
|
||||
@@ -188,13 +180,7 @@ mod tests {
|
||||
Box::pin(async { Ok(Some("foo".to_string())) })
|
||||
});
|
||||
|
||||
CacheContainer::new(
|
||||
name.to_string(),
|
||||
cache,
|
||||
invalidator,
|
||||
init,
|
||||
always_true_filter,
|
||||
)
|
||||
CacheContainer::new(name.to_string(), cache, invalidator, init, filter)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
2
src/common/meta/src/cache/table/schema.rs
vendored
2
src/common/meta/src/cache/table/schema.rs
vendored
@@ -36,7 +36,7 @@ pub fn new_schema_cache(
|
||||
let schema_manager = SchemaManager::new(kv_backend.clone());
|
||||
let init = init_factory(schema_manager);
|
||||
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, filter)
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, Box::new(filter))
|
||||
}
|
||||
|
||||
fn init_factory(schema_manager: SchemaManager) -> Initializer<SchemaName, Arc<SchemaNameValue>> {
|
||||
|
||||
@@ -41,7 +41,7 @@ pub fn new_table_info_cache(
|
||||
let table_info_manager = Arc::new(TableInfoManager::new(kv_backend));
|
||||
let init = init_factory(table_info_manager);
|
||||
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, filter)
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, Box::new(filter))
|
||||
}
|
||||
|
||||
fn init_factory(table_info_manager: TableInfoManagerRef) -> Initializer<TableId, Arc<TableInfo>> {
|
||||
|
||||
@@ -41,7 +41,7 @@ pub fn new_table_name_cache(
|
||||
let table_name_manager = Arc::new(TableNameManager::new(kv_backend));
|
||||
let init = init_factory(table_name_manager);
|
||||
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, filter)
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, Box::new(filter))
|
||||
}
|
||||
|
||||
fn init_factory(table_name_manager: TableNameManagerRef) -> Initializer<TableName, TableId> {
|
||||
|
||||
@@ -65,7 +65,7 @@ pub fn new_table_route_cache(
|
||||
let table_info_manager = Arc::new(TableRouteManager::new(kv_backend));
|
||||
let init = init_factory(table_info_manager);
|
||||
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, filter)
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, Box::new(filter))
|
||||
}
|
||||
|
||||
fn init_factory(
|
||||
|
||||
@@ -40,7 +40,7 @@ pub fn new_table_schema_cache(
|
||||
let table_info_manager = TableInfoManager::new(kv_backend);
|
||||
let init = init_factory(table_info_manager);
|
||||
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, filter)
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, Box::new(filter))
|
||||
}
|
||||
|
||||
fn init_factory(table_info_manager: TableInfoManager) -> Initializer<TableId, Arc<SchemaName>> {
|
||||
|
||||
2
src/common/meta/src/cache/table/view_info.rs
vendored
2
src/common/meta/src/cache/table/view_info.rs
vendored
@@ -40,7 +40,7 @@ pub fn new_view_info_cache(
|
||||
let view_info_manager = Arc::new(ViewInfoManager::new(kv_backend));
|
||||
let init = init_factory(view_info_manager);
|
||||
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, filter)
|
||||
CacheContainer::new(name, cache, Box::new(invalidator), init, Box::new(filter))
|
||||
}
|
||||
|
||||
fn init_factory(view_info_manager: ViewInfoManagerRef) -> Initializer<TableId, Arc<ViewInfoValue>> {
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Display;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::DEFAULT_CATALOG_NAME;
|
||||
use futures::stream::BoxStream;
|
||||
@@ -145,7 +146,7 @@ impl CatalogManager {
|
||||
self.kv_backend.clone(),
|
||||
req,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
catalog_decoder,
|
||||
Arc::new(catalog_decoder),
|
||||
)
|
||||
.into_stream();
|
||||
|
||||
@@ -155,8 +156,6 @@ impl CatalogManager {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
use std::sync::Arc;
|
||||
|
||||
use futures::stream::BoxStream;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -165,7 +166,7 @@ impl DatanodeTableManager {
|
||||
self.kv_backend.clone(),
|
||||
req,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
datanode_table_value_decoder,
|
||||
Arc::new(datanode_table_value_decoder),
|
||||
)
|
||||
.into_stream();
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use futures::stream::BoxStream;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
@@ -199,7 +201,7 @@ impl FlowNameManager {
|
||||
self.kv_backend.clone(),
|
||||
req,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
flow_name_decoder,
|
||||
Arc::new(flow_name_decoder),
|
||||
)
|
||||
.into_stream();
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use futures::stream::BoxStream;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
@@ -177,7 +179,7 @@ impl FlowRouteManager {
|
||||
self.kv_backend.clone(),
|
||||
req,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
flow_route_decoder,
|
||||
Arc::new(flow_route_decoder),
|
||||
)
|
||||
.into_stream();
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use futures::stream::BoxStream;
|
||||
use futures::TryStreamExt;
|
||||
use lazy_static::lazy_static;
|
||||
@@ -177,7 +179,7 @@ impl FlownodeFlowManager {
|
||||
self.kv_backend.clone(),
|
||||
req,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
flownode_flow_key_decoder,
|
||||
Arc::new(flownode_flow_key_decoder),
|
||||
)
|
||||
.into_stream();
|
||||
|
||||
|
||||
@@ -206,7 +206,7 @@ impl TableFlowManager {
|
||||
self.kv_backend.clone(),
|
||||
req,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
table_flow_decoder,
|
||||
Arc::new(table_flow_decoder),
|
||||
)
|
||||
.into_stream();
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_time::DatabaseTimeToLive;
|
||||
@@ -282,7 +283,7 @@ impl SchemaManager {
|
||||
self.kv_backend.clone(),
|
||||
req,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
schema_decoder,
|
||||
Arc::new(schema_decoder),
|
||||
)
|
||||
.into_stream();
|
||||
|
||||
@@ -307,7 +308,6 @@ impl<'a> From<&'a SchemaName> for SchemaNameKey<'a> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -269,7 +269,7 @@ impl TableNameManager {
|
||||
self.kv_backend.clone(),
|
||||
req,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
table_decoder,
|
||||
Arc::new(table_decoder),
|
||||
)
|
||||
.into_stream();
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_stream::try_stream;
|
||||
use common_telemetry::debug;
|
||||
use futures::Stream;
|
||||
@@ -146,7 +148,7 @@ impl PaginationStreamFactory {
|
||||
}
|
||||
|
||||
pub struct PaginationStream<T> {
|
||||
decoder_fn: fn(KeyValue) -> Result<T>,
|
||||
decoder_fn: Arc<KeyValueDecoderFn<T>>,
|
||||
factory: PaginationStreamFactory,
|
||||
}
|
||||
|
||||
@@ -156,7 +158,7 @@ impl<T> PaginationStream<T> {
|
||||
kv: KvBackendRef,
|
||||
req: RangeRequest,
|
||||
page_size: usize,
|
||||
decoder_fn: fn(KeyValue) -> Result<T>,
|
||||
decoder_fn: Arc<KeyValueDecoderFn<T>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
decoder_fn,
|
||||
@@ -189,7 +191,6 @@ mod tests {
|
||||
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use futures::TryStreamExt;
|
||||
|
||||
@@ -249,7 +250,7 @@ mod tests {
|
||||
..Default::default()
|
||||
},
|
||||
DEFAULT_PAGE_SIZE,
|
||||
decoder,
|
||||
Arc::new(decoder),
|
||||
)
|
||||
.into_stream();
|
||||
let kv = stream.try_collect::<Vec<_>>().await.unwrap();
|
||||
@@ -289,7 +290,7 @@ mod tests {
|
||||
..Default::default()
|
||||
},
|
||||
2,
|
||||
decoder,
|
||||
Arc::new(decoder),
|
||||
);
|
||||
let kv = stream
|
||||
.into_stream()
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_procedure::error::{DeleteStatesSnafu, ListStateSnafu, PutStateSnafu};
|
||||
@@ -169,7 +171,7 @@ impl StateStore for KvStateStore {
|
||||
self.kv_backend.clone(),
|
||||
req,
|
||||
self.max_num_per_range_request.unwrap_or_default(),
|
||||
decode_kv,
|
||||
Arc::new(decode_kv),
|
||||
)
|
||||
.into_stream();
|
||||
|
||||
|
||||
@@ -544,7 +544,7 @@ mod tests {
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use futures_util::future::BoxFuture;
|
||||
use futures_util::FutureExt;
|
||||
use object_store::{EntryMode, ObjectStore};
|
||||
use object_store::ObjectStore;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use super::*;
|
||||
@@ -578,11 +578,7 @@ mod tests {
|
||||
) {
|
||||
let dir = proc_path!(procedure_store, "{procedure_id}/");
|
||||
let lister = object_store.list(&dir).await.unwrap();
|
||||
let mut files_in_dir: Vec<_> = lister
|
||||
.into_iter()
|
||||
.filter(|x| x.metadata().mode() == EntryMode::FILE)
|
||||
.map(|de| de.name().to_string())
|
||||
.collect();
|
||||
let mut files_in_dir: Vec<_> = lister.into_iter().map(|de| de.name().to_string()).collect();
|
||||
files_in_dir.sort_unstable();
|
||||
assert_eq!(files, files_in_dir);
|
||||
}
|
||||
|
||||
@@ -193,14 +193,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build http client"))]
|
||||
BuildHttpClient {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: reqwest::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing required field: {}", name))]
|
||||
MissingRequiredField {
|
||||
name: String,
|
||||
@@ -414,10 +406,9 @@ impl ErrorExt for Error {
|
||||
| MissingKvBackend { .. }
|
||||
| TomlFormat { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
PayloadNotExist { .. }
|
||||
| Unexpected { .. }
|
||||
| WatchAsyncTaskChange { .. }
|
||||
| BuildHttpClient { .. } => StatusCode::Unexpected,
|
||||
PayloadNotExist { .. } | Unexpected { .. } | WatchAsyncTaskChange { .. } => {
|
||||
StatusCode::Unexpected
|
||||
}
|
||||
|
||||
AsyncTaskExecute { source, .. } => source.status_code(),
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ use object_store::{Access, Error, HttpClient, ObjectStore, ObjectStoreBuilder, O
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::config::{HttpClientConfig, ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE};
|
||||
use crate::error::{self, BuildHttpClientSnafu, CreateDirSnafu, Result};
|
||||
use crate::error::{self, CreateDirSnafu, Result};
|
||||
|
||||
pub(crate) async fn new_raw_object_store(
|
||||
store: &ObjectStoreConfig,
|
||||
@@ -236,8 +236,7 @@ pub(crate) fn build_http_client(config: &HttpClientConfig) -> Result<HttpClient>
|
||||
builder.timeout(config.timeout)
|
||||
};
|
||||
|
||||
let client = http_builder.build().context(BuildHttpClientSnafu)?;
|
||||
Ok(HttpClient::with(client))
|
||||
HttpClient::build(http_builder).context(error::InitBackendSnafu)
|
||||
}
|
||||
struct PrintDetailedError;
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ impl FileRegionManifest {
|
||||
pub async fn store(&self, region_dir: &str, object_store: &ObjectStore) -> Result<()> {
|
||||
let path = ®ion_manifest_path(region_dir);
|
||||
let exist = object_store
|
||||
.exists(path)
|
||||
.is_exist(path)
|
||||
.await
|
||||
.context(CheckObjectSnafu { path })?;
|
||||
ensure!(!exist, ManifestExistsSnafu { path });
|
||||
|
||||
@@ -130,7 +130,7 @@ mod tests {
|
||||
assert_eq!(region.metadata.primary_key, vec![1]);
|
||||
|
||||
assert!(object_store
|
||||
.exists("create_region_dir/manifest/_file_manifest")
|
||||
.is_exist("create_region_dir/manifest/_file_manifest")
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
@@ -198,13 +198,13 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
assert!(object_store
|
||||
.exists("drop_region_dir/manifest/_file_manifest")
|
||||
.is_exist("drop_region_dir/manifest/_file_manifest")
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
FileRegion::drop(®ion, &object_store).await.unwrap();
|
||||
assert!(!object_store
|
||||
.exists("drop_region_dir/manifest/_file_manifest")
|
||||
.is_exist("drop_region_dir/manifest/_file_manifest")
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
|
||||
@@ -41,7 +41,6 @@ datafusion-expr.workspace = true
|
||||
datanode.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
lazy_static.workspace = true
|
||||
log-query.workspace = true
|
||||
log-store.workspace = true
|
||||
meta-client.workspace = true
|
||||
opentelemetry-proto.workspace = true
|
||||
|
||||
@@ -16,7 +16,6 @@ pub mod builder;
|
||||
mod grpc;
|
||||
mod influxdb;
|
||||
mod log_handler;
|
||||
mod logs;
|
||||
mod opentsdb;
|
||||
mod otlp;
|
||||
mod prom_store;
|
||||
@@ -65,8 +64,8 @@ use servers::prometheus_handler::PrometheusHandler;
|
||||
use servers::query_handler::grpc::GrpcQueryHandler;
|
||||
use servers::query_handler::sql::SqlQueryHandler;
|
||||
use servers::query_handler::{
|
||||
InfluxdbLineProtocolHandler, LogQueryHandler, OpenTelemetryProtocolHandler,
|
||||
OpentsdbProtocolHandler, PipelineHandler, PromStoreProtocolHandler, ScriptHandler,
|
||||
InfluxdbLineProtocolHandler, OpenTelemetryProtocolHandler, OpentsdbProtocolHandler,
|
||||
PipelineHandler, PromStoreProtocolHandler, ScriptHandler,
|
||||
};
|
||||
use servers::server::ServerHandlers;
|
||||
use session::context::QueryContextRef;
|
||||
@@ -100,7 +99,6 @@ pub trait FrontendInstance:
|
||||
+ ScriptHandler
|
||||
+ PrometheusHandler
|
||||
+ PipelineHandler
|
||||
+ LogQueryHandler
|
||||
+ Send
|
||||
+ Sync
|
||||
+ 'static
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use client::Output;
|
||||
use common_error::ext::BoxedError;
|
||||
use log_query::LogQuery;
|
||||
use server_error::Result as ServerResult;
|
||||
use servers::error::{self as server_error, AuthSnafu, ExecuteQuerySnafu};
|
||||
use servers::interceptor::{LogQueryInterceptor, LogQueryInterceptorRef};
|
||||
use servers::query_handler::LogQueryHandler;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::ResultExt;
|
||||
use tonic::async_trait;
|
||||
|
||||
use super::Instance;
|
||||
|
||||
#[async_trait]
|
||||
impl LogQueryHandler for Instance {
|
||||
async fn query(&self, mut request: LogQuery, ctx: QueryContextRef) -> ServerResult<Output> {
|
||||
let interceptor = self
|
||||
.plugins
|
||||
.get::<LogQueryInterceptorRef<server_error::Error>>();
|
||||
|
||||
self.plugins
|
||||
.get::<PermissionCheckerRef>()
|
||||
.as_ref()
|
||||
.check_permission(ctx.current_user(), PermissionReq::LogQuery)
|
||||
.context(AuthSnafu)?;
|
||||
|
||||
interceptor.as_ref().pre_query(&request, ctx.clone())?;
|
||||
|
||||
request
|
||||
.time_filter
|
||||
.canonicalize()
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteQuerySnafu)?;
|
||||
|
||||
let plan = self
|
||||
.query_engine
|
||||
.planner()
|
||||
.plan_logs_query(request, ctx.clone())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteQuerySnafu)?;
|
||||
|
||||
let output = self
|
||||
.statement_executor
|
||||
.exec_plan(plan, ctx.clone())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteQuerySnafu)?;
|
||||
|
||||
Ok(interceptor.as_ref().post_query(output, ctx.clone())?)
|
||||
}
|
||||
}
|
||||
@@ -87,7 +87,6 @@ where
|
||||
let ingest_interceptor = self.plugins.get::<LogIngestInterceptorRef<ServerError>>();
|
||||
builder =
|
||||
builder.with_log_ingest_handler(self.instance.clone(), validator, ingest_interceptor);
|
||||
builder = builder.with_logs_handler(self.instance.clone());
|
||||
|
||||
if let Some(user_provider) = self.plugins.get::<UserProviderRef>() {
|
||||
builder = builder.with_user_provider(user_provider);
|
||||
|
||||
@@ -15,15 +15,11 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub mod creator;
|
||||
pub mod error;
|
||||
pub mod reader;
|
||||
mod error;
|
||||
|
||||
pub type Bytes = Vec<u8>;
|
||||
pub type BytesRef<'a> = &'a [u8];
|
||||
|
||||
/// The seed used for the Bloom filter.
|
||||
pub const SEED: u128 = 42;
|
||||
|
||||
/// The Meta information of the bloom filter stored in the file.
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub struct BloomFilterMeta {
|
||||
|
||||
@@ -12,23 +12,21 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod finalize_segment;
|
||||
mod intermediate_codec;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use finalize_segment::FinalizedBloomFilterStorage;
|
||||
use futures::{AsyncWrite, AsyncWriteExt, StreamExt};
|
||||
use fastbloom::BloomFilter;
|
||||
use futures::{AsyncWrite, AsyncWriteExt};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::bloom_filter::error::{IoSnafu, Result, SerdeJsonSnafu};
|
||||
use crate::bloom_filter::{BloomFilterMeta, BloomFilterSegmentLocation, Bytes, SEED};
|
||||
use crate::external_provider::ExternalTempFileProvider;
|
||||
use super::error::{IoSnafu, SerdeJsonSnafu};
|
||||
use crate::bloom_filter::error::Result;
|
||||
use crate::bloom_filter::{BloomFilterMeta, BloomFilterSegmentLocation, Bytes};
|
||||
|
||||
/// The seed used for the Bloom filter.
|
||||
const SEED: u128 = 42;
|
||||
|
||||
/// The false positive rate of the Bloom filter.
|
||||
pub const FALSE_POSITIVE_RATE: f64 = 0.01;
|
||||
const FALSE_POSITIVE_RATE: f64 = 0.01;
|
||||
|
||||
/// `BloomFilterCreator` is responsible for creating and managing bloom filters
|
||||
/// for a set of elements. It divides the rows into segments and creates
|
||||
@@ -60,9 +58,6 @@ pub struct BloomFilterCreator {
|
||||
|
||||
/// Storage for finalized Bloom filters.
|
||||
finalized_bloom_filters: FinalizedBloomFilterStorage,
|
||||
|
||||
/// Global memory usage of the bloom filter creator.
|
||||
global_memory_usage: Arc<AtomicUsize>,
|
||||
}
|
||||
|
||||
impl BloomFilterCreator {
|
||||
@@ -71,12 +66,7 @@ impl BloomFilterCreator {
|
||||
/// # PANICS
|
||||
///
|
||||
/// `rows_per_segment` <= 0
|
||||
pub fn new(
|
||||
rows_per_segment: usize,
|
||||
intermediate_provider: Box<dyn ExternalTempFileProvider>,
|
||||
global_memory_usage: Arc<AtomicUsize>,
|
||||
global_memory_usage_threshold: Option<usize>,
|
||||
) -> Self {
|
||||
pub fn new(rows_per_segment: usize) -> Self {
|
||||
assert!(
|
||||
rows_per_segment > 0,
|
||||
"rows_per_segment must be greater than 0"
|
||||
@@ -87,67 +77,54 @@ impl BloomFilterCreator {
|
||||
accumulated_row_count: 0,
|
||||
cur_seg_distinct_elems: HashSet::default(),
|
||||
cur_seg_distinct_elems_mem_usage: 0,
|
||||
global_memory_usage: global_memory_usage.clone(),
|
||||
finalized_bloom_filters: FinalizedBloomFilterStorage::new(
|
||||
intermediate_provider,
|
||||
global_memory_usage,
|
||||
global_memory_usage_threshold,
|
||||
),
|
||||
finalized_bloom_filters: FinalizedBloomFilterStorage::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds a row of elements to the bloom filter. If the number of accumulated rows
|
||||
/// reaches `rows_per_segment`, it finalizes the current segment.
|
||||
pub async fn push_row_elems(&mut self, elems: impl IntoIterator<Item = Bytes>) -> Result<()> {
|
||||
pub fn push_row_elems(&mut self, elems: impl IntoIterator<Item = Bytes>) {
|
||||
self.accumulated_row_count += 1;
|
||||
|
||||
let mut mem_diff = 0;
|
||||
for elem in elems.into_iter() {
|
||||
let len = elem.len();
|
||||
let is_new = self.cur_seg_distinct_elems.insert(elem);
|
||||
if is_new {
|
||||
mem_diff += len;
|
||||
self.cur_seg_distinct_elems_mem_usage += len;
|
||||
}
|
||||
}
|
||||
self.cur_seg_distinct_elems_mem_usage += mem_diff;
|
||||
self.global_memory_usage
|
||||
.fetch_add(mem_diff, Ordering::Relaxed);
|
||||
|
||||
if self.accumulated_row_count % self.rows_per_segment == 0 {
|
||||
self.finalize_segment().await?;
|
||||
self.finalize_segment();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Finalizes any remaining segments and writes the bloom filters and metadata to the provided writer.
|
||||
pub async fn finish(&mut self, mut writer: impl AsyncWrite + Unpin) -> Result<()> {
|
||||
if !self.cur_seg_distinct_elems.is_empty() {
|
||||
self.finalize_segment().await?;
|
||||
self.finalize_segment();
|
||||
}
|
||||
|
||||
let mut meta = BloomFilterMeta {
|
||||
rows_per_segment: self.rows_per_segment,
|
||||
seg_count: self.finalized_bloom_filters.len(),
|
||||
row_count: self.accumulated_row_count,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut segs = self.finalized_bloom_filters.drain().await?;
|
||||
while let Some(segment) = segs.next().await {
|
||||
let segment = segment?;
|
||||
writer
|
||||
.write_all(&segment.bloom_filter_bytes)
|
||||
.await
|
||||
.context(IoSnafu)?;
|
||||
let mut buf = Vec::new();
|
||||
for segment in self.finalized_bloom_filters.drain() {
|
||||
let slice = segment.bloom_filter.as_slice();
|
||||
buf.clear();
|
||||
write_u64_slice(&mut buf, slice);
|
||||
writer.write_all(&buf).await.context(IoSnafu)?;
|
||||
|
||||
let size = segment.bloom_filter_bytes.len();
|
||||
let size = buf.len();
|
||||
meta.bloom_filter_segments.push(BloomFilterSegmentLocation {
|
||||
offset: meta.bloom_filter_segments_size as _,
|
||||
size: size as _,
|
||||
elem_count: segment.element_count,
|
||||
});
|
||||
meta.bloom_filter_segments_size += size;
|
||||
meta.seg_count += 1;
|
||||
}
|
||||
|
||||
let meta_bytes = serde_json::to_vec(&meta).context(SerdeJsonSnafu)?;
|
||||
@@ -168,29 +145,91 @@ impl BloomFilterCreator {
|
||||
self.cur_seg_distinct_elems_mem_usage + self.finalized_bloom_filters.memory_usage()
|
||||
}
|
||||
|
||||
async fn finalize_segment(&mut self) -> Result<()> {
|
||||
fn finalize_segment(&mut self) {
|
||||
let elem_count = self.cur_seg_distinct_elems.len();
|
||||
self.finalized_bloom_filters
|
||||
.add(self.cur_seg_distinct_elems.drain(), elem_count)
|
||||
.await?;
|
||||
|
||||
self.global_memory_usage
|
||||
.fetch_sub(self.cur_seg_distinct_elems_mem_usage, Ordering::Relaxed);
|
||||
.add(self.cur_seg_distinct_elems.drain(), elem_count);
|
||||
self.cur_seg_distinct_elems_mem_usage = 0;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Storage for finalized Bloom filters.
|
||||
///
|
||||
/// TODO(zhongzc): Add support for storing intermediate bloom filters on disk to control memory usage.
|
||||
#[derive(Debug, Default)]
|
||||
struct FinalizedBloomFilterStorage {
|
||||
/// Bloom filters that are stored in memory.
|
||||
in_memory: Vec<FinalizedBloomFilterSegment>,
|
||||
}
|
||||
|
||||
impl FinalizedBloomFilterStorage {
|
||||
fn memory_usage(&self) -> usize {
|
||||
self.in_memory.iter().map(|s| s.size).sum()
|
||||
}
|
||||
|
||||
/// Adds a new finalized Bloom filter to the storage.
|
||||
///
|
||||
/// TODO(zhongzc): Add support for flushing to disk.
|
||||
fn add(&mut self, elems: impl IntoIterator<Item = Bytes>, elem_count: usize) {
|
||||
let mut bf = BloomFilter::with_false_pos(FALSE_POSITIVE_RATE)
|
||||
.seed(&SEED)
|
||||
.expected_items(elem_count);
|
||||
for elem in elems.into_iter() {
|
||||
bf.insert(&elem);
|
||||
}
|
||||
|
||||
let cbf = FinalizedBloomFilterSegment::new(bf, elem_count);
|
||||
self.in_memory.push(cbf);
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.in_memory.len()
|
||||
}
|
||||
|
||||
fn drain(&mut self) -> impl Iterator<Item = FinalizedBloomFilterSegment> + '_ {
|
||||
self.in_memory.drain(..)
|
||||
}
|
||||
}
|
||||
|
||||
/// A finalized Bloom filter segment.
|
||||
#[derive(Debug)]
|
||||
struct FinalizedBloomFilterSegment {
|
||||
/// The underlying Bloom filter.
|
||||
bloom_filter: BloomFilter,
|
||||
|
||||
/// The number of elements in the Bloom filter.
|
||||
element_count: usize,
|
||||
|
||||
/// The occupied memory size of the Bloom filter.
|
||||
size: usize,
|
||||
}
|
||||
|
||||
impl FinalizedBloomFilterSegment {
|
||||
fn new(bloom_filter: BloomFilter, elem_count: usize) -> Self {
|
||||
let memory_usage = std::mem::size_of_val(bloom_filter.as_slice());
|
||||
Self {
|
||||
bloom_filter,
|
||||
element_count: elem_count,
|
||||
size: memory_usage,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes a slice of `u64` to the buffer in little-endian order.
|
||||
fn write_u64_slice(buf: &mut Vec<u8>, slice: &[u64]) {
|
||||
buf.reserve(std::mem::size_of_val(slice));
|
||||
for &x in slice {
|
||||
buf.extend_from_slice(&x.to_le_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use fastbloom::BloomFilter;
|
||||
use futures::io::Cursor;
|
||||
|
||||
use super::*;
|
||||
use crate::external_provider::MockExternalTempFileProvider;
|
||||
|
||||
/// Converts a slice of bytes to a vector of `u64`.
|
||||
pub fn u64_vec_from_bytes(bytes: &[u8]) -> Vec<u64> {
|
||||
fn u64_vec_from_bytes(bytes: &[u8]) -> Vec<u64> {
|
||||
bytes
|
||||
.chunks_exact(std::mem::size_of::<u64>())
|
||||
.map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap()))
|
||||
@@ -200,32 +239,18 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_bloom_filter_creator() {
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
let mut creator = BloomFilterCreator::new(
|
||||
2,
|
||||
Box::new(MockExternalTempFileProvider::new()),
|
||||
Arc::new(AtomicUsize::new(0)),
|
||||
None,
|
||||
);
|
||||
let mut creator = BloomFilterCreator::new(2);
|
||||
|
||||
creator
|
||||
.push_row_elems(vec![b"a".to_vec(), b"b".to_vec()])
|
||||
.await
|
||||
.unwrap();
|
||||
creator.push_row_elems(vec![b"a".to_vec(), b"b".to_vec()]);
|
||||
assert!(creator.cur_seg_distinct_elems_mem_usage > 0);
|
||||
assert!(creator.memory_usage() > 0);
|
||||
|
||||
creator
|
||||
.push_row_elems(vec![b"c".to_vec(), b"d".to_vec()])
|
||||
.await
|
||||
.unwrap();
|
||||
creator.push_row_elems(vec![b"c".to_vec(), b"d".to_vec()]);
|
||||
// Finalize the first segment
|
||||
assert_eq!(creator.cur_seg_distinct_elems_mem_usage, 0);
|
||||
assert!(creator.cur_seg_distinct_elems_mem_usage == 0);
|
||||
assert!(creator.memory_usage() > 0);
|
||||
|
||||
creator
|
||||
.push_row_elems(vec![b"e".to_vec(), b"f".to_vec()])
|
||||
.await
|
||||
.unwrap();
|
||||
creator.push_row_elems(vec![b"e".to_vec(), b"f".to_vec()]);
|
||||
assert!(creator.cur_seg_distinct_elems_mem_usage > 0);
|
||||
assert!(creator.memory_usage() > 0);
|
||||
|
||||
|
||||
@@ -1,293 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::pin::Pin;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use asynchronous_codec::{FramedRead, FramedWrite};
|
||||
use fastbloom::BloomFilter;
|
||||
use futures::stream::StreamExt;
|
||||
use futures::{stream, AsyncWriteExt, Stream};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use super::intermediate_codec::IntermediateBloomFilterCodecV1;
|
||||
use crate::bloom_filter::creator::{FALSE_POSITIVE_RATE, SEED};
|
||||
use crate::bloom_filter::error::{IntermediateSnafu, IoSnafu, Result};
|
||||
use crate::bloom_filter::Bytes;
|
||||
use crate::external_provider::ExternalTempFileProvider;
|
||||
|
||||
/// The minimum memory usage threshold for flushing in-memory Bloom filters to disk.
|
||||
const MIN_MEMORY_USAGE_THRESHOLD: usize = 1024 * 1024; // 1MB
|
||||
|
||||
/// Storage for finalized Bloom filters.
|
||||
pub struct FinalizedBloomFilterStorage {
|
||||
/// Bloom filters that are stored in memory.
|
||||
in_memory: Vec<FinalizedBloomFilterSegment>,
|
||||
|
||||
/// Used to generate unique file IDs for intermediate Bloom filters.
|
||||
intermediate_file_id_counter: usize,
|
||||
|
||||
/// Prefix for intermediate Bloom filter files.
|
||||
intermediate_prefix: String,
|
||||
|
||||
/// The provider for intermediate Bloom filter files.
|
||||
intermediate_provider: Box<dyn ExternalTempFileProvider>,
|
||||
|
||||
/// The memory usage of the in-memory Bloom filters.
|
||||
memory_usage: usize,
|
||||
|
||||
/// The global memory usage provided by the user to track the
|
||||
/// total memory usage of the creating Bloom filters.
|
||||
global_memory_usage: Arc<AtomicUsize>,
|
||||
|
||||
/// The threshold of the global memory usage of the creating Bloom filters.
|
||||
global_memory_usage_threshold: Option<usize>,
|
||||
}
|
||||
|
||||
impl FinalizedBloomFilterStorage {
|
||||
/// Creates a new `FinalizedBloomFilterStorage`.
|
||||
pub fn new(
|
||||
intermediate_provider: Box<dyn ExternalTempFileProvider>,
|
||||
global_memory_usage: Arc<AtomicUsize>,
|
||||
global_memory_usage_threshold: Option<usize>,
|
||||
) -> Self {
|
||||
let external_prefix = format!("intm-bloom-filters-{}", uuid::Uuid::new_v4());
|
||||
Self {
|
||||
in_memory: Vec::new(),
|
||||
intermediate_file_id_counter: 0,
|
||||
intermediate_prefix: external_prefix,
|
||||
intermediate_provider,
|
||||
memory_usage: 0,
|
||||
global_memory_usage,
|
||||
global_memory_usage_threshold,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the memory usage of the storage.
|
||||
pub fn memory_usage(&self) -> usize {
|
||||
self.memory_usage
|
||||
}
|
||||
|
||||
/// Adds a new finalized Bloom filter to the storage.
|
||||
///
|
||||
/// If the memory usage exceeds the threshold, flushes the in-memory Bloom filters to disk.
|
||||
pub async fn add(
|
||||
&mut self,
|
||||
elems: impl IntoIterator<Item = Bytes>,
|
||||
element_count: usize,
|
||||
) -> Result<()> {
|
||||
let mut bf = BloomFilter::with_false_pos(FALSE_POSITIVE_RATE)
|
||||
.seed(&SEED)
|
||||
.expected_items(element_count);
|
||||
for elem in elems.into_iter() {
|
||||
bf.insert(&elem);
|
||||
}
|
||||
|
||||
let fbf = FinalizedBloomFilterSegment::from(bf, element_count);
|
||||
|
||||
// Update memory usage.
|
||||
let memory_diff = fbf.bloom_filter_bytes.len();
|
||||
self.memory_usage += memory_diff;
|
||||
self.global_memory_usage
|
||||
.fetch_add(memory_diff, Ordering::Relaxed);
|
||||
|
||||
// Add the finalized Bloom filter to the in-memory storage.
|
||||
self.in_memory.push(fbf);
|
||||
|
||||
// Flush to disk if necessary.
|
||||
|
||||
// Do not flush if memory usage is too low.
|
||||
if self.memory_usage < MIN_MEMORY_USAGE_THRESHOLD {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Check if the global memory usage exceeds the threshold and flush to disk if necessary.
|
||||
if let Some(threshold) = self.global_memory_usage_threshold {
|
||||
let global = self.global_memory_usage.load(Ordering::Relaxed);
|
||||
|
||||
if global > threshold {
|
||||
self.flush_in_memory_to_disk().await?;
|
||||
|
||||
self.global_memory_usage
|
||||
.fetch_sub(self.memory_usage, Ordering::Relaxed);
|
||||
self.memory_usage = 0;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Drains the storage and returns a stream of finalized Bloom filter segments.
|
||||
pub async fn drain(
|
||||
&mut self,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Result<FinalizedBloomFilterSegment>> + '_>>> {
|
||||
// FAST PATH: memory only
|
||||
if self.intermediate_file_id_counter == 0 {
|
||||
return Ok(Box::pin(stream::iter(self.in_memory.drain(..).map(Ok))));
|
||||
}
|
||||
|
||||
// SLOW PATH: memory + disk
|
||||
let mut on_disk = self
|
||||
.intermediate_provider
|
||||
.read_all(&self.intermediate_prefix)
|
||||
.await
|
||||
.context(IntermediateSnafu)?;
|
||||
on_disk.sort_unstable_by(|x, y| x.0.cmp(&y.0));
|
||||
|
||||
let streams = on_disk
|
||||
.into_iter()
|
||||
.map(|(_, reader)| FramedRead::new(reader, IntermediateBloomFilterCodecV1::default()));
|
||||
|
||||
let in_memory_stream = stream::iter(self.in_memory.drain(..)).map(Ok);
|
||||
Ok(Box::pin(
|
||||
stream::iter(streams).flatten().chain(in_memory_stream),
|
||||
))
|
||||
}
|
||||
|
||||
/// Flushes the in-memory Bloom filters to disk.
|
||||
async fn flush_in_memory_to_disk(&mut self) -> Result<()> {
|
||||
let file_id = self.intermediate_file_id_counter;
|
||||
self.intermediate_file_id_counter += 1;
|
||||
|
||||
let file_id = format!("{:08}", file_id);
|
||||
let mut writer = self
|
||||
.intermediate_provider
|
||||
.create(&self.intermediate_prefix, &file_id)
|
||||
.await
|
||||
.context(IntermediateSnafu)?;
|
||||
|
||||
let fw = FramedWrite::new(&mut writer, IntermediateBloomFilterCodecV1::default());
|
||||
// `forward()` will flush and close the writer when the stream ends
|
||||
if let Err(e) = stream::iter(self.in_memory.drain(..).map(Ok))
|
||||
.forward(fw)
|
||||
.await
|
||||
{
|
||||
writer.close().await.context(IoSnafu)?;
|
||||
writer.flush().await.context(IoSnafu)?;
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// A finalized Bloom filter segment.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct FinalizedBloomFilterSegment {
|
||||
/// The underlying Bloom filter bytes.
|
||||
pub bloom_filter_bytes: Vec<u8>,
|
||||
|
||||
/// The number of elements in the Bloom filter.
|
||||
pub element_count: usize,
|
||||
}
|
||||
|
||||
impl FinalizedBloomFilterSegment {
|
||||
fn from(bf: BloomFilter, elem_count: usize) -> Self {
|
||||
let bf_slice = bf.as_slice();
|
||||
let mut bloom_filter_bytes = Vec::with_capacity(std::mem::size_of_val(bf_slice));
|
||||
for &x in bf_slice {
|
||||
bloom_filter_bytes.extend_from_slice(&x.to_le_bytes());
|
||||
}
|
||||
|
||||
Self {
|
||||
bloom_filter_bytes,
|
||||
element_count: elem_count,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Mutex;
|
||||
|
||||
use futures::AsyncRead;
|
||||
use tokio::io::duplex;
|
||||
use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt};
|
||||
|
||||
use super::*;
|
||||
use crate::bloom_filter::creator::tests::u64_vec_from_bytes;
|
||||
use crate::external_provider::MockExternalTempFileProvider;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_finalized_bloom_filter_storage() {
|
||||
let mut mock_provider = MockExternalTempFileProvider::new();
|
||||
|
||||
let mock_files: Arc<Mutex<HashMap<String, Box<dyn AsyncRead + Unpin + Send>>>> =
|
||||
Arc::new(Mutex::new(HashMap::new()));
|
||||
|
||||
mock_provider.expect_create().returning({
|
||||
let files = Arc::clone(&mock_files);
|
||||
move |file_group, file_id| {
|
||||
assert!(file_group.starts_with("intm-bloom-filters-"));
|
||||
let mut files = files.lock().unwrap();
|
||||
let (writer, reader) = duplex(2 * 1024 * 1024);
|
||||
files.insert(file_id.to_string(), Box::new(reader.compat()));
|
||||
Ok(Box::new(writer.compat_write()))
|
||||
}
|
||||
});
|
||||
|
||||
mock_provider.expect_read_all().returning({
|
||||
let files = Arc::clone(&mock_files);
|
||||
move |file_group| {
|
||||
assert!(file_group.starts_with("intm-bloom-filters-"));
|
||||
let mut files = files.lock().unwrap();
|
||||
Ok(files.drain().collect::<Vec<_>>())
|
||||
}
|
||||
});
|
||||
|
||||
let global_memory_usage = Arc::new(AtomicUsize::new(0));
|
||||
let global_memory_usage_threshold = Some(1024 * 1024); // 1MB
|
||||
let provider = Box::new(mock_provider);
|
||||
let mut storage = FinalizedBloomFilterStorage::new(
|
||||
provider,
|
||||
global_memory_usage.clone(),
|
||||
global_memory_usage_threshold,
|
||||
);
|
||||
|
||||
let elem_count = 2000;
|
||||
let batch = 1000;
|
||||
|
||||
for i in 0..batch {
|
||||
let elems = (elem_count * i..elem_count * (i + 1)).map(|x| x.to_string().into_bytes());
|
||||
storage.add(elems, elem_count).await.unwrap();
|
||||
}
|
||||
|
||||
// Flush happens.
|
||||
assert!(storage.intermediate_file_id_counter > 0);
|
||||
|
||||
// Drain the storage.
|
||||
let mut stream = storage.drain().await.unwrap();
|
||||
|
||||
let mut i = 0;
|
||||
while let Some(segment) = stream.next().await {
|
||||
let segment = segment.unwrap();
|
||||
assert_eq!(segment.element_count, elem_count);
|
||||
|
||||
let v = u64_vec_from_bytes(&segment.bloom_filter_bytes);
|
||||
|
||||
// Check the correctness of the Bloom filter.
|
||||
let bf = BloomFilter::from_vec(v)
|
||||
.seed(&SEED)
|
||||
.expected_items(segment.element_count);
|
||||
for elem in (elem_count * i..elem_count * (i + 1)).map(|x| x.to_string().into_bytes()) {
|
||||
assert!(bf.contains(&elem));
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
assert_eq!(i, batch);
|
||||
}
|
||||
}
|
||||
@@ -1,248 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use asynchronous_codec::{BytesMut, Decoder, Encoder};
|
||||
use bytes::{Buf, BufMut};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::bloom_filter::creator::finalize_segment::FinalizedBloomFilterSegment;
|
||||
use crate::bloom_filter::error::{Error, InvalidIntermediateMagicSnafu, IoSnafu, Result};
|
||||
|
||||
/// The magic number for the codec version 1 of the intermediate bloom filter.
|
||||
const CODEC_V1_MAGIC: &[u8; 4] = b"bi01";
|
||||
|
||||
/// Codec of the intermediate finalized bloom filter segment.
|
||||
///
|
||||
/// # Format
|
||||
///
|
||||
/// [ magic ][ elem count ][ size ][ bloom filter ][ elem count ][ size ][ bloom filter ]...
|
||||
/// [4] [8] [8] [size] [8] [8] [size]
|
||||
#[derive(Debug, Default)]
|
||||
pub struct IntermediateBloomFilterCodecV1 {
|
||||
handled_header_magic: bool,
|
||||
}
|
||||
|
||||
impl Encoder for IntermediateBloomFilterCodecV1 {
|
||||
type Item<'a> = FinalizedBloomFilterSegment;
|
||||
type Error = Error;
|
||||
|
||||
fn encode(&mut self, item: FinalizedBloomFilterSegment, dst: &mut BytesMut) -> Result<()> {
|
||||
if !self.handled_header_magic {
|
||||
dst.extend_from_slice(CODEC_V1_MAGIC);
|
||||
self.handled_header_magic = true;
|
||||
}
|
||||
|
||||
let segment_bytes = item.bloom_filter_bytes;
|
||||
let elem_count = item.element_count;
|
||||
|
||||
dst.reserve(2 * std::mem::size_of::<u64>() + segment_bytes.len());
|
||||
dst.put_u64_le(elem_count as u64);
|
||||
dst.put_u64_le(segment_bytes.len() as u64);
|
||||
dst.extend_from_slice(&segment_bytes);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Decoder for IntermediateBloomFilterCodecV1 {
|
||||
type Item = FinalizedBloomFilterSegment;
|
||||
type Error = Error;
|
||||
|
||||
fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>> {
|
||||
if !self.handled_header_magic {
|
||||
let m_len = CODEC_V1_MAGIC.len();
|
||||
if src.remaining() < m_len {
|
||||
return Ok(None);
|
||||
}
|
||||
let magic_bytes = &src[..m_len];
|
||||
ensure!(
|
||||
magic_bytes == CODEC_V1_MAGIC,
|
||||
InvalidIntermediateMagicSnafu {
|
||||
invalid: magic_bytes,
|
||||
}
|
||||
);
|
||||
self.handled_header_magic = true;
|
||||
src.advance(m_len);
|
||||
}
|
||||
|
||||
let s = &src[..];
|
||||
|
||||
let u64_size = std::mem::size_of::<u64>();
|
||||
let n_size = u64_size * 2;
|
||||
if s.len() < n_size {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let element_count = u64::from_le_bytes(s[0..u64_size].try_into().unwrap()) as usize;
|
||||
let segment_size = u64::from_le_bytes(s[u64_size..n_size].try_into().unwrap()) as usize;
|
||||
|
||||
if s.len() < n_size + segment_size {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let bloom_filter_bytes = s[n_size..n_size + segment_size].to_vec();
|
||||
src.advance(n_size + segment_size);
|
||||
Ok(Some(FinalizedBloomFilterSegment {
|
||||
element_count,
|
||||
bloom_filter_bytes,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Required for [`Encoder`] and [`Decoder`] implementations.
|
||||
impl From<std::io::Error> for Error {
|
||||
fn from(error: std::io::Error) -> Self {
|
||||
Err::<(), std::io::Error>(error)
|
||||
.context(IoSnafu)
|
||||
.unwrap_err()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use asynchronous_codec::{FramedRead, FramedWrite};
|
||||
use futures::io::Cursor;
|
||||
use futures::{SinkExt, StreamExt};
|
||||
|
||||
use super::*;
|
||||
use crate::bloom_filter::creator::finalize_segment::FinalizedBloomFilterSegment;
|
||||
|
||||
#[test]
|
||||
fn test_intermediate_bloom_filter_codec_v1_basic() {
|
||||
let mut encoder = IntermediateBloomFilterCodecV1::default();
|
||||
let mut buf = BytesMut::new();
|
||||
|
||||
let item1 = FinalizedBloomFilterSegment {
|
||||
element_count: 2,
|
||||
bloom_filter_bytes: vec![1, 2, 3, 4],
|
||||
};
|
||||
let item2 = FinalizedBloomFilterSegment {
|
||||
element_count: 3,
|
||||
bloom_filter_bytes: vec![5, 6, 7, 8],
|
||||
};
|
||||
let item3 = FinalizedBloomFilterSegment {
|
||||
element_count: 4,
|
||||
bloom_filter_bytes: vec![9, 10, 11, 12],
|
||||
};
|
||||
|
||||
encoder.encode(item1.clone(), &mut buf).unwrap();
|
||||
encoder.encode(item2.clone(), &mut buf).unwrap();
|
||||
encoder.encode(item3.clone(), &mut buf).unwrap();
|
||||
|
||||
let mut buf = buf.freeze().try_into_mut().unwrap();
|
||||
|
||||
let mut decoder = IntermediateBloomFilterCodecV1::default();
|
||||
let decoded_item1 = decoder.decode(&mut buf).unwrap().unwrap();
|
||||
let decoded_item2 = decoder.decode(&mut buf).unwrap().unwrap();
|
||||
let decoded_item3 = decoder.decode(&mut buf).unwrap().unwrap();
|
||||
|
||||
assert_eq!(item1, decoded_item1);
|
||||
assert_eq!(item2, decoded_item2);
|
||||
assert_eq!(item3, decoded_item3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_intermediate_bloom_filter_codec_v1_frame_read_write() {
|
||||
let item1 = FinalizedBloomFilterSegment {
|
||||
element_count: 2,
|
||||
bloom_filter_bytes: vec![1, 2, 3, 4],
|
||||
};
|
||||
let item2 = FinalizedBloomFilterSegment {
|
||||
element_count: 3,
|
||||
bloom_filter_bytes: vec![5, 6, 7, 8],
|
||||
};
|
||||
let item3 = FinalizedBloomFilterSegment {
|
||||
element_count: 4,
|
||||
bloom_filter_bytes: vec![9, 10, 11, 12],
|
||||
};
|
||||
|
||||
let mut bytes = Cursor::new(vec![]);
|
||||
|
||||
let mut writer = FramedWrite::new(&mut bytes, IntermediateBloomFilterCodecV1::default());
|
||||
writer.send(item1.clone()).await.unwrap();
|
||||
writer.send(item2.clone()).await.unwrap();
|
||||
writer.send(item3.clone()).await.unwrap();
|
||||
writer.flush().await.unwrap();
|
||||
writer.close().await.unwrap();
|
||||
|
||||
let bytes = bytes.into_inner();
|
||||
let mut reader =
|
||||
FramedRead::new(bytes.as_slice(), IntermediateBloomFilterCodecV1::default());
|
||||
let decoded_item1 = reader.next().await.unwrap().unwrap();
|
||||
let decoded_item2 = reader.next().await.unwrap().unwrap();
|
||||
let decoded_item3 = reader.next().await.unwrap().unwrap();
|
||||
assert!(reader.next().await.is_none());
|
||||
|
||||
assert_eq!(item1, decoded_item1);
|
||||
assert_eq!(item2, decoded_item2);
|
||||
assert_eq!(item3, decoded_item3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_intermediate_bloom_filter_codec_v1_frame_read_write_only_magic() {
|
||||
let bytes = CODEC_V1_MAGIC.to_vec();
|
||||
let mut reader =
|
||||
FramedRead::new(bytes.as_slice(), IntermediateBloomFilterCodecV1::default());
|
||||
assert!(reader.next().await.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_intermediate_bloom_filter_codec_v1_frame_read_write_partial_magic() {
|
||||
let bytes = CODEC_V1_MAGIC[..3].to_vec();
|
||||
let mut reader =
|
||||
FramedRead::new(bytes.as_slice(), IntermediateBloomFilterCodecV1::default());
|
||||
let e = reader.next().await.unwrap();
|
||||
assert!(e.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_intermediate_bloom_filter_codec_v1_frame_read_write_partial_item() {
|
||||
let mut bytes = vec![];
|
||||
bytes.extend_from_slice(CODEC_V1_MAGIC);
|
||||
bytes.extend_from_slice(&2u64.to_le_bytes());
|
||||
bytes.extend_from_slice(&4u64.to_le_bytes());
|
||||
|
||||
let mut reader =
|
||||
FramedRead::new(bytes.as_slice(), IntermediateBloomFilterCodecV1::default());
|
||||
let e = reader.next().await.unwrap();
|
||||
assert!(e.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_intermediate_bloom_filter_codec_v1_frame_read_write_corrupted_magic() {
|
||||
let mut bytes = vec![];
|
||||
bytes.extend_from_slice(b"bi02");
|
||||
bytes.extend_from_slice(&2u64.to_le_bytes());
|
||||
bytes.extend_from_slice(&4u64.to_le_bytes());
|
||||
bytes.extend_from_slice(&[1, 2, 3, 4]);
|
||||
|
||||
let mut reader =
|
||||
FramedRead::new(bytes.as_slice(), IntermediateBloomFilterCodecV1::default());
|
||||
let e = reader.next().await.unwrap();
|
||||
assert!(e.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_intermediate_bloom_filter_codec_v1_frame_read_write_corrupted_length() {
|
||||
let mut bytes = vec![];
|
||||
bytes.extend_from_slice(CODEC_V1_MAGIC);
|
||||
bytes.extend_from_slice(&2u64.to_le_bytes());
|
||||
bytes.extend_from_slice(&4u64.to_le_bytes());
|
||||
bytes.extend_from_slice(&[1, 2, 3]);
|
||||
|
||||
let mut reader =
|
||||
FramedRead::new(bytes.as_slice(), IntermediateBloomFilterCodecV1::default());
|
||||
let e = reader.next().await.unwrap();
|
||||
assert!(e.is_err());
|
||||
}
|
||||
}
|
||||
@@ -39,43 +39,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to deserialize json"))]
|
||||
DeserializeJson {
|
||||
#[snafu(source)]
|
||||
error: serde_json::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Intermediate error"))]
|
||||
Intermediate {
|
||||
source: crate::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("File size too small for bloom filter"))]
|
||||
FileSizeTooSmall {
|
||||
size: u64,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unexpected bloom filter meta size"))]
|
||||
UnexpectedMetaSize {
|
||||
max_meta_size: u64,
|
||||
actual_meta_size: u64,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid intermediate magic"))]
|
||||
InvalidIntermediateMagic {
|
||||
invalid: Vec<u8>,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("External error"))]
|
||||
External {
|
||||
source: BoxedError,
|
||||
@@ -89,14 +52,8 @@ impl ErrorExt for Error {
|
||||
use Error::*;
|
||||
|
||||
match self {
|
||||
Io { .. }
|
||||
| SerdeJson { .. }
|
||||
| FileSizeTooSmall { .. }
|
||||
| UnexpectedMetaSize { .. }
|
||||
| DeserializeJson { .. }
|
||||
| InvalidIntermediateMagic { .. } => StatusCode::Unexpected,
|
||||
Io { .. } | Self::SerdeJson { .. } => StatusCode::Unexpected,
|
||||
|
||||
Intermediate { source, .. } => source.status_code(),
|
||||
External { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,265 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::ops::Range;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use common_base::range_read::RangeReader;
|
||||
use fastbloom::BloomFilter;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::bloom_filter::error::{
|
||||
DeserializeJsonSnafu, FileSizeTooSmallSnafu, IoSnafu, Result, UnexpectedMetaSizeSnafu,
|
||||
};
|
||||
use crate::bloom_filter::{BloomFilterMeta, BloomFilterSegmentLocation, SEED};
|
||||
|
||||
/// Minimum size of the bloom filter, which is the size of the length of the bloom filter.
|
||||
const BLOOM_META_LEN_SIZE: u64 = 4;
|
||||
|
||||
/// Default prefetch size of bloom filter meta.
|
||||
pub const DEFAULT_PREFETCH_SIZE: u64 = 1024; // 1KiB
|
||||
|
||||
/// `BloomFilterReader` reads the bloom filter from the file.
|
||||
#[async_trait]
|
||||
pub trait BloomFilterReader {
|
||||
/// Reads range of bytes from the file.
|
||||
async fn range_read(&mut self, offset: u64, size: u32) -> Result<Bytes>;
|
||||
|
||||
/// Reads bunch of ranges from the file.
|
||||
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>>;
|
||||
|
||||
/// Reads the meta information of the bloom filter.
|
||||
async fn metadata(&mut self) -> Result<BloomFilterMeta>;
|
||||
|
||||
/// Reads a bloom filter with the given location.
|
||||
async fn bloom_filter(&mut self, loc: &BloomFilterSegmentLocation) -> Result<BloomFilter> {
|
||||
let bytes = self.range_read(loc.offset, loc.size as _).await?;
|
||||
let vec = bytes
|
||||
.chunks_exact(std::mem::size_of::<u64>())
|
||||
.map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap()))
|
||||
.collect();
|
||||
let bm = BloomFilter::from_vec(vec)
|
||||
.seed(&SEED)
|
||||
.expected_items(loc.elem_count);
|
||||
Ok(bm)
|
||||
}
|
||||
}
|
||||
|
||||
/// `BloomFilterReaderImpl` reads the bloom filter from the file.
|
||||
pub struct BloomFilterReaderImpl<R: RangeReader> {
|
||||
/// The underlying reader.
|
||||
reader: R,
|
||||
}
|
||||
|
||||
impl<R: RangeReader> BloomFilterReaderImpl<R> {
|
||||
/// Creates a new `BloomFilterReaderImpl` with the given reader.
|
||||
pub fn new(reader: R) -> Self {
|
||||
Self { reader }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<R: RangeReader> BloomFilterReader for BloomFilterReaderImpl<R> {
|
||||
async fn range_read(&mut self, offset: u64, size: u32) -> Result<Bytes> {
|
||||
self.reader
|
||||
.read(offset..offset + size as u64)
|
||||
.await
|
||||
.context(IoSnafu)
|
||||
}
|
||||
|
||||
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
|
||||
self.reader.read_vec(ranges).await.context(IoSnafu)
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> Result<BloomFilterMeta> {
|
||||
let metadata = self.reader.metadata().await.context(IoSnafu)?;
|
||||
let file_size = metadata.content_length;
|
||||
|
||||
let mut meta_reader =
|
||||
BloomFilterMetaReader::new(&mut self.reader, file_size, Some(DEFAULT_PREFETCH_SIZE));
|
||||
meta_reader.metadata().await
|
||||
}
|
||||
}
|
||||
|
||||
/// `BloomFilterMetaReader` reads the metadata of the bloom filter.
|
||||
struct BloomFilterMetaReader<R: RangeReader> {
|
||||
reader: R,
|
||||
file_size: u64,
|
||||
prefetch_size: u64,
|
||||
}
|
||||
|
||||
impl<R: RangeReader> BloomFilterMetaReader<R> {
|
||||
pub fn new(reader: R, file_size: u64, prefetch_size: Option<u64>) -> Self {
|
||||
Self {
|
||||
reader,
|
||||
file_size,
|
||||
prefetch_size: prefetch_size
|
||||
.unwrap_or(BLOOM_META_LEN_SIZE)
|
||||
.max(BLOOM_META_LEN_SIZE),
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads the metadata of the bloom filter.
|
||||
///
|
||||
/// It will first prefetch some bytes from the end of the file,
|
||||
/// then parse the metadata from the prefetch bytes.
|
||||
pub async fn metadata(&mut self) -> Result<BloomFilterMeta> {
|
||||
ensure!(
|
||||
self.file_size >= BLOOM_META_LEN_SIZE,
|
||||
FileSizeTooSmallSnafu {
|
||||
size: self.file_size,
|
||||
}
|
||||
);
|
||||
|
||||
let meta_start = self.file_size.saturating_sub(self.prefetch_size);
|
||||
let suffix = self
|
||||
.reader
|
||||
.read(meta_start..self.file_size)
|
||||
.await
|
||||
.context(IoSnafu)?;
|
||||
let suffix_len = suffix.len();
|
||||
let length = u32::from_le_bytes(Self::read_tailing_four_bytes(&suffix)?) as u64;
|
||||
self.validate_meta_size(length)?;
|
||||
|
||||
if length > suffix_len as u64 - BLOOM_META_LEN_SIZE {
|
||||
let metadata_start = self.file_size - length - BLOOM_META_LEN_SIZE;
|
||||
let meta = self
|
||||
.reader
|
||||
.read(metadata_start..self.file_size - BLOOM_META_LEN_SIZE)
|
||||
.await
|
||||
.context(IoSnafu)?;
|
||||
serde_json::from_slice(&meta).context(DeserializeJsonSnafu)
|
||||
} else {
|
||||
let metadata_start = self.file_size - length - BLOOM_META_LEN_SIZE - meta_start;
|
||||
let meta = &suffix[metadata_start as usize..suffix_len - BLOOM_META_LEN_SIZE as usize];
|
||||
serde_json::from_slice(meta).context(DeserializeJsonSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
fn read_tailing_four_bytes(suffix: &[u8]) -> Result<[u8; 4]> {
|
||||
let suffix_len = suffix.len();
|
||||
ensure!(
|
||||
suffix_len >= 4,
|
||||
FileSizeTooSmallSnafu {
|
||||
size: suffix_len as u64
|
||||
}
|
||||
);
|
||||
let mut bytes = [0; 4];
|
||||
bytes.copy_from_slice(&suffix[suffix_len - 4..suffix_len]);
|
||||
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
fn validate_meta_size(&self, length: u64) -> Result<()> {
|
||||
let max_meta_size = self.file_size - BLOOM_META_LEN_SIZE;
|
||||
ensure!(
|
||||
length <= max_meta_size,
|
||||
UnexpectedMetaSizeSnafu {
|
||||
max_meta_size,
|
||||
actual_meta_size: length,
|
||||
}
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::Arc;
|
||||
|
||||
use futures::io::Cursor;
|
||||
|
||||
use super::*;
|
||||
use crate::bloom_filter::creator::BloomFilterCreator;
|
||||
use crate::external_provider::MockExternalTempFileProvider;
|
||||
|
||||
async fn mock_bloom_filter_bytes() -> Vec<u8> {
|
||||
let mut writer = Cursor::new(vec![]);
|
||||
let mut creator = BloomFilterCreator::new(
|
||||
2,
|
||||
Box::new(MockExternalTempFileProvider::new()),
|
||||
Arc::new(AtomicUsize::new(0)),
|
||||
None,
|
||||
);
|
||||
|
||||
creator
|
||||
.push_row_elems(vec![b"a".to_vec(), b"b".to_vec()])
|
||||
.await
|
||||
.unwrap();
|
||||
creator
|
||||
.push_row_elems(vec![b"c".to_vec(), b"d".to_vec()])
|
||||
.await
|
||||
.unwrap();
|
||||
creator
|
||||
.push_row_elems(vec![b"e".to_vec(), b"f".to_vec()])
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
creator.finish(&mut writer).await.unwrap();
|
||||
|
||||
writer.into_inner()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_bloom_filter_meta_reader() {
|
||||
let bytes = mock_bloom_filter_bytes().await;
|
||||
let file_size = bytes.len() as u64;
|
||||
|
||||
for prefetch in [0u64, file_size / 2, file_size, file_size + 10] {
|
||||
let mut reader =
|
||||
BloomFilterMetaReader::new(bytes.clone(), file_size as _, Some(prefetch));
|
||||
let meta = reader.metadata().await.unwrap();
|
||||
|
||||
assert_eq!(meta.rows_per_segment, 2);
|
||||
assert_eq!(meta.seg_count, 2);
|
||||
assert_eq!(meta.row_count, 3);
|
||||
assert_eq!(meta.bloom_filter_segments.len(), 2);
|
||||
|
||||
assert_eq!(meta.bloom_filter_segments[0].offset, 0);
|
||||
assert_eq!(meta.bloom_filter_segments[0].elem_count, 4);
|
||||
assert_eq!(
|
||||
meta.bloom_filter_segments[1].offset,
|
||||
meta.bloom_filter_segments[0].size
|
||||
);
|
||||
assert_eq!(meta.bloom_filter_segments[1].elem_count, 2);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_bloom_filter_reader() {
|
||||
let bytes = mock_bloom_filter_bytes().await;
|
||||
|
||||
let mut reader = BloomFilterReaderImpl::new(bytes);
|
||||
let meta = reader.metadata().await.unwrap();
|
||||
|
||||
assert_eq!(meta.bloom_filter_segments.len(), 2);
|
||||
let bf = reader
|
||||
.bloom_filter(&meta.bloom_filter_segments[0])
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(bf.contains(&b"a"));
|
||||
assert!(bf.contains(&b"b"));
|
||||
assert!(bf.contains(&b"c"));
|
||||
assert!(bf.contains(&b"d"));
|
||||
|
||||
let bf = reader
|
||||
.bloom_filter(&meta.bloom_filter_segments[1])
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(bf.contains(&b"e"));
|
||||
assert!(bf.contains(&b"f"));
|
||||
}
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
#[derive(Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
#[stack_trace_debug]
|
||||
pub enum Error {
|
||||
#[snafu(display("External error"))]
|
||||
External {
|
||||
source: BoxedError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
use Error::*;
|
||||
|
||||
match self {
|
||||
External { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod external_provider;
|
||||
pub mod external_sort;
|
||||
mod intermediate_rw;
|
||||
mod merge_stream;
|
||||
|
||||
@@ -15,24 +15,25 @@
|
||||
use async_trait::async_trait;
|
||||
use futures::{AsyncRead, AsyncWrite};
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::inverted_index::error::Result;
|
||||
|
||||
pub type Writer = Box<dyn AsyncWrite + Unpin + Send>;
|
||||
pub type Reader = Box<dyn AsyncRead + Unpin + Send>;
|
||||
|
||||
/// Trait for managing intermediate files to control memory usage for a particular index.
|
||||
/// Trait for managing intermediate files during external sorting for a particular index.
|
||||
#[mockall::automock]
|
||||
#[async_trait]
|
||||
pub trait ExternalTempFileProvider: Send + Sync {
|
||||
/// Creates and opens a new intermediate file associated with a specific `file_group` for writing.
|
||||
/// Creates and opens a new intermediate file associated with a specific index for writing.
|
||||
/// The implementation should ensure that the file does not already exist.
|
||||
///
|
||||
/// - `file_group`: a unique identifier for the group of files
|
||||
/// - `index_name`: the name of the index for which the file will be associated
|
||||
/// - `file_id`: a unique identifier for the new file
|
||||
async fn create(&self, file_group: &str, file_id: &str) -> Result<Writer, Error>;
|
||||
async fn create(
|
||||
&self,
|
||||
index_name: &str,
|
||||
file_id: &str,
|
||||
) -> Result<Box<dyn AsyncWrite + Unpin + Send>>;
|
||||
|
||||
/// Retrieves all intermediate files and their associated file identifiers for a specific `file_group`.
|
||||
/// Retrieves all intermediate files associated with a specific index for an external sorting operation.
|
||||
///
|
||||
/// `file_group` is a unique identifier for the group of files.
|
||||
async fn read_all(&self, file_group: &str) -> Result<Vec<(String, Reader)>, Error>;
|
||||
/// `index_name`: the name of the index to retrieve intermediate files for
|
||||
async fn read_all(&self, index_name: &str) -> Result<Vec<Box<dyn AsyncRead + Unpin + Send>>>;
|
||||
}
|
||||
@@ -23,16 +23,15 @@ use async_trait::async_trait;
|
||||
use common_base::BitVec;
|
||||
use common_telemetry::{debug, error};
|
||||
use futures::stream;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::external_provider::ExternalTempFileProvider;
|
||||
use crate::inverted_index::create::sort::external_provider::ExternalTempFileProvider;
|
||||
use crate::inverted_index::create::sort::intermediate_rw::{
|
||||
IntermediateReader, IntermediateWriter,
|
||||
};
|
||||
use crate::inverted_index::create::sort::merge_stream::MergeSortedStream;
|
||||
use crate::inverted_index::create::sort::{SortOutput, SortedStream, Sorter};
|
||||
use crate::inverted_index::create::sort_create::SorterFactory;
|
||||
use crate::inverted_index::error::{IntermediateSnafu, Result};
|
||||
use crate::inverted_index::error::Result;
|
||||
use crate::inverted_index::{Bytes, BytesRef};
|
||||
|
||||
/// `ExternalSorter` manages the sorting of data using both in-memory structures and external files.
|
||||
@@ -108,11 +107,7 @@ impl Sorter for ExternalSorter {
|
||||
/// Finalizes the sorting operation, merging data from both in-memory buffer and external files
|
||||
/// into a sorted stream
|
||||
async fn output(&mut self) -> Result<SortOutput> {
|
||||
let readers = self
|
||||
.temp_file_provider
|
||||
.read_all(&self.index_name)
|
||||
.await
|
||||
.context(IntermediateSnafu)?;
|
||||
let readers = self.temp_file_provider.read_all(&self.index_name).await?;
|
||||
|
||||
// TODO(zhongzc): k-way merge instead of 2-way merge
|
||||
|
||||
@@ -127,7 +122,7 @@ impl Sorter for ExternalSorter {
|
||||
Ok((value, bitmap))
|
||||
}),
|
||||
)));
|
||||
for (_, reader) in readers {
|
||||
for reader in readers {
|
||||
tree_nodes.push_back(IntermediateReader::new(reader).into_stream().await?);
|
||||
}
|
||||
|
||||
@@ -246,11 +241,7 @@ impl ExternalSorter {
|
||||
|
||||
let file_id = &format!("{:012}", self.total_row_count);
|
||||
let index_name = &self.index_name;
|
||||
let writer = self
|
||||
.temp_file_provider
|
||||
.create(index_name, file_id)
|
||||
.await
|
||||
.context(IntermediateSnafu)?;
|
||||
let writer = self.temp_file_provider.create(index_name, file_id).await?;
|
||||
|
||||
let values = mem::take(&mut self.values_buffer);
|
||||
self.global_memory_usage
|
||||
@@ -311,7 +302,7 @@ mod tests {
|
||||
use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt};
|
||||
|
||||
use super::*;
|
||||
use crate::external_provider::MockExternalTempFileProvider;
|
||||
use crate::inverted_index::create::sort::external_provider::MockExternalTempFileProvider;
|
||||
|
||||
async fn test_external_sorter(
|
||||
current_memory_usage_threshold: Option<usize>,
|
||||
@@ -341,7 +332,7 @@ mod tests {
|
||||
move |index_name| {
|
||||
assert_eq!(index_name, "test");
|
||||
let mut files = files.lock().unwrap();
|
||||
Ok(files.drain().collect::<Vec<_>>())
|
||||
Ok(files.drain().map(|f| f.1).collect::<Vec<_>>())
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -213,13 +213,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Intermediate error"))]
|
||||
Intermediate {
|
||||
source: crate::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -252,7 +245,6 @@ impl ErrorExt for Error {
|
||||
| InconsistentRowCount { .. }
|
||||
| IndexNotFound { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
Intermediate { source, .. } => source.status_code(),
|
||||
External { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,21 +31,12 @@ mod footer;
|
||||
/// InvertedIndexReader defines an asynchronous reader of inverted index data
|
||||
#[mockall::automock]
|
||||
#[async_trait]
|
||||
pub trait InvertedIndexReader: Send + Sync {
|
||||
pub trait InvertedIndexReader: Send {
|
||||
/// Seeks to given offset and reads data with exact size as provided.
|
||||
async fn range_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>>;
|
||||
|
||||
/// Reads the bytes in the given ranges.
|
||||
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
|
||||
let mut result = Vec::with_capacity(ranges.len());
|
||||
for range in ranges {
|
||||
let data = self
|
||||
.range_read(range.start, (range.end - range.start) as u32)
|
||||
.await?;
|
||||
result.push(Bytes::from(data));
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>>;
|
||||
|
||||
/// Retrieves metadata of all inverted indices stored within the blob.
|
||||
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>>;
|
||||
|
||||
@@ -51,7 +51,7 @@ impl<R> InvertedIndexBlobReader<R> {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<R: RangeReader + Sync> InvertedIndexReader for InvertedIndexBlobReader<R> {
|
||||
impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
|
||||
async fn range_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
|
||||
let buf = self
|
||||
.source
|
||||
|
||||
@@ -16,7 +16,5 @@
|
||||
#![feature(assert_matches)]
|
||||
|
||||
pub mod bloom_filter;
|
||||
pub mod error;
|
||||
pub mod external_provider;
|
||||
pub mod fulltext_index;
|
||||
pub mod inverted_index;
|
||||
|
||||
@@ -11,6 +11,5 @@ workspace = true
|
||||
chrono.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
serde.workspace = true
|
||||
snafu.workspace = true
|
||||
table.workspace = true
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
use std::any::Any;
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use snafu::Snafu;
|
||||
|
||||
@@ -42,15 +41,6 @@ impl ErrorExt for Error {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Error::InvalidTimeFilter { .. }
|
||||
| Error::InvalidDateFormat { .. }
|
||||
| Error::InvalidSpanFormat { .. }
|
||||
| Error::EndBeforeStart { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
use chrono::{DateTime, Datelike, Duration, NaiveDate, NaiveTime, TimeZone, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use table::table_name::TableName;
|
||||
|
||||
use crate::error::{
|
||||
@@ -22,10 +21,9 @@ use crate::error::{
|
||||
};
|
||||
|
||||
/// GreptimeDB's log query request.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct LogQuery {
|
||||
/// A fully qualified table name to query logs from.
|
||||
pub table: TableName,
|
||||
pub table_name: TableName,
|
||||
/// Specifies the time range for the log query. See [`TimeFilter`] for more details.
|
||||
pub time_filter: TimeFilter,
|
||||
/// Columns with filters to query.
|
||||
@@ -36,18 +34,6 @@ pub struct LogQuery {
|
||||
pub context: Context,
|
||||
}
|
||||
|
||||
impl Default for LogQuery {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
table: TableName::new("", "", ""),
|
||||
time_filter: Default::default(),
|
||||
columns: vec![],
|
||||
limit: None,
|
||||
context: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a time range for log query.
|
||||
///
|
||||
/// This struct allows various formats to express a time range from the user side
|
||||
@@ -72,7 +58,7 @@ impl Default for LogQuery {
|
||||
///
|
||||
/// This struct doesn't require a timezone to be presented. When the timezone is not
|
||||
/// provided, it will fill the default timezone with the same rules akin to other queries.
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TimeFilter {
|
||||
pub start: Option<String>,
|
||||
pub end: Option<String>,
|
||||
@@ -83,7 +69,8 @@ impl TimeFilter {
|
||||
/// Validate and canonicalize the time filter.
|
||||
///
|
||||
/// This function will try to fill the missing fields and convert all dates to timestamps
|
||||
#[allow(unused_assignments)] // false positive
|
||||
// false positive
|
||||
#[allow(unused_assignments)]
|
||||
pub fn canonicalize(&mut self) -> Result<()> {
|
||||
let mut start_dt = None;
|
||||
let mut end_dt = None;
|
||||
@@ -222,7 +209,6 @@ impl TimeFilter {
|
||||
}
|
||||
|
||||
/// Represents a column with filters to query.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ColumnFilters {
|
||||
/// Case-sensitive column name to query.
|
||||
pub column_name: String,
|
||||
@@ -230,7 +216,6 @@ pub struct ColumnFilters {
|
||||
pub filters: Vec<ContentFilter>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum ContentFilter {
|
||||
/// Only match the exact content.
|
||||
///
|
||||
@@ -249,16 +234,13 @@ pub enum ContentFilter {
|
||||
Compound(Vec<ContentFilter>, BinaryOperator),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum BinaryOperator {
|
||||
And,
|
||||
Or,
|
||||
}
|
||||
|
||||
/// Controls how many adjacent lines to return.
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub enum Context {
|
||||
#[default]
|
||||
None,
|
||||
/// Specify the number of lines before and after the matched line separately.
|
||||
Lines(usize, usize),
|
||||
|
||||
@@ -326,8 +326,8 @@ impl ClusterInfo for MetaClient {
|
||||
let cluster_kv_backend = Arc::new(self.cluster_client()?);
|
||||
let range_prefix = DatanodeStatKey::key_prefix_with_cluster_id(self.id.0);
|
||||
let req = RangeRequest::new().with_prefix(range_prefix);
|
||||
let stream =
|
||||
PaginationStream::new(cluster_kv_backend, req, 256, decode_stats).into_stream();
|
||||
let stream = PaginationStream::new(cluster_kv_backend, req, 256, Arc::new(decode_stats))
|
||||
.into_stream();
|
||||
let mut datanode_stats = stream
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
@@ -994,7 +994,8 @@ mod tests {
|
||||
|
||||
let req = RangeRequest::new().with_prefix(b"__prefix/");
|
||||
let stream =
|
||||
PaginationStream::new(Arc::new(cluster_client), req, 10, mock_decoder).into_stream();
|
||||
PaginationStream::new(Arc::new(cluster_client), req, 10, Arc::new(mock_decoder))
|
||||
.into_stream();
|
||||
|
||||
let res = stream.try_collect::<Vec<_>>().await.unwrap();
|
||||
assert_eq!(10, res.len());
|
||||
|
||||
@@ -102,7 +102,7 @@ impl LeaderCachedKvBackend {
|
||||
self.store.clone(),
|
||||
RangeRequest::new().with_prefix(prefix.as_bytes()),
|
||||
DEFAULT_PAGE_SIZE,
|
||||
Ok,
|
||||
Arc::new(Ok),
|
||||
)
|
||||
.into_stream();
|
||||
|
||||
|
||||
@@ -313,12 +313,12 @@ mod test {
|
||||
let region_dir = "test_metric_region";
|
||||
// assert metadata region's dir
|
||||
let metadata_region_dir = join_dir(region_dir, METADATA_REGION_SUBDIR);
|
||||
let exist = object_store.exists(&metadata_region_dir).await.unwrap();
|
||||
let exist = object_store.is_exist(&metadata_region_dir).await.unwrap();
|
||||
assert!(exist);
|
||||
|
||||
// assert data region's dir
|
||||
let data_region_dir = join_dir(region_dir, DATA_REGION_SUBDIR);
|
||||
let exist = object_store.exists(&data_region_dir).await.unwrap();
|
||||
let exist = object_store.is_exist(&data_region_dir).await.unwrap();
|
||||
assert!(exist);
|
||||
|
||||
// check mito engine
|
||||
|
||||
@@ -37,7 +37,7 @@ use store_api::storage::{ConcreteDataType, RegionId, TimeSeriesRowSelector};
|
||||
|
||||
use crate::cache::cache_size::parquet_meta_size;
|
||||
use crate::cache::file_cache::{FileType, IndexKey};
|
||||
use crate::cache::index::inverted_index::{InvertedIndexCache, InvertedIndexCacheRef};
|
||||
use crate::cache::index::{InvertedIndexCache, InvertedIndexCacheRef};
|
||||
use crate::cache::write_cache::WriteCacheRef;
|
||||
use crate::metrics::{CACHE_BYTES, CACHE_EVICTION, CACHE_HIT, CACHE_MISS};
|
||||
use crate::read::Batch;
|
||||
|
||||
4
src/mito2/src/cache/file_cache.rs
vendored
4
src/mito2/src/cache/file_cache.rs
vendored
@@ -286,7 +286,7 @@ impl FileCache {
|
||||
}
|
||||
|
||||
async fn get_reader(&self, file_path: &str) -> object_store::Result<Option<Reader>> {
|
||||
if self.local_store.exists(file_path).await? {
|
||||
if self.local_store.is_exist(file_path).await? {
|
||||
Ok(Some(self.local_store.reader(file_path).await?))
|
||||
} else {
|
||||
Ok(None)
|
||||
@@ -480,7 +480,7 @@ mod tests {
|
||||
cache.memory_index.run_pending_tasks().await;
|
||||
|
||||
// The file also not exists.
|
||||
assert!(!local_store.exists(&file_path).await.unwrap());
|
||||
assert!(!local_store.is_exist(&file_path).await.unwrap());
|
||||
assert_eq!(0, cache.memory_index.weighted_size());
|
||||
}
|
||||
|
||||
|
||||
530
src/mito2/src/cache/index.rs
vendored
530
src/mito2/src/cache/index.rs
vendored
@@ -12,29 +12,168 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod inverted_index;
|
||||
|
||||
use std::future::Future;
|
||||
use std::hash::Hash;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::index::InvertedIndexMetas;
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use common_base::BitVec;
|
||||
use index::inverted_index::error::DecodeFstSnafu;
|
||||
use index::inverted_index::format::reader::InvertedIndexReader;
|
||||
use index::inverted_index::FstMap;
|
||||
use object_store::Buffer;
|
||||
use prost::Message;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::metrics::{CACHE_BYTES, CACHE_HIT, CACHE_MISS};
|
||||
use crate::sst::file::FileId;
|
||||
|
||||
/// Metrics for index metadata.
|
||||
const INDEX_METADATA_TYPE: &str = "index_metadata";
|
||||
/// Metrics for index content.
|
||||
const INDEX_CONTENT_TYPE: &str = "index_content";
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct PageKey {
|
||||
/// Inverted index blob reader with cache.
|
||||
pub struct CachedInvertedIndexBlobReader<R> {
|
||||
file_id: FileId,
|
||||
file_size: u64,
|
||||
inner: R,
|
||||
cache: InvertedIndexCacheRef,
|
||||
}
|
||||
|
||||
impl<R> CachedInvertedIndexBlobReader<R> {
|
||||
pub fn new(file_id: FileId, file_size: u64, inner: R, cache: InvertedIndexCacheRef) -> Self {
|
||||
Self {
|
||||
file_id,
|
||||
file_size,
|
||||
inner,
|
||||
cache,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> CachedInvertedIndexBlobReader<R>
|
||||
where
|
||||
R: InvertedIndexReader,
|
||||
{
|
||||
/// Gets given range of index data from cache, and loads from source if the file
|
||||
/// is not already cached.
|
||||
async fn get_or_load(
|
||||
&mut self,
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<Vec<u8>> {
|
||||
let keys =
|
||||
IndexDataPageKey::generate_page_keys(self.file_id, offset, size, self.cache.page_size);
|
||||
// Size is 0, return empty data.
|
||||
if keys.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let mut data = Vec::with_capacity(keys.len());
|
||||
data.resize(keys.len(), Bytes::new());
|
||||
let mut cache_miss_range = vec![];
|
||||
let mut cache_miss_idx = vec![];
|
||||
let last_index = keys.len() - 1;
|
||||
// TODO: Avoid copy as much as possible.
|
||||
for (i, index) in keys.iter().enumerate() {
|
||||
match self.cache.get_index(index) {
|
||||
Some(page) => {
|
||||
CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
|
||||
data[i] = page;
|
||||
}
|
||||
None => {
|
||||
CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
|
||||
let base_offset = index.page_id * self.cache.page_size;
|
||||
let pruned_size = if i == last_index {
|
||||
prune_size(&keys, self.file_size, self.cache.page_size)
|
||||
} else {
|
||||
self.cache.page_size
|
||||
};
|
||||
cache_miss_range.push(base_offset..base_offset + pruned_size);
|
||||
cache_miss_idx.push(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
if !cache_miss_range.is_empty() {
|
||||
let pages = self.inner.read_vec(&cache_miss_range).await?;
|
||||
for (i, page) in cache_miss_idx.into_iter().zip(pages.into_iter()) {
|
||||
let key = keys[i].clone();
|
||||
data[i] = page.clone();
|
||||
self.cache.put_index(key, page.clone());
|
||||
}
|
||||
}
|
||||
let buffer = Buffer::from_iter(data.into_iter());
|
||||
Ok(buffer
|
||||
.slice(IndexDataPageKey::calculate_range(
|
||||
offset,
|
||||
size,
|
||||
self.cache.page_size,
|
||||
))
|
||||
.to_vec())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobReader<R> {
|
||||
async fn range_read(
|
||||
&mut self,
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<Vec<u8>> {
|
||||
self.inner.range_read(offset, size).await
|
||||
}
|
||||
|
||||
async fn read_vec(
|
||||
&mut self,
|
||||
ranges: &[Range<u64>],
|
||||
) -> index::inverted_index::error::Result<Vec<Bytes>> {
|
||||
self.inner.read_vec(ranges).await
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> index::inverted_index::error::Result<Arc<InvertedIndexMetas>> {
|
||||
if let Some(cached) = self.cache.get_index_metadata(self.file_id) {
|
||||
CACHE_HIT.with_label_values(&[INDEX_METADATA_TYPE]).inc();
|
||||
Ok(cached)
|
||||
} else {
|
||||
let meta = self.inner.metadata().await?;
|
||||
self.cache.put_index_metadata(self.file_id, meta.clone());
|
||||
CACHE_MISS.with_label_values(&[INDEX_METADATA_TYPE]).inc();
|
||||
Ok(meta)
|
||||
}
|
||||
}
|
||||
|
||||
async fn fst(
|
||||
&mut self,
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<FstMap> {
|
||||
self.get_or_load(offset, size)
|
||||
.await
|
||||
.and_then(|r| FstMap::new(r).context(DecodeFstSnafu))
|
||||
}
|
||||
|
||||
async fn bitmap(
|
||||
&mut self,
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<BitVec> {
|
||||
self.get_or_load(offset, size).await.map(BitVec::from_vec)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct IndexMetadataKey {
|
||||
file_id: FileId,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct IndexDataPageKey {
|
||||
file_id: FileId,
|
||||
page_id: u64,
|
||||
}
|
||||
|
||||
impl PageKey {
|
||||
impl IndexDataPageKey {
|
||||
/// Converts an offset to a page ID based on the page size.
|
||||
fn calculate_page_id(offset: u64, page_size: u64) -> u64 {
|
||||
offset / page_size
|
||||
@@ -60,60 +199,49 @@ impl PageKey {
|
||||
start..end
|
||||
}
|
||||
|
||||
/// Generates a iterator of `IndexKey` for the pages that a given offset and size span.
|
||||
fn generate_page_keys(offset: u64, size: u32, page_size: u64) -> impl Iterator<Item = Self> {
|
||||
/// Generates a vector of IndexKey instances for the pages that a given offset and size span.
|
||||
fn generate_page_keys(file_id: FileId, offset: u64, size: u32, page_size: u64) -> Vec<Self> {
|
||||
let start_page = Self::calculate_page_id(offset, page_size);
|
||||
let total_pages = Self::calculate_page_count(offset, size, page_size);
|
||||
(0..total_pages).map(move |i| Self {
|
||||
page_id: start_page + i as u64,
|
||||
})
|
||||
(0..total_pages)
|
||||
.map(|i| Self {
|
||||
file_id,
|
||||
page_id: start_page + i as u64,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Cache for index metadata and content.
|
||||
pub struct IndexCache<K, M> {
|
||||
/// Cache for index metadata
|
||||
index_metadata: moka::sync::Cache<K, Arc<M>>,
|
||||
/// Cache for index content.
|
||||
index: moka::sync::Cache<(K, PageKey), Bytes>,
|
||||
pub type InvertedIndexCacheRef = Arc<InvertedIndexCache>;
|
||||
|
||||
pub struct InvertedIndexCache {
|
||||
/// Cache for inverted index metadata
|
||||
index_metadata: moka::sync::Cache<IndexMetadataKey, Arc<InvertedIndexMetas>>,
|
||||
/// Cache for inverted index content.
|
||||
index: moka::sync::Cache<IndexDataPageKey, Bytes>,
|
||||
// Page size for index content.
|
||||
page_size: u64,
|
||||
|
||||
/// Weighter for metadata.
|
||||
weight_of_metadata: fn(&K, &Arc<M>) -> u32,
|
||||
/// Weighter for content.
|
||||
weight_of_content: fn(&(K, PageKey), &Bytes) -> u32,
|
||||
}
|
||||
|
||||
impl<K, M> IndexCache<K, M>
|
||||
where
|
||||
K: Hash + Eq + Send + Sync + 'static,
|
||||
M: Send + Sync + 'static,
|
||||
{
|
||||
pub fn new_with_weighter(
|
||||
index_metadata_cap: u64,
|
||||
index_content_cap: u64,
|
||||
page_size: u64,
|
||||
index_type: &'static str,
|
||||
weight_of_metadata: fn(&K, &Arc<M>) -> u32,
|
||||
weight_of_content: fn(&(K, PageKey), &Bytes) -> u32,
|
||||
) -> Self {
|
||||
common_telemetry::debug!("Building IndexCache with metadata size: {index_metadata_cap}, content size: {index_content_cap}, page size: {page_size}, index type: {index_type}");
|
||||
impl InvertedIndexCache {
|
||||
/// Creates `InvertedIndexCache` with provided `index_metadata_cap` and `index_content_cap`.
|
||||
pub fn new(index_metadata_cap: u64, index_content_cap: u64, page_size: u64) -> Self {
|
||||
common_telemetry::debug!("Building InvertedIndexCache with metadata size: {index_metadata_cap}, content size: {index_content_cap}");
|
||||
let index_metadata = moka::sync::CacheBuilder::new(index_metadata_cap)
|
||||
.name(&format!("index_metadata_{}", index_type))
|
||||
.weigher(weight_of_metadata)
|
||||
.eviction_listener(move |k, v, _cause| {
|
||||
let size = weight_of_metadata(&k, &v);
|
||||
.name("inverted_index_metadata")
|
||||
.weigher(index_metadata_weight)
|
||||
.eviction_listener(|k, v, _cause| {
|
||||
let size = index_metadata_weight(&k, &v);
|
||||
CACHE_BYTES
|
||||
.with_label_values(&[INDEX_METADATA_TYPE])
|
||||
.sub(size.into());
|
||||
})
|
||||
.build();
|
||||
let index_cache = moka::sync::CacheBuilder::new(index_content_cap)
|
||||
.name(&format!("index_content_{}", index_type))
|
||||
.weigher(weight_of_content)
|
||||
.eviction_listener(move |k, v, _cause| {
|
||||
let size = weight_of_content(&k, &v);
|
||||
.name("inverted_index_content")
|
||||
.weigher(index_content_weight)
|
||||
.eviction_listener(|k, v, _cause| {
|
||||
let size = index_content_weight(&k, &v);
|
||||
CACHE_BYTES
|
||||
.with_label_values(&[INDEX_CONTENT_TYPE])
|
||||
.sub(size.into());
|
||||
@@ -123,109 +251,259 @@ where
|
||||
index_metadata,
|
||||
index: index_cache,
|
||||
page_size,
|
||||
weight_of_content,
|
||||
weight_of_metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, M> IndexCache<K, M>
|
||||
where
|
||||
K: Hash + Eq + Clone + Copy + Send + Sync + 'static,
|
||||
M: Send + Sync + 'static,
|
||||
{
|
||||
pub fn get_metadata(&self, key: K) -> Option<Arc<M>> {
|
||||
self.index_metadata.get(&key)
|
||||
impl InvertedIndexCache {
|
||||
pub fn get_index_metadata(&self, file_id: FileId) -> Option<Arc<InvertedIndexMetas>> {
|
||||
self.index_metadata.get(&IndexMetadataKey { file_id })
|
||||
}
|
||||
|
||||
pub fn put_metadata(&self, key: K, metadata: Arc<M>) {
|
||||
pub fn put_index_metadata(&self, file_id: FileId, metadata: Arc<InvertedIndexMetas>) {
|
||||
let key = IndexMetadataKey { file_id };
|
||||
CACHE_BYTES
|
||||
.with_label_values(&[INDEX_METADATA_TYPE])
|
||||
.add((self.weight_of_metadata)(&key, &metadata).into());
|
||||
.add(index_metadata_weight(&key, &metadata).into());
|
||||
self.index_metadata.insert(key, metadata)
|
||||
}
|
||||
|
||||
/// Gets given range of index data from cache, and loads from source if the file
|
||||
/// is not already cached.
|
||||
async fn get_or_load<F, Fut, E>(
|
||||
&self,
|
||||
key: K,
|
||||
file_size: u64,
|
||||
offset: u64,
|
||||
size: u32,
|
||||
load: F,
|
||||
) -> Result<Vec<u8>, E>
|
||||
where
|
||||
F: FnOnce(Vec<Range<u64>>) -> Fut,
|
||||
Fut: Future<Output = Result<Vec<Bytes>, E>>,
|
||||
E: std::error::Error,
|
||||
{
|
||||
let page_keys =
|
||||
PageKey::generate_page_keys(offset, size, self.page_size).collect::<Vec<_>>();
|
||||
// Size is 0, return empty data.
|
||||
if page_keys.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let mut data = Vec::with_capacity(page_keys.len());
|
||||
data.resize(page_keys.len(), Bytes::new());
|
||||
let mut cache_miss_range = vec![];
|
||||
let mut cache_miss_idx = vec![];
|
||||
let last_index = page_keys.len() - 1;
|
||||
// TODO: Avoid copy as much as possible.
|
||||
for (i, page_key) in page_keys.iter().enumerate() {
|
||||
match self.get_page(key, *page_key) {
|
||||
Some(page) => {
|
||||
CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
|
||||
data[i] = page;
|
||||
}
|
||||
None => {
|
||||
CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
|
||||
let base_offset = page_key.page_id * self.page_size;
|
||||
let pruned_size = if i == last_index {
|
||||
prune_size(page_keys.iter(), file_size, self.page_size)
|
||||
} else {
|
||||
self.page_size
|
||||
};
|
||||
cache_miss_range.push(base_offset..base_offset + pruned_size);
|
||||
cache_miss_idx.push(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
if !cache_miss_range.is_empty() {
|
||||
let pages = load(cache_miss_range).await?;
|
||||
for (i, page) in cache_miss_idx.into_iter().zip(pages.into_iter()) {
|
||||
let page_key = page_keys[i];
|
||||
data[i] = page.clone();
|
||||
self.put_page(key, page_key, page.clone());
|
||||
}
|
||||
}
|
||||
let buffer = Buffer::from_iter(data.into_iter());
|
||||
Ok(buffer
|
||||
.slice(PageKey::calculate_range(offset, size, self.page_size))
|
||||
.to_vec())
|
||||
pub fn get_index(&self, key: &IndexDataPageKey) -> Option<Bytes> {
|
||||
self.index.get(key)
|
||||
}
|
||||
|
||||
fn get_page(&self, key: K, page_key: PageKey) -> Option<Bytes> {
|
||||
self.index.get(&(key, page_key))
|
||||
}
|
||||
|
||||
fn put_page(&self, key: K, page_key: PageKey, value: Bytes) {
|
||||
pub fn put_index(&self, key: IndexDataPageKey, value: Bytes) {
|
||||
CACHE_BYTES
|
||||
.with_label_values(&[INDEX_CONTENT_TYPE])
|
||||
.add((self.weight_of_content)(&(key, page_key), &value).into());
|
||||
self.index.insert((key, page_key), value);
|
||||
.add(index_content_weight(&key, &value).into());
|
||||
self.index.insert(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculates weight for index metadata.
|
||||
fn index_metadata_weight(k: &IndexMetadataKey, v: &Arc<InvertedIndexMetas>) -> u32 {
|
||||
(k.file_id.as_bytes().len() + v.encoded_len()) as u32
|
||||
}
|
||||
|
||||
/// Calculates weight for index content.
|
||||
fn index_content_weight(k: &IndexDataPageKey, v: &Bytes) -> u32 {
|
||||
(k.file_id.as_bytes().len() + v.len()) as u32
|
||||
}
|
||||
|
||||
/// Prunes the size of the last page based on the indexes.
|
||||
/// We have following cases:
|
||||
/// 1. The rest file size is less than the page size, read to the end of the file.
|
||||
/// 2. Otherwise, read the page size.
|
||||
fn prune_size<'a>(
|
||||
indexes: impl Iterator<Item = &'a PageKey>,
|
||||
file_size: u64,
|
||||
page_size: u64,
|
||||
) -> u64 {
|
||||
fn prune_size(indexes: &[IndexDataPageKey], file_size: u64, page_size: u64) -> u64 {
|
||||
let last_page_start = indexes.last().map(|i| i.page_id * page_size).unwrap_or(0);
|
||||
page_size.min(file_size - last_page_start)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use common_base::BitVec;
|
||||
use futures::stream;
|
||||
use index::inverted_index::format::reader::{InvertedIndexBlobReader, InvertedIndexReader};
|
||||
use index::inverted_index::format::writer::{InvertedIndexBlobWriter, InvertedIndexWriter};
|
||||
use index::inverted_index::Bytes;
|
||||
use prometheus::register_int_counter_vec;
|
||||
use rand::{Rng, RngCore};
|
||||
|
||||
use super::*;
|
||||
use crate::sst::index::store::InstrumentedStore;
|
||||
use crate::test_util::TestEnv;
|
||||
|
||||
// Repeat times for following little fuzz tests.
|
||||
const FUZZ_REPEAT_TIMES: usize = 100;
|
||||
|
||||
// Fuzz test for index data page key
|
||||
#[test]
|
||||
fn fuzz_index_calculation() {
|
||||
// randomly generate a large u8 array
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut data = vec![0u8; 1024 * 1024];
|
||||
rng.fill_bytes(&mut data);
|
||||
let file_id = FileId::random();
|
||||
|
||||
for _ in 0..FUZZ_REPEAT_TIMES {
|
||||
let offset = rng.gen_range(0..data.len() as u64);
|
||||
let size = rng.gen_range(0..data.len() as u32 - offset as u32);
|
||||
let page_size: usize = rng.gen_range(1..1024);
|
||||
|
||||
let indexes =
|
||||
IndexDataPageKey::generate_page_keys(file_id, offset, size, page_size as u64);
|
||||
let page_num = indexes.len();
|
||||
let mut read = Vec::with_capacity(size as usize);
|
||||
for key in indexes.into_iter() {
|
||||
let start = key.page_id as usize * page_size;
|
||||
let page = if start + page_size < data.len() {
|
||||
&data[start..start + page_size]
|
||||
} else {
|
||||
&data[start..]
|
||||
};
|
||||
read.extend_from_slice(page);
|
||||
}
|
||||
let expected_range = offset as usize..(offset + size as u64 as u64) as usize;
|
||||
let read =
|
||||
read[IndexDataPageKey::calculate_range(offset, size, page_size as u64)].to_vec();
|
||||
if read != data.get(expected_range).unwrap() {
|
||||
panic!(
|
||||
"fuzz_read_index failed, offset: {}, size: {}, page_size: {}\nread len: {}, expected len: {}\nrange: {:?}, page num: {}",
|
||||
offset, size, page_size, read.len(), size as usize,
|
||||
IndexDataPageKey::calculate_range(offset, size, page_size as u64),
|
||||
page_num
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn unpack(fst_value: u64) -> [u32; 2] {
|
||||
bytemuck::cast::<u64, [u32; 2]>(fst_value)
|
||||
}
|
||||
|
||||
async fn create_inverted_index_blob() -> Vec<u8> {
|
||||
let mut blob = Vec::new();
|
||||
let mut writer = InvertedIndexBlobWriter::new(&mut blob);
|
||||
writer
|
||||
.add_index(
|
||||
"tag0".to_string(),
|
||||
BitVec::from_slice(&[0b0000_0001, 0b0000_0000]),
|
||||
Box::new(stream::iter(vec![
|
||||
Ok((Bytes::from("a"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
Ok((Bytes::from("b"), BitVec::from_slice(&[0b0010_0000]))),
|
||||
Ok((Bytes::from("c"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
])),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
writer
|
||||
.add_index(
|
||||
"tag1".to_string(),
|
||||
BitVec::from_slice(&[0b0000_0001, 0b0000_0000]),
|
||||
Box::new(stream::iter(vec![
|
||||
Ok((Bytes::from("x"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
Ok((Bytes::from("y"), BitVec::from_slice(&[0b0010_0000]))),
|
||||
Ok((Bytes::from("z"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
])),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
writer
|
||||
.finish(8, NonZeroUsize::new(1).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
blob
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_inverted_index_cache() {
|
||||
let blob = create_inverted_index_blob().await;
|
||||
|
||||
// Init a test range reader in local fs.
|
||||
let mut env = TestEnv::new();
|
||||
let file_size = blob.len() as u64;
|
||||
let store = env.init_object_store_manager();
|
||||
let temp_path = "data";
|
||||
store.write(temp_path, blob).await.unwrap();
|
||||
let store = InstrumentedStore::new(store);
|
||||
let metric =
|
||||
register_int_counter_vec!("test_bytes", "a counter for test", &["test"]).unwrap();
|
||||
let counter = metric.with_label_values(&["test"]);
|
||||
let range_reader = store
|
||||
.range_reader("data", &counter, &counter)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let reader = InvertedIndexBlobReader::new(range_reader);
|
||||
let mut cached_reader = CachedInvertedIndexBlobReader::new(
|
||||
FileId::random(),
|
||||
file_size,
|
||||
reader,
|
||||
Arc::new(InvertedIndexCache::new(8192, 8192, 50)),
|
||||
);
|
||||
let metadata = cached_reader.metadata().await.unwrap();
|
||||
assert_eq!(metadata.total_row_count, 8);
|
||||
assert_eq!(metadata.segment_row_count, 1);
|
||||
assert_eq!(metadata.metas.len(), 2);
|
||||
// tag0
|
||||
let tag0 = metadata.metas.get("tag0").unwrap();
|
||||
let stats0 = tag0.stats.as_ref().unwrap();
|
||||
assert_eq!(stats0.distinct_count, 3);
|
||||
assert_eq!(stats0.null_count, 1);
|
||||
assert_eq!(stats0.min_value, Bytes::from("a"));
|
||||
assert_eq!(stats0.max_value, Bytes::from("c"));
|
||||
let fst0 = cached_reader
|
||||
.fst(
|
||||
tag0.base_offset + tag0.relative_fst_offset as u64,
|
||||
tag0.fst_size,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(fst0.len(), 3);
|
||||
let [offset, size] = unpack(fst0.get(b"a").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
let [offset, size] = unpack(fst0.get(b"b").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
|
||||
let [offset, size] = unpack(fst0.get(b"c").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
|
||||
// tag1
|
||||
let tag1 = metadata.metas.get("tag1").unwrap();
|
||||
let stats1 = tag1.stats.as_ref().unwrap();
|
||||
assert_eq!(stats1.distinct_count, 3);
|
||||
assert_eq!(stats1.null_count, 1);
|
||||
assert_eq!(stats1.min_value, Bytes::from("x"));
|
||||
assert_eq!(stats1.max_value, Bytes::from("z"));
|
||||
let fst1 = cached_reader
|
||||
.fst(
|
||||
tag1.base_offset + tag1.relative_fst_offset as u64,
|
||||
tag1.fst_size,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(fst1.len(), 3);
|
||||
let [offset, size] = unpack(fst1.get(b"x").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
let [offset, size] = unpack(fst1.get(b"y").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
|
||||
let [offset, size] = unpack(fst1.get(b"z").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
|
||||
// fuzz test
|
||||
let mut rng = rand::thread_rng();
|
||||
for _ in 0..FUZZ_REPEAT_TIMES {
|
||||
let offset = rng.gen_range(0..file_size);
|
||||
let size = rng.gen_range(0..file_size as u32 - offset as u32);
|
||||
let expected = cached_reader.range_read(offset, size).await.unwrap();
|
||||
let read = cached_reader.get_or_load(offset, size).await.unwrap();
|
||||
assert_eq!(read, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
322
src/mito2/src/cache/index/inverted_index.rs
vendored
322
src/mito2/src/cache/index/inverted_index.rs
vendored
@@ -1,322 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::index::InvertedIndexMetas;
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use index::inverted_index::error::Result;
|
||||
use index::inverted_index::format::reader::InvertedIndexReader;
|
||||
use prost::Message;
|
||||
|
||||
use crate::cache::index::{IndexCache, PageKey, INDEX_METADATA_TYPE};
|
||||
use crate::metrics::{CACHE_HIT, CACHE_MISS};
|
||||
use crate::sst::file::FileId;
|
||||
|
||||
const INDEX_TYPE_INVERTED_INDEX: &str = "inverted_index";
|
||||
|
||||
/// Cache for inverted index.
|
||||
pub type InvertedIndexCache = IndexCache<FileId, InvertedIndexMetas>;
|
||||
pub type InvertedIndexCacheRef = Arc<InvertedIndexCache>;
|
||||
|
||||
impl InvertedIndexCache {
|
||||
/// Creates a new inverted index cache.
|
||||
pub fn new(index_metadata_cap: u64, index_content_cap: u64, page_size: u64) -> Self {
|
||||
Self::new_with_weighter(
|
||||
index_metadata_cap,
|
||||
index_content_cap,
|
||||
page_size,
|
||||
INDEX_TYPE_INVERTED_INDEX,
|
||||
inverted_index_metadata_weight,
|
||||
inverted_index_content_weight,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculates weight for inverted index metadata.
|
||||
fn inverted_index_metadata_weight(k: &FileId, v: &Arc<InvertedIndexMetas>) -> u32 {
|
||||
(k.as_bytes().len() + v.encoded_len()) as u32
|
||||
}
|
||||
|
||||
/// Calculates weight for inverted index content.
|
||||
fn inverted_index_content_weight((k, _): &(FileId, PageKey), v: &Bytes) -> u32 {
|
||||
(k.as_bytes().len() + v.len()) as u32
|
||||
}
|
||||
|
||||
/// Inverted index blob reader with cache.
|
||||
pub struct CachedInvertedIndexBlobReader<R> {
|
||||
file_id: FileId,
|
||||
file_size: u64,
|
||||
inner: R,
|
||||
cache: InvertedIndexCacheRef,
|
||||
}
|
||||
|
||||
impl<R> CachedInvertedIndexBlobReader<R> {
|
||||
/// Creates a new inverted index blob reader with cache.
|
||||
pub fn new(file_id: FileId, file_size: u64, inner: R, cache: InvertedIndexCacheRef) -> Self {
|
||||
Self {
|
||||
file_id,
|
||||
file_size,
|
||||
inner,
|
||||
cache,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobReader<R> {
|
||||
async fn range_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
|
||||
let inner = &mut self.inner;
|
||||
self.cache
|
||||
.get_or_load(
|
||||
self.file_id,
|
||||
self.file_size,
|
||||
offset,
|
||||
size,
|
||||
move |ranges| async move { inner.read_vec(&ranges).await },
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>> {
|
||||
if let Some(cached) = self.cache.get_metadata(self.file_id) {
|
||||
CACHE_HIT.with_label_values(&[INDEX_METADATA_TYPE]).inc();
|
||||
Ok(cached)
|
||||
} else {
|
||||
let meta = self.inner.metadata().await?;
|
||||
self.cache.put_metadata(self.file_id, meta.clone());
|
||||
CACHE_MISS.with_label_values(&[INDEX_METADATA_TYPE]).inc();
|
||||
Ok(meta)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use common_base::BitVec;
|
||||
use futures::stream;
|
||||
use index::inverted_index::format::reader::{InvertedIndexBlobReader, InvertedIndexReader};
|
||||
use index::inverted_index::format::writer::{InvertedIndexBlobWriter, InvertedIndexWriter};
|
||||
use index::inverted_index::Bytes;
|
||||
use prometheus::register_int_counter_vec;
|
||||
use rand::{Rng, RngCore};
|
||||
|
||||
use super::*;
|
||||
use crate::sst::index::store::InstrumentedStore;
|
||||
use crate::test_util::TestEnv;
|
||||
|
||||
// Repeat times for following little fuzz tests.
|
||||
const FUZZ_REPEAT_TIMES: usize = 100;
|
||||
|
||||
// Fuzz test for index data page key
|
||||
#[test]
|
||||
fn fuzz_index_calculation() {
|
||||
// randomly generate a large u8 array
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut data = vec![0u8; 1024 * 1024];
|
||||
rng.fill_bytes(&mut data);
|
||||
|
||||
for _ in 0..FUZZ_REPEAT_TIMES {
|
||||
let offset = rng.gen_range(0..data.len() as u64);
|
||||
let size = rng.gen_range(0..data.len() as u32 - offset as u32);
|
||||
let page_size: usize = rng.gen_range(1..1024);
|
||||
|
||||
let indexes =
|
||||
PageKey::generate_page_keys(offset, size, page_size as u64).collect::<Vec<_>>();
|
||||
let page_num = indexes.len();
|
||||
let mut read = Vec::with_capacity(size as usize);
|
||||
for key in indexes.into_iter() {
|
||||
let start = key.page_id as usize * page_size;
|
||||
let page = if start + page_size < data.len() {
|
||||
&data[start..start + page_size]
|
||||
} else {
|
||||
&data[start..]
|
||||
};
|
||||
read.extend_from_slice(page);
|
||||
}
|
||||
let expected_range = offset as usize..(offset + size as u64 as u64) as usize;
|
||||
let read = read[PageKey::calculate_range(offset, size, page_size as u64)].to_vec();
|
||||
if read != data.get(expected_range).unwrap() {
|
||||
panic!(
|
||||
"fuzz_read_index failed, offset: {}, size: {}, page_size: {}\nread len: {}, expected len: {}\nrange: {:?}, page num: {}",
|
||||
offset, size, page_size, read.len(), size as usize,
|
||||
PageKey::calculate_range(offset, size, page_size as u64),
|
||||
page_num
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn unpack(fst_value: u64) -> [u32; 2] {
|
||||
bytemuck::cast::<u64, [u32; 2]>(fst_value)
|
||||
}
|
||||
|
||||
async fn create_inverted_index_blob() -> Vec<u8> {
|
||||
let mut blob = Vec::new();
|
||||
let mut writer = InvertedIndexBlobWriter::new(&mut blob);
|
||||
writer
|
||||
.add_index(
|
||||
"tag0".to_string(),
|
||||
BitVec::from_slice(&[0b0000_0001, 0b0000_0000]),
|
||||
Box::new(stream::iter(vec![
|
||||
Ok((Bytes::from("a"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
Ok((Bytes::from("b"), BitVec::from_slice(&[0b0010_0000]))),
|
||||
Ok((Bytes::from("c"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
])),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
writer
|
||||
.add_index(
|
||||
"tag1".to_string(),
|
||||
BitVec::from_slice(&[0b0000_0001, 0b0000_0000]),
|
||||
Box::new(stream::iter(vec![
|
||||
Ok((Bytes::from("x"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
Ok((Bytes::from("y"), BitVec::from_slice(&[0b0010_0000]))),
|
||||
Ok((Bytes::from("z"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
])),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
writer
|
||||
.finish(8, NonZeroUsize::new(1).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
blob
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_inverted_index_cache() {
|
||||
let blob = create_inverted_index_blob().await;
|
||||
|
||||
// Init a test range reader in local fs.
|
||||
let mut env = TestEnv::new();
|
||||
let file_size = blob.len() as u64;
|
||||
let store = env.init_object_store_manager();
|
||||
let temp_path = "data";
|
||||
store.write(temp_path, blob).await.unwrap();
|
||||
let store = InstrumentedStore::new(store);
|
||||
let metric =
|
||||
register_int_counter_vec!("test_bytes", "a counter for test", &["test"]).unwrap();
|
||||
let counter = metric.with_label_values(&["test"]);
|
||||
let range_reader = store
|
||||
.range_reader("data", &counter, &counter)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let reader = InvertedIndexBlobReader::new(range_reader);
|
||||
let mut cached_reader = CachedInvertedIndexBlobReader::new(
|
||||
FileId::random(),
|
||||
file_size,
|
||||
reader,
|
||||
Arc::new(InvertedIndexCache::new(8192, 8192, 50)),
|
||||
);
|
||||
let metadata = cached_reader.metadata().await.unwrap();
|
||||
assert_eq!(metadata.total_row_count, 8);
|
||||
assert_eq!(metadata.segment_row_count, 1);
|
||||
assert_eq!(metadata.metas.len(), 2);
|
||||
// tag0
|
||||
let tag0 = metadata.metas.get("tag0").unwrap();
|
||||
let stats0 = tag0.stats.as_ref().unwrap();
|
||||
assert_eq!(stats0.distinct_count, 3);
|
||||
assert_eq!(stats0.null_count, 1);
|
||||
assert_eq!(stats0.min_value, Bytes::from("a"));
|
||||
assert_eq!(stats0.max_value, Bytes::from("c"));
|
||||
let fst0 = cached_reader
|
||||
.fst(
|
||||
tag0.base_offset + tag0.relative_fst_offset as u64,
|
||||
tag0.fst_size,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(fst0.len(), 3);
|
||||
let [offset, size] = unpack(fst0.get(b"a").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
let [offset, size] = unpack(fst0.get(b"b").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
|
||||
let [offset, size] = unpack(fst0.get(b"c").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
|
||||
// tag1
|
||||
let tag1 = metadata.metas.get("tag1").unwrap();
|
||||
let stats1 = tag1.stats.as_ref().unwrap();
|
||||
assert_eq!(stats1.distinct_count, 3);
|
||||
assert_eq!(stats1.null_count, 1);
|
||||
assert_eq!(stats1.min_value, Bytes::from("x"));
|
||||
assert_eq!(stats1.max_value, Bytes::from("z"));
|
||||
let fst1 = cached_reader
|
||||
.fst(
|
||||
tag1.base_offset + tag1.relative_fst_offset as u64,
|
||||
tag1.fst_size,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(fst1.len(), 3);
|
||||
let [offset, size] = unpack(fst1.get(b"x").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
let [offset, size] = unpack(fst1.get(b"y").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
|
||||
let [offset, size] = unpack(fst1.get(b"z").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
|
||||
// fuzz test
|
||||
let mut rng = rand::thread_rng();
|
||||
for _ in 0..FUZZ_REPEAT_TIMES {
|
||||
let offset = rng.gen_range(0..file_size);
|
||||
let size = rng.gen_range(0..file_size as u32 - offset as u32);
|
||||
let expected = cached_reader.range_read(offset, size).await.unwrap();
|
||||
let inner = &mut cached_reader.inner;
|
||||
let read = cached_reader
|
||||
.cache
|
||||
.get_or_load(
|
||||
cached_reader.file_id,
|
||||
file_size,
|
||||
offset,
|
||||
size,
|
||||
|ranges| async move { inner.read_vec(&ranges).await },
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(read, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -192,12 +192,12 @@ async fn test_engine_create_with_custom_store() {
|
||||
assert!(object_store_manager
|
||||
.find("Gcs")
|
||||
.unwrap()
|
||||
.exists(region_dir)
|
||||
.is_exist(region_dir)
|
||||
.await
|
||||
.unwrap());
|
||||
assert!(!object_store_manager
|
||||
.default_object_store()
|
||||
.exists(region_dir)
|
||||
.is_exist(region_dir)
|
||||
.await
|
||||
.unwrap());
|
||||
}
|
||||
|
||||
@@ -71,7 +71,7 @@ async fn test_engine_drop_region() {
|
||||
assert!(!env
|
||||
.get_object_store()
|
||||
.unwrap()
|
||||
.exists(&join_path(®ion_dir, DROPPING_MARKER_FILE))
|
||||
.is_exist(&join_path(®ion_dir, DROPPING_MARKER_FILE))
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
@@ -93,7 +93,7 @@ async fn test_engine_drop_region() {
|
||||
listener.wait().await;
|
||||
|
||||
let object_store = env.get_object_store().unwrap();
|
||||
assert!(!object_store.exists(®ion_dir).await.unwrap());
|
||||
assert!(!object_store.is_exist(®ion_dir).await.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -167,13 +167,13 @@ async fn test_engine_drop_region_for_custom_store() {
|
||||
assert!(object_store_manager
|
||||
.find("Gcs")
|
||||
.unwrap()
|
||||
.exists(&custom_region_dir)
|
||||
.is_exist(&custom_region_dir)
|
||||
.await
|
||||
.unwrap());
|
||||
assert!(object_store_manager
|
||||
.find("default")
|
||||
.unwrap()
|
||||
.exists(&global_region_dir)
|
||||
.is_exist(&global_region_dir)
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
@@ -190,13 +190,13 @@ async fn test_engine_drop_region_for_custom_store() {
|
||||
assert!(!object_store_manager
|
||||
.find("Gcs")
|
||||
.unwrap()
|
||||
.exists(&custom_region_dir)
|
||||
.is_exist(&custom_region_dir)
|
||||
.await
|
||||
.unwrap());
|
||||
assert!(object_store_manager
|
||||
.find("default")
|
||||
.unwrap()
|
||||
.exists(&global_region_dir)
|
||||
.is_exist(&global_region_dir)
|
||||
.await
|
||||
.unwrap());
|
||||
}
|
||||
|
||||
@@ -228,13 +228,13 @@ async fn test_engine_region_open_with_custom_store() {
|
||||
let object_store_manager = env.get_object_store_manager().unwrap();
|
||||
assert!(!object_store_manager
|
||||
.default_object_store()
|
||||
.exists(region.access_layer.region_dir())
|
||||
.is_exist(region.access_layer.region_dir())
|
||||
.await
|
||||
.unwrap());
|
||||
assert!(object_store_manager
|
||||
.find("Gcs")
|
||||
.unwrap()
|
||||
.exists(region.access_layer.region_dir())
|
||||
.is_exist(region.access_layer.region_dir())
|
||||
.await
|
||||
.unwrap());
|
||||
}
|
||||
|
||||
@@ -723,20 +723,10 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Failed to iter data part"))]
|
||||
ReadDataPart {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: parquet::errors::ParquetError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read row group in memtable"))]
|
||||
DecodeArrowRowGroup {
|
||||
#[snafu(source)]
|
||||
error: ArrowError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid region options, {}", reason))]
|
||||
InvalidRegionOptions {
|
||||
reason: String,
|
||||
@@ -1039,7 +1029,6 @@ impl ErrorExt for Error {
|
||||
RegionBusy { .. } => StatusCode::RegionBusy,
|
||||
GetSchemaMetadata { source, .. } => source.status_code(),
|
||||
Timeout { .. } => StatusCode::Cancelled,
|
||||
DecodeArrowRowGroup { .. } => StatusCode::Internal,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -84,7 +84,6 @@ async fn manager_without_checkpoint() {
|
||||
|
||||
// check files
|
||||
let mut expected = vec![
|
||||
"/",
|
||||
"00000000000000000010.json",
|
||||
"00000000000000000009.json",
|
||||
"00000000000000000008.json",
|
||||
@@ -131,7 +130,6 @@ async fn manager_with_checkpoint_distance_1() {
|
||||
|
||||
// check files
|
||||
let mut expected = vec![
|
||||
"/",
|
||||
"00000000000000000009.checkpoint",
|
||||
"00000000000000000010.checkpoint",
|
||||
"00000000000000000010.json",
|
||||
|
||||
@@ -27,12 +27,8 @@ use crate::memtable::{
|
||||
BoxedBatchIterator, KeyValues, Memtable, MemtableId, MemtableRanges, MemtableRef, MemtableStats,
|
||||
};
|
||||
|
||||
#[allow(unused)]
|
||||
mod context;
|
||||
#[allow(unused)]
|
||||
pub(crate) mod part;
|
||||
mod part_reader;
|
||||
mod row_group_reader;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BulkMemtable {
|
||||
|
||||
@@ -1,117 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Context for iterating bulk memtable.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Arc;
|
||||
|
||||
use parquet::file::metadata::ParquetMetaData;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::ColumnId;
|
||||
use table::predicate::Predicate;
|
||||
|
||||
use crate::row_converter::McmpRowCodec;
|
||||
use crate::sst::parquet::file_range::RangeBase;
|
||||
use crate::sst::parquet::format::ReadFormat;
|
||||
use crate::sst::parquet::reader::SimpleFilterContext;
|
||||
use crate::sst::parquet::stats::RowGroupPruningStats;
|
||||
|
||||
pub(crate) type BulkIterContextRef = Arc<BulkIterContext>;
|
||||
|
||||
pub(crate) struct BulkIterContext {
|
||||
pub(crate) base: RangeBase,
|
||||
pub(crate) predicate: Option<Predicate>,
|
||||
}
|
||||
|
||||
impl BulkIterContext {
|
||||
pub(crate) fn new(
|
||||
region_metadata: RegionMetadataRef,
|
||||
projection: &Option<&[ColumnId]>,
|
||||
predicate: Option<Predicate>,
|
||||
) -> Self {
|
||||
let codec = McmpRowCodec::new_with_primary_keys(®ion_metadata);
|
||||
|
||||
let simple_filters = predicate
|
||||
.as_ref()
|
||||
.iter()
|
||||
.flat_map(|predicate| {
|
||||
predicate
|
||||
.exprs()
|
||||
.iter()
|
||||
.filter_map(|expr| SimpleFilterContext::new_opt(®ion_metadata, None, expr))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let read_format = build_read_format(region_metadata, projection);
|
||||
|
||||
Self {
|
||||
base: RangeBase {
|
||||
filters: simple_filters,
|
||||
read_format,
|
||||
codec,
|
||||
// we don't need to compat batch since all batch in memtable have the same schema.
|
||||
compat_batch: None,
|
||||
},
|
||||
predicate,
|
||||
}
|
||||
}
|
||||
|
||||
/// Prunes row groups by stats.
|
||||
pub(crate) fn row_groups_to_read(&self, file_meta: &Arc<ParquetMetaData>) -> VecDeque<usize> {
|
||||
let region_meta = self.base.read_format.metadata();
|
||||
let row_groups = file_meta.row_groups();
|
||||
// expected_metadata is set to None since we always expect region metadata of memtable is up-to-date.
|
||||
let stats = RowGroupPruningStats::new(row_groups, &self.base.read_format, None);
|
||||
if let Some(predicate) = self.predicate.as_ref() {
|
||||
predicate
|
||||
.prune_with_stats(&stats, region_meta.schema.arrow_schema())
|
||||
.iter()
|
||||
.zip(0..file_meta.num_row_groups())
|
||||
.filter_map(|(selected, row_group)| {
|
||||
if !*selected {
|
||||
return None;
|
||||
}
|
||||
Some(row_group)
|
||||
})
|
||||
.collect::<VecDeque<_>>()
|
||||
} else {
|
||||
(0..file_meta.num_row_groups()).collect()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn read_format(&self) -> &ReadFormat {
|
||||
&self.base.read_format
|
||||
}
|
||||
}
|
||||
|
||||
fn build_read_format(
|
||||
region_metadata: RegionMetadataRef,
|
||||
projection: &Option<&[ColumnId]>,
|
||||
) -> ReadFormat {
|
||||
let read_format = if let Some(column_ids) = &projection {
|
||||
ReadFormat::new(region_metadata, column_ids.iter().copied())
|
||||
} else {
|
||||
// No projection, lists all column ids to read.
|
||||
ReadFormat::new(
|
||||
region_metadata.clone(),
|
||||
region_metadata
|
||||
.column_metadatas
|
||||
.iter()
|
||||
.map(|col| col.column_id),
|
||||
)
|
||||
};
|
||||
|
||||
read_format
|
||||
}
|
||||
@@ -13,12 +13,10 @@
|
||||
// limitations under the License.
|
||||
|
||||
//! Bulk part encoder/decoder.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::Mutation;
|
||||
use bytes::Bytes;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datafusion::arrow::array::{TimestampNanosecondArray, UInt64Builder};
|
||||
use datatypes::arrow;
|
||||
@@ -28,145 +26,93 @@ use datatypes::arrow::array::{
|
||||
UInt8Builder,
|
||||
};
|
||||
use datatypes::arrow::compute::TakeOptions;
|
||||
use datatypes::arrow::datatypes::SchemaRef;
|
||||
use datatypes::arrow::datatypes::{DataType as ArrowDataType, SchemaRef};
|
||||
use datatypes::arrow_array::BinaryArray;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::{MutableVector, ScalarVectorBuilder, Vector};
|
||||
use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
|
||||
use datatypes::types::TimestampType;
|
||||
use parquet::arrow::ArrowWriter;
|
||||
use parquet::data_type::AsBytes;
|
||||
use parquet::file::metadata::ParquetMetaData;
|
||||
use parquet::file::properties::WriterProperties;
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::ColumnId;
|
||||
use table::predicate::Predicate;
|
||||
|
||||
use crate::error;
|
||||
use crate::error::{ComputeArrowSnafu, EncodeMemtableSnafu, NewRecordBatchSnafu, Result};
|
||||
use crate::memtable::bulk::context::BulkIterContextRef;
|
||||
use crate::memtable::bulk::part_reader::BulkPartIter;
|
||||
use crate::memtable::key_values::KeyValuesRef;
|
||||
use crate::memtable::BoxedBatchIterator;
|
||||
use crate::read::Batch;
|
||||
use crate::row_converter::{McmpRowCodec, RowCodec};
|
||||
use crate::sst::parquet::format::{PrimaryKeyArray, ReadFormat};
|
||||
use crate::sst::parquet::helper::parse_parquet_metadata;
|
||||
use crate::sst::parquet::format::PrimaryKeyArray;
|
||||
use crate::sst::to_sst_arrow_schema;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BulkPart {
|
||||
data: Bytes,
|
||||
data: Vec<u8>,
|
||||
metadata: BulkPartMeta,
|
||||
}
|
||||
|
||||
impl BulkPart {
|
||||
pub fn new(data: Bytes, metadata: BulkPartMeta) -> Self {
|
||||
pub fn new(data: Vec<u8>, metadata: BulkPartMeta) -> Self {
|
||||
Self { data, metadata }
|
||||
}
|
||||
|
||||
pub(crate) fn metadata(&self) -> &BulkPartMeta {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
pub(crate) fn read(&self, context: BulkIterContextRef) -> Result<Option<BoxedBatchIterator>> {
|
||||
// use predicate to find row groups to read.
|
||||
let row_groups_to_read = context.row_groups_to_read(&self.metadata.parquet_metadata);
|
||||
|
||||
if row_groups_to_read.is_empty() {
|
||||
// All row groups are filtered.
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let iter = BulkPartIter::try_new(
|
||||
context,
|
||||
row_groups_to_read,
|
||||
self.metadata.parquet_metadata.clone(),
|
||||
self.data.clone(),
|
||||
)?;
|
||||
Ok(Some(Box::new(iter) as BoxedBatchIterator))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BulkPartMeta {
|
||||
/// Total rows in part.
|
||||
pub num_rows: usize,
|
||||
/// Max timestamp in part.
|
||||
pub max_timestamp: i64,
|
||||
/// Min timestamp in part.
|
||||
pub min_timestamp: i64,
|
||||
/// Part file metadata.
|
||||
pub parquet_metadata: Arc<ParquetMetaData>,
|
||||
/// Part region schema.
|
||||
pub region_metadata: RegionMetadataRef,
|
||||
}
|
||||
|
||||
pub struct BulkPartEncoder {
|
||||
metadata: RegionMetadataRef,
|
||||
pk_encoder: McmpRowCodec,
|
||||
row_group_size: usize,
|
||||
dedup: bool,
|
||||
writer_props: Option<WriterProperties>,
|
||||
}
|
||||
|
||||
impl BulkPartEncoder {
|
||||
pub(crate) fn new(
|
||||
metadata: RegionMetadataRef,
|
||||
dedup: bool,
|
||||
row_group_size: usize,
|
||||
) -> BulkPartEncoder {
|
||||
let codec = McmpRowCodec::new_with_primary_keys(&metadata);
|
||||
let writer_props = Some(
|
||||
WriterProperties::builder()
|
||||
.set_write_batch_size(row_group_size)
|
||||
.set_max_row_group_size(row_group_size)
|
||||
.build(),
|
||||
);
|
||||
impl Default for BulkPartMeta {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
metadata,
|
||||
pk_encoder: codec,
|
||||
row_group_size,
|
||||
dedup,
|
||||
writer_props,
|
||||
num_rows: 0,
|
||||
max_timestamp: i64::MIN,
|
||||
min_timestamp: i64::MAX,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BulkPartEncoder {
|
||||
metadata: RegionMetadataRef,
|
||||
arrow_schema: SchemaRef,
|
||||
pk_encoder: McmpRowCodec,
|
||||
dedup: bool,
|
||||
}
|
||||
|
||||
impl BulkPartEncoder {
|
||||
/// Encodes mutations to a [BulkPart], returns true if encoded data has been written to `dest`.
|
||||
fn encode_mutations(&self, mutations: &[Mutation]) -> Result<Option<BulkPart>> {
|
||||
fn encode_mutations(&self, mutations: &[Mutation], dest: &mut BulkPart) -> Result<bool> {
|
||||
let Some((arrow_record_batch, min_ts, max_ts)) =
|
||||
mutations_to_record_batch(mutations, &self.metadata, &self.pk_encoder, self.dedup)?
|
||||
mutations_to_record_batch(mutations, &self.metadata, &self.pk_encoder, false)?
|
||||
else {
|
||||
return Ok(None);
|
||||
return Ok(false);
|
||||
};
|
||||
|
||||
let mut buf = Vec::with_capacity(4096);
|
||||
let arrow_schema = arrow_record_batch.schema();
|
||||
|
||||
let file_metadata = {
|
||||
let mut writer =
|
||||
ArrowWriter::try_new(&mut buf, arrow_schema, self.writer_props.clone())
|
||||
.context(EncodeMemtableSnafu)?;
|
||||
{
|
||||
let mut writer = ArrowWriter::try_new(&mut dest.data, arrow_schema, None)
|
||||
.context(EncodeMemtableSnafu)?;
|
||||
writer
|
||||
.write(&arrow_record_batch)
|
||||
.context(EncodeMemtableSnafu)?;
|
||||
writer.finish().context(EncodeMemtableSnafu)?
|
||||
let _metadata = writer.finish().context(EncodeMemtableSnafu)?;
|
||||
}
|
||||
|
||||
dest.metadata = BulkPartMeta {
|
||||
num_rows: arrow_record_batch.num_rows(),
|
||||
max_timestamp: max_ts,
|
||||
min_timestamp: min_ts,
|
||||
};
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
let buf = Bytes::from(buf);
|
||||
let parquet_metadata = Arc::new(parse_parquet_metadata(file_metadata)?);
|
||||
|
||||
Ok(Some(BulkPart {
|
||||
data: buf,
|
||||
metadata: BulkPartMeta {
|
||||
num_rows: arrow_record_batch.num_rows(),
|
||||
max_timestamp: max_ts,
|
||||
min_timestamp: min_ts,
|
||||
parquet_metadata,
|
||||
region_metadata: self.metadata.clone(),
|
||||
},
|
||||
}))
|
||||
/// Decodes [BulkPart] to [Batch]es.
|
||||
fn decode_to_batches(&self, _part: &BulkPart, _dest: &mut VecDeque<Batch>) -> Result<()> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -433,12 +379,10 @@ fn binary_array_to_dictionary(input: &BinaryArray) -> Result<PrimaryKeyArray> {
|
||||
mod tests {
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use datafusion_common::ScalarValue;
|
||||
use datatypes::prelude::{ScalarVector, Value};
|
||||
use datatypes::vectors::{Float64Vector, TimestampMillisecondVector};
|
||||
|
||||
use super::*;
|
||||
use crate::memtable::bulk::context::BulkIterContext;
|
||||
use crate::sst::parquet::format::ReadFormat;
|
||||
use crate::test_util::memtable_util::{build_key_values_with_ts_seq_values, metadata_for_test};
|
||||
|
||||
@@ -500,7 +444,7 @@ mod tests {
|
||||
k0: &'a str,
|
||||
k1: u32,
|
||||
timestamps: &'a [i64],
|
||||
v1: &'a [Option<f64>],
|
||||
v0: &'a [Option<f64>],
|
||||
sequence: u64,
|
||||
}
|
||||
|
||||
@@ -508,7 +452,7 @@ mod tests {
|
||||
struct BatchOutput<'a> {
|
||||
pk_values: &'a [Value],
|
||||
timestamps: &'a [i64],
|
||||
v1: &'a [Option<f64>],
|
||||
v0: &'a [Option<f64>],
|
||||
}
|
||||
|
||||
fn check_mutations_to_record_batches(
|
||||
@@ -526,7 +470,7 @@ mod tests {
|
||||
m.k0.to_string(),
|
||||
m.k1,
|
||||
m.timestamps.iter().copied(),
|
||||
m.v1.iter().copied(),
|
||||
m.v0.iter().copied(),
|
||||
m.sequence,
|
||||
)
|
||||
.mutation
|
||||
@@ -582,7 +526,7 @@ mod tests {
|
||||
for idx in 0..expected.len() {
|
||||
assert_eq!(expected[idx].pk_values, &batch_values[idx].0);
|
||||
assert_eq!(expected[idx].timestamps, &batch_values[idx].1);
|
||||
assert_eq!(expected[idx].v1, &batch_values[idx].2);
|
||||
assert_eq!(expected[idx].v0, &batch_values[idx].2);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -593,13 +537,13 @@ mod tests {
|
||||
k0: "a",
|
||||
k1: 0,
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.1)],
|
||||
v0: &[Some(0.1)],
|
||||
sequence: 0,
|
||||
}],
|
||||
&[BatchOutput {
|
||||
pk_values: &[Value::String("a".into()), Value::UInt32(0)],
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.1)],
|
||||
v0: &[Some(0.1)],
|
||||
}],
|
||||
(0, 0),
|
||||
true,
|
||||
@@ -611,28 +555,28 @@ mod tests {
|
||||
k0: "a",
|
||||
k1: 0,
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.1)],
|
||||
v0: &[Some(0.1)],
|
||||
sequence: 0,
|
||||
},
|
||||
MutationInput {
|
||||
k0: "b",
|
||||
k1: 0,
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.0)],
|
||||
v0: &[Some(0.0)],
|
||||
sequence: 0,
|
||||
},
|
||||
MutationInput {
|
||||
k0: "a",
|
||||
k1: 0,
|
||||
timestamps: &[1],
|
||||
v1: &[Some(0.2)],
|
||||
v0: &[Some(0.2)],
|
||||
sequence: 1,
|
||||
},
|
||||
MutationInput {
|
||||
k0: "a",
|
||||
k1: 1,
|
||||
timestamps: &[1],
|
||||
v1: &[Some(0.3)],
|
||||
v0: &[Some(0.3)],
|
||||
sequence: 2,
|
||||
},
|
||||
],
|
||||
@@ -640,17 +584,17 @@ mod tests {
|
||||
BatchOutput {
|
||||
pk_values: &[Value::String("a".into()), Value::UInt32(0)],
|
||||
timestamps: &[0, 1],
|
||||
v1: &[Some(0.1), Some(0.2)],
|
||||
v0: &[Some(0.1), Some(0.2)],
|
||||
},
|
||||
BatchOutput {
|
||||
pk_values: &[Value::String("a".into()), Value::UInt32(1)],
|
||||
timestamps: &[1],
|
||||
v1: &[Some(0.3)],
|
||||
v0: &[Some(0.3)],
|
||||
},
|
||||
BatchOutput {
|
||||
pk_values: &[Value::String("b".into()), Value::UInt32(0)],
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.0)],
|
||||
v0: &[Some(0.0)],
|
||||
},
|
||||
],
|
||||
(0, 1),
|
||||
@@ -663,21 +607,21 @@ mod tests {
|
||||
k0: "a",
|
||||
k1: 0,
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.1)],
|
||||
v0: &[Some(0.1)],
|
||||
sequence: 0,
|
||||
},
|
||||
MutationInput {
|
||||
k0: "b",
|
||||
k1: 0,
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.0)],
|
||||
v0: &[Some(0.0)],
|
||||
sequence: 0,
|
||||
},
|
||||
MutationInput {
|
||||
k0: "a",
|
||||
k1: 0,
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.2)],
|
||||
v0: &[Some(0.2)],
|
||||
sequence: 1,
|
||||
},
|
||||
],
|
||||
@@ -685,12 +629,12 @@ mod tests {
|
||||
BatchOutput {
|
||||
pk_values: &[Value::String("a".into()), Value::UInt32(0)],
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.2)],
|
||||
v0: &[Some(0.2)],
|
||||
},
|
||||
BatchOutput {
|
||||
pk_values: &[Value::String("b".into()), Value::UInt32(0)],
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.0)],
|
||||
v0: &[Some(0.0)],
|
||||
},
|
||||
],
|
||||
(0, 0),
|
||||
@@ -702,21 +646,21 @@ mod tests {
|
||||
k0: "a",
|
||||
k1: 0,
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.1)],
|
||||
v0: &[Some(0.1)],
|
||||
sequence: 0,
|
||||
},
|
||||
MutationInput {
|
||||
k0: "b",
|
||||
k1: 0,
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.0)],
|
||||
v0: &[Some(0.0)],
|
||||
sequence: 0,
|
||||
},
|
||||
MutationInput {
|
||||
k0: "a",
|
||||
k1: 0,
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.2)],
|
||||
v0: &[Some(0.2)],
|
||||
sequence: 1,
|
||||
},
|
||||
],
|
||||
@@ -724,194 +668,16 @@ mod tests {
|
||||
BatchOutput {
|
||||
pk_values: &[Value::String("a".into()), Value::UInt32(0)],
|
||||
timestamps: &[0, 0],
|
||||
v1: &[Some(0.2), Some(0.1)],
|
||||
v0: &[Some(0.2), Some(0.1)],
|
||||
},
|
||||
BatchOutput {
|
||||
pk_values: &[Value::String("b".into()), Value::UInt32(0)],
|
||||
timestamps: &[0],
|
||||
v1: &[Some(0.0)],
|
||||
v0: &[Some(0.0)],
|
||||
},
|
||||
],
|
||||
(0, 0),
|
||||
false,
|
||||
);
|
||||
}
|
||||
|
||||
fn encode(input: &[MutationInput]) -> BulkPart {
|
||||
let metadata = metadata_for_test();
|
||||
let mutations = input
|
||||
.iter()
|
||||
.map(|m| {
|
||||
build_key_values_with_ts_seq_values(
|
||||
&metadata,
|
||||
m.k0.to_string(),
|
||||
m.k1,
|
||||
m.timestamps.iter().copied(),
|
||||
m.v1.iter().copied(),
|
||||
m.sequence,
|
||||
)
|
||||
.mutation
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let encoder = BulkPartEncoder::new(metadata, true, 1024);
|
||||
encoder.encode_mutations(&mutations).unwrap().unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write_and_read_part_projection() {
|
||||
let part = encode(&[
|
||||
MutationInput {
|
||||
k0: "a",
|
||||
k1: 0,
|
||||
timestamps: &[1],
|
||||
v1: &[Some(0.1)],
|
||||
sequence: 0,
|
||||
},
|
||||
MutationInput {
|
||||
k0: "b",
|
||||
k1: 0,
|
||||
timestamps: &[1],
|
||||
v1: &[Some(0.0)],
|
||||
sequence: 0,
|
||||
},
|
||||
MutationInput {
|
||||
k0: "a",
|
||||
k1: 0,
|
||||
timestamps: &[2],
|
||||
v1: &[Some(0.2)],
|
||||
sequence: 1,
|
||||
},
|
||||
]);
|
||||
|
||||
let projection = &[4u32];
|
||||
|
||||
let mut reader = part
|
||||
.read(Arc::new(BulkIterContext::new(
|
||||
part.metadata.region_metadata.clone(),
|
||||
&Some(projection.as_slice()),
|
||||
None,
|
||||
)))
|
||||
.unwrap()
|
||||
.expect("expect at least one row group");
|
||||
|
||||
let mut total_rows_read = 0;
|
||||
let mut field = vec![];
|
||||
for res in reader {
|
||||
let batch = res.unwrap();
|
||||
assert_eq!(1, batch.fields().len());
|
||||
assert_eq!(4, batch.fields()[0].column_id);
|
||||
field.extend(
|
||||
batch.fields()[0]
|
||||
.data
|
||||
.as_any()
|
||||
.downcast_ref::<Float64Vector>()
|
||||
.unwrap()
|
||||
.iter_data()
|
||||
.map(|v| v.unwrap()),
|
||||
);
|
||||
total_rows_read += batch.num_rows();
|
||||
}
|
||||
assert_eq!(3, total_rows_read);
|
||||
assert_eq!(vec![0.1, 0.2, 0.0], field);
|
||||
}
|
||||
|
||||
fn prepare(key_values: Vec<(&str, u32, (i64, i64), u64)>) -> BulkPart {
|
||||
let metadata = metadata_for_test();
|
||||
let mutations = key_values
|
||||
.into_iter()
|
||||
.map(|(k0, k1, (start, end), sequence)| {
|
||||
let ts = (start..end);
|
||||
let v1 = (start..end).map(|_| None);
|
||||
build_key_values_with_ts_seq_values(&metadata, k0.to_string(), k1, ts, v1, sequence)
|
||||
.mutation
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let encoder = BulkPartEncoder::new(metadata, true, 100);
|
||||
encoder.encode_mutations(&mutations).unwrap().unwrap()
|
||||
}
|
||||
|
||||
fn check_prune_row_group(part: &BulkPart, predicate: Option<Predicate>, expected_rows: usize) {
|
||||
let context = Arc::new(BulkIterContext::new(
|
||||
part.metadata.region_metadata.clone(),
|
||||
&None,
|
||||
predicate,
|
||||
));
|
||||
let mut reader = part
|
||||
.read(context)
|
||||
.unwrap()
|
||||
.expect("expect at least one row group");
|
||||
let mut total_rows_read = 0;
|
||||
for res in reader {
|
||||
let batch = res.unwrap();
|
||||
total_rows_read += batch.num_rows();
|
||||
}
|
||||
// Should only read row group 1.
|
||||
assert_eq!(expected_rows, total_rows_read);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_prune_row_groups() {
|
||||
let part = prepare(vec![
|
||||
("a", 0, (0, 40), 1),
|
||||
("a", 1, (0, 60), 1),
|
||||
("b", 0, (0, 100), 2),
|
||||
("b", 1, (100, 180), 3),
|
||||
("b", 1, (180, 210), 4),
|
||||
]);
|
||||
|
||||
let context = Arc::new(BulkIterContext::new(
|
||||
part.metadata.region_metadata.clone(),
|
||||
&None,
|
||||
Some(Predicate::new(vec![datafusion_expr::col("ts").eq(
|
||||
datafusion_expr::lit(ScalarValue::TimestampMillisecond(Some(300), None)),
|
||||
)])),
|
||||
));
|
||||
assert!(part.read(context).unwrap().is_none());
|
||||
|
||||
check_prune_row_group(&part, None, 310);
|
||||
|
||||
check_prune_row_group(
|
||||
&part,
|
||||
Some(Predicate::new(vec![
|
||||
datafusion_expr::col("k0").eq(datafusion_expr::lit("a")),
|
||||
datafusion_expr::col("k1").eq(datafusion_expr::lit(0u32)),
|
||||
])),
|
||||
40,
|
||||
);
|
||||
|
||||
check_prune_row_group(
|
||||
&part,
|
||||
Some(Predicate::new(vec![
|
||||
datafusion_expr::col("k0").eq(datafusion_expr::lit("a")),
|
||||
datafusion_expr::col("k1").eq(datafusion_expr::lit(1u32)),
|
||||
])),
|
||||
60,
|
||||
);
|
||||
|
||||
check_prune_row_group(
|
||||
&part,
|
||||
Some(Predicate::new(vec![
|
||||
datafusion_expr::col("k0").eq(datafusion_expr::lit("a"))
|
||||
])),
|
||||
100,
|
||||
);
|
||||
|
||||
check_prune_row_group(
|
||||
&part,
|
||||
Some(Predicate::new(vec![
|
||||
datafusion_expr::col("k0").eq(datafusion_expr::lit("b")),
|
||||
datafusion_expr::col("k1").eq(datafusion_expr::lit(0u32)),
|
||||
])),
|
||||
100,
|
||||
);
|
||||
|
||||
/// Predicates over field column can do precise filtering.
|
||||
check_prune_row_group(
|
||||
&part,
|
||||
Some(Predicate::new(vec![
|
||||
datafusion_expr::col("v0").eq(datafusion_expr::lit(150i64))
|
||||
])),
|
||||
1,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,149 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Arc;
|
||||
|
||||
use bytes::Bytes;
|
||||
use parquet::arrow::ProjectionMask;
|
||||
use parquet::file::metadata::ParquetMetaData;
|
||||
|
||||
use crate::error;
|
||||
use crate::memtable::bulk::context::BulkIterContextRef;
|
||||
use crate::memtable::bulk::row_group_reader::{
|
||||
MemtableRowGroupReader, MemtableRowGroupReaderBuilder,
|
||||
};
|
||||
use crate::read::Batch;
|
||||
|
||||
/// Iterator for reading data inside a bulk part.
|
||||
pub struct BulkPartIter {
|
||||
row_groups_to_read: VecDeque<usize>,
|
||||
current_reader: Option<PruneReader>,
|
||||
builder: MemtableRowGroupReaderBuilder,
|
||||
}
|
||||
|
||||
impl BulkPartIter {
|
||||
/// Creates a new [BulkPartIter].
|
||||
pub(crate) fn try_new(
|
||||
context: BulkIterContextRef,
|
||||
mut row_groups_to_read: VecDeque<usize>,
|
||||
parquet_meta: Arc<ParquetMetaData>,
|
||||
data: Bytes,
|
||||
) -> error::Result<Self> {
|
||||
let projection_mask = ProjectionMask::roots(
|
||||
parquet_meta.file_metadata().schema_descr(),
|
||||
context.read_format().projection_indices().iter().copied(),
|
||||
);
|
||||
|
||||
let builder = MemtableRowGroupReaderBuilder::try_new(
|
||||
context.clone(),
|
||||
projection_mask,
|
||||
parquet_meta,
|
||||
data,
|
||||
)?;
|
||||
|
||||
let init_reader = row_groups_to_read
|
||||
.pop_front()
|
||||
.map(|first_row_group| builder.build_row_group_reader(first_row_group, None))
|
||||
.transpose()?
|
||||
.map(|r| PruneReader::new(context, r));
|
||||
Ok(Self {
|
||||
row_groups_to_read,
|
||||
current_reader: init_reader,
|
||||
builder,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn next_batch(&mut self) -> error::Result<Option<Batch>> {
|
||||
let Some(current) = &mut self.current_reader else {
|
||||
// All row group exhausted.
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
if let Some(batch) = current.next_batch()? {
|
||||
return Ok(Some(batch));
|
||||
}
|
||||
|
||||
// Previous row group exhausted, read next row group
|
||||
while let Some(next_row_group) = self.row_groups_to_read.pop_front() {
|
||||
current.reset(self.builder.build_row_group_reader(next_row_group, None)?);
|
||||
if let Some(next_batch) = current.next_batch()? {
|
||||
return Ok(Some(next_batch));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for BulkPartIter {
|
||||
type Item = error::Result<Batch>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.next_batch().transpose()
|
||||
}
|
||||
}
|
||||
|
||||
struct PruneReader {
|
||||
context: BulkIterContextRef,
|
||||
row_group_reader: MemtableRowGroupReader,
|
||||
}
|
||||
|
||||
//todo(hl): maybe we also need to support lastrow mode here.
|
||||
impl PruneReader {
|
||||
fn new(context: BulkIterContextRef, reader: MemtableRowGroupReader) -> Self {
|
||||
Self {
|
||||
context,
|
||||
row_group_reader: reader,
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterates current inner reader until exhausted.
|
||||
fn next_batch(&mut self) -> error::Result<Option<Batch>> {
|
||||
while let Some(b) = self.row_group_reader.next_inner()? {
|
||||
match self.prune(b)? {
|
||||
Some(b) => {
|
||||
return Ok(Some(b));
|
||||
}
|
||||
None => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Prunes batch according to filters.
|
||||
fn prune(&mut self, batch: Batch) -> error::Result<Option<Batch>> {
|
||||
//todo(hl): add metrics.
|
||||
|
||||
// fast path
|
||||
if self.context.base.filters.is_empty() {
|
||||
return Ok(Some(batch));
|
||||
}
|
||||
|
||||
let Some(batch_filtered) = self.context.base.precise_filter(batch)? else {
|
||||
// the entire batch is filtered out
|
||||
return Ok(None);
|
||||
};
|
||||
if !batch_filtered.is_empty() {
|
||||
Ok(Some(batch_filtered))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn reset(&mut self, reader: MemtableRowGroupReader) {
|
||||
self.row_group_reader = reader;
|
||||
}
|
||||
}
|
||||
@@ -1,189 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use bytes::Bytes;
|
||||
use datatypes::arrow::array::RecordBatch;
|
||||
use datatypes::arrow::error::ArrowError;
|
||||
use parquet::arrow::arrow_reader::{ParquetRecordBatchReader, RowGroups, RowSelection};
|
||||
use parquet::arrow::{parquet_to_arrow_field_levels, FieldLevels, ProjectionMask};
|
||||
use parquet::column::page::{PageIterator, PageReader};
|
||||
use parquet::file::metadata::ParquetMetaData;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error;
|
||||
use crate::error::ReadDataPartSnafu;
|
||||
use crate::memtable::bulk::context::BulkIterContextRef;
|
||||
use crate::sst::parquet::format::ReadFormat;
|
||||
use crate::sst::parquet::reader::{RowGroupReaderBase, RowGroupReaderContext};
|
||||
use crate::sst::parquet::row_group::{ColumnChunkIterator, RowGroupBase};
|
||||
use crate::sst::parquet::DEFAULT_READ_BATCH_SIZE;
|
||||
|
||||
/// Helper for reading specific row group inside Memtable Parquet parts.
|
||||
// This is similar to [mito2::sst::parquet::row_group::InMemoryRowGroup] since
|
||||
// it's a workaround for lacking of keyword generics.
|
||||
pub struct MemtableRowGroupPageFetcher<'a> {
|
||||
/// Shared structs for reading row group.
|
||||
base: RowGroupBase<'a>,
|
||||
bytes: Bytes,
|
||||
}
|
||||
|
||||
impl<'a> MemtableRowGroupPageFetcher<'a> {
|
||||
pub(crate) fn create(
|
||||
row_group_idx: usize,
|
||||
parquet_meta: &'a ParquetMetaData,
|
||||
bytes: Bytes,
|
||||
) -> Self {
|
||||
let metadata = parquet_meta.row_group(row_group_idx);
|
||||
let row_count = metadata.num_rows() as usize;
|
||||
let page_locations = parquet_meta
|
||||
.offset_index()
|
||||
.map(|x| x[row_group_idx].as_slice());
|
||||
|
||||
Self {
|
||||
base: RowGroupBase {
|
||||
metadata,
|
||||
page_locations,
|
||||
row_count,
|
||||
column_chunks: vec![None; metadata.columns().len()],
|
||||
// the cached `column_uncompressed_pages` would never be used in Memtable readers.
|
||||
column_uncompressed_pages: vec![None; metadata.columns().len()],
|
||||
},
|
||||
bytes,
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetches column pages from memory file.
|
||||
pub(crate) fn fetch(&mut self, projection: &ProjectionMask, selection: Option<&RowSelection>) {
|
||||
if let Some((selection, page_locations)) = selection.zip(self.base.page_locations) {
|
||||
// Selection provided.
|
||||
let (fetch_ranges, page_start_offsets) =
|
||||
self.base
|
||||
.calc_sparse_read_ranges(projection, page_locations, selection);
|
||||
if fetch_ranges.is_empty() {
|
||||
return;
|
||||
}
|
||||
let chunk_data = self.fetch_bytes(&fetch_ranges);
|
||||
|
||||
self.base
|
||||
.assign_sparse_chunk(projection, chunk_data, page_start_offsets);
|
||||
} else {
|
||||
let fetch_ranges = self.base.calc_dense_read_ranges(projection);
|
||||
if fetch_ranges.is_empty() {
|
||||
// Nothing to fetch.
|
||||
return;
|
||||
}
|
||||
let chunk_data = self.fetch_bytes(&fetch_ranges);
|
||||
self.base.assign_dense_chunk(projection, chunk_data);
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_bytes(&self, ranges: &[Range<u64>]) -> Vec<Bytes> {
|
||||
ranges
|
||||
.iter()
|
||||
.map(|range| self.bytes.slice(range.start as usize..range.end as usize))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Creates a page reader to read column at `i`.
|
||||
fn column_page_reader(&self, i: usize) -> parquet::errors::Result<Box<dyn PageReader>> {
|
||||
let reader = self.base.column_reader(i)?;
|
||||
Ok(Box::new(reader))
|
||||
}
|
||||
}
|
||||
|
||||
impl RowGroups for MemtableRowGroupPageFetcher<'_> {
|
||||
fn num_rows(&self) -> usize {
|
||||
self.base.row_count
|
||||
}
|
||||
|
||||
fn column_chunks(&self, i: usize) -> parquet::errors::Result<Box<dyn PageIterator>> {
|
||||
Ok(Box::new(ColumnChunkIterator {
|
||||
reader: Some(self.column_page_reader(i)),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
impl RowGroupReaderContext for BulkIterContextRef {
|
||||
fn map_result(
|
||||
&self,
|
||||
result: Result<Option<RecordBatch>, ArrowError>,
|
||||
) -> error::Result<Option<RecordBatch>> {
|
||||
result.context(error::DecodeArrowRowGroupSnafu)
|
||||
}
|
||||
|
||||
fn read_format(&self) -> &ReadFormat {
|
||||
self.as_ref().read_format()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) type MemtableRowGroupReader = RowGroupReaderBase<BulkIterContextRef>;
|
||||
|
||||
pub(crate) struct MemtableRowGroupReaderBuilder {
|
||||
context: BulkIterContextRef,
|
||||
projection: ProjectionMask,
|
||||
parquet_metadata: Arc<ParquetMetaData>,
|
||||
field_levels: FieldLevels,
|
||||
data: Bytes,
|
||||
}
|
||||
|
||||
impl MemtableRowGroupReaderBuilder {
|
||||
pub(crate) fn try_new(
|
||||
context: BulkIterContextRef,
|
||||
projection: ProjectionMask,
|
||||
parquet_metadata: Arc<ParquetMetaData>,
|
||||
data: Bytes,
|
||||
) -> error::Result<Self> {
|
||||
let parquet_schema_desc = parquet_metadata.file_metadata().schema_descr();
|
||||
let hint = Some(context.read_format().arrow_schema().fields());
|
||||
let field_levels =
|
||||
parquet_to_arrow_field_levels(parquet_schema_desc, projection.clone(), hint)
|
||||
.context(ReadDataPartSnafu)?;
|
||||
Ok(Self {
|
||||
context,
|
||||
projection,
|
||||
parquet_metadata,
|
||||
field_levels,
|
||||
data,
|
||||
})
|
||||
}
|
||||
|
||||
/// Builds a reader to read the row group at `row_group_idx` from memory.
|
||||
pub(crate) fn build_row_group_reader(
|
||||
&self,
|
||||
row_group_idx: usize,
|
||||
row_selection: Option<RowSelection>,
|
||||
) -> error::Result<MemtableRowGroupReader> {
|
||||
let mut row_group = MemtableRowGroupPageFetcher::create(
|
||||
row_group_idx,
|
||||
&self.parquet_metadata,
|
||||
self.data.clone(),
|
||||
);
|
||||
// Fetches data from memory part. Currently, row selection is not supported.
|
||||
row_group.fetch(&self.projection, row_selection.as_ref());
|
||||
|
||||
// Builds the parquet reader.
|
||||
// Now the row selection is None.
|
||||
let reader = ParquetRecordBatchReader::try_new_with_row_groups(
|
||||
&self.field_levels,
|
||||
&row_group,
|
||||
DEFAULT_READ_BATCH_SIZE,
|
||||
row_selection,
|
||||
)
|
||||
.context(ReadDataPartSnafu)?;
|
||||
Ok(MemtableRowGroupReader::create(self.context.clone(), reader))
|
||||
}
|
||||
}
|
||||
@@ -99,8 +99,11 @@ impl RowGroupLastRowCachedReader {
|
||||
return Self::new_miss(key, row_group_reader, None);
|
||||
};
|
||||
if let Some(value) = cache_manager.get_selector_result(&key) {
|
||||
let schema_matches =
|
||||
value.projection == row_group_reader.read_format().projection_indices();
|
||||
let schema_matches = value.projection
|
||||
== row_group_reader
|
||||
.context()
|
||||
.read_format()
|
||||
.projection_indices();
|
||||
if schema_matches {
|
||||
// Schema matches, use cache batches.
|
||||
Self::new_hit(value)
|
||||
@@ -215,23 +218,29 @@ impl RowGroupLastRowReader {
|
||||
};
|
||||
|
||||
// All last rows in row group are yielded, update cache.
|
||||
self.maybe_update_cache();
|
||||
self.update_cache();
|
||||
Ok(last_batch)
|
||||
}
|
||||
|
||||
/// Updates row group's last row cache if cache manager is present.
|
||||
fn maybe_update_cache(&mut self) {
|
||||
if let Some(cache) = &self.cache_manager {
|
||||
if self.yielded_batches.is_empty() {
|
||||
// we always expect that row groups yields batches.
|
||||
return;
|
||||
}
|
||||
let value = Arc::new(SelectorResultValue {
|
||||
result: std::mem::take(&mut self.yielded_batches),
|
||||
projection: self.reader.read_format().projection_indices().to_vec(),
|
||||
});
|
||||
cache.put_selector_result(self.key, value)
|
||||
fn update_cache(&mut self) {
|
||||
if self.yielded_batches.is_empty() {
|
||||
// we always expect that row groups yields batches.
|
||||
return;
|
||||
}
|
||||
let Some(cache) = &self.cache_manager else {
|
||||
return;
|
||||
};
|
||||
let value = Arc::new(SelectorResultValue {
|
||||
result: std::mem::take(&mut self.yielded_batches),
|
||||
projection: self
|
||||
.reader
|
||||
.context()
|
||||
.read_format()
|
||||
.projection_indices()
|
||||
.to_vec(),
|
||||
});
|
||||
cache.put_selector_result(self.key, value);
|
||||
}
|
||||
|
||||
fn metrics(&self) -> &ReaderMetrics {
|
||||
|
||||
@@ -399,7 +399,7 @@ impl ScanRegion {
|
||||
});
|
||||
}
|
||||
|
||||
/// Use the latest schema to build the inverted index applier.
|
||||
/// Use the latest schema to build the inveretd index applier.
|
||||
fn build_invereted_index_applier(&self) -> Option<InvertedIndexApplierRef> {
|
||||
if self.ignore_inverted_index {
|
||||
return None;
|
||||
|
||||
@@ -447,26 +447,7 @@ impl ManifestContext {
|
||||
pub(crate) fn set_role(&self, next_role: RegionRole, region_id: RegionId) {
|
||||
match next_role {
|
||||
RegionRole::Follower => {
|
||||
match self.state.fetch_update(|state| {
|
||||
if !matches!(state, RegionRoleState::Follower) {
|
||||
Some(RegionRoleState::Follower)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}) {
|
||||
Ok(state) => info!(
|
||||
"Convert region {} to follower, previous role state: {:?}",
|
||||
region_id, state
|
||||
),
|
||||
Err(state) => {
|
||||
if state != RegionRoleState::Follower {
|
||||
warn!(
|
||||
"Failed to convert region {} to follower, current role state: {:?}",
|
||||
region_id, state
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
self.state.store(RegionRoleState::Follower);
|
||||
}
|
||||
RegionRole::Leader => {
|
||||
match self.state.fetch_update(|state| {
|
||||
|
||||
@@ -185,7 +185,7 @@ mod tests {
|
||||
|
||||
scheduler.stop(true).await.unwrap();
|
||||
|
||||
assert!(!object_store.exists(&path).await.unwrap());
|
||||
assert!(!object_store.is_exist(&path).await.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -247,7 +247,7 @@ mod tests {
|
||||
|
||||
scheduler.stop(true).await.unwrap();
|
||||
|
||||
assert!(!object_store.exists(&path).await.unwrap());
|
||||
assert!(!object_store.exists(&index_path).await.unwrap());
|
||||
assert!(!object_store.is_exist(&path).await.unwrap());
|
||||
assert!(!object_store.is_exist(&index_path).await.unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -104,28 +104,16 @@ impl IntermediateLocation {
|
||||
&self.files_dir
|
||||
}
|
||||
|
||||
/// Returns the path of the directory for intermediate files associated with the `file_group`:
|
||||
/// `__intm/{region_id}/{sst_file_id}/{uuid}/{file_group}/`
|
||||
pub fn file_group_path(&self, file_group: &str) -> String {
|
||||
util::join_path(&self.files_dir, &format!("{file_group}/"))
|
||||
/// Returns the path of the directory for intermediate files associated with a column:
|
||||
/// `__intm/{region_id}/{sst_file_id}/{uuid}/{column_id}/`
|
||||
pub fn column_path(&self, column_id: &str) -> String {
|
||||
util::join_path(&self.files_dir, &format!("{column_id}/"))
|
||||
}
|
||||
|
||||
/// Returns the path of the intermediate file with the given `file_group` and `im_file_id`:
|
||||
/// `__intm/{region_id}/{sst_file_id}/{uuid}/{file_group}/{im_file_id}.im`
|
||||
pub fn file_path(&self, file_group: &str, im_file_id: &str) -> String {
|
||||
util::join_path(
|
||||
&self.file_group_path(file_group),
|
||||
&format!("{im_file_id}.im"),
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the intermediate file id from the path.
|
||||
pub fn im_file_id_from_path(&self, path: &str) -> String {
|
||||
path.rsplit('/')
|
||||
.next()
|
||||
.and_then(|s| s.strip_suffix(".im"))
|
||||
.unwrap_or_default()
|
||||
.to_string()
|
||||
/// Returns the path of the intermediate file with the given id for a column:
|
||||
/// `__intm/{region_id}/{sst_file_id}/{uuid}/{column_id}/{im_file_id}.im`
|
||||
pub fn file_path(&self, column_id: &str, im_file_id: &str) -> String {
|
||||
util::join_path(&self.column_path(column_id), &format!("{im_file_id}.im"))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -173,20 +161,17 @@ mod tests {
|
||||
|
||||
let uuid = location.files_dir.split('/').nth(3).unwrap();
|
||||
|
||||
let file_group = "1";
|
||||
let column_id = "1";
|
||||
assert_eq!(
|
||||
location.file_group_path(file_group),
|
||||
format!("{INTERMEDIATE_DIR}/0/{sst_file_id}/{uuid}/{file_group}/")
|
||||
location.column_path(column_id),
|
||||
format!("{INTERMEDIATE_DIR}/0/{sst_file_id}/{uuid}/{column_id}/")
|
||||
);
|
||||
|
||||
let im_file_id = "000000000010";
|
||||
let file_path = location.file_path(file_group, im_file_id);
|
||||
assert_eq!(
|
||||
file_path,
|
||||
format!("{INTERMEDIATE_DIR}/0/{sst_file_id}/{uuid}/{file_group}/{im_file_id}.im")
|
||||
location.file_path(column_id, im_file_id),
|
||||
format!("{INTERMEDIATE_DIR}/0/{sst_file_id}/{uuid}/{column_id}/{im_file_id}.im")
|
||||
);
|
||||
|
||||
assert_eq!(location.im_file_id_from_path(&file_path), im_file_id);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -29,7 +29,7 @@ use snafu::ResultExt;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey};
|
||||
use crate::cache::index::inverted_index::{CachedInvertedIndexBlobReader, InvertedIndexCacheRef};
|
||||
use crate::cache::index::{CachedInvertedIndexBlobReader, InvertedIndexCacheRef};
|
||||
use crate::error::{
|
||||
ApplyInvertedIndexSnafu, MetadataSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result,
|
||||
};
|
||||
|
||||
@@ -34,7 +34,7 @@ use store_api::metadata::RegionMetadata;
|
||||
use store_api::storage::ColumnId;
|
||||
|
||||
use crate::cache::file_cache::FileCacheRef;
|
||||
use crate::cache::index::inverted_index::InvertedIndexCacheRef;
|
||||
use crate::cache::index::InvertedIndexCacheRef;
|
||||
use crate::error::{BuildIndexApplierSnafu, ColumnNotFoundSnafu, ConvertValueSnafu, Result};
|
||||
use crate::row_converter::SortField;
|
||||
use crate::sst::index::inverted_index::applier::InvertedIndexApplier;
|
||||
|
||||
@@ -316,7 +316,7 @@ mod tests {
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use super::*;
|
||||
use crate::cache::index::inverted_index::InvertedIndexCache;
|
||||
use crate::cache::index::InvertedIndexCache;
|
||||
use crate::metrics::CACHE_BYTES;
|
||||
use crate::read::BatchColumn;
|
||||
use crate::row_converter::{McmpRowCodec, RowCodec, SortField};
|
||||
|
||||
@@ -16,9 +16,9 @@ use async_trait::async_trait;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::warn;
|
||||
use futures::{AsyncRead, AsyncWrite};
|
||||
use index::error as index_error;
|
||||
use index::error::Result as IndexResult;
|
||||
use index::external_provider::ExternalTempFileProvider;
|
||||
use index::inverted_index::create::sort::external_provider::ExternalTempFileProvider;
|
||||
use index::inverted_index::error as index_error;
|
||||
use index::inverted_index::error::Result as IndexResult;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::Result;
|
||||
@@ -42,10 +42,10 @@ pub(crate) struct TempFileProvider {
|
||||
impl ExternalTempFileProvider for TempFileProvider {
|
||||
async fn create(
|
||||
&self,
|
||||
file_group: &str,
|
||||
column_id: &str,
|
||||
file_id: &str,
|
||||
) -> IndexResult<Box<dyn AsyncWrite + Unpin + Send>> {
|
||||
let path = self.location.file_path(file_group, file_id);
|
||||
let path = self.location.file_path(column_id, file_id);
|
||||
let writer = self
|
||||
.manager
|
||||
.store()
|
||||
@@ -63,13 +63,13 @@ impl ExternalTempFileProvider for TempFileProvider {
|
||||
|
||||
async fn read_all(
|
||||
&self,
|
||||
file_group: &str,
|
||||
) -> IndexResult<Vec<(String, Box<dyn AsyncRead + Unpin + Send>)>> {
|
||||
let file_group_path = self.location.file_group_path(file_group);
|
||||
column_id: &str,
|
||||
) -> IndexResult<Vec<Box<dyn AsyncRead + Unpin + Send>>> {
|
||||
let column_path = self.location.column_path(column_id);
|
||||
let entries = self
|
||||
.manager
|
||||
.store()
|
||||
.list(&file_group_path)
|
||||
.list(&column_path)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(index_error::ExternalSnafu)?;
|
||||
@@ -81,8 +81,6 @@ impl ExternalTempFileProvider for TempFileProvider {
|
||||
continue;
|
||||
}
|
||||
|
||||
let im_file_id = self.location.im_file_id_from_path(entry.path());
|
||||
|
||||
let reader = self
|
||||
.manager
|
||||
.store()
|
||||
@@ -95,7 +93,7 @@ impl ExternalTempFileProvider for TempFileProvider {
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(index_error::ExternalSnafu)?;
|
||||
readers.push((im_file_id, Box::new(reader) as _));
|
||||
readers.push(Box::new(reader) as _);
|
||||
}
|
||||
|
||||
Ok(readers)
|
||||
@@ -135,36 +133,36 @@ mod tests {
|
||||
let store = IntermediateManager::init_fs(path).await.unwrap();
|
||||
let provider = TempFileProvider::new(location.clone(), store);
|
||||
|
||||
let file_group = "tag0";
|
||||
let column_name = "tag0";
|
||||
let file_id = "0000000010";
|
||||
let mut writer = provider.create(file_group, file_id).await.unwrap();
|
||||
let mut writer = provider.create(column_name, file_id).await.unwrap();
|
||||
writer.write_all(b"hello").await.unwrap();
|
||||
writer.flush().await.unwrap();
|
||||
writer.close().await.unwrap();
|
||||
|
||||
let file_id = "0000000100";
|
||||
let mut writer = provider.create(file_group, file_id).await.unwrap();
|
||||
let mut writer = provider.create(column_name, file_id).await.unwrap();
|
||||
writer.write_all(b"world").await.unwrap();
|
||||
writer.flush().await.unwrap();
|
||||
writer.close().await.unwrap();
|
||||
|
||||
let file_group = "tag1";
|
||||
let column_name = "tag1";
|
||||
let file_id = "0000000010";
|
||||
let mut writer = provider.create(file_group, file_id).await.unwrap();
|
||||
let mut writer = provider.create(column_name, file_id).await.unwrap();
|
||||
writer.write_all(b"foo").await.unwrap();
|
||||
writer.flush().await.unwrap();
|
||||
writer.close().await.unwrap();
|
||||
|
||||
let readers = provider.read_all("tag0").await.unwrap();
|
||||
assert_eq!(readers.len(), 2);
|
||||
for (_, mut reader) in readers {
|
||||
for mut reader in readers {
|
||||
let mut buf = Vec::new();
|
||||
reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert!(matches!(buf.as_slice(), b"hello" | b"world"));
|
||||
}
|
||||
let readers = provider.read_all("tag1").await.unwrap();
|
||||
assert_eq!(readers.len(), 1);
|
||||
let mut reader = readers.into_iter().map(|x| x.1).next().unwrap();
|
||||
let mut reader = readers.into_iter().next().unwrap();
|
||||
let mut buf = Vec::new();
|
||||
reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(buf, b"foo");
|
||||
|
||||
@@ -27,11 +27,11 @@ pub(crate) mod file_range;
|
||||
pub mod format;
|
||||
pub(crate) mod helper;
|
||||
pub(crate) mod metadata;
|
||||
pub(crate) mod page_reader;
|
||||
mod page_reader;
|
||||
pub mod reader;
|
||||
pub mod row_group;
|
||||
mod row_selection;
|
||||
pub(crate) mod stats;
|
||||
mod stats;
|
||||
pub mod writer;
|
||||
|
||||
/// Key of metadata in parquet SST.
|
||||
|
||||
@@ -24,7 +24,6 @@ use async_trait::async_trait;
|
||||
use common_recordbatch::filter::SimpleFilterEvaluator;
|
||||
use common_telemetry::{debug, warn};
|
||||
use datafusion_expr::Expr;
|
||||
use datatypes::arrow::error::ArrowError;
|
||||
use datatypes::arrow::record_batch::RecordBatch;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use itertools::Itertools;
|
||||
@@ -40,8 +39,7 @@ use table::predicate::Predicate;
|
||||
|
||||
use crate::cache::CacheManagerRef;
|
||||
use crate::error::{
|
||||
ArrowReaderSnafu, InvalidMetadataSnafu, InvalidParquetSnafu, ReadDataPartSnafu,
|
||||
ReadParquetSnafu, Result,
|
||||
ArrowReaderSnafu, InvalidMetadataSnafu, InvalidParquetSnafu, ReadParquetSnafu, Result,
|
||||
};
|
||||
use crate::metrics::{
|
||||
PRECISE_FILTER_ROWS_TOTAL, READ_ROWS_IN_ROW_GROUP_TOTAL, READ_ROWS_TOTAL,
|
||||
@@ -209,7 +207,8 @@ impl ParquetReaderBuilder {
|
||||
let hint = Some(read_format.arrow_schema().fields());
|
||||
let field_levels =
|
||||
parquet_to_arrow_field_levels(parquet_schema_desc, projection_mask.clone(), hint)
|
||||
.context(ReadDataPartSnafu)?;
|
||||
.context(ReadParquetSnafu { path: &file_path })?;
|
||||
|
||||
let row_groups = self
|
||||
.row_groups_to_read(&read_format, &parquet_meta, &mut metrics.filter_metrics)
|
||||
.await;
|
||||
@@ -872,7 +871,7 @@ impl SimpleFilterContext {
|
||||
///
|
||||
/// Returns None if the column to filter doesn't exist in the SST metadata or the
|
||||
/// expected metadata.
|
||||
pub(crate) fn new_opt(
|
||||
fn new_opt(
|
||||
sst_meta: &RegionMetadataRef,
|
||||
expected_meta: Option<&RegionMetadata>,
|
||||
expr: &Expr,
|
||||
@@ -1036,51 +1035,10 @@ impl ParquetReader {
|
||||
}
|
||||
}
|
||||
|
||||
/// RowGroupReaderContext represents the fields that cannot be shared
|
||||
/// between different `RowGroupReader`s.
|
||||
pub(crate) trait RowGroupReaderContext: Send {
|
||||
fn map_result(
|
||||
&self,
|
||||
result: std::result::Result<Option<RecordBatch>, ArrowError>,
|
||||
) -> Result<Option<RecordBatch>>;
|
||||
|
||||
fn read_format(&self) -> &ReadFormat;
|
||||
}
|
||||
|
||||
impl RowGroupReaderContext for FileRangeContextRef {
|
||||
fn map_result(
|
||||
&self,
|
||||
result: std::result::Result<Option<RecordBatch>, ArrowError>,
|
||||
) -> Result<Option<RecordBatch>> {
|
||||
result.context(ArrowReaderSnafu {
|
||||
path: self.file_path(),
|
||||
})
|
||||
}
|
||||
|
||||
fn read_format(&self) -> &ReadFormat {
|
||||
self.as_ref().read_format()
|
||||
}
|
||||
}
|
||||
|
||||
/// [RowGroupReader] that reads from [FileRange].
|
||||
pub(crate) type RowGroupReader = RowGroupReaderBase<FileRangeContextRef>;
|
||||
|
||||
impl RowGroupReader {
|
||||
/// Creates a new reader from file range.
|
||||
pub(crate) fn new(context: FileRangeContextRef, reader: ParquetRecordBatchReader) -> Self {
|
||||
Self {
|
||||
context,
|
||||
reader,
|
||||
batches: VecDeque::new(),
|
||||
metrics: ReaderMetrics::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Reader to read a row group of a parquet file.
|
||||
pub(crate) struct RowGroupReaderBase<T> {
|
||||
/// Context of [RowGroupReader] so adapts to different underlying implementation.
|
||||
context: T,
|
||||
pub struct RowGroupReader {
|
||||
/// Context for file ranges.
|
||||
context: FileRangeContextRef,
|
||||
/// Inner parquet reader.
|
||||
reader: ParquetRecordBatchReader,
|
||||
/// Buffered batches to return.
|
||||
@@ -1089,12 +1047,9 @@ pub(crate) struct RowGroupReaderBase<T> {
|
||||
metrics: ReaderMetrics,
|
||||
}
|
||||
|
||||
impl<T> RowGroupReaderBase<T>
|
||||
where
|
||||
T: RowGroupReaderContext,
|
||||
{
|
||||
impl RowGroupReader {
|
||||
/// Creates a new reader.
|
||||
pub(crate) fn create(context: T, reader: ParquetRecordBatchReader) -> Self {
|
||||
pub(crate) fn new(context: FileRangeContextRef, reader: ParquetRecordBatchReader) -> Self {
|
||||
Self {
|
||||
context,
|
||||
reader,
|
||||
@@ -1107,19 +1062,21 @@ where
|
||||
pub(crate) fn metrics(&self) -> &ReaderMetrics {
|
||||
&self.metrics
|
||||
}
|
||||
|
||||
/// Gets [ReadFormat] of underlying reader.
|
||||
pub(crate) fn read_format(&self) -> &ReadFormat {
|
||||
self.context.read_format()
|
||||
pub(crate) fn context(&self) -> &FileRangeContextRef {
|
||||
&self.context
|
||||
}
|
||||
|
||||
/// Tries to fetch next [RecordBatch] from the reader.
|
||||
fn fetch_next_record_batch(&mut self) -> Result<Option<RecordBatch>> {
|
||||
self.context.map_result(self.reader.next().transpose())
|
||||
self.reader.next().transpose().context(ArrowReaderSnafu {
|
||||
path: self.context.file_path(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the next [Batch].
|
||||
pub(crate) fn next_inner(&mut self) -> Result<Option<Batch>> {
|
||||
#[async_trait::async_trait]
|
||||
impl BatchReader for RowGroupReader {
|
||||
async fn next_batch(&mut self) -> Result<Option<Batch>> {
|
||||
let scan_start = Instant::now();
|
||||
if let Some(batch) = self.batches.pop_front() {
|
||||
self.metrics.num_rows += batch.num_rows();
|
||||
@@ -1147,16 +1104,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<T> BatchReader for RowGroupReaderBase<T>
|
||||
where
|
||||
T: RowGroupReaderContext,
|
||||
{
|
||||
async fn next_batch(&mut self) -> Result<Option<Batch>> {
|
||||
self.next_inner()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use parquet::arrow::arrow_reader::RowSelector;
|
||||
|
||||
@@ -38,196 +38,25 @@ use crate::sst::file::FileId;
|
||||
use crate::sst::parquet::helper::fetch_byte_ranges;
|
||||
use crate::sst::parquet::page_reader::RowGroupCachedReader;
|
||||
|
||||
pub(crate) struct RowGroupBase<'a> {
|
||||
pub(crate) metadata: &'a RowGroupMetaData,
|
||||
pub(crate) page_locations: Option<&'a [Vec<PageLocation>]>,
|
||||
/// Compressed page of each column.
|
||||
pub(crate) column_chunks: Vec<Option<Arc<ColumnChunkData>>>,
|
||||
pub(crate) row_count: usize,
|
||||
/// Row group level cached pages for each column.
|
||||
///
|
||||
/// These pages are uncompressed pages of a row group.
|
||||
/// `column_uncompressed_pages.len()` equals to `column_chunks.len()`.
|
||||
pub(crate) column_uncompressed_pages: Vec<Option<Arc<PageValue>>>,
|
||||
}
|
||||
|
||||
impl<'a> RowGroupBase<'a> {
|
||||
pub(crate) fn new(parquet_meta: &'a ParquetMetaData, row_group_idx: usize) -> Self {
|
||||
let metadata = parquet_meta.row_group(row_group_idx);
|
||||
// `page_locations` is always `None` if we don't set
|
||||
// [with_page_index()](https://docs.rs/parquet/latest/parquet/arrow/arrow_reader/struct.ArrowReaderOptions.html#method.with_page_index)
|
||||
// to `true`.
|
||||
let page_locations = parquet_meta
|
||||
.offset_index()
|
||||
.map(|x| x[row_group_idx].as_slice());
|
||||
|
||||
Self {
|
||||
metadata,
|
||||
page_locations,
|
||||
column_chunks: vec![None; metadata.columns().len()],
|
||||
row_count: metadata.num_rows() as usize,
|
||||
column_uncompressed_pages: vec![None; metadata.columns().len()],
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn calc_sparse_read_ranges(
|
||||
&self,
|
||||
projection: &ProjectionMask,
|
||||
page_locations: &[Vec<PageLocation>],
|
||||
selection: &RowSelection,
|
||||
) -> (Vec<Range<u64>>, Vec<Vec<usize>>) {
|
||||
// If we have a `RowSelection` and an `OffsetIndex` then only fetch pages required for the
|
||||
// `RowSelection`
|
||||
let mut page_start_offsets: Vec<Vec<usize>> = vec![];
|
||||
let ranges = self
|
||||
.column_chunks
|
||||
.iter()
|
||||
.zip(self.metadata.columns())
|
||||
.enumerate()
|
||||
.filter(|&(idx, (chunk, _chunk_meta))| chunk.is_none() && projection.leaf_included(idx))
|
||||
.flat_map(|(idx, (_chunk, chunk_meta))| {
|
||||
// If the first page does not start at the beginning of the column,
|
||||
// then we need to also fetch a dictionary page.
|
||||
let mut ranges = vec![];
|
||||
let (start, _len) = chunk_meta.byte_range();
|
||||
match page_locations[idx].first() {
|
||||
Some(first) if first.offset as u64 != start => {
|
||||
ranges.push(start..first.offset as u64);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
ranges.extend(
|
||||
selection
|
||||
.scan_ranges(&page_locations[idx])
|
||||
.iter()
|
||||
.map(|range| range.start as u64..range.end as u64),
|
||||
);
|
||||
page_start_offsets.push(ranges.iter().map(|range| range.start as usize).collect());
|
||||
|
||||
ranges
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
(ranges, page_start_offsets)
|
||||
}
|
||||
|
||||
pub(crate) fn assign_sparse_chunk(
|
||||
&mut self,
|
||||
projection: &ProjectionMask,
|
||||
data: Vec<Bytes>,
|
||||
page_start_offsets: Vec<Vec<usize>>,
|
||||
) {
|
||||
let mut page_start_offsets = page_start_offsets.into_iter();
|
||||
let mut chunk_data = data.into_iter();
|
||||
|
||||
for (idx, chunk) in self.column_chunks.iter_mut().enumerate() {
|
||||
if chunk.is_some() || !projection.leaf_included(idx) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(offsets) = page_start_offsets.next() {
|
||||
let mut chunks = Vec::with_capacity(offsets.len());
|
||||
for _ in 0..offsets.len() {
|
||||
chunks.push(chunk_data.next().unwrap());
|
||||
}
|
||||
|
||||
*chunk = Some(Arc::new(ColumnChunkData::Sparse {
|
||||
length: self.metadata.column(idx).byte_range().1 as usize,
|
||||
data: offsets.into_iter().zip(chunks).collect(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn calc_dense_read_ranges(&self, projection: &ProjectionMask) -> Vec<Range<u64>> {
|
||||
self.column_chunks
|
||||
.iter()
|
||||
.zip(&self.column_uncompressed_pages)
|
||||
.enumerate()
|
||||
.filter(|&(idx, (chunk, uncompressed_pages))| {
|
||||
// Don't need to fetch column data if we already cache the column's pages.
|
||||
chunk.is_none() && projection.leaf_included(idx) && uncompressed_pages.is_none()
|
||||
})
|
||||
.map(|(idx, (_chunk, _pages))| {
|
||||
let column = self.metadata.column(idx);
|
||||
let (start, length) = column.byte_range();
|
||||
start..(start + length)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
/// Assigns uncompressed chunk binary data to [RowGroupBase::column_chunks]
|
||||
/// and returns the chunk offset and binary data assigned.
|
||||
pub(crate) fn assign_dense_chunk(
|
||||
&mut self,
|
||||
projection: &ProjectionMask,
|
||||
chunk_data: Vec<Bytes>,
|
||||
) -> Vec<(usize, Bytes)> {
|
||||
let mut chunk_data = chunk_data.into_iter();
|
||||
let mut res = vec![];
|
||||
|
||||
for (idx, (chunk, row_group_pages)) in self
|
||||
.column_chunks
|
||||
.iter_mut()
|
||||
.zip(&self.column_uncompressed_pages)
|
||||
.enumerate()
|
||||
{
|
||||
if chunk.is_some() || !projection.leaf_included(idx) || row_group_pages.is_some() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the fetched page.
|
||||
let Some(data) = chunk_data.next() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let column = self.metadata.column(idx);
|
||||
res.push((idx, data.clone()));
|
||||
*chunk = Some(Arc::new(ColumnChunkData::Dense {
|
||||
offset: column.byte_range().0 as usize,
|
||||
data,
|
||||
}));
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
/// Create [PageReader] from [RowGroupBase::column_chunks]
|
||||
pub(crate) fn column_reader(
|
||||
&self,
|
||||
col_idx: usize,
|
||||
) -> Result<SerializedPageReader<ColumnChunkData>> {
|
||||
let page_reader = match &self.column_chunks[col_idx] {
|
||||
None => {
|
||||
return Err(ParquetError::General(format!(
|
||||
"Invalid column index {col_idx}, column was not fetched"
|
||||
)))
|
||||
}
|
||||
Some(data) => {
|
||||
let page_locations = self.page_locations.map(|index| index[col_idx].clone());
|
||||
SerializedPageReader::new(
|
||||
data.clone(),
|
||||
self.metadata.column(col_idx),
|
||||
self.row_count,
|
||||
page_locations,
|
||||
)?
|
||||
}
|
||||
};
|
||||
|
||||
// This column don't cache uncompressed pages.
|
||||
Ok(page_reader)
|
||||
}
|
||||
}
|
||||
|
||||
/// An in-memory collection of column chunks
|
||||
pub struct InMemoryRowGroup<'a> {
|
||||
metadata: &'a RowGroupMetaData,
|
||||
page_locations: Option<&'a [Vec<PageLocation>]>,
|
||||
/// Compressed page of each column.
|
||||
column_chunks: Vec<Option<Arc<ColumnChunkData>>>,
|
||||
row_count: usize,
|
||||
region_id: RegionId,
|
||||
file_id: FileId,
|
||||
row_group_idx: usize,
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
/// Row group level cached pages for each column.
|
||||
///
|
||||
/// These pages are uncompressed pages of a row group.
|
||||
/// `column_uncompressed_pages.len()` equals to `column_chunks.len()`.
|
||||
column_uncompressed_pages: Vec<Option<Arc<PageValue>>>,
|
||||
file_path: &'a str,
|
||||
/// Object store.
|
||||
object_store: ObjectStore,
|
||||
base: RowGroupBase<'a>,
|
||||
}
|
||||
|
||||
impl<'a> InMemoryRowGroup<'a> {
|
||||
@@ -244,12 +73,24 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
file_path: &'a str,
|
||||
object_store: ObjectStore,
|
||||
) -> Self {
|
||||
let metadata = parquet_meta.row_group(row_group_idx);
|
||||
// `page_locations` is always `None` if we don't set
|
||||
// [with_page_index()](https://docs.rs/parquet/latest/parquet/arrow/arrow_reader/struct.ArrowReaderOptions.html#method.with_page_index)
|
||||
// to `true`.
|
||||
let page_locations = parquet_meta
|
||||
.offset_index()
|
||||
.map(|x| x[row_group_idx].as_slice());
|
||||
|
||||
Self {
|
||||
base: RowGroupBase::new(parquet_meta, row_group_idx),
|
||||
metadata,
|
||||
row_count: metadata.num_rows() as usize,
|
||||
column_chunks: vec![None; metadata.columns().len()],
|
||||
page_locations,
|
||||
region_id,
|
||||
file_id,
|
||||
row_group_idx,
|
||||
cache_manager,
|
||||
column_uncompressed_pages: vec![None; metadata.columns().len()],
|
||||
file_path,
|
||||
object_store,
|
||||
}
|
||||
@@ -261,15 +102,65 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
projection: &ProjectionMask,
|
||||
selection: Option<&RowSelection>,
|
||||
) -> Result<()> {
|
||||
if let Some((selection, page_locations)) = selection.zip(self.base.page_locations) {
|
||||
let (fetch_ranges, page_start_offsets) =
|
||||
self.base
|
||||
.calc_sparse_read_ranges(projection, page_locations, selection);
|
||||
if let Some((selection, page_locations)) = selection.zip(self.page_locations) {
|
||||
// If we have a `RowSelection` and an `OffsetIndex` then only fetch pages required for the
|
||||
// `RowSelection`
|
||||
let mut page_start_offsets: Vec<Vec<usize>> = vec![];
|
||||
|
||||
let chunk_data = self.fetch_bytes(&fetch_ranges).await?;
|
||||
// Assign sparse chunk data to base.
|
||||
self.base
|
||||
.assign_sparse_chunk(projection, chunk_data, page_start_offsets);
|
||||
let fetch_ranges = self
|
||||
.column_chunks
|
||||
.iter()
|
||||
.zip(self.metadata.columns())
|
||||
.enumerate()
|
||||
.filter(|&(idx, (chunk, _chunk_meta))| {
|
||||
chunk.is_none() && projection.leaf_included(idx)
|
||||
})
|
||||
.flat_map(|(idx, (_chunk, chunk_meta))| {
|
||||
// If the first page does not start at the beginning of the column,
|
||||
// then we need to also fetch a dictionary page.
|
||||
let mut ranges = vec![];
|
||||
let (start, _len) = chunk_meta.byte_range();
|
||||
match page_locations[idx].first() {
|
||||
Some(first) if first.offset as u64 != start => {
|
||||
ranges.push(start..first.offset as u64);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
ranges.extend(
|
||||
selection
|
||||
.scan_ranges(&page_locations[idx])
|
||||
.iter()
|
||||
.map(|range| range.start as u64..range.end as u64),
|
||||
);
|
||||
page_start_offsets
|
||||
.push(ranges.iter().map(|range| range.start as usize).collect());
|
||||
|
||||
ranges
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut chunk_data = self.fetch_bytes(&fetch_ranges).await?.into_iter();
|
||||
|
||||
let mut page_start_offsets = page_start_offsets.into_iter();
|
||||
|
||||
for (idx, chunk) in self.column_chunks.iter_mut().enumerate() {
|
||||
if chunk.is_some() || !projection.leaf_included(idx) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(offsets) = page_start_offsets.next() {
|
||||
let mut chunks = Vec::with_capacity(offsets.len());
|
||||
for _ in 0..offsets.len() {
|
||||
chunks.push(chunk_data.next().unwrap());
|
||||
}
|
||||
|
||||
*chunk = Some(Arc::new(ColumnChunkData::Sparse {
|
||||
length: self.metadata.column(idx).byte_range().1 as usize,
|
||||
data: offsets.into_iter().zip(chunks).collect(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Now we only use cache in dense chunk data.
|
||||
self.fetch_pages_from_cache(projection);
|
||||
@@ -278,24 +169,46 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
// is a synchronous, CPU-bound operation.
|
||||
yield_now().await;
|
||||
|
||||
// Calculate ranges to read.
|
||||
let fetch_ranges = self.base.calc_dense_read_ranges(projection);
|
||||
let fetch_ranges = self
|
||||
.column_chunks
|
||||
.iter()
|
||||
.zip(&self.column_uncompressed_pages)
|
||||
.enumerate()
|
||||
.filter(|&(idx, (chunk, uncompressed_pages))| {
|
||||
// Don't need to fetch column data if we already cache the column's pages.
|
||||
chunk.is_none() && projection.leaf_included(idx) && uncompressed_pages.is_none()
|
||||
})
|
||||
.map(|(idx, (_chunk, _pages))| {
|
||||
let column = self.metadata.column(idx);
|
||||
let (start, length) = column.byte_range();
|
||||
start..(start + length)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if fetch_ranges.is_empty() {
|
||||
// Nothing to fetch.
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Fetch data with ranges
|
||||
let chunk_data = self.fetch_bytes(&fetch_ranges).await?;
|
||||
let mut chunk_data = self.fetch_bytes(&fetch_ranges).await?.into_iter();
|
||||
|
||||
// Assigns fetched data to base.
|
||||
let assigned_columns = self.base.assign_dense_chunk(projection, chunk_data);
|
||||
for (idx, (chunk, row_group_pages)) in self
|
||||
.column_chunks
|
||||
.iter_mut()
|
||||
.zip(&self.column_uncompressed_pages)
|
||||
.enumerate()
|
||||
{
|
||||
if chunk.is_some() || !projection.leaf_included(idx) || row_group_pages.is_some() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Put fetched data to cache if necessary.
|
||||
if let Some(cache) = &self.cache_manager {
|
||||
for (col_idx, data) in assigned_columns {
|
||||
let column = self.base.metadata.column(col_idx);
|
||||
// Get the fetched page.
|
||||
let Some(data) = chunk_data.next() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let column = self.metadata.column(idx);
|
||||
if let Some(cache) = &self.cache_manager {
|
||||
if !cache_uncompressed_pages(column) {
|
||||
// For columns that have multiple uncompressed pages, we only cache the compressed page
|
||||
// to save memory.
|
||||
@@ -303,12 +216,17 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
self.region_id,
|
||||
self.file_id,
|
||||
self.row_group_idx,
|
||||
col_idx,
|
||||
idx,
|
||||
);
|
||||
cache
|
||||
.put_pages(page_key, Arc::new(PageValue::new_compressed(data.clone())));
|
||||
}
|
||||
}
|
||||
|
||||
*chunk = Some(Arc::new(ColumnChunkData::Dense {
|
||||
offset: column.byte_range().0 as usize,
|
||||
data,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -319,8 +237,7 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
/// If the page is in the cache, sets the column chunk or `column_uncompressed_pages` for the column.
|
||||
fn fetch_pages_from_cache(&mut self, projection: &ProjectionMask) {
|
||||
let _timer = READ_STAGE_FETCH_PAGES.start_timer();
|
||||
self.base
|
||||
.column_chunks
|
||||
self.column_chunks
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.filter(|(idx, chunk)| chunk.is_none() && projection.leaf_included(*idx))
|
||||
@@ -328,7 +245,7 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
let Some(cache) = &self.cache_manager else {
|
||||
return;
|
||||
};
|
||||
let column = self.base.metadata.column(idx);
|
||||
let column = self.metadata.column(idx);
|
||||
if cache_uncompressed_pages(column) {
|
||||
// Fetches uncompressed pages for the row group.
|
||||
let page_key = PageKey::new_uncompressed(
|
||||
@@ -337,7 +254,7 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
self.row_group_idx,
|
||||
idx,
|
||||
);
|
||||
self.base.column_uncompressed_pages[idx] = cache.get_pages(&page_key);
|
||||
self.column_uncompressed_pages[idx] = cache.get_pages(&page_key);
|
||||
} else {
|
||||
// Fetches the compressed page from the cache.
|
||||
let page_key = PageKey::new_compressed(
|
||||
@@ -391,19 +308,34 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
|
||||
/// Creates a page reader to read column at `i`.
|
||||
fn column_page_reader(&self, i: usize) -> Result<Box<dyn PageReader>> {
|
||||
if let Some(cached_pages) = &self.base.column_uncompressed_pages[i] {
|
||||
if let Some(cached_pages) = &self.column_uncompressed_pages[i] {
|
||||
debug_assert!(!cached_pages.row_group.is_empty());
|
||||
// Hits the row group level page cache.
|
||||
return Ok(Box::new(RowGroupCachedReader::new(&cached_pages.row_group)));
|
||||
}
|
||||
|
||||
let page_reader = self.base.column_reader(i)?;
|
||||
let page_reader = match &self.column_chunks[i] {
|
||||
None => {
|
||||
return Err(ParquetError::General(format!(
|
||||
"Invalid column index {i}, column was not fetched"
|
||||
)))
|
||||
}
|
||||
Some(data) => {
|
||||
let page_locations = self.page_locations.map(|index| index[i].clone());
|
||||
SerializedPageReader::new(
|
||||
data.clone(),
|
||||
self.metadata.column(i),
|
||||
self.row_count,
|
||||
page_locations,
|
||||
)?
|
||||
}
|
||||
};
|
||||
|
||||
let Some(cache) = &self.cache_manager else {
|
||||
return Ok(Box::new(page_reader));
|
||||
};
|
||||
|
||||
let column = self.base.metadata.column(i);
|
||||
let column = self.metadata.column(i);
|
||||
if cache_uncompressed_pages(column) {
|
||||
// This column use row group level page cache.
|
||||
// We collect all pages and put them into the cache.
|
||||
@@ -430,7 +362,7 @@ fn cache_uncompressed_pages(column: &ColumnChunkMetaData) -> bool {
|
||||
|
||||
impl RowGroups for InMemoryRowGroup<'_> {
|
||||
fn num_rows(&self) -> usize {
|
||||
self.base.row_count
|
||||
self.row_count
|
||||
}
|
||||
|
||||
fn column_chunks(&self, i: usize) -> Result<Box<dyn PageIterator>> {
|
||||
@@ -498,8 +430,8 @@ impl ChunkReader for ColumnChunkData {
|
||||
}
|
||||
|
||||
/// Implements [`PageIterator`] for a single column chunk, yielding a single [`PageReader`]
|
||||
pub(crate) struct ColumnChunkIterator {
|
||||
pub(crate) reader: Option<Result<Box<dyn PageReader>>>,
|
||||
struct ColumnChunkIterator {
|
||||
reader: Option<Result<Box<dyn PageReader>>>,
|
||||
}
|
||||
|
||||
impl Iterator for ColumnChunkIterator {
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
//! Memtable test utilities.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
@@ -33,8 +34,8 @@ use crate::error::Result;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::partition_tree::data::{timestamp_array_to_i64_slice, DataBatch, DataBuffer};
|
||||
use crate::memtable::{
|
||||
BoxedBatchIterator, BulkPart, KeyValues, Memtable, MemtableBuilder, MemtableId, MemtableRanges,
|
||||
MemtableRef, MemtableStats,
|
||||
BoxedBatchIterator, BulkPart, KeyValues, Memtable, MemtableBuilder, MemtableId, MemtableRange,
|
||||
MemtableRanges, MemtableRef, MemtableStats,
|
||||
};
|
||||
use crate::row_converter::{McmpRowCodec, RowCodec, SortField};
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
// Check if this region is pending drop. And clean the entire dir if so.
|
||||
if !self.dropping_regions.is_region_exists(region_id)
|
||||
&& object_store
|
||||
.exists(&join_path(&request.region_dir, DROPPING_MARKER_FILE))
|
||||
.is_exist(&join_path(&request.region_dir, DROPPING_MARKER_FILE))
|
||||
.await
|
||||
.context(OpenDalSnafu)?
|
||||
{
|
||||
|
||||
@@ -17,9 +17,8 @@ futures.workspace = true
|
||||
lazy_static.workspace = true
|
||||
md5 = "0.7"
|
||||
moka = { workspace = true, features = ["future"] }
|
||||
opendal = { git = "https://github.com/GreptimeTeam/opendal.git", rev = "c82605177f2feec83e49dcaa537c505639d94024", features = [
|
||||
opendal = { version = "0.49", features = [
|
||||
"layers-tracing",
|
||||
"layers-prometheus",
|
||||
"services-azblob",
|
||||
"services-fs",
|
||||
"services-gcs",
|
||||
|
||||
@@ -13,37 +13,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod lru_cache;
|
||||
mod prometheus;
|
||||
|
||||
pub use lru_cache::*;
|
||||
pub use opendal::layers::*;
|
||||
pub use prometheus::build_prometheus_metrics_layer;
|
||||
|
||||
mod prometheus {
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
|
||||
use opendal::layers::PrometheusLayer;
|
||||
|
||||
static PROMETHEUS_LAYER: OnceLock<Mutex<PrometheusLayer>> = OnceLock::new();
|
||||
|
||||
pub fn build_prometheus_metrics_layer(with_path_label: bool) -> PrometheusLayer {
|
||||
PROMETHEUS_LAYER
|
||||
.get_or_init(|| {
|
||||
// This logical tries to extract parent path from the object storage operation
|
||||
// the function also relies on assumption that the region path is built from
|
||||
// pattern `<data|index>/catalog/schema/table_id/....`
|
||||
//
|
||||
// We'll get the data/catalog/schema from path.
|
||||
let path_level = if with_path_label { 3 } else { 0 };
|
||||
|
||||
let layer = PrometheusLayer::builder()
|
||||
.path_label(path_level)
|
||||
.register_default()
|
||||
.unwrap();
|
||||
|
||||
Mutex::new(layer)
|
||||
})
|
||||
.lock()
|
||||
.unwrap()
|
||||
.clone()
|
||||
}
|
||||
}
|
||||
pub use prometheus::PrometheusMetricsLayer;
|
||||
|
||||
@@ -156,12 +156,9 @@ impl<C: Access> ReadCache<C> {
|
||||
let size = entry.metadata().content_length();
|
||||
OBJECT_STORE_LRU_CACHE_ENTRIES.inc();
|
||||
OBJECT_STORE_LRU_CACHE_BYTES.add(size as i64);
|
||||
// ignore root path
|
||||
if entry.path() != "/" {
|
||||
self.mem_cache
|
||||
.insert(read_key.to_string(), ReadResult::Success(size as u32))
|
||||
.await;
|
||||
}
|
||||
self.mem_cache
|
||||
.insert(read_key.to_string(), ReadResult::Success(size as u32))
|
||||
.await;
|
||||
}
|
||||
|
||||
Ok(self.cache_stat().await)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user