mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-03 20:02:54 +00:00
Compare commits
24 Commits
v0.11.2
...
v0.12.0-ni
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
358d5e1d63 | ||
|
|
579059d99f | ||
|
|
53d55c0b6b | ||
|
|
bef6896280 | ||
|
|
4b4c6dbb66 | ||
|
|
e8e9526738 | ||
|
|
fee75a1fad | ||
|
|
b8a78b7838 | ||
|
|
2137c53274 | ||
|
|
03ad6e2a8d | ||
|
|
d53fbcb936 | ||
|
|
8c1959c580 | ||
|
|
e2a41ccaec | ||
|
|
a8012147ab | ||
|
|
60f8dbf7f0 | ||
|
|
9da2e17d0e | ||
|
|
1a8e77a480 | ||
|
|
e1e39993f7 | ||
|
|
a30d918df2 | ||
|
|
2c4ac76754 | ||
|
|
a6893aad42 | ||
|
|
d91517688a | ||
|
|
3d1b8c4fac | ||
|
|
7c69ca0502 |
1
.github/cargo-blacklist.txt
vendored
1
.github/cargo-blacklist.txt
vendored
@@ -1,2 +1,3 @@
|
||||
native-tls
|
||||
openssl
|
||||
aws-lc-sys
|
||||
|
||||
7
.github/workflows/develop.yml
vendored
7
.github/workflows/develop.yml
vendored
@@ -269,13 +269,6 @@ jobs:
|
||||
- name: Install cargo-gc-bin
|
||||
shell: bash
|
||||
run: cargo install cargo-gc-bin
|
||||
- name: Check aws-lc-sys will not build
|
||||
shell: bash
|
||||
run: |
|
||||
if cargo tree -i aws-lc-sys -e features | grep -q aws-lc-sys; then
|
||||
echo "Found aws-lc-sys, which has compilation problems on older gcc versions. Please replace it with ring until its building experience improves."
|
||||
exit 1
|
||||
fi
|
||||
- name: Build greptime bianry
|
||||
shell: bash
|
||||
# `cargo gc` will invoke `cargo build` with specified args
|
||||
|
||||
4
.github/workflows/nightly-build.yml
vendored
4
.github/workflows/nightly-build.yml
vendored
@@ -12,7 +12,7 @@ on:
|
||||
linux_amd64_runner:
|
||||
type: choice
|
||||
description: The runner uses to build linux-amd64 artifacts
|
||||
default: ec2-c6i.2xlarge-amd64
|
||||
default: ec2-c6i.4xlarge-amd64
|
||||
options:
|
||||
- ubuntu-20.04
|
||||
- ubuntu-20.04-8-cores
|
||||
@@ -27,7 +27,7 @@ on:
|
||||
linux_arm64_runner:
|
||||
type: choice
|
||||
description: The runner uses to build linux-arm64 artifacts
|
||||
default: ec2-c6g.2xlarge-arm64
|
||||
default: ec2-c6g.4xlarge-arm64
|
||||
options:
|
||||
- ec2-c6g.xlarge-arm64 # 4C8G
|
||||
- ec2-c6g.2xlarge-arm64 # 8C16G
|
||||
|
||||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -91,7 +91,7 @@ env:
|
||||
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
|
||||
NIGHTLY_RELEASE_PREFIX: nightly
|
||||
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
|
||||
NEXT_RELEASE_VERSION: v0.11.0
|
||||
NEXT_RELEASE_VERSION: v0.12.0
|
||||
|
||||
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
|
||||
permissions:
|
||||
|
||||
212
Cargo.lock
generated
212
Cargo.lock
generated
@@ -188,7 +188,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
@@ -749,7 +749,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "auth"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -798,21 +798,6 @@ dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-lc-sys"
|
||||
version = "0.21.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b3ddc4a5b231dd6958b140ff3151b6412b3f4321fab354f399eec8f14b06df62"
|
||||
dependencies = [
|
||||
"bindgen 0.69.4",
|
||||
"cc",
|
||||
"cmake",
|
||||
"dunce",
|
||||
"fs_extra",
|
||||
"libc",
|
||||
"paste",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.6.20"
|
||||
@@ -993,29 +978,6 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.69.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools 0.12.1",
|
||||
"lazy_static",
|
||||
"lazycell",
|
||||
"log",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash 1.1.0",
|
||||
"shlex",
|
||||
"syn 2.0.90",
|
||||
"which",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.70.1"
|
||||
@@ -1340,7 +1302,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cache"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"catalog",
|
||||
"common-error",
|
||||
@@ -1348,7 +1310,7 @@ dependencies = [
|
||||
"common-meta",
|
||||
"moka",
|
||||
"snafu 0.8.5",
|
||||
"substrait 0.11.0",
|
||||
"substrait 0.12.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1375,7 +1337,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow",
|
||||
@@ -1714,7 +1676,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
|
||||
|
||||
[[package]]
|
||||
name = "cli"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"auth",
|
||||
@@ -1758,7 +1720,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"substrait 0.11.0",
|
||||
"substrait 0.12.0",
|
||||
"table",
|
||||
"temp-env",
|
||||
"tempfile",
|
||||
@@ -1768,7 +1730,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1797,7 +1759,7 @@ dependencies = [
|
||||
"rand",
|
||||
"serde_json",
|
||||
"snafu 0.8.5",
|
||||
"substrait 0.11.0",
|
||||
"substrait 0.12.0",
|
||||
"substrait 0.37.3",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -1838,7 +1800,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"auth",
|
||||
@@ -1898,7 +1860,7 @@ dependencies = [
|
||||
"similar-asserts",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"substrait 0.11.0",
|
||||
"substrait 0.12.0",
|
||||
"table",
|
||||
"temp-env",
|
||||
"tempfile",
|
||||
@@ -1944,7 +1906,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"async-trait",
|
||||
@@ -1965,7 +1927,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
@@ -1976,7 +1938,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-config"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -1999,7 +1961,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
@@ -2036,7 +1998,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-decimal"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"bigdecimal 0.4.5",
|
||||
"common-error",
|
||||
@@ -2049,7 +2011,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"snafu 0.8.5",
|
||||
"strum 0.25.0",
|
||||
@@ -2058,7 +2020,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-frontend"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2073,7 +2035,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"approx 0.5.1",
|
||||
@@ -2118,7 +2080,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-greptimedb-telemetry"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-runtime",
|
||||
@@ -2135,7 +2097,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -2161,7 +2123,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -2180,7 +2142,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-macro"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-query",
|
||||
@@ -2194,7 +2156,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -2207,7 +2169,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"api",
|
||||
@@ -2264,7 +2226,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-options"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"common-grpc",
|
||||
"humantime-serde",
|
||||
@@ -2273,11 +2235,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-plugins"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
|
||||
[[package]]
|
||||
name = "common-pprof"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -2289,7 +2251,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -2316,7 +2278,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -2324,7 +2286,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2350,7 +2312,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-error",
|
||||
@@ -2369,7 +2331,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.19",
|
||||
@@ -2399,7 +2361,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"backtrace",
|
||||
@@ -2427,7 +2389,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"client",
|
||||
"common-query",
|
||||
@@ -2439,7 +2401,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -2457,7 +2419,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-version"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"build-data",
|
||||
"const_format",
|
||||
@@ -2467,7 +2429,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-wal"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -3276,7 +3238,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -3327,7 +3289,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"substrait 0.11.0",
|
||||
"substrait 0.12.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"toml 0.8.19",
|
||||
@@ -3336,7 +3298,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -3719,12 +3681,6 @@ version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"
|
||||
|
||||
[[package]]
|
||||
name = "dunce"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
|
||||
|
||||
[[package]]
|
||||
name = "duration-str"
|
||||
version = "0.11.2"
|
||||
@@ -3954,7 +3910,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-engine"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -4071,7 +4027,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
|
||||
|
||||
[[package]]
|
||||
name = "flow"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow",
|
||||
@@ -4128,7 +4084,7 @@ dependencies = [
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"strum 0.25.0",
|
||||
"substrait 0.11.0",
|
||||
"substrait 0.12.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.11.0",
|
||||
@@ -4175,7 +4131,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -4314,12 +4270,6 @@ dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fs_extra"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "fsevent-sys"
|
||||
version = "4.1.0"
|
||||
@@ -5315,7 +5265,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"asynchronous-codec",
|
||||
@@ -5955,12 +5905,6 @@ dependencies = [
|
||||
"spin 0.9.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazycell"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||
|
||||
[[package]]
|
||||
name = "levenshtein_automata"
|
||||
version = "0.2.1"
|
||||
@@ -6150,7 +6094,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
|
||||
|
||||
[[package]]
|
||||
name = "log-query"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
@@ -6161,7 +6105,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -6482,7 +6426,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -6509,7 +6453,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -6588,7 +6532,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "metric-engine"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -6692,13 +6636,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
"async-channel 1.9.0",
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"bytemuck",
|
||||
"bytes",
|
||||
"common-base",
|
||||
"common-config",
|
||||
@@ -6966,7 +6911,7 @@ checksum = "06f19e4cfa0ab5a76b627cec2d81331c49b034988eaf302c3bafeada684eadef"
|
||||
dependencies = [
|
||||
"base64 0.21.7",
|
||||
"bigdecimal 0.4.5",
|
||||
"bindgen 0.70.1",
|
||||
"bindgen",
|
||||
"bitflags 2.6.0",
|
||||
"bitvec",
|
||||
"btoi",
|
||||
@@ -7005,7 +6950,7 @@ checksum = "478b0ff3f7d67b79da2b96f56f334431aef65e15ba4b29dd74a4236e29582bdc"
|
||||
dependencies = [
|
||||
"base64 0.21.7",
|
||||
"bigdecimal 0.4.5",
|
||||
"bindgen 0.70.1",
|
||||
"bindgen",
|
||||
"bitflags 2.6.0",
|
||||
"bitvec",
|
||||
"btoi",
|
||||
@@ -7404,7 +7349,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -7657,7 +7602,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "operator"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -7705,7 +7650,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)",
|
||||
"store-api",
|
||||
"substrait 0.11.0",
|
||||
"substrait 0.12.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -7955,7 +7900,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -8241,7 +8186,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pipeline"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -8404,7 +8349,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "plugins"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"clap 4.5.19",
|
||||
@@ -8681,7 +8626,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"async-trait",
|
||||
@@ -8919,7 +8864,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"async-compression 0.4.13",
|
||||
"async-trait",
|
||||
@@ -8939,6 +8884,7 @@ dependencies = [
|
||||
"lz4_flex 0.11.3",
|
||||
"moka",
|
||||
"pin-project",
|
||||
"prometheus",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
@@ -9043,7 +8989,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -9110,7 +9056,7 @@ dependencies = [
|
||||
"stats-cli",
|
||||
"store-api",
|
||||
"streaming-stats",
|
||||
"substrait 0.11.0",
|
||||
"substrait 0.12.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -10572,7 +10518,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "script"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -10866,7 +10812,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -10876,7 +10822,6 @@ dependencies = [
|
||||
"arrow-schema",
|
||||
"async-trait",
|
||||
"auth",
|
||||
"aws-lc-sys",
|
||||
"axum",
|
||||
"axum-macros",
|
||||
"base64 0.21.7",
|
||||
@@ -10979,7 +10924,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -11327,7 +11272,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"chrono",
|
||||
@@ -11352,6 +11297,7 @@ dependencies = [
|
||||
"jsonb",
|
||||
"lazy_static",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"snafu 0.8.5",
|
||||
"sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)",
|
||||
@@ -11390,7 +11336,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.19",
|
||||
@@ -11428,6 +11374,7 @@ dependencies = [
|
||||
"lazy_static",
|
||||
"log",
|
||||
"regex",
|
||||
"serde",
|
||||
"sqlparser 0.45.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"sqlparser_derive 0.2.2 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)",
|
||||
]
|
||||
@@ -11610,7 +11557,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -11781,7 +11728,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
@@ -11980,7 +11927,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -12246,11 +12193,12 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
|
||||
|
||||
[[package]]
|
||||
name = "tests-fuzz"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"async-trait",
|
||||
"chrono",
|
||||
"common-base",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-query",
|
||||
@@ -12288,7 +12236,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -12352,7 +12300,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlx",
|
||||
"store-api",
|
||||
"substrait 0.11.0",
|
||||
"substrait 0.12.0",
|
||||
"table",
|
||||
"tempfile",
|
||||
"time",
|
||||
|
||||
@@ -68,7 +68,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -180,6 +180,7 @@ sysinfo = "0.30"
|
||||
# on branch v0.44.x
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "54a267ac89c09b11c0c88934690530807185d3e7", features = [
|
||||
"visitor",
|
||||
"serde",
|
||||
] }
|
||||
strum = { version = "0.25", features = ["derive"] }
|
||||
tempfile = "3"
|
||||
|
||||
@@ -13,11 +13,11 @@
|
||||
| Key | Type | Default | Descriptions |
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
|
||||
| `default_timezone` | String | Unset | The default timezone of the server. |
|
||||
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
|
||||
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
|
||||
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
|
||||
| `runtime` | -- | -- | The runtime options. |
|
||||
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
|
||||
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
|
||||
@@ -61,9 +61,9 @@
|
||||
| `wal` | -- | -- | The WAL options. |
|
||||
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
|
||||
| `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `128MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
@@ -150,6 +150,7 @@
|
||||
| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. |
|
||||
| `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
|
||||
| `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
|
||||
| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. |
|
||||
| `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. |
|
||||
| `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
|
||||
| `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
|
||||
@@ -286,12 +287,12 @@
|
||||
| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
|
||||
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. |
|
||||
| `store_addrs` | Array | -- | Store server address default to etcd store. |
|
||||
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
|
||||
| `backend` | String | `EtcdStore` | The datastore for meta server. |
|
||||
| `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
|
||||
| `use_memory_store` | Bool | `false` | Store data in memory. |
|
||||
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. |
|
||||
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
|
||||
| `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
|
||||
| `backend` | String | `EtcdStore` | The datastore for meta server. |
|
||||
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
|
||||
| `runtime` | -- | -- | The runtime options. |
|
||||
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
|
||||
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
|
||||
@@ -356,7 +357,6 @@
|
||||
| `node_id` | Integer | Unset | The datanode identifier and should be unique in the cluster. |
|
||||
| `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.<br/>It will block the datanode start if it can't receive leases in the heartbeat from metasrv. |
|
||||
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
|
||||
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
|
||||
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
|
||||
| `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. |
|
||||
@@ -364,6 +364,7 @@
|
||||
| `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. |
|
||||
| `rpc_max_recv_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_recv_message_size` instead. |
|
||||
| `rpc_max_send_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_send_message_size` instead. |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
|
||||
| `http` | -- | -- | The HTTP server options. |
|
||||
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
|
||||
| `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
|
||||
@@ -398,9 +399,9 @@
|
||||
| `wal` | -- | -- | The WAL options. |
|
||||
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
|
||||
| `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.file_size` | String | `128MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
@@ -475,6 +476,9 @@
|
||||
| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query<br/>- `auto`: automatically (default)<br/>- `disable`: never |
|
||||
| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `auto` | Memory threshold for performing an external sort during index creation.<br/>- `auto`: automatically determine the threshold based on the system memory size (default)<br/>- `unlimited`: no memory limit<br/>- `[size]` e.g. `64MB`: fixed memory threshold |
|
||||
| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. |
|
||||
| `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
|
||||
| `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
|
||||
| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. |
|
||||
| `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. |
|
||||
| `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
|
||||
| `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
|
||||
|
||||
@@ -13,9 +13,6 @@ require_lease_before_startup = false
|
||||
## By default, it provides services after all regions have been initialized.
|
||||
init_regions_in_background = false
|
||||
|
||||
## Enable telemetry to collect anonymous usage data.
|
||||
enable_telemetry = true
|
||||
|
||||
## Parallelism of initializing regions.
|
||||
init_regions_parallelism = 16
|
||||
|
||||
@@ -42,6 +39,8 @@ rpc_max_recv_message_size = "512MB"
|
||||
## @toml2docs:none-default
|
||||
rpc_max_send_message_size = "512MB"
|
||||
|
||||
## Enable telemetry to collect anonymous usage data. Enabled by default.
|
||||
#+ enable_telemetry = true
|
||||
|
||||
## The HTTP server options.
|
||||
[http]
|
||||
@@ -143,15 +142,15 @@ dir = "/tmp/greptimedb/wal"
|
||||
|
||||
## The size of the WAL segment file.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
file_size = "256MB"
|
||||
file_size = "128MB"
|
||||
|
||||
## The threshold of the WAL size to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_threshold = "4GB"
|
||||
purge_threshold = "1GB"
|
||||
|
||||
## The interval to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_interval = "10m"
|
||||
purge_interval = "1m"
|
||||
|
||||
## The read batch size.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
@@ -544,6 +543,15 @@ mem_threshold_on_create = "auto"
|
||||
## Deprecated, use `region_engine.mito.index.aux_path` instead.
|
||||
intermediate_path = ""
|
||||
|
||||
## Cache size for inverted index metadata.
|
||||
metadata_cache_size = "64MiB"
|
||||
|
||||
## Cache size for inverted index content.
|
||||
content_cache_size = "128MiB"
|
||||
|
||||
## Page size for inverted index content cache.
|
||||
content_cache_page_size = "8MiB"
|
||||
|
||||
## The options for full-text index in Mito engine.
|
||||
[region_engine.mito.fulltext_index]
|
||||
|
||||
|
||||
@@ -10,6 +10,12 @@ server_addr = "127.0.0.1:3002"
|
||||
## Store server address default to etcd store.
|
||||
store_addrs = ["127.0.0.1:2379"]
|
||||
|
||||
## If it's not empty, the metasrv will store all data with this key prefix.
|
||||
store_key_prefix = ""
|
||||
|
||||
## The datastore for meta server.
|
||||
backend = "EtcdStore"
|
||||
|
||||
## Datanode selector type.
|
||||
## - `round_robin` (default value)
|
||||
## - `lease_based`
|
||||
@@ -20,20 +26,14 @@ selector = "round_robin"
|
||||
## Store data in memory.
|
||||
use_memory_store = false
|
||||
|
||||
## Whether to enable greptimedb telemetry.
|
||||
enable_telemetry = true
|
||||
|
||||
## If it's not empty, the metasrv will store all data with this key prefix.
|
||||
store_key_prefix = ""
|
||||
|
||||
## Whether to enable region failover.
|
||||
## This feature is only available on GreptimeDB running on cluster mode and
|
||||
## - Using Remote WAL
|
||||
## - Using shared storage (e.g., s3).
|
||||
enable_region_failover = false
|
||||
|
||||
## The datastore for meta server.
|
||||
backend = "EtcdStore"
|
||||
## Whether to enable greptimedb telemetry. Enabled by default.
|
||||
#+ enable_telemetry = true
|
||||
|
||||
## The runtime options.
|
||||
#+ [runtime]
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
## The running mode of the datanode. It can be `standalone` or `distributed`.
|
||||
mode = "standalone"
|
||||
|
||||
## Enable telemetry to collect anonymous usage data.
|
||||
enable_telemetry = true
|
||||
|
||||
## The default timezone of the server.
|
||||
## @toml2docs:none-default
|
||||
default_timezone = "UTC"
|
||||
@@ -18,6 +15,9 @@ init_regions_parallelism = 16
|
||||
## The maximum current queries allowed to be executed. Zero means unlimited.
|
||||
max_concurrent_queries = 0
|
||||
|
||||
## Enable telemetry to collect anonymous usage data. Enabled by default.
|
||||
#+ enable_telemetry = true
|
||||
|
||||
## The runtime options.
|
||||
#+ [runtime]
|
||||
## The number of threads to execute the runtime for global read operations.
|
||||
@@ -147,15 +147,15 @@ dir = "/tmp/greptimedb/wal"
|
||||
|
||||
## The size of the WAL segment file.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
file_size = "256MB"
|
||||
file_size = "128MB"
|
||||
|
||||
## The threshold of the WAL size to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_threshold = "4GB"
|
||||
purge_threshold = "1GB"
|
||||
|
||||
## The interval to trigger a flush.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
purge_interval = "10m"
|
||||
purge_interval = "1m"
|
||||
|
||||
## The read batch size.
|
||||
## **It's only used when the provider is `raft_engine`**.
|
||||
@@ -588,6 +588,9 @@ metadata_cache_size = "64MiB"
|
||||
## Cache size for inverted index content.
|
||||
content_cache_size = "128MiB"
|
||||
|
||||
## Page size for inverted index content cache.
|
||||
content_cache_page_size = "8MiB"
|
||||
|
||||
## The options for full-text index in Mito engine.
|
||||
[region_engine.mito.fulltext_index]
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -36,6 +36,11 @@ pub struct Metadata {
|
||||
/// `RangeReader` reads a range of bytes from a source.
|
||||
#[async_trait]
|
||||
pub trait RangeReader: Send + Unpin {
|
||||
/// Sets the file size hint for the reader.
|
||||
///
|
||||
/// It's used to optimize the reading process by reducing the number of remote requests.
|
||||
fn with_file_size_hint(&mut self, file_size_hint: u64);
|
||||
|
||||
/// Returns the metadata of the source.
|
||||
async fn metadata(&mut self) -> io::Result<Metadata>;
|
||||
|
||||
@@ -70,6 +75,10 @@ pub trait RangeReader: Send + Unpin {
|
||||
|
||||
#[async_trait]
|
||||
impl<R: ?Sized + RangeReader> RangeReader for &mut R {
|
||||
fn with_file_size_hint(&mut self, file_size_hint: u64) {
|
||||
(*self).with_file_size_hint(file_size_hint)
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
(*self).metadata().await
|
||||
}
|
||||
@@ -186,15 +195,17 @@ impl<R: RangeReader + 'static> AsyncRead for AsyncReadAdapter<R> {
|
||||
|
||||
#[async_trait]
|
||||
impl RangeReader for Vec<u8> {
|
||||
fn with_file_size_hint(&mut self, _file_size_hint: u64) {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
Ok(Metadata {
|
||||
content_length: self.len() as u64,
|
||||
})
|
||||
}
|
||||
|
||||
async fn read(&mut self, mut range: Range<u64>) -> io::Result<Bytes> {
|
||||
range.end = range.end.min(self.len() as u64);
|
||||
|
||||
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
|
||||
let bytes = Bytes::copy_from_slice(&self[range.start as usize..range.end as usize]);
|
||||
Ok(bytes)
|
||||
}
|
||||
@@ -222,6 +233,10 @@ impl FileReader {
|
||||
|
||||
#[async_trait]
|
||||
impl RangeReader for FileReader {
|
||||
fn with_file_size_hint(&mut self, _file_size_hint: u64) {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
Ok(Metadata {
|
||||
content_length: self.content_length,
|
||||
|
||||
@@ -19,7 +19,7 @@ pub const GIB: u64 = MIB * BINARY_DATA_MAGNITUDE;
|
||||
pub const TIB: u64 = GIB * BINARY_DATA_MAGNITUDE;
|
||||
pub const PIB: u64 = TIB * BINARY_DATA_MAGNITUDE;
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd)]
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Default)]
|
||||
pub struct ReadableSize(pub u64);
|
||||
|
||||
impl ReadableSize {
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
mod convert;
|
||||
mod distance;
|
||||
pub(crate) mod impl_conv;
|
||||
mod scalar_add;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -32,5 +33,8 @@ impl VectorFunction {
|
||||
registry.register(Arc::new(distance::CosDistanceFunction));
|
||||
registry.register(Arc::new(distance::DotProductFunction));
|
||||
registry.register(Arc::new(distance::L2SqDistanceFunction));
|
||||
|
||||
// scalar calculation
|
||||
registry.register(Arc::new(scalar_add::ScalarAddFunction));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,7 +109,6 @@ pub fn parse_veclit_from_strlit(s: &str) -> Result<Vec<f32>> {
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
/// Convert a vector literal to a binary literal.
|
||||
pub fn veclit_to_binlit(vec: &[f32]) -> Vec<u8> {
|
||||
if cfg!(target_endian = "little") {
|
||||
|
||||
173
src/common/function/src/scalars/vector/scalar_add.rs
Normal file
173
src/common/function/src/scalars/vector/scalar_add.rs
Normal file
@@ -0,0 +1,173 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result};
|
||||
use common_query::prelude::Signature;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef};
|
||||
use nalgebra::DVectorView;
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::helper;
|
||||
use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit};
|
||||
|
||||
const NAME: &str = "vec_scalar_add";
|
||||
|
||||
/// Adds a scalar to each element of a vector.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```sql
|
||||
/// SELECT vec_to_string(vec_scalar_add(1, "[1, 2, 3]")) as result;
|
||||
///
|
||||
/// +---------+
|
||||
/// | result |
|
||||
/// +---------+
|
||||
/// | [2,3,4] |
|
||||
/// +---------+
|
||||
///
|
||||
/// -- Negative scalar to simulate subtraction
|
||||
/// SELECT vec_to_string(vec_scalar_add(-1, "[1, 2, 3]")) as result;
|
||||
///
|
||||
/// +---------+
|
||||
/// | result |
|
||||
/// +---------+
|
||||
/// | [0,1,2] |
|
||||
/// +---------+
|
||||
/// ```
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ScalarAddFunction;
|
||||
|
||||
impl Function for ScalarAddFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::binary_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
helper::one_of_sigs2(
|
||||
vec![ConcreteDataType::float64_datatype()],
|
||||
vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::binary_datatype(),
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 2,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly two, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
let arg0 = &columns[0];
|
||||
let arg1 = &columns[1];
|
||||
|
||||
let len = arg0.len();
|
||||
let mut result = BinaryVectorBuilder::with_capacity(len);
|
||||
if len == 0 {
|
||||
return Ok(result.to_vector());
|
||||
}
|
||||
|
||||
let arg1_const = as_veclit_if_const(arg1)?;
|
||||
|
||||
for i in 0..len {
|
||||
let arg0 = arg0.get(i).as_f64_lossy();
|
||||
let Some(arg0) = arg0 else {
|
||||
result.push_null();
|
||||
continue;
|
||||
};
|
||||
|
||||
let arg1 = match arg1_const.as_ref() {
|
||||
Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())),
|
||||
None => as_veclit(arg1.get_ref(i))?,
|
||||
};
|
||||
let Some(arg1) = arg1 else {
|
||||
result.push_null();
|
||||
continue;
|
||||
};
|
||||
|
||||
let vec = DVectorView::from_slice(&arg1, arg1.len());
|
||||
let vec_res = vec.add_scalar(arg0 as _);
|
||||
|
||||
let veclit = vec_res.as_slice();
|
||||
let binlit = veclit_to_binlit(veclit);
|
||||
result.push(Some(&binlit));
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ScalarAddFunction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", NAME.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::vectors::{Float32Vector, StringVector};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_scalar_add() {
|
||||
let func = ScalarAddFunction;
|
||||
|
||||
let input0 = Arc::new(Float32Vector::from(vec![
|
||||
Some(1.0),
|
||||
Some(-1.0),
|
||||
None,
|
||||
Some(3.0),
|
||||
]));
|
||||
let input1 = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
Some("[7.0,8.0,9.0]".to_string()),
|
||||
None,
|
||||
]));
|
||||
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), &[input0, input1])
|
||||
.unwrap();
|
||||
|
||||
let result = result.as_ref();
|
||||
assert_eq!(result.len(), 4);
|
||||
assert_eq!(
|
||||
result.get_ref(0).as_binary().unwrap(),
|
||||
Some(veclit_to_binlit(&[2.0, 3.0, 4.0]).as_slice())
|
||||
);
|
||||
assert_eq!(
|
||||
result.get_ref(1).as_binary().unwrap(),
|
||||
Some(veclit_to_binlit(&[3.0, 4.0, 5.0]).as_slice())
|
||||
);
|
||||
assert!(result.get_ref(2).is_null());
|
||||
assert!(result.get_ref(3).is_null());
|
||||
}
|
||||
}
|
||||
@@ -49,9 +49,9 @@ impl Default for RaftEngineConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
dir: None,
|
||||
file_size: ReadableSize::mb(256),
|
||||
purge_threshold: ReadableSize::gb(4),
|
||||
purge_interval: Duration::from_secs(600),
|
||||
file_size: ReadableSize::mb(128),
|
||||
purge_threshold: ReadableSize::gb(1),
|
||||
purge_interval: Duration::from_secs(60),
|
||||
read_batch_size: 128,
|
||||
sync_write: false,
|
||||
enable_log_recycle: true,
|
||||
|
||||
@@ -19,14 +19,16 @@ use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use client::Output;
|
||||
use common_error::ext::BoxedError;
|
||||
use pipeline::pipeline_operator::PipelineOperator;
|
||||
use pipeline::{GreptimeTransformer, Pipeline, PipelineInfo, PipelineVersion};
|
||||
use servers::error::{
|
||||
AuthSnafu, Error as ServerError, ExecuteGrpcRequestSnafu, PipelineSnafu, Result as ServerResult,
|
||||
};
|
||||
use servers::interceptor::{LogIngestInterceptor, LogIngestInterceptorRef};
|
||||
use servers::query_handler::PipelineHandler;
|
||||
use session::context::QueryContextRef;
|
||||
use session::context::{QueryContext, QueryContextRef};
|
||||
use snafu::ResultExt;
|
||||
use table::Table;
|
||||
|
||||
use crate::instance::Instance;
|
||||
|
||||
@@ -84,6 +86,22 @@ impl PipelineHandler for Instance {
|
||||
.await
|
||||
.context(PipelineSnafu)
|
||||
}
|
||||
|
||||
async fn get_table(
|
||||
&self,
|
||||
table: &str,
|
||||
query_ctx: &QueryContext,
|
||||
) -> std::result::Result<Option<Arc<Table>>, catalog::error::Error> {
|
||||
let catalog = query_ctx.current_catalog();
|
||||
let schema = query_ctx.current_schema();
|
||||
self.catalog_manager
|
||||
.table(catalog, &schema, table, None)
|
||||
.await
|
||||
}
|
||||
|
||||
fn build_pipeline(&self, pipeline: &str) -> ServerResult<Pipeline<GreptimeTransformer>> {
|
||||
PipelineOperator::build_pipeline(pipeline).context(PipelineSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
|
||||
@@ -26,14 +26,6 @@ use crate::inverted_index::search::predicate::Predicate;
|
||||
#[snafu(visibility(pub))]
|
||||
#[stack_trace_debug]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to seek"))]
|
||||
Seek {
|
||||
#[snafu(source)]
|
||||
error: IoError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read"))]
|
||||
Read {
|
||||
#[snafu(source)]
|
||||
@@ -76,6 +68,18 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Blob size too small"))]
|
||||
BlobSizeTooSmall {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid footer payload size"))]
|
||||
InvalidFooterPayloadSize {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unexpected inverted index footer payload size, max: {max_payload_size}, actual: {actual_payload_size}"))]
|
||||
UnexpectedFooterPayloadSize {
|
||||
max_payload_size: u64,
|
||||
@@ -215,8 +219,7 @@ impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
use Error::*;
|
||||
match self {
|
||||
Seek { .. }
|
||||
| Read { .. }
|
||||
Read { .. }
|
||||
| Write { .. }
|
||||
| Flush { .. }
|
||||
| Close { .. }
|
||||
@@ -229,7 +232,9 @@ impl ErrorExt for Error {
|
||||
| KeysApplierUnexpectedPredicates { .. }
|
||||
| CommonIo { .. }
|
||||
| UnknownIntermediateCodecMagic { .. }
|
||||
| FstCompile { .. } => StatusCode::Unexpected,
|
||||
| FstCompile { .. }
|
||||
| InvalidFooterPayloadSize { .. }
|
||||
| BlobSizeTooSmall { .. } => StatusCode::Unexpected,
|
||||
|
||||
ParseRegex { .. }
|
||||
| ParseDFA { .. }
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
@@ -30,23 +31,23 @@ mod footer;
|
||||
#[mockall::automock]
|
||||
#[async_trait]
|
||||
pub trait InvertedIndexReader: Send {
|
||||
/// Reads all data to dest.
|
||||
async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize>;
|
||||
|
||||
/// Seeks to given offset and reads data with exact size as provided.
|
||||
async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>>;
|
||||
async fn range_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>>;
|
||||
|
||||
/// Reads the bytes in the given ranges.
|
||||
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Vec<u8>>>;
|
||||
|
||||
/// Retrieves metadata of all inverted indices stored within the blob.
|
||||
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>>;
|
||||
|
||||
/// Retrieves the finite state transducer (FST) map from the given offset and size.
|
||||
async fn fst(&mut self, offset: u64, size: u32) -> Result<FstMap> {
|
||||
let fst_data = self.seek_read(offset, size).await?;
|
||||
let fst_data = self.range_read(offset, size).await?;
|
||||
FstMap::new(fst_data).context(DecodeFstSnafu)
|
||||
}
|
||||
|
||||
/// Retrieves the bitmap from the given offset and size.
|
||||
async fn bitmap(&mut self, offset: u64, size: u32) -> Result<BitVec> {
|
||||
self.seek_read(offset, size).await.map(BitVec::from_vec)
|
||||
self.range_read(offset, size).await.map(BitVec::from_vec)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
@@ -19,8 +20,9 @@ use common_base::range_read::RangeReader;
|
||||
use greptime_proto::v1::index::InvertedIndexMetas;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use super::footer::DEFAULT_PREFETCH_SIZE;
|
||||
use crate::inverted_index::error::{CommonIoSnafu, Result, UnexpectedBlobSizeSnafu};
|
||||
use crate::inverted_index::format::reader::footer::InvertedIndeFooterReader;
|
||||
use crate::inverted_index::format::reader::footer::InvertedIndexFooterReader;
|
||||
use crate::inverted_index::format::reader::InvertedIndexReader;
|
||||
use crate::inverted_index::format::MIN_BLOB_SIZE;
|
||||
|
||||
@@ -49,16 +51,7 @@ impl<R> InvertedIndexBlobReader<R> {
|
||||
|
||||
#[async_trait]
|
||||
impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
|
||||
async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize> {
|
||||
let metadata = self.source.metadata().await.context(CommonIoSnafu)?;
|
||||
self.source
|
||||
.read_into(0..metadata.content_length, dest)
|
||||
.await
|
||||
.context(CommonIoSnafu)?;
|
||||
Ok(metadata.content_length as usize)
|
||||
}
|
||||
|
||||
async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
|
||||
async fn range_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
|
||||
let buf = self
|
||||
.source
|
||||
.read(offset..offset + size as u64)
|
||||
@@ -67,12 +60,18 @@ impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
|
||||
Ok(buf.into())
|
||||
}
|
||||
|
||||
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<Vec<u8>>> {
|
||||
let bufs = self.source.read_vec(ranges).await.context(CommonIoSnafu)?;
|
||||
Ok(bufs.into_iter().map(|buf| buf.into()).collect())
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>> {
|
||||
let metadata = self.source.metadata().await.context(CommonIoSnafu)?;
|
||||
let blob_size = metadata.content_length;
|
||||
Self::validate_blob_size(blob_size)?;
|
||||
|
||||
let mut footer_reader = InvertedIndeFooterReader::new(&mut self.source, blob_size);
|
||||
let mut footer_reader = InvertedIndexFooterReader::new(&mut self.source, blob_size)
|
||||
.with_prefetch_size(DEFAULT_PREFETCH_SIZE);
|
||||
footer_reader.metadata().await.map(Arc::new)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,53 +18,88 @@ use prost::Message;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::inverted_index::error::{
|
||||
CommonIoSnafu, DecodeProtoSnafu, Result, UnexpectedFooterPayloadSizeSnafu,
|
||||
UnexpectedOffsetSizeSnafu, UnexpectedZeroSegmentRowCountSnafu,
|
||||
BlobSizeTooSmallSnafu, CommonIoSnafu, DecodeProtoSnafu, InvalidFooterPayloadSizeSnafu, Result,
|
||||
UnexpectedFooterPayloadSizeSnafu, UnexpectedOffsetSizeSnafu,
|
||||
UnexpectedZeroSegmentRowCountSnafu,
|
||||
};
|
||||
use crate::inverted_index::format::FOOTER_PAYLOAD_SIZE_SIZE;
|
||||
|
||||
/// InvertedIndeFooterReader is for reading the footer section of the blob.
|
||||
pub struct InvertedIndeFooterReader<R> {
|
||||
pub const DEFAULT_PREFETCH_SIZE: u64 = 1024; // 1KiB
|
||||
|
||||
/// InvertedIndexFooterReader is for reading the footer section of the blob.
|
||||
pub struct InvertedIndexFooterReader<R> {
|
||||
source: R,
|
||||
blob_size: u64,
|
||||
prefetch_size: Option<u64>,
|
||||
}
|
||||
|
||||
impl<R> InvertedIndeFooterReader<R> {
|
||||
impl<R> InvertedIndexFooterReader<R> {
|
||||
pub fn new(source: R, blob_size: u64) -> Self {
|
||||
Self { source, blob_size }
|
||||
Self {
|
||||
source,
|
||||
blob_size,
|
||||
prefetch_size: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the prefetch size for the footer reader.
|
||||
pub fn with_prefetch_size(mut self, prefetch_size: u64) -> Self {
|
||||
self.prefetch_size = Some(prefetch_size.max(FOOTER_PAYLOAD_SIZE_SIZE));
|
||||
self
|
||||
}
|
||||
|
||||
pub fn prefetch_size(&self) -> u64 {
|
||||
self.prefetch_size.unwrap_or(FOOTER_PAYLOAD_SIZE_SIZE)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: RangeReader> InvertedIndeFooterReader<R> {
|
||||
impl<R: RangeReader> InvertedIndexFooterReader<R> {
|
||||
pub async fn metadata(&mut self) -> Result<InvertedIndexMetas> {
|
||||
let payload_size = self.read_payload_size().await?;
|
||||
let metas = self.read_payload(payload_size).await?;
|
||||
Ok(metas)
|
||||
}
|
||||
ensure!(
|
||||
self.blob_size >= FOOTER_PAYLOAD_SIZE_SIZE,
|
||||
BlobSizeTooSmallSnafu
|
||||
);
|
||||
|
||||
async fn read_payload_size(&mut self) -> Result<u64> {
|
||||
let mut size_buf = [0u8; FOOTER_PAYLOAD_SIZE_SIZE as usize];
|
||||
let end = self.blob_size;
|
||||
let start = end - FOOTER_PAYLOAD_SIZE_SIZE;
|
||||
self.source
|
||||
.read_into(start..end, &mut &mut size_buf[..])
|
||||
let footer_start = self.blob_size.saturating_sub(self.prefetch_size());
|
||||
let suffix = self
|
||||
.source
|
||||
.read(footer_start..self.blob_size)
|
||||
.await
|
||||
.context(CommonIoSnafu)?;
|
||||
let suffix_len = suffix.len();
|
||||
let length = u32::from_le_bytes(Self::read_tailing_four_bytes(&suffix)?) as u64;
|
||||
self.validate_payload_size(length)?;
|
||||
|
||||
let payload_size = u32::from_le_bytes(size_buf) as u64;
|
||||
self.validate_payload_size(payload_size)?;
|
||||
let footer_size = FOOTER_PAYLOAD_SIZE_SIZE;
|
||||
|
||||
Ok(payload_size)
|
||||
// Did not fetch the entire file metadata in the initial read, need to make a second request.
|
||||
if length > suffix_len as u64 - footer_size {
|
||||
let metadata_start = self.blob_size - length - footer_size;
|
||||
let meta = self
|
||||
.source
|
||||
.read(metadata_start..self.blob_size - footer_size)
|
||||
.await
|
||||
.context(CommonIoSnafu)?;
|
||||
self.parse_payload(&meta, length)
|
||||
} else {
|
||||
let metadata_start = self.blob_size - length - footer_size - footer_start;
|
||||
let meta = &suffix[metadata_start as usize..suffix_len - footer_size as usize];
|
||||
self.parse_payload(meta, length)
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_payload(&mut self, payload_size: u64) -> Result<InvertedIndexMetas> {
|
||||
let end = self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE;
|
||||
let start = end - payload_size;
|
||||
let bytes = self.source.read(start..end).await.context(CommonIoSnafu)?;
|
||||
fn read_tailing_four_bytes(suffix: &[u8]) -> Result<[u8; 4]> {
|
||||
let suffix_len = suffix.len();
|
||||
ensure!(suffix_len >= 4, InvalidFooterPayloadSizeSnafu);
|
||||
let mut bytes = [0; 4];
|
||||
bytes.copy_from_slice(&suffix[suffix_len - 4..suffix_len]);
|
||||
|
||||
let metas = InvertedIndexMetas::decode(&*bytes).context(DecodeProtoSnafu)?;
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
fn parse_payload(&mut self, bytes: &[u8], payload_size: u64) -> Result<InvertedIndexMetas> {
|
||||
let metas = InvertedIndexMetas::decode(bytes).context(DecodeProtoSnafu)?;
|
||||
self.validate_metas(&metas, payload_size)?;
|
||||
|
||||
Ok(metas)
|
||||
}
|
||||
|
||||
@@ -113,9 +148,12 @@ impl<R: RangeReader> InvertedIndeFooterReader<R> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use prost::Message;
|
||||
|
||||
use super::*;
|
||||
use crate::inverted_index::error::Error;
|
||||
|
||||
fn create_test_payload(meta: InvertedIndexMeta) -> Vec<u8> {
|
||||
let mut metas = InvertedIndexMetas {
|
||||
@@ -141,14 +179,18 @@ mod tests {
|
||||
|
||||
let mut payload_buf = create_test_payload(meta);
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);
|
||||
|
||||
let payload_size = reader.read_payload_size().await.unwrap();
|
||||
let metas = reader.read_payload(payload_size).await.unwrap();
|
||||
for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] {
|
||||
let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size);
|
||||
if prefetch > 0 {
|
||||
reader = reader.with_prefetch_size(prefetch);
|
||||
}
|
||||
|
||||
assert_eq!(metas.metas.len(), 1);
|
||||
let index_meta = &metas.metas.get("test").unwrap();
|
||||
assert_eq!(index_meta.name, "test");
|
||||
let metas = reader.metadata().await.unwrap();
|
||||
assert_eq!(metas.metas.len(), 1);
|
||||
let index_meta = &metas.metas.get("test").unwrap();
|
||||
assert_eq!(index_meta.name, "test");
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -157,14 +199,20 @@ mod tests {
|
||||
name: "test".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut payload_buf = create_test_payload(meta);
|
||||
payload_buf.push(0xff); // Add an extra byte to corrupt the footer
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);
|
||||
|
||||
let payload_size_result = reader.read_payload_size().await;
|
||||
assert!(payload_size_result.is_err());
|
||||
for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] {
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size);
|
||||
if prefetch > 0 {
|
||||
reader = reader.with_prefetch_size(prefetch);
|
||||
}
|
||||
|
||||
let result = reader.metadata().await;
|
||||
assert_matches!(result, Err(Error::UnexpectedFooterPayloadSize { .. }));
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -178,10 +226,15 @@ mod tests {
|
||||
|
||||
let mut payload_buf = create_test_payload(meta);
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);
|
||||
|
||||
let payload_size = reader.read_payload_size().await.unwrap();
|
||||
let payload_result = reader.read_payload(payload_size).await;
|
||||
assert!(payload_result.is_err());
|
||||
for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] {
|
||||
let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size);
|
||||
if prefetch > 0 {
|
||||
reader = reader.with_prefetch_size(prefetch);
|
||||
}
|
||||
|
||||
let result = reader.metadata().await;
|
||||
assert_matches!(result, Err(Error::UnexpectedOffsetSize { .. }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(iter_partition_in_place)]
|
||||
#![feature(assert_matches)]
|
||||
|
||||
pub mod fulltext_index;
|
||||
pub mod inverted_index;
|
||||
|
||||
@@ -206,43 +206,41 @@ pub async fn metasrv_builder(
|
||||
plugins: Plugins,
|
||||
kv_backend: Option<KvBackendRef>,
|
||||
) -> Result<MetasrvBuilder> {
|
||||
let (kv_backend, election) = match (kv_backend, &opts.backend) {
|
||||
let (mut kv_backend, election) = match (kv_backend, &opts.backend) {
|
||||
(Some(kv_backend), _) => (kv_backend, None),
|
||||
(None, BackendImpl::MemoryStore) => (Arc::new(MemoryKvBackend::new()) as _, None),
|
||||
(None, BackendImpl::EtcdStore) => {
|
||||
let etcd_client = create_etcd_client(opts).await?;
|
||||
let kv_backend = {
|
||||
let etcd_backend =
|
||||
EtcdStore::with_etcd_client(etcd_client.clone(), opts.max_txn_ops);
|
||||
if !opts.store_key_prefix.is_empty() {
|
||||
Arc::new(ChrootKvBackend::new(
|
||||
opts.store_key_prefix.clone().into_bytes(),
|
||||
etcd_backend,
|
||||
))
|
||||
} else {
|
||||
etcd_backend
|
||||
}
|
||||
};
|
||||
(
|
||||
kv_backend,
|
||||
Some(
|
||||
EtcdElection::with_etcd_client(
|
||||
&opts.server_addr,
|
||||
etcd_client.clone(),
|
||||
opts.store_key_prefix.clone(),
|
||||
)
|
||||
.await?,
|
||||
),
|
||||
let kv_backend = EtcdStore::with_etcd_client(etcd_client.clone(), opts.max_txn_ops);
|
||||
let election = EtcdElection::with_etcd_client(
|
||||
&opts.server_addr,
|
||||
etcd_client,
|
||||
opts.store_key_prefix.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
(kv_backend, Some(election))
|
||||
}
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
(None, BackendImpl::PostgresStore) => {
|
||||
let pg_client = create_postgres_client(opts).await?;
|
||||
let kv_backend = PgStore::with_pg_client(pg_client).await.unwrap();
|
||||
// TODO(jeremy, weny): implement election for postgres
|
||||
(kv_backend, None)
|
||||
}
|
||||
};
|
||||
|
||||
if !opts.store_key_prefix.is_empty() {
|
||||
info!(
|
||||
"using chroot kv backend with prefix: {prefix}",
|
||||
prefix = opts.store_key_prefix
|
||||
);
|
||||
kv_backend = Arc::new(ChrootKvBackend::new(
|
||||
opts.store_key_prefix.clone().into_bytes(),
|
||||
kv_backend,
|
||||
))
|
||||
}
|
||||
|
||||
let in_memory = Arc::new(MemoryKvBackend::new()) as ResettableKvBackendRef;
|
||||
|
||||
let selector = match opts.selector {
|
||||
|
||||
@@ -470,6 +470,10 @@ impl Metasrv {
|
||||
});
|
||||
}
|
||||
} else {
|
||||
warn!(
|
||||
"Ensure only one instance of Metasrv is running, as there is no election service."
|
||||
);
|
||||
|
||||
if let Err(e) = self.wal_options_allocator.start().await {
|
||||
error!(e; "Failed to start wal options allocator");
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ aquamarine.workspace = true
|
||||
async-channel = "1.9"
|
||||
async-stream.workspace = true
|
||||
async-trait = "0.1"
|
||||
bytemuck.workspace = true
|
||||
bytes.workspace = true
|
||||
common-base.workspace = true
|
||||
common-config.workspace = true
|
||||
|
||||
@@ -32,6 +32,7 @@ use moka::notification::RemovalCause;
|
||||
use moka::sync::Cache;
|
||||
use parquet::column::page::Page;
|
||||
use parquet::file::metadata::ParquetMetaData;
|
||||
use puffin::puffin_manager::cache::{PuffinMetadataCache, PuffinMetadataCacheRef};
|
||||
use store_api::storage::{ConcreteDataType, RegionId, TimeSeriesRowSelector};
|
||||
|
||||
use crate::cache::cache_size::parquet_meta_size;
|
||||
@@ -68,6 +69,8 @@ pub struct CacheManager {
|
||||
write_cache: Option<WriteCacheRef>,
|
||||
/// Cache for inverted index.
|
||||
index_cache: Option<InvertedIndexCacheRef>,
|
||||
/// Puffin metadata cache.
|
||||
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
|
||||
/// Cache for time series selectors.
|
||||
selector_result_cache: Option<SelectorResultCache>,
|
||||
}
|
||||
@@ -217,6 +220,10 @@ impl CacheManager {
|
||||
pub(crate) fn index_cache(&self) -> Option<&InvertedIndexCacheRef> {
|
||||
self.index_cache.as_ref()
|
||||
}
|
||||
|
||||
pub(crate) fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> {
|
||||
self.puffin_metadata_cache.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
/// Increases selector cache miss metrics.
|
||||
@@ -237,6 +244,8 @@ pub struct CacheManagerBuilder {
|
||||
page_cache_size: u64,
|
||||
index_metadata_size: u64,
|
||||
index_content_size: u64,
|
||||
index_content_page_size: u64,
|
||||
puffin_metadata_size: u64,
|
||||
write_cache: Option<WriteCacheRef>,
|
||||
selector_result_cache_size: u64,
|
||||
}
|
||||
@@ -278,6 +287,18 @@ impl CacheManagerBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets page size for index content.
|
||||
pub fn index_content_page_size(mut self, bytes: u64) -> Self {
|
||||
self.index_content_page_size = bytes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets cache size for puffin metadata.
|
||||
pub fn puffin_metadata_size(mut self, bytes: u64) -> Self {
|
||||
self.puffin_metadata_size = bytes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets selector result cache size.
|
||||
pub fn selector_result_cache_size(mut self, bytes: u64) -> Self {
|
||||
self.selector_result_cache_size = bytes;
|
||||
@@ -338,8 +359,13 @@ impl CacheManagerBuilder {
|
||||
})
|
||||
.build()
|
||||
});
|
||||
let inverted_index_cache =
|
||||
InvertedIndexCache::new(self.index_metadata_size, self.index_content_size);
|
||||
let inverted_index_cache = InvertedIndexCache::new(
|
||||
self.index_metadata_size,
|
||||
self.index_content_size,
|
||||
self.index_content_page_size,
|
||||
);
|
||||
let puffin_metadata_cache =
|
||||
PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES);
|
||||
let selector_result_cache = (self.selector_result_cache_size != 0).then(|| {
|
||||
Cache::builder()
|
||||
.max_capacity(self.selector_result_cache_size)
|
||||
@@ -361,6 +387,7 @@ impl CacheManagerBuilder {
|
||||
page_cache,
|
||||
write_cache: self.write_cache,
|
||||
index_cache: Some(Arc::new(inverted_index_cache)),
|
||||
puffin_metadata_cache: Some(Arc::new(puffin_metadata_cache)),
|
||||
selector_result_cache,
|
||||
}
|
||||
}
|
||||
|
||||
399
src/mito2/src/cache/index.rs
vendored
399
src/mito2/src/cache/index.rs
vendored
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::index::InvertedIndexMetas;
|
||||
@@ -34,14 +35,16 @@ const INDEX_CONTENT_TYPE: &str = "index_content";
|
||||
/// Inverted index blob reader with cache.
|
||||
pub struct CachedInvertedIndexBlobReader<R> {
|
||||
file_id: FileId,
|
||||
file_size: u64,
|
||||
inner: R,
|
||||
cache: InvertedIndexCacheRef,
|
||||
}
|
||||
|
||||
impl<R> CachedInvertedIndexBlobReader<R> {
|
||||
pub fn new(file_id: FileId, inner: R, cache: InvertedIndexCacheRef) -> Self {
|
||||
pub fn new(file_id: FileId, file_size: u64, inner: R, cache: InvertedIndexCacheRef) -> Self {
|
||||
Self {
|
||||
file_id,
|
||||
file_size,
|
||||
inner,
|
||||
cache,
|
||||
}
|
||||
@@ -59,43 +62,77 @@ where
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<Vec<u8>> {
|
||||
let range = offset as usize..(offset + size as u64) as usize;
|
||||
if let Some(cached) = self.cache.get_index(IndexKey {
|
||||
file_id: self.file_id,
|
||||
}) {
|
||||
CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
|
||||
Ok(cached[range].to_vec())
|
||||
} else {
|
||||
let mut all_data = Vec::with_capacity(1024 * 1024);
|
||||
self.inner.read_all(&mut all_data).await?;
|
||||
let result = all_data[range].to_vec();
|
||||
self.cache.put_index(
|
||||
IndexKey {
|
||||
file_id: self.file_id,
|
||||
},
|
||||
Arc::new(all_data),
|
||||
);
|
||||
CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
|
||||
Ok(result)
|
||||
let keys =
|
||||
IndexDataPageKey::generate_page_keys(self.file_id, offset, size, self.cache.page_size);
|
||||
// Size is 0, return empty data.
|
||||
if keys.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
// TODO: Can be replaced by an uncontinuous structure like opendal::Buffer.
|
||||
let mut data = Vec::with_capacity(keys.len());
|
||||
data.resize(keys.len(), Arc::new(Vec::new()));
|
||||
let mut cache_miss_range = vec![];
|
||||
let mut cache_miss_idx = vec![];
|
||||
let last_index = keys.len() - 1;
|
||||
// TODO: Avoid copy as much as possible.
|
||||
for (i, index) in keys.clone().into_iter().enumerate() {
|
||||
match self.cache.get_index(&index) {
|
||||
Some(page) => {
|
||||
CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
|
||||
data[i] = page;
|
||||
}
|
||||
None => {
|
||||
CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc();
|
||||
let base_offset = index.page_id * self.cache.page_size;
|
||||
let pruned_size = if i == last_index {
|
||||
prune_size(&keys, self.file_size, self.cache.page_size)
|
||||
} else {
|
||||
self.cache.page_size
|
||||
};
|
||||
cache_miss_range.push(base_offset..base_offset + pruned_size);
|
||||
cache_miss_idx.push(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
if !cache_miss_range.is_empty() {
|
||||
let pages = self.inner.read_vec(&cache_miss_range).await?;
|
||||
for (i, page) in cache_miss_idx.into_iter().zip(pages.into_iter()) {
|
||||
let page = Arc::new(page);
|
||||
let key = keys[i].clone();
|
||||
data[i] = page.clone();
|
||||
self.cache.put_index(key, page.clone());
|
||||
}
|
||||
}
|
||||
let mut result = Vec::with_capacity(size as usize);
|
||||
data.iter().enumerate().for_each(|(i, page)| {
|
||||
let range = if i == 0 {
|
||||
IndexDataPageKey::calculate_first_page_range(offset, size, self.cache.page_size)
|
||||
} else if i == last_index {
|
||||
IndexDataPageKey::calculate_last_page_range(offset, size, self.cache.page_size)
|
||||
} else {
|
||||
0..self.cache.page_size as usize
|
||||
};
|
||||
result.extend_from_slice(&page[range]);
|
||||
});
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobReader<R> {
|
||||
async fn read_all(
|
||||
&mut self,
|
||||
dest: &mut Vec<u8>,
|
||||
) -> index::inverted_index::error::Result<usize> {
|
||||
self.inner.read_all(dest).await
|
||||
}
|
||||
|
||||
async fn seek_read(
|
||||
async fn range_read(
|
||||
&mut self,
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<Vec<u8>> {
|
||||
self.inner.seek_read(offset, size).await
|
||||
self.inner.range_read(offset, size).await
|
||||
}
|
||||
|
||||
async fn read_vec(
|
||||
&mut self,
|
||||
ranges: &[Range<u64>],
|
||||
) -> index::inverted_index::error::Result<Vec<Vec<u8>>> {
|
||||
self.inner.read_vec(ranges).await
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> index::inverted_index::error::Result<Arc<InvertedIndexMetas>> {
|
||||
@@ -130,22 +167,81 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct IndexKey {
|
||||
pub struct IndexMetadataKey {
|
||||
file_id: FileId,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct IndexDataPageKey {
|
||||
file_id: FileId,
|
||||
page_id: u64,
|
||||
}
|
||||
|
||||
impl IndexDataPageKey {
|
||||
/// Converts an offset to a page ID based on the page size.
|
||||
fn calculate_page_id(offset: u64, page_size: u64) -> u64 {
|
||||
offset / page_size
|
||||
}
|
||||
|
||||
/// Calculates the total number of pages that a given size spans, starting from a specific offset.
|
||||
fn calculate_page_count(offset: u64, size: u32, page_size: u64) -> u32 {
|
||||
let start_page = Self::calculate_page_id(offset, page_size);
|
||||
let end_page = Self::calculate_page_id(offset + (size as u64) - 1, page_size);
|
||||
(end_page + 1 - start_page) as u32
|
||||
}
|
||||
|
||||
/// Computes the byte range in the first page based on the offset and size.
|
||||
/// For example, if offset is 1000 and size is 5000 with PAGE_SIZE of 4096, the first page range is 1000..4096.
|
||||
fn calculate_first_page_range(offset: u64, size: u32, page_size: u64) -> Range<usize> {
|
||||
let start = (offset % page_size) as usize;
|
||||
let end = if size > page_size as u32 - start as u32 {
|
||||
page_size as usize
|
||||
} else {
|
||||
start + size as usize
|
||||
};
|
||||
start..end
|
||||
}
|
||||
|
||||
/// Computes the byte range in the last page based on the offset and size.
|
||||
/// For example, if offset is 1000 and size is 5000 with PAGE_SIZE of 4096, the last page range is 0..1904.
|
||||
fn calculate_last_page_range(offset: u64, size: u32, page_size: u64) -> Range<usize> {
|
||||
let offset = offset as usize;
|
||||
let size = size as usize;
|
||||
let page_size = page_size as usize;
|
||||
if (offset + size) % page_size == 0 {
|
||||
0..page_size
|
||||
} else {
|
||||
0..((offset + size) % page_size)
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates a vector of IndexKey instances for the pages that a given offset and size span.
|
||||
fn generate_page_keys(file_id: FileId, offset: u64, size: u32, page_size: u64) -> Vec<Self> {
|
||||
let start_page = Self::calculate_page_id(offset, page_size);
|
||||
let total_pages = Self::calculate_page_count(offset, size, page_size);
|
||||
(0..total_pages)
|
||||
.map(|i| Self {
|
||||
file_id,
|
||||
page_id: start_page + i as u64,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
pub type InvertedIndexCacheRef = Arc<InvertedIndexCache>;
|
||||
|
||||
pub struct InvertedIndexCache {
|
||||
/// Cache for inverted index metadata
|
||||
index_metadata: moka::sync::Cache<IndexKey, Arc<InvertedIndexMetas>>,
|
||||
index_metadata: moka::sync::Cache<IndexMetadataKey, Arc<InvertedIndexMetas>>,
|
||||
/// Cache for inverted index content.
|
||||
index: moka::sync::Cache<IndexKey, Arc<Vec<u8>>>,
|
||||
index: moka::sync::Cache<IndexDataPageKey, Arc<Vec<u8>>>,
|
||||
// Page size for index content.
|
||||
page_size: u64,
|
||||
}
|
||||
|
||||
impl InvertedIndexCache {
|
||||
/// Creates `InvertedIndexCache` with provided `index_metadata_cap` and `index_content_cap`.
|
||||
pub fn new(index_metadata_cap: u64, index_content_cap: u64) -> Self {
|
||||
pub fn new(index_metadata_cap: u64, index_content_cap: u64, page_size: u64) -> Self {
|
||||
common_telemetry::debug!("Building InvertedIndexCache with metadata size: {index_metadata_cap}, content size: {index_content_cap}");
|
||||
let index_metadata = moka::sync::CacheBuilder::new(index_metadata_cap)
|
||||
.name("inverted_index_metadata")
|
||||
@@ -170,29 +266,29 @@ impl InvertedIndexCache {
|
||||
Self {
|
||||
index_metadata,
|
||||
index: index_cache,
|
||||
page_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl InvertedIndexCache {
|
||||
pub fn get_index_metadata(&self, file_id: FileId) -> Option<Arc<InvertedIndexMetas>> {
|
||||
self.index_metadata.get(&IndexKey { file_id })
|
||||
self.index_metadata.get(&IndexMetadataKey { file_id })
|
||||
}
|
||||
|
||||
pub fn put_index_metadata(&self, file_id: FileId, metadata: Arc<InvertedIndexMetas>) {
|
||||
let key = IndexKey { file_id };
|
||||
let key = IndexMetadataKey { file_id };
|
||||
CACHE_BYTES
|
||||
.with_label_values(&[INDEX_METADATA_TYPE])
|
||||
.add(index_metadata_weight(&key, &metadata).into());
|
||||
self.index_metadata.insert(key, metadata)
|
||||
}
|
||||
|
||||
// todo(hl): align index file content to pages with size like 4096 bytes.
|
||||
pub fn get_index(&self, key: IndexKey) -> Option<Arc<Vec<u8>>> {
|
||||
self.index.get(&key)
|
||||
pub fn get_index(&self, key: &IndexDataPageKey) -> Option<Arc<Vec<u8>>> {
|
||||
self.index.get(key)
|
||||
}
|
||||
|
||||
pub fn put_index(&self, key: IndexKey, value: Arc<Vec<u8>>) {
|
||||
pub fn put_index(&self, key: IndexDataPageKey, value: Arc<Vec<u8>>) {
|
||||
CACHE_BYTES
|
||||
.with_label_values(&[INDEX_CONTENT_TYPE])
|
||||
.add(index_content_weight(&key, &value).into());
|
||||
@@ -201,11 +297,234 @@ impl InvertedIndexCache {
|
||||
}
|
||||
|
||||
/// Calculates weight for index metadata.
|
||||
fn index_metadata_weight(k: &IndexKey, v: &Arc<InvertedIndexMetas>) -> u32 {
|
||||
fn index_metadata_weight(k: &IndexMetadataKey, v: &Arc<InvertedIndexMetas>) -> u32 {
|
||||
(k.file_id.as_bytes().len() + v.encoded_len()) as u32
|
||||
}
|
||||
|
||||
/// Calculates weight for index content.
|
||||
fn index_content_weight(k: &IndexKey, v: &Arc<Vec<u8>>) -> u32 {
|
||||
fn index_content_weight(k: &IndexDataPageKey, v: &Arc<Vec<u8>>) -> u32 {
|
||||
(k.file_id.as_bytes().len() + v.len()) as u32
|
||||
}
|
||||
|
||||
/// Prunes the size of the last page based on the indexes.
|
||||
/// We have following cases:
|
||||
/// 1. The rest file size is less than the page size, read to the end of the file.
|
||||
/// 2. Otherwise, read the page size.
|
||||
fn prune_size(indexes: &[IndexDataPageKey], file_size: u64, page_size: u64) -> u64 {
|
||||
let last_page_start = indexes.last().map(|i| i.page_id * page_size).unwrap_or(0);
|
||||
page_size.min(file_size - last_page_start)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use common_base::BitVec;
|
||||
use futures::stream;
|
||||
use index::inverted_index::format::reader::{InvertedIndexBlobReader, InvertedIndexReader};
|
||||
use index::inverted_index::format::writer::{InvertedIndexBlobWriter, InvertedIndexWriter};
|
||||
use index::inverted_index::Bytes;
|
||||
use prometheus::register_int_counter_vec;
|
||||
use rand::{Rng, RngCore};
|
||||
|
||||
use super::*;
|
||||
use crate::sst::index::store::InstrumentedStore;
|
||||
use crate::test_util::TestEnv;
|
||||
|
||||
// Fuzz test for index data page key
|
||||
#[test]
|
||||
fn fuzz_index_calculation() {
|
||||
// randomly generate a large u8 array
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut data = vec![0u8; 1024 * 1024];
|
||||
rng.fill_bytes(&mut data);
|
||||
let file_id = FileId::random();
|
||||
|
||||
for _ in 0..100 {
|
||||
let offset = rng.gen_range(0..data.len() as u64);
|
||||
let size = rng.gen_range(0..data.len() as u32 - offset as u32);
|
||||
let page_size: usize = rng.gen_range(1..1024);
|
||||
|
||||
let indexes =
|
||||
IndexDataPageKey::generate_page_keys(file_id, offset, size, page_size as u64);
|
||||
let page_num = indexes.len();
|
||||
let mut read = Vec::with_capacity(size as usize);
|
||||
let last_index = indexes.len() - 1;
|
||||
for (i, key) in indexes.into_iter().enumerate() {
|
||||
let start = key.page_id as usize * page_size;
|
||||
let page = if start + page_size < data.len() {
|
||||
&data[start..start + page_size]
|
||||
} else {
|
||||
&data[start..]
|
||||
};
|
||||
let range = if i == 0 {
|
||||
// first page range
|
||||
IndexDataPageKey::calculate_first_page_range(offset, size, page_size as u64)
|
||||
} else if i == last_index {
|
||||
// last page range. when the first page is the last page, the range is not used.
|
||||
IndexDataPageKey::calculate_last_page_range(offset, size, page_size as u64)
|
||||
} else {
|
||||
0..page_size
|
||||
};
|
||||
read.extend_from_slice(&page[range]);
|
||||
}
|
||||
let expected_range = offset as usize..(offset + size as u64 as u64) as usize;
|
||||
if read != data.get(expected_range).unwrap() {
|
||||
panic!(
|
||||
"fuzz_read_index failed, offset: {}, size: {}, page_size: {}\nread len: {}, expected len: {}\nfirst page range: {:?}, last page range: {:?}, page num: {}",
|
||||
offset, size, page_size, read.len(), size as usize,
|
||||
IndexDataPageKey::calculate_first_page_range(offset, size, page_size as u64),
|
||||
IndexDataPageKey::calculate_last_page_range(offset, size, page_size as u64), page_num
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn unpack(fst_value: u64) -> [u32; 2] {
|
||||
bytemuck::cast::<u64, [u32; 2]>(fst_value)
|
||||
}
|
||||
|
||||
async fn create_inverted_index_blob() -> Vec<u8> {
|
||||
let mut blob = Vec::new();
|
||||
let mut writer = InvertedIndexBlobWriter::new(&mut blob);
|
||||
writer
|
||||
.add_index(
|
||||
"tag0".to_string(),
|
||||
BitVec::from_slice(&[0b0000_0001, 0b0000_0000]),
|
||||
Box::new(stream::iter(vec![
|
||||
Ok((Bytes::from("a"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
Ok((Bytes::from("b"), BitVec::from_slice(&[0b0010_0000]))),
|
||||
Ok((Bytes::from("c"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
])),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
writer
|
||||
.add_index(
|
||||
"tag1".to_string(),
|
||||
BitVec::from_slice(&[0b0000_0001, 0b0000_0000]),
|
||||
Box::new(stream::iter(vec![
|
||||
Ok((Bytes::from("x"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
Ok((Bytes::from("y"), BitVec::from_slice(&[0b0010_0000]))),
|
||||
Ok((Bytes::from("z"), BitVec::from_slice(&[0b0000_0001]))),
|
||||
])),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
writer
|
||||
.finish(8, NonZeroUsize::new(1).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
blob
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_inverted_index_cache() {
|
||||
let blob = create_inverted_index_blob().await;
|
||||
|
||||
// Init a test range reader in local fs.
|
||||
let mut env = TestEnv::new();
|
||||
let file_size = blob.len() as u64;
|
||||
let store = env.init_object_store_manager();
|
||||
let temp_path = "data";
|
||||
store.write(temp_path, blob).await.unwrap();
|
||||
let store = InstrumentedStore::new(store);
|
||||
let metric =
|
||||
register_int_counter_vec!("test_bytes", "a counter for test", &["test"]).unwrap();
|
||||
let counter = metric.with_label_values(&["test"]);
|
||||
let range_reader = store
|
||||
.range_reader("data", &counter, &counter)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let reader = InvertedIndexBlobReader::new(range_reader);
|
||||
let mut cached_reader = CachedInvertedIndexBlobReader::new(
|
||||
FileId::random(),
|
||||
file_size,
|
||||
reader,
|
||||
Arc::new(InvertedIndexCache::new(8192, 8192, 50)),
|
||||
);
|
||||
let metadata = cached_reader.metadata().await.unwrap();
|
||||
assert_eq!(metadata.total_row_count, 8);
|
||||
assert_eq!(metadata.segment_row_count, 1);
|
||||
assert_eq!(metadata.metas.len(), 2);
|
||||
// tag0
|
||||
let tag0 = metadata.metas.get("tag0").unwrap();
|
||||
let stats0 = tag0.stats.as_ref().unwrap();
|
||||
assert_eq!(stats0.distinct_count, 3);
|
||||
assert_eq!(stats0.null_count, 1);
|
||||
assert_eq!(stats0.min_value, Bytes::from("a"));
|
||||
assert_eq!(stats0.max_value, Bytes::from("c"));
|
||||
let fst0 = cached_reader
|
||||
.fst(
|
||||
tag0.base_offset + tag0.relative_fst_offset as u64,
|
||||
tag0.fst_size,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(fst0.len(), 3);
|
||||
let [offset, size] = unpack(fst0.get(b"a").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
let [offset, size] = unpack(fst0.get(b"b").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
|
||||
let [offset, size] = unpack(fst0.get(b"c").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
|
||||
// tag1
|
||||
let tag1 = metadata.metas.get("tag1").unwrap();
|
||||
let stats1 = tag1.stats.as_ref().unwrap();
|
||||
assert_eq!(stats1.distinct_count, 3);
|
||||
assert_eq!(stats1.null_count, 1);
|
||||
assert_eq!(stats1.min_value, Bytes::from("x"));
|
||||
assert_eq!(stats1.max_value, Bytes::from("z"));
|
||||
let fst1 = cached_reader
|
||||
.fst(
|
||||
tag1.base_offset + tag1.relative_fst_offset as u64,
|
||||
tag1.fst_size,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(fst1.len(), 3);
|
||||
let [offset, size] = unpack(fst1.get(b"x").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
let [offset, size] = unpack(fst1.get(b"y").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
|
||||
let [offset, size] = unpack(fst1.get(b"z").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
|
||||
|
||||
// fuzz test
|
||||
let mut rng = rand::thread_rng();
|
||||
for _ in 0..100 {
|
||||
let offset = rng.gen_range(0..file_size);
|
||||
let size = rng.gen_range(0..file_size as u32 - offset as u32);
|
||||
let expected = cached_reader.range_read(offset, size).await.unwrap();
|
||||
let read = cached_reader.get_or_load(offset, size).await.unwrap();
|
||||
assert_eq!(read, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
2
src/mito2/src/cache/write_cache.rs
vendored
2
src/mito2/src/cache/write_cache.rs
vendored
@@ -501,7 +501,7 @@ mod tests {
|
||||
|
||||
// Read metadata from write cache
|
||||
let builder = ParquetReaderBuilder::new(data_home, handle.clone(), mock_store.clone())
|
||||
.cache(cache_manager.clone());
|
||||
.cache(Some(cache_manager.clone()));
|
||||
let reader = builder.build().await.unwrap();
|
||||
|
||||
// Check parquet metadata
|
||||
|
||||
@@ -570,7 +570,6 @@ pub struct SerializedCompactionOutput {
|
||||
struct CompactionSstReaderBuilder<'a> {
|
||||
metadata: RegionMetadataRef,
|
||||
sst_layer: AccessLayerRef,
|
||||
cache: CacheManagerRef,
|
||||
inputs: &'a [FileHandle],
|
||||
append_mode: bool,
|
||||
filter_deleted: bool,
|
||||
@@ -584,7 +583,7 @@ impl<'a> CompactionSstReaderBuilder<'a> {
|
||||
let mut scan_input = ScanInput::new(self.sst_layer, ProjectionMapper::all(&self.metadata)?)
|
||||
.with_files(self.inputs.to_vec())
|
||||
.with_append_mode(self.append_mode)
|
||||
.with_cache(self.cache)
|
||||
.with_cache(None)
|
||||
.with_filter_deleted(self.filter_deleted)
|
||||
// We ignore file not found error during compaction.
|
||||
.with_ignore_file_not_found(true)
|
||||
|
||||
@@ -296,7 +296,6 @@ impl Compactor for DefaultCompactor {
|
||||
let reader = CompactionSstReaderBuilder {
|
||||
metadata: region_metadata.clone(),
|
||||
sst_layer: sst_layer.clone(),
|
||||
cache: cache_manager.clone(),
|
||||
inputs: &output.inputs,
|
||||
append_mode,
|
||||
filter_deleted: output.filter_deleted,
|
||||
|
||||
@@ -304,6 +304,9 @@ pub struct IndexConfig {
|
||||
|
||||
/// Write buffer size for creating the index.
|
||||
pub write_buffer_size: ReadableSize,
|
||||
|
||||
/// Cache size for metadata of puffin files. Setting it to 0 to disable the cache.
|
||||
pub metadata_cache_size: ReadableSize,
|
||||
}
|
||||
|
||||
impl Default for IndexConfig {
|
||||
@@ -312,6 +315,7 @@ impl Default for IndexConfig {
|
||||
aux_path: String::new(),
|
||||
staging_size: ReadableSize::gb(2),
|
||||
write_buffer_size: ReadableSize::mb(8),
|
||||
metadata_cache_size: ReadableSize::mb(64),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -412,6 +416,8 @@ pub struct InvertedIndexConfig {
|
||||
pub metadata_cache_size: ReadableSize,
|
||||
/// Cache size for inverted index content. Setting it to 0 to disable the cache.
|
||||
pub content_cache_size: ReadableSize,
|
||||
/// Page size for inverted index content.
|
||||
pub content_cache_page_size: ReadableSize,
|
||||
}
|
||||
|
||||
impl InvertedIndexConfig {
|
||||
@@ -437,6 +443,7 @@ impl Default for InvertedIndexConfig {
|
||||
intermediate_path: String::new(),
|
||||
metadata_cache_size: ReadableSize::mb(64),
|
||||
content_cache_size: ReadableSize::mb(128),
|
||||
content_cache_page_size: ReadableSize::mb(8),
|
||||
};
|
||||
|
||||
if let Some(sys_memory) = common_config::utils::get_sys_total_memory() {
|
||||
|
||||
@@ -424,12 +424,16 @@ impl EngineInner {
|
||||
// Get cache.
|
||||
let cache_manager = self.workers.cache_manager();
|
||||
|
||||
let scan_region =
|
||||
ScanRegion::new(version, region.access_layer.clone(), request, cache_manager)
|
||||
.with_parallel_scan_channel_size(self.config.parallel_scan_channel_size)
|
||||
.with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled())
|
||||
.with_ignore_fulltext_index(self.config.fulltext_index.apply_on_query.disabled())
|
||||
.with_start_time(query_start);
|
||||
let scan_region = ScanRegion::new(
|
||||
version,
|
||||
region.access_layer.clone(),
|
||||
request,
|
||||
Some(cache_manager),
|
||||
)
|
||||
.with_parallel_scan_channel_size(self.config.parallel_scan_channel_size)
|
||||
.with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled())
|
||||
.with_ignore_fulltext_index(self.config.fulltext_index.apply_on_query.disabled())
|
||||
.with_start_time(query_start);
|
||||
|
||||
Ok(scan_region)
|
||||
}
|
||||
|
||||
@@ -893,6 +893,14 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read file metadata"))]
|
||||
Metadata {
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
@@ -965,7 +973,8 @@ impl ErrorExt for Error {
|
||||
| CreateDir { .. }
|
||||
| ReadDataPart { .. }
|
||||
| CorruptedEntry { .. }
|
||||
| BuildEntry { .. } => StatusCode::Internal,
|
||||
| BuildEntry { .. }
|
||||
| Metadata { .. } => StatusCode::Internal,
|
||||
|
||||
OpenRegion { source, .. } => source.status_code(),
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ use crate::cache::{
|
||||
use crate::error::Result;
|
||||
use crate::read::{Batch, BatchReader, BoxedBatchReader};
|
||||
use crate::sst::file::FileId;
|
||||
use crate::sst::parquet::reader::RowGroupReader;
|
||||
use crate::sst::parquet::reader::{ReaderMetrics, RowGroupReader};
|
||||
|
||||
/// Reader to keep the last row for each time series.
|
||||
/// It assumes that batches from the input reader are
|
||||
@@ -86,7 +86,7 @@ impl RowGroupLastRowCachedReader {
|
||||
pub(crate) fn new(
|
||||
file_id: FileId,
|
||||
row_group_idx: usize,
|
||||
cache_manager: CacheManagerRef,
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
row_group_reader: RowGroupReader,
|
||||
) -> Self {
|
||||
let key = SelectorResultKey {
|
||||
@@ -95,6 +95,9 @@ impl RowGroupLastRowCachedReader {
|
||||
selector: TimeSeriesRowSelector::LastRow,
|
||||
};
|
||||
|
||||
let Some(cache_manager) = cache_manager else {
|
||||
return Self::new_miss(key, row_group_reader, None);
|
||||
};
|
||||
if let Some(value) = cache_manager.get_selector_result(&key) {
|
||||
let schema_matches = value.projection
|
||||
== row_group_reader
|
||||
@@ -105,10 +108,18 @@ impl RowGroupLastRowCachedReader {
|
||||
// Schema matches, use cache batches.
|
||||
Self::new_hit(value)
|
||||
} else {
|
||||
Self::new_miss(key, row_group_reader, cache_manager)
|
||||
Self::new_miss(key, row_group_reader, Some(cache_manager))
|
||||
}
|
||||
} else {
|
||||
Self::new_miss(key, row_group_reader, cache_manager)
|
||||
Self::new_miss(key, row_group_reader, Some(cache_manager))
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the underlying reader metrics if uncached.
|
||||
pub(crate) fn metrics(&self) -> Option<&ReaderMetrics> {
|
||||
match self {
|
||||
RowGroupLastRowCachedReader::Hit(_) => None,
|
||||
RowGroupLastRowCachedReader::Miss(reader) => Some(reader.metrics()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,7 +133,7 @@ impl RowGroupLastRowCachedReader {
|
||||
fn new_miss(
|
||||
key: SelectorResultKey,
|
||||
row_group_reader: RowGroupReader,
|
||||
cache_manager: CacheManagerRef,
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
) -> Self {
|
||||
selector_result_cache_miss();
|
||||
Self::Miss(RowGroupLastRowReader::new(
|
||||
@@ -167,13 +178,17 @@ pub(crate) struct RowGroupLastRowReader {
|
||||
reader: RowGroupReader,
|
||||
selector: LastRowSelector,
|
||||
yielded_batches: Vec<Batch>,
|
||||
cache_manager: CacheManagerRef,
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
/// Index buffer to take a new batch from the last row.
|
||||
take_index: UInt32Vector,
|
||||
}
|
||||
|
||||
impl RowGroupLastRowReader {
|
||||
fn new(key: SelectorResultKey, reader: RowGroupReader, cache_manager: CacheManagerRef) -> Self {
|
||||
fn new(
|
||||
key: SelectorResultKey,
|
||||
reader: RowGroupReader,
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
) -> Self {
|
||||
Self {
|
||||
key,
|
||||
reader,
|
||||
@@ -213,6 +228,9 @@ impl RowGroupLastRowReader {
|
||||
// we always expect that row groups yields batches.
|
||||
return;
|
||||
}
|
||||
let Some(cache) = &self.cache_manager else {
|
||||
return;
|
||||
};
|
||||
let value = Arc::new(SelectorResultValue {
|
||||
result: std::mem::take(&mut self.yielded_batches),
|
||||
projection: self
|
||||
@@ -222,7 +240,11 @@ impl RowGroupLastRowReader {
|
||||
.projection_indices()
|
||||
.to_vec(),
|
||||
});
|
||||
self.cache_manager.put_selector_result(self.key, value);
|
||||
cache.put_selector_result(self.key, value);
|
||||
}
|
||||
|
||||
fn metrics(&self) -> &ReaderMetrics {
|
||||
self.reader.metrics()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -171,7 +171,7 @@ impl ProjectionMapper {
|
||||
pub(crate) fn convert(
|
||||
&self,
|
||||
batch: &Batch,
|
||||
cache_manager: &CacheManager,
|
||||
cache_manager: Option<&CacheManager>,
|
||||
) -> common_recordbatch::error::Result<RecordBatch> {
|
||||
debug_assert_eq!(self.batch_fields.len(), batch.fields().len());
|
||||
debug_assert!(self
|
||||
@@ -204,12 +204,15 @@ impl ProjectionMapper {
|
||||
match index {
|
||||
BatchIndex::Tag(idx) => {
|
||||
let value = &pk_values[*idx];
|
||||
let vector = repeated_vector_with_cache(
|
||||
&column_schema.data_type,
|
||||
value,
|
||||
num_rows,
|
||||
cache_manager,
|
||||
)?;
|
||||
let vector = match cache_manager {
|
||||
Some(cache) => repeated_vector_with_cache(
|
||||
&column_schema.data_type,
|
||||
value,
|
||||
num_rows,
|
||||
cache,
|
||||
)?,
|
||||
None => new_repeated_vector(&column_schema.data_type, value, num_rows)?,
|
||||
};
|
||||
columns.push(vector);
|
||||
}
|
||||
BatchIndex::Timestamp => {
|
||||
@@ -357,7 +360,7 @@ mod tests {
|
||||
// With vector cache.
|
||||
let cache = CacheManager::builder().vector_cache_size(1024).build();
|
||||
let batch = new_batch(0, &[1, 2], &[(3, 3), (4, 4)], 3);
|
||||
let record_batch = mapper.convert(&batch, &cache).unwrap();
|
||||
let record_batch = mapper.convert(&batch, Some(&cache)).unwrap();
|
||||
let expect = "\
|
||||
+---------------------+----+----+----+----+
|
||||
| ts | k0 | k1 | v0 | v1 |
|
||||
@@ -377,7 +380,7 @@ mod tests {
|
||||
assert!(cache
|
||||
.get_repeated_vector(&ConcreteDataType::int64_datatype(), &Value::Int64(3))
|
||||
.is_none());
|
||||
let record_batch = mapper.convert(&batch, &cache).unwrap();
|
||||
let record_batch = mapper.convert(&batch, Some(&cache)).unwrap();
|
||||
assert_eq!(expect, print_record_batch(record_batch));
|
||||
}
|
||||
|
||||
@@ -398,8 +401,7 @@ mod tests {
|
||||
);
|
||||
|
||||
let batch = new_batch(0, &[1, 2], &[(4, 4)], 3);
|
||||
let cache = CacheManager::builder().vector_cache_size(1024).build();
|
||||
let record_batch = mapper.convert(&batch, &cache).unwrap();
|
||||
let record_batch = mapper.convert(&batch, None).unwrap();
|
||||
let expect = "\
|
||||
+----+----+
|
||||
| v1 | k0 |
|
||||
|
||||
@@ -72,11 +72,21 @@ impl PruneReader {
|
||||
self.source = source;
|
||||
}
|
||||
|
||||
pub(crate) fn metrics(&mut self) -> &ReaderMetrics {
|
||||
/// Merge metrics with the inner reader and return the merged metrics.
|
||||
pub(crate) fn metrics(&self) -> ReaderMetrics {
|
||||
let mut metrics = self.metrics.clone();
|
||||
match &self.source {
|
||||
Source::RowGroup(r) => r.metrics(),
|
||||
Source::LastRow(_) => &self.metrics,
|
||||
Source::RowGroup(r) => {
|
||||
metrics.merge_from(r.metrics());
|
||||
}
|
||||
Source::LastRow(r) => {
|
||||
if let Some(inner_metrics) = r.metrics() {
|
||||
metrics.merge_from(inner_metrics);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
metrics
|
||||
}
|
||||
|
||||
pub(crate) async fn next_batch(&mut self) -> Result<Option<Batch>> {
|
||||
|
||||
@@ -112,7 +112,7 @@ impl RangeMeta {
|
||||
Self::push_unordered_file_ranges(
|
||||
input.memtables.len(),
|
||||
&input.files,
|
||||
&input.cache_manager,
|
||||
input.cache_manager.as_deref(),
|
||||
&mut ranges,
|
||||
);
|
||||
|
||||
@@ -203,15 +203,16 @@ impl RangeMeta {
|
||||
fn push_unordered_file_ranges(
|
||||
num_memtables: usize,
|
||||
files: &[FileHandle],
|
||||
cache: &CacheManager,
|
||||
cache: Option<&CacheManager>,
|
||||
ranges: &mut Vec<RangeMeta>,
|
||||
) {
|
||||
// For append mode, we can parallelize reading row groups.
|
||||
for (i, file) in files.iter().enumerate() {
|
||||
let file_index = num_memtables + i;
|
||||
// Get parquet meta from the cache.
|
||||
let parquet_meta =
|
||||
cache.get_parquet_meta_data_from_mem_cache(file.region_id(), file.file_id());
|
||||
let parquet_meta = cache.and_then(|c| {
|
||||
c.get_parquet_meta_data_from_mem_cache(file.region_id(), file.file_id())
|
||||
});
|
||||
if let Some(parquet_meta) = parquet_meta {
|
||||
// Scans each row group.
|
||||
for row_group_index in 0..file.meta_ref().num_row_groups {
|
||||
|
||||
@@ -167,7 +167,7 @@ pub(crate) struct ScanRegion {
|
||||
/// Scan request.
|
||||
request: ScanRequest,
|
||||
/// Cache.
|
||||
cache_manager: CacheManagerRef,
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
/// Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
parallel_scan_channel_size: usize,
|
||||
/// Whether to ignore inverted index.
|
||||
@@ -184,7 +184,7 @@ impl ScanRegion {
|
||||
version: VersionRef,
|
||||
access_layer: AccessLayerRef,
|
||||
request: ScanRequest,
|
||||
cache_manager: CacheManagerRef,
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
) -> ScanRegion {
|
||||
ScanRegion {
|
||||
version,
|
||||
@@ -401,18 +401,27 @@ impl ScanRegion {
|
||||
}
|
||||
|
||||
let file_cache = || -> Option<FileCacheRef> {
|
||||
let write_cache = self.cache_manager.write_cache()?;
|
||||
let cache_manager = self.cache_manager.as_ref()?;
|
||||
let write_cache = cache_manager.write_cache()?;
|
||||
let file_cache = write_cache.file_cache();
|
||||
Some(file_cache)
|
||||
}();
|
||||
|
||||
let index_cache = self.cache_manager.index_cache().cloned();
|
||||
let index_cache = self
|
||||
.cache_manager
|
||||
.as_ref()
|
||||
.and_then(|c| c.index_cache())
|
||||
.cloned();
|
||||
|
||||
let puffin_metadata_cache = self
|
||||
.cache_manager
|
||||
.as_ref()
|
||||
.and_then(|c| c.puffin_metadata_cache())
|
||||
.cloned();
|
||||
|
||||
InvertedIndexApplierBuilder::new(
|
||||
self.access_layer.region_dir().to_string(),
|
||||
self.access_layer.object_store().clone(),
|
||||
file_cache,
|
||||
index_cache,
|
||||
self.version.metadata.as_ref(),
|
||||
self.version.metadata.inverted_indexed_column_ids(
|
||||
self.version
|
||||
@@ -424,6 +433,9 @@ impl ScanRegion {
|
||||
),
|
||||
self.access_layer.puffin_manager_factory().clone(),
|
||||
)
|
||||
.with_file_cache(file_cache)
|
||||
.with_index_cache(index_cache)
|
||||
.with_puffin_metadata_cache(puffin_metadata_cache)
|
||||
.build(&self.request.filters)
|
||||
.inspect_err(|err| warn!(err; "Failed to build invereted index applier"))
|
||||
.ok()
|
||||
@@ -477,7 +489,7 @@ pub(crate) struct ScanInput {
|
||||
/// Handles to SST files to scan.
|
||||
pub(crate) files: Vec<FileHandle>,
|
||||
/// Cache.
|
||||
pub(crate) cache_manager: CacheManagerRef,
|
||||
pub(crate) cache_manager: Option<CacheManagerRef>,
|
||||
/// Ignores file not found error.
|
||||
ignore_file_not_found: bool,
|
||||
/// Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
@@ -508,7 +520,7 @@ impl ScanInput {
|
||||
predicate: None,
|
||||
memtables: Vec::new(),
|
||||
files: Vec::new(),
|
||||
cache_manager: CacheManagerRef::default(),
|
||||
cache_manager: None,
|
||||
ignore_file_not_found: false,
|
||||
parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
|
||||
inverted_index_applier: None,
|
||||
@@ -551,7 +563,7 @@ impl ScanInput {
|
||||
|
||||
/// Sets cache for this query.
|
||||
#[must_use]
|
||||
pub(crate) fn with_cache(mut self, cache: CacheManagerRef) -> Self {
|
||||
pub(crate) fn with_cache(mut self, cache: Option<CacheManagerRef>) -> Self {
|
||||
self.cache_manager = cache;
|
||||
self
|
||||
}
|
||||
|
||||
@@ -181,8 +181,9 @@ pub(crate) fn scan_file_ranges(
|
||||
}
|
||||
yield batch;
|
||||
}
|
||||
if let Source::PruneReader(mut reader) = source {
|
||||
reader_metrics.merge_from(reader.metrics());
|
||||
if let Source::PruneReader(reader) = source {
|
||||
let prune_metrics = reader.metrics();
|
||||
reader_metrics.merge_from(&prune_metrics);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -257,7 +257,7 @@ impl SeqScan {
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let cache = &stream_ctx.input.cache_manager;
|
||||
let cache = stream_ctx.input.cache_manager.as_deref();
|
||||
let mut metrics = ScannerMetrics::default();
|
||||
let mut fetch_start = Instant::now();
|
||||
#[cfg(debug_assertions)]
|
||||
|
||||
@@ -149,7 +149,7 @@ impl UnorderedScan {
|
||||
let stream = try_stream! {
|
||||
part_metrics.on_first_poll();
|
||||
|
||||
let cache = &stream_ctx.input.cache_manager;
|
||||
let cache = stream_ctx.input.cache_manager.as_deref();
|
||||
let range_builder_list = Arc::new(RangeBuilderList::new(
|
||||
stream_ctx.input.num_memtables(),
|
||||
stream_ctx.input.num_files(),
|
||||
|
||||
@@ -146,12 +146,33 @@ pub enum IndexType {
|
||||
}
|
||||
|
||||
impl FileMeta {
|
||||
/// Returns true if the file has an inverted index
|
||||
pub fn inverted_index_available(&self) -> bool {
|
||||
self.available_indexes.contains(&IndexType::InvertedIndex)
|
||||
}
|
||||
|
||||
/// Returns true if the file has a fulltext index
|
||||
pub fn fulltext_index_available(&self) -> bool {
|
||||
self.available_indexes.contains(&IndexType::FulltextIndex)
|
||||
}
|
||||
|
||||
/// Returns the size of the inverted index file
|
||||
pub fn inverted_index_size(&self) -> Option<u64> {
|
||||
if self.available_indexes.len() == 1 && self.inverted_index_available() {
|
||||
Some(self.index_file_size)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the size of the fulltext index file
|
||||
pub fn fulltext_index_size(&self) -> Option<u64> {
|
||||
if self.available_indexes.len() == 1 && self.fulltext_index_available() {
|
||||
Some(self.index_file_size)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle to a SST file.
|
||||
|
||||
@@ -18,7 +18,7 @@ pub(crate) mod intermediate;
|
||||
pub(crate) mod inverted_index;
|
||||
pub(crate) mod puffin_manager;
|
||||
mod statistics;
|
||||
mod store;
|
||||
pub(crate) mod store;
|
||||
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
|
||||
@@ -16,19 +16,23 @@ pub mod builder;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_base::range_read::RangeReader;
|
||||
use common_telemetry::warn;
|
||||
use index::inverted_index::format::reader::InvertedIndexBlobReader;
|
||||
use index::inverted_index::search::index_apply::{
|
||||
ApplyOutput, IndexApplier, IndexNotFoundStrategy, SearchContext,
|
||||
};
|
||||
use object_store::ObjectStore;
|
||||
use puffin::puffin_manager::cache::PuffinMetadataCacheRef;
|
||||
use puffin::puffin_manager::{BlobGuard, PuffinManager, PuffinReader};
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey};
|
||||
use crate::cache::index::{CachedInvertedIndexBlobReader, InvertedIndexCacheRef};
|
||||
use crate::error::{ApplyInvertedIndexSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result};
|
||||
use crate::error::{
|
||||
ApplyInvertedIndexSnafu, MetadataSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result,
|
||||
};
|
||||
use crate::metrics::{INDEX_APPLY_ELAPSED, INDEX_APPLY_MEMORY_USAGE};
|
||||
use crate::sst::file::FileId;
|
||||
use crate::sst::index::inverted_index::INDEX_BLOB_TYPE;
|
||||
@@ -60,6 +64,9 @@ pub(crate) struct InvertedIndexApplier {
|
||||
|
||||
/// In-memory cache for inverted index.
|
||||
inverted_index_cache: Option<InvertedIndexCacheRef>,
|
||||
|
||||
/// Puffin metadata cache.
|
||||
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
|
||||
}
|
||||
|
||||
pub(crate) type InvertedIndexApplierRef = Arc<InvertedIndexApplier>;
|
||||
@@ -70,8 +77,6 @@ impl InvertedIndexApplier {
|
||||
region_dir: String,
|
||||
region_id: RegionId,
|
||||
store: ObjectStore,
|
||||
file_cache: Option<FileCacheRef>,
|
||||
index_cache: Option<InvertedIndexCacheRef>,
|
||||
index_applier: Box<dyn IndexApplier>,
|
||||
puffin_manager_factory: PuffinManagerFactory,
|
||||
) -> Self {
|
||||
@@ -81,15 +86,37 @@ impl InvertedIndexApplier {
|
||||
region_dir,
|
||||
region_id,
|
||||
store,
|
||||
file_cache,
|
||||
file_cache: None,
|
||||
index_applier,
|
||||
puffin_manager_factory,
|
||||
inverted_index_cache: index_cache,
|
||||
inverted_index_cache: None,
|
||||
puffin_metadata_cache: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the file cache.
|
||||
pub fn with_file_cache(mut self, file_cache: Option<FileCacheRef>) -> Self {
|
||||
self.file_cache = file_cache;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the index cache.
|
||||
pub fn with_index_cache(mut self, index_cache: Option<InvertedIndexCacheRef>) -> Self {
|
||||
self.inverted_index_cache = index_cache;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the puffin metadata cache.
|
||||
pub fn with_puffin_metadata_cache(
|
||||
mut self,
|
||||
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
|
||||
) -> Self {
|
||||
self.puffin_metadata_cache = puffin_metadata_cache;
|
||||
self
|
||||
}
|
||||
|
||||
/// Applies predicates to the provided SST file id and returns the relevant row group ids
|
||||
pub async fn apply(&self, file_id: FileId) -> Result<ApplyOutput> {
|
||||
pub async fn apply(&self, file_id: FileId, file_size_hint: Option<u64>) -> Result<ApplyOutput> {
|
||||
let _timer = INDEX_APPLY_ELAPSED
|
||||
.with_label_values(&[TYPE_INVERTED_INDEX])
|
||||
.start_timer();
|
||||
@@ -99,19 +126,25 @@ impl InvertedIndexApplier {
|
||||
index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty,
|
||||
};
|
||||
|
||||
let blob = match self.cached_blob_reader(file_id).await {
|
||||
let mut blob = match self.cached_blob_reader(file_id).await {
|
||||
Ok(Some(puffin_reader)) => puffin_reader,
|
||||
other => {
|
||||
if let Err(err) = other {
|
||||
warn!(err; "An unexpected error occurred while reading the cached index file. Fallback to remote index file.")
|
||||
}
|
||||
self.remote_blob_reader(file_id).await?
|
||||
self.remote_blob_reader(file_id, file_size_hint).await?
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(index_cache) = &self.inverted_index_cache {
|
||||
let file_size = if let Some(file_size) = file_size_hint {
|
||||
file_size
|
||||
} else {
|
||||
blob.metadata().await.context(MetadataSnafu)?.content_length
|
||||
};
|
||||
let mut index_reader = CachedInvertedIndexBlobReader::new(
|
||||
file_id,
|
||||
file_size,
|
||||
InvertedIndexBlobReader::new(blob),
|
||||
index_cache.clone(),
|
||||
);
|
||||
@@ -156,13 +189,22 @@ impl InvertedIndexApplier {
|
||||
}
|
||||
|
||||
/// Creates a blob reader from the remote index file.
|
||||
async fn remote_blob_reader(&self, file_id: FileId) -> Result<BlobReader> {
|
||||
let puffin_manager = self.puffin_manager_factory.build(self.store.clone());
|
||||
async fn remote_blob_reader(
|
||||
&self,
|
||||
file_id: FileId,
|
||||
file_size_hint: Option<u64>,
|
||||
) -> Result<BlobReader> {
|
||||
let puffin_manager = self
|
||||
.puffin_manager_factory
|
||||
.build(self.store.clone())
|
||||
.with_puffin_metadata_cache(self.puffin_metadata_cache.clone());
|
||||
|
||||
let file_path = location::index_file_path(&self.region_dir, file_id);
|
||||
puffin_manager
|
||||
.reader(&file_path)
|
||||
.await
|
||||
.context(PuffinBuildReaderSnafu)?
|
||||
.with_file_size_hint(file_size_hint)
|
||||
.blob(INDEX_BLOB_TYPE)
|
||||
.await
|
||||
.context(PuffinReadBlobSnafu)?
|
||||
@@ -219,12 +261,10 @@ mod tests {
|
||||
region_dir.clone(),
|
||||
RegionId::new(0, 0),
|
||||
object_store,
|
||||
None,
|
||||
None,
|
||||
Box::new(mock_index_applier),
|
||||
puffin_manager_factory,
|
||||
);
|
||||
let output = sst_index_applier.apply(file_id).await.unwrap();
|
||||
let output = sst_index_applier.apply(file_id, None).await.unwrap();
|
||||
assert_eq!(
|
||||
output,
|
||||
ApplyOutput {
|
||||
@@ -261,12 +301,10 @@ mod tests {
|
||||
region_dir.clone(),
|
||||
RegionId::new(0, 0),
|
||||
object_store,
|
||||
None,
|
||||
None,
|
||||
Box::new(mock_index_applier),
|
||||
puffin_manager_factory,
|
||||
);
|
||||
let res = sst_index_applier.apply(file_id).await;
|
||||
let res = sst_index_applier.apply(file_id, None).await;
|
||||
assert!(format!("{:?}", res.unwrap_err()).contains("Blob not found"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@ use datatypes::value::Value;
|
||||
use index::inverted_index::search::index_apply::PredicatesIndexApplier;
|
||||
use index::inverted_index::search::predicate::Predicate;
|
||||
use object_store::ObjectStore;
|
||||
use puffin::puffin_manager::cache::PuffinMetadataCacheRef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::metadata::RegionMetadata;
|
||||
use store_api::storage::ColumnId;
|
||||
@@ -65,6 +66,9 @@ pub(crate) struct InvertedIndexApplierBuilder<'a> {
|
||||
|
||||
/// Cache for inverted index.
|
||||
index_cache: Option<InvertedIndexCacheRef>,
|
||||
|
||||
/// Cache for puffin metadata.
|
||||
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
|
||||
}
|
||||
|
||||
impl<'a> InvertedIndexApplierBuilder<'a> {
|
||||
@@ -72,8 +76,6 @@ impl<'a> InvertedIndexApplierBuilder<'a> {
|
||||
pub fn new(
|
||||
region_dir: String,
|
||||
object_store: ObjectStore,
|
||||
file_cache: Option<FileCacheRef>,
|
||||
index_cache: Option<InvertedIndexCacheRef>,
|
||||
metadata: &'a RegionMetadata,
|
||||
indexed_column_ids: HashSet<ColumnId>,
|
||||
puffin_manager_factory: PuffinManagerFactory,
|
||||
@@ -81,15 +83,37 @@ impl<'a> InvertedIndexApplierBuilder<'a> {
|
||||
Self {
|
||||
region_dir,
|
||||
object_store,
|
||||
file_cache,
|
||||
metadata,
|
||||
indexed_column_ids,
|
||||
output: HashMap::default(),
|
||||
index_cache,
|
||||
puffin_manager_factory,
|
||||
file_cache: None,
|
||||
index_cache: None,
|
||||
puffin_metadata_cache: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the file cache.
|
||||
pub fn with_file_cache(mut self, file_cache: Option<FileCacheRef>) -> Self {
|
||||
self.file_cache = file_cache;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the puffin metadata cache.
|
||||
pub fn with_puffin_metadata_cache(
|
||||
mut self,
|
||||
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
|
||||
) -> Self {
|
||||
self.puffin_metadata_cache = puffin_metadata_cache;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the index cache.
|
||||
pub fn with_index_cache(mut self, index_cache: Option<InvertedIndexCacheRef>) -> Self {
|
||||
self.index_cache = index_cache;
|
||||
self
|
||||
}
|
||||
|
||||
/// Consumes the builder to construct an [`InvertedIndexApplier`], optionally returned based on
|
||||
/// the expressions provided. If no predicates match, returns `None`.
|
||||
pub fn build(mut self, exprs: &[Expr]) -> Result<Option<InvertedIndexApplier>> {
|
||||
@@ -108,15 +132,18 @@ impl<'a> InvertedIndexApplierBuilder<'a> {
|
||||
.collect();
|
||||
let applier = PredicatesIndexApplier::try_from(predicates);
|
||||
|
||||
Ok(Some(InvertedIndexApplier::new(
|
||||
self.region_dir,
|
||||
self.metadata.region_id,
|
||||
self.object_store,
|
||||
self.file_cache,
|
||||
self.index_cache,
|
||||
Box::new(applier.context(BuildIndexApplierSnafu)?),
|
||||
self.puffin_manager_factory,
|
||||
)))
|
||||
Ok(Some(
|
||||
InvertedIndexApplier::new(
|
||||
self.region_dir,
|
||||
self.metadata.region_id,
|
||||
self.object_store,
|
||||
Box::new(applier.context(BuildIndexApplierSnafu)?),
|
||||
self.puffin_manager_factory,
|
||||
)
|
||||
.with_file_cache(self.file_cache)
|
||||
.with_puffin_metadata_cache(self.puffin_metadata_cache)
|
||||
.with_index_cache(self.index_cache),
|
||||
))
|
||||
}
|
||||
|
||||
/// Recursively traverses expressions to collect predicates.
|
||||
@@ -322,8 +349,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
|
||||
@@ -75,8 +75,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -118,8 +116,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -144,8 +140,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -187,8 +181,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -214,8 +206,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
|
||||
@@ -231,8 +231,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -260,8 +258,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -280,8 +276,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -315,8 +309,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
|
||||
@@ -137,8 +137,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -175,8 +173,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -204,8 +200,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -224,8 +218,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -244,8 +236,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -303,8 +293,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -341,8 +329,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
|
||||
@@ -68,8 +68,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -101,8 +99,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -126,8 +122,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -159,8 +153,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -186,8 +178,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
|
||||
@@ -62,8 +62,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -91,8 +89,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -120,8 +116,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
@@ -142,8 +136,6 @@ mod tests {
|
||||
let mut builder = InvertedIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::from_iter([1, 2, 3]),
|
||||
facotry,
|
||||
|
||||
@@ -310,12 +310,14 @@ mod tests {
|
||||
use futures::future::BoxFuture;
|
||||
use object_store::services::Memory;
|
||||
use object_store::ObjectStore;
|
||||
use puffin::puffin_manager::cache::PuffinMetadataCache;
|
||||
use puffin::puffin_manager::PuffinManager;
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use super::*;
|
||||
use crate::cache::index::InvertedIndexCache;
|
||||
use crate::metrics::CACHE_BYTES;
|
||||
use crate::read::BatchColumn;
|
||||
use crate::row_converter::{McmpRowCodec, RowCodec, SortField};
|
||||
use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder;
|
||||
@@ -446,22 +448,23 @@ mod tests {
|
||||
|
||||
move |expr| {
|
||||
let _d = &d;
|
||||
let cache = Arc::new(InvertedIndexCache::new(10, 10));
|
||||
let cache = Arc::new(InvertedIndexCache::new(10, 10, 100));
|
||||
let puffin_metadata_cache = Arc::new(PuffinMetadataCache::new(10, &CACHE_BYTES));
|
||||
let applier = InvertedIndexApplierBuilder::new(
|
||||
region_dir.clone(),
|
||||
object_store.clone(),
|
||||
None,
|
||||
Some(cache),
|
||||
®ion_metadata,
|
||||
indexed_column_ids.clone(),
|
||||
factory.clone(),
|
||||
)
|
||||
.with_index_cache(Some(cache))
|
||||
.with_puffin_metadata_cache(Some(puffin_metadata_cache))
|
||||
.build(&[expr])
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
Box::pin(async move {
|
||||
applier
|
||||
.apply(sst_file_id)
|
||||
.apply(sst_file_id, None)
|
||||
.await
|
||||
.unwrap()
|
||||
.matched_segment_ids
|
||||
|
||||
@@ -68,6 +68,7 @@ impl InstrumentedStore {
|
||||
path: path.to_string(),
|
||||
read_byte_count,
|
||||
read_count,
|
||||
file_size_hint: None,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -262,15 +263,27 @@ pub(crate) struct InstrumentedRangeReader<'a> {
|
||||
path: String,
|
||||
read_byte_count: &'a IntCounter,
|
||||
read_count: &'a IntCounter,
|
||||
file_size_hint: Option<u64>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl RangeReader for InstrumentedRangeReader<'_> {
|
||||
fn with_file_size_hint(&mut self, file_size_hint: u64) {
|
||||
self.file_size_hint = Some(file_size_hint);
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
let stat = self.store.stat(&self.path).await?;
|
||||
Ok(Metadata {
|
||||
content_length: stat.content_length(),
|
||||
})
|
||||
match self.file_size_hint {
|
||||
Some(file_size_hint) => Ok(Metadata {
|
||||
content_length: file_size_hint,
|
||||
}),
|
||||
None => {
|
||||
let stat = self.store.stat(&self.path).await?;
|
||||
Ok(Metadata {
|
||||
content_length: stat.content_length(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
|
||||
|
||||
@@ -195,11 +195,11 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
// Enable page cache.
|
||||
let cache = Arc::new(
|
||||
let cache = Some(Arc::new(
|
||||
CacheManager::builder()
|
||||
.page_cache_size(64 * 1024 * 1024)
|
||||
.build(),
|
||||
);
|
||||
));
|
||||
let builder = ParquetReaderBuilder::new(FILE_DIR.to_string(), handle.clone(), object_store)
|
||||
.cache(cache.clone());
|
||||
for _ in 0..3 {
|
||||
@@ -219,15 +219,15 @@ mod tests {
|
||||
|
||||
// Doesn't have compressed page cached.
|
||||
let page_key = PageKey::new_compressed(metadata.region_id, handle.file_id(), 0, 0);
|
||||
assert!(cache.get_pages(&page_key).is_none());
|
||||
assert!(cache.as_ref().unwrap().get_pages(&page_key).is_none());
|
||||
|
||||
// Cache 4 row groups.
|
||||
for i in 0..4 {
|
||||
let page_key = PageKey::new_uncompressed(metadata.region_id, handle.file_id(), i, 0);
|
||||
assert!(cache.get_pages(&page_key).is_some());
|
||||
assert!(cache.as_ref().unwrap().get_pages(&page_key).is_some());
|
||||
}
|
||||
let page_key = PageKey::new_uncompressed(metadata.region_id, handle.file_id(), 5, 0);
|
||||
assert!(cache.get_pages(&page_key).is_none());
|
||||
assert!(cache.as_ref().unwrap().get_pages(&page_key).is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -82,7 +82,7 @@ pub struct ParquetReaderBuilder {
|
||||
/// can contain columns not in the parquet file.
|
||||
projection: Option<Vec<ColumnId>>,
|
||||
/// Manager that caches SST data.
|
||||
cache_manager: CacheManagerRef,
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
/// Index appliers.
|
||||
inverted_index_applier: Option<InvertedIndexApplierRef>,
|
||||
fulltext_index_applier: Option<FulltextIndexApplierRef>,
|
||||
@@ -106,7 +106,7 @@ impl ParquetReaderBuilder {
|
||||
predicate: None,
|
||||
time_range: None,
|
||||
projection: None,
|
||||
cache_manager: CacheManagerRef::default(),
|
||||
cache_manager: None,
|
||||
inverted_index_applier: None,
|
||||
fulltext_index_applier: None,
|
||||
expected_metadata: None,
|
||||
@@ -138,7 +138,7 @@ impl ParquetReaderBuilder {
|
||||
|
||||
/// Attaches the cache to the builder.
|
||||
#[must_use]
|
||||
pub fn cache(mut self, cache: CacheManagerRef) -> ParquetReaderBuilder {
|
||||
pub fn cache(mut self, cache: Option<CacheManagerRef>) -> ParquetReaderBuilder {
|
||||
self.cache_manager = cache;
|
||||
self
|
||||
}
|
||||
@@ -313,12 +313,10 @@ impl ParquetReaderBuilder {
|
||||
let region_id = self.file_handle.region_id();
|
||||
let file_id = self.file_handle.file_id();
|
||||
// Tries to get from global cache.
|
||||
if let Some(metadata) = self
|
||||
.cache_manager
|
||||
.get_parquet_meta_data(region_id, file_id)
|
||||
.await
|
||||
{
|
||||
return Ok(metadata);
|
||||
if let Some(manager) = &self.cache_manager {
|
||||
if let Some(metadata) = manager.get_parquet_meta_data(region_id, file_id).await {
|
||||
return Ok(metadata);
|
||||
}
|
||||
}
|
||||
|
||||
// Cache miss, load metadata directly.
|
||||
@@ -326,11 +324,13 @@ impl ParquetReaderBuilder {
|
||||
let metadata = metadata_loader.load().await?;
|
||||
let metadata = Arc::new(metadata);
|
||||
// Cache the metadata.
|
||||
self.cache_manager.put_parquet_meta_data(
|
||||
self.file_handle.region_id(),
|
||||
self.file_handle.file_id(),
|
||||
metadata.clone(),
|
||||
);
|
||||
if let Some(cache) = &self.cache_manager {
|
||||
cache.put_parquet_meta_data(
|
||||
self.file_handle.region_id(),
|
||||
self.file_handle.file_id(),
|
||||
metadata.clone(),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(metadata)
|
||||
}
|
||||
@@ -475,8 +475,11 @@ impl ParquetReaderBuilder {
|
||||
if !self.file_handle.meta_ref().inverted_index_available() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let apply_output = match index_applier.apply(self.file_handle.file_id()).await {
|
||||
let file_size_hint = self.file_handle.meta_ref().inverted_index_size();
|
||||
let apply_output = match index_applier
|
||||
.apply(self.file_handle.file_id(), file_size_hint)
|
||||
.await
|
||||
{
|
||||
Ok(output) => output,
|
||||
Err(err) => {
|
||||
if cfg!(any(test, feature = "test")) {
|
||||
@@ -846,7 +849,7 @@ pub(crate) struct RowGroupReaderBuilder {
|
||||
/// Field levels to read.
|
||||
field_levels: FieldLevels,
|
||||
/// Cache.
|
||||
cache_manager: CacheManagerRef,
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
}
|
||||
|
||||
impl RowGroupReaderBuilder {
|
||||
@@ -864,7 +867,7 @@ impl RowGroupReaderBuilder {
|
||||
&self.parquet_meta
|
||||
}
|
||||
|
||||
pub(crate) fn cache_manager(&self) -> &CacheManagerRef {
|
||||
pub(crate) fn cache_manager(&self) -> &Option<CacheManagerRef> {
|
||||
&self.cache_manager
|
||||
}
|
||||
|
||||
@@ -915,10 +918,10 @@ enum ReaderState {
|
||||
|
||||
impl ReaderState {
|
||||
/// Returns the metrics of the reader.
|
||||
fn metrics(&mut self) -> &ReaderMetrics {
|
||||
fn metrics(&self) -> ReaderMetrics {
|
||||
match self {
|
||||
ReaderState::Readable(reader) => reader.metrics(),
|
||||
ReaderState::Exhausted(m) => m,
|
||||
ReaderState::Exhausted(m) => m.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ pub struct InMemoryRowGroup<'a> {
|
||||
region_id: RegionId,
|
||||
file_id: FileId,
|
||||
row_group_idx: usize,
|
||||
cache_manager: CacheManagerRef,
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
/// Row group level cached pages for each column.
|
||||
///
|
||||
/// These pages are uncompressed pages of a row group.
|
||||
@@ -69,7 +69,7 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
file_id: FileId,
|
||||
parquet_meta: &'a ParquetMetaData,
|
||||
row_group_idx: usize,
|
||||
cache_manager: CacheManagerRef,
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
file_path: &'a str,
|
||||
object_store: ObjectStore,
|
||||
) -> Self {
|
||||
@@ -208,18 +208,19 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
};
|
||||
|
||||
let column = self.metadata.column(idx);
|
||||
|
||||
if !cache_uncompressed_pages(column) {
|
||||
// For columns that have multiple uncompressed pages, we only cache the compressed page
|
||||
// to save memory.
|
||||
let page_key = PageKey::new_compressed(
|
||||
self.region_id,
|
||||
self.file_id,
|
||||
self.row_group_idx,
|
||||
idx,
|
||||
);
|
||||
self.cache_manager
|
||||
.put_pages(page_key, Arc::new(PageValue::new_compressed(data.clone())));
|
||||
if let Some(cache) = &self.cache_manager {
|
||||
if !cache_uncompressed_pages(column) {
|
||||
// For columns that have multiple uncompressed pages, we only cache the compressed page
|
||||
// to save memory.
|
||||
let page_key = PageKey::new_compressed(
|
||||
self.region_id,
|
||||
self.file_id,
|
||||
self.row_group_idx,
|
||||
idx,
|
||||
);
|
||||
cache
|
||||
.put_pages(page_key, Arc::new(PageValue::new_compressed(data.clone())));
|
||||
}
|
||||
}
|
||||
|
||||
*chunk = Some(Arc::new(ColumnChunkData::Dense {
|
||||
@@ -241,6 +242,9 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
.enumerate()
|
||||
.filter(|(idx, chunk)| chunk.is_none() && projection.leaf_included(*idx))
|
||||
.for_each(|(idx, chunk)| {
|
||||
let Some(cache) = &self.cache_manager else {
|
||||
return;
|
||||
};
|
||||
let column = self.metadata.column(idx);
|
||||
if cache_uncompressed_pages(column) {
|
||||
// Fetches uncompressed pages for the row group.
|
||||
@@ -250,7 +254,7 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
self.row_group_idx,
|
||||
idx,
|
||||
);
|
||||
self.column_uncompressed_pages[idx] = self.cache_manager.get_pages(&page_key);
|
||||
self.column_uncompressed_pages[idx] = cache.get_pages(&page_key);
|
||||
} else {
|
||||
// Fetches the compressed page from the cache.
|
||||
let page_key = PageKey::new_compressed(
|
||||
@@ -260,7 +264,7 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
idx,
|
||||
);
|
||||
|
||||
*chunk = self.cache_manager.get_pages(&page_key).map(|page_value| {
|
||||
*chunk = cache.get_pages(&page_key).map(|page_value| {
|
||||
Arc::new(ColumnChunkData::Dense {
|
||||
offset: column.byte_range().0 as usize,
|
||||
data: page_value.compressed.clone(),
|
||||
@@ -296,7 +300,7 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
key: IndexKey,
|
||||
ranges: &[Range<u64>],
|
||||
) -> Option<Vec<Bytes>> {
|
||||
if let Some(cache) = self.cache_manager.write_cache() {
|
||||
if let Some(cache) = self.cache_manager.as_ref()?.write_cache() {
|
||||
return cache.file_cache().read_ranges(key, ranges).await;
|
||||
}
|
||||
None
|
||||
@@ -327,6 +331,10 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
}
|
||||
};
|
||||
|
||||
let Some(cache) = &self.cache_manager else {
|
||||
return Ok(Box::new(page_reader));
|
||||
};
|
||||
|
||||
let column = self.metadata.column(i);
|
||||
if cache_uncompressed_pages(column) {
|
||||
// This column use row group level page cache.
|
||||
@@ -335,7 +343,7 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
let page_value = Arc::new(PageValue::new_row_group(pages));
|
||||
let page_key =
|
||||
PageKey::new_uncompressed(self.region_id, self.file_id, self.row_group_idx, i);
|
||||
self.cache_manager.put_pages(page_key, page_value.clone());
|
||||
cache.put_pages(page_key, page_value.clone());
|
||||
|
||||
return Ok(Box::new(RowGroupCachedReader::new(&page_value.row_group)));
|
||||
}
|
||||
|
||||
@@ -35,8 +35,7 @@ use api::v1::{OpType, Row, Rows, SemanticType};
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_base::Plugins;
|
||||
use common_datasource::compression::CompressionType;
|
||||
use common_meta::cache::{new_schema_cache, new_table_info_cache, new_table_schema_cache};
|
||||
use common_meta::key::schema_name::{SchemaName, SchemaNameValue};
|
||||
use common_meta::cache::{new_schema_cache, new_table_schema_cache};
|
||||
use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef};
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
@@ -49,7 +48,7 @@ use datatypes::schema::ColumnSchema;
|
||||
use log_store::kafka::log_store::KafkaLogStore;
|
||||
use log_store::raft_engine::log_store::RaftEngineLogStore;
|
||||
use log_store::test_util::log_store_util;
|
||||
use moka::future::{Cache, CacheBuilder};
|
||||
use moka::future::CacheBuilder;
|
||||
use object_store::manager::{ObjectStoreManager, ObjectStoreManagerRef};
|
||||
use object_store::services::Fs;
|
||||
use object_store::ObjectStore;
|
||||
|
||||
@@ -170,6 +170,8 @@ impl WorkerGroup {
|
||||
.selector_result_cache_size(config.selector_result_cache_size.as_bytes())
|
||||
.index_metadata_size(config.inverted_index.metadata_cache_size.as_bytes())
|
||||
.index_content_size(config.inverted_index.content_cache_size.as_bytes())
|
||||
.index_content_page_size(config.inverted_index.content_cache_page_size.as_bytes())
|
||||
.puffin_metadata_size(config.index.metadata_cache_size.as_bytes())
|
||||
.write_cache(write_cache)
|
||||
.build(),
|
||||
);
|
||||
|
||||
@@ -223,7 +223,7 @@ transform:
|
||||
type: uint32
|
||||
"#;
|
||||
|
||||
parse(&Content::Yaml(pipeline_yaml.into())).unwrap()
|
||||
parse(&Content::Yaml(pipeline_yaml)).unwrap()
|
||||
}
|
||||
|
||||
fn criterion_benchmark(c: &mut Criterion) {
|
||||
|
||||
@@ -37,9 +37,9 @@ const PROCESSORS: &str = "processors";
|
||||
const TRANSFORM: &str = "transform";
|
||||
const TRANSFORMS: &str = "transforms";
|
||||
|
||||
pub enum Content {
|
||||
Json(String),
|
||||
Yaml(String),
|
||||
pub enum Content<'a> {
|
||||
Json(&'a str),
|
||||
Yaml(&'a str),
|
||||
}
|
||||
|
||||
pub fn parse<T>(input: &Content) -> Result<Pipeline<T>>
|
||||
@@ -379,8 +379,7 @@ transform:
|
||||
- field: field2
|
||||
type: uint32
|
||||
"#;
|
||||
let pipeline: Pipeline<GreptimeTransformer> =
|
||||
parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
|
||||
let pipeline: Pipeline<GreptimeTransformer> = parse(&Content::Yaml(pipeline_yaml)).unwrap();
|
||||
let mut payload = pipeline.init_intermediate_state();
|
||||
pipeline.prepare(input_value, &mut payload).unwrap();
|
||||
assert_eq!(&["my_field"].to_vec(), pipeline.required_keys());
|
||||
@@ -432,8 +431,7 @@ transform:
|
||||
- field: ts
|
||||
type: timestamp, ns
|
||||
index: time"#;
|
||||
let pipeline: Pipeline<GreptimeTransformer> =
|
||||
parse(&Content::Yaml(pipeline_str.into())).unwrap();
|
||||
let pipeline: Pipeline<GreptimeTransformer> = parse(&Content::Yaml(pipeline_str)).unwrap();
|
||||
let mut payload = pipeline.init_intermediate_state();
|
||||
pipeline
|
||||
.prepare(serde_json::Value::String(message), &mut payload)
|
||||
@@ -509,8 +507,7 @@ transform:
|
||||
type: uint32
|
||||
"#;
|
||||
|
||||
let pipeline: Pipeline<GreptimeTransformer> =
|
||||
parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
|
||||
let pipeline: Pipeline<GreptimeTransformer> = parse(&Content::Yaml(pipeline_yaml)).unwrap();
|
||||
let mut payload = pipeline.init_intermediate_state();
|
||||
pipeline.prepare(input_value, &mut payload).unwrap();
|
||||
assert_eq!(&["my_field"].to_vec(), pipeline.required_keys());
|
||||
@@ -554,8 +551,7 @@ transform:
|
||||
index: time
|
||||
"#;
|
||||
|
||||
let pipeline: Pipeline<GreptimeTransformer> =
|
||||
parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
|
||||
let pipeline: Pipeline<GreptimeTransformer> = parse(&Content::Yaml(pipeline_yaml)).unwrap();
|
||||
let schema = pipeline.schemas().clone();
|
||||
let mut result = pipeline.init_intermediate_state();
|
||||
pipeline.prepare(input_value, &mut result).unwrap();
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
pub mod coerce;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use ahash::HashMap;
|
||||
use api::helper::proto_value_type;
|
||||
@@ -367,20 +368,15 @@ fn json_value_to_row(
|
||||
Ok(Row { values: row })
|
||||
}
|
||||
|
||||
/// Identity pipeline for Greptime
|
||||
/// This pipeline will convert the input JSON array to Greptime Rows
|
||||
/// 1. The pipeline will add a default timestamp column to the schema
|
||||
/// 2. The pipeline not resolve NULL value
|
||||
/// 3. The pipeline assumes that the json format is fixed
|
||||
/// 4. The pipeline will return an error if the same column datatype is mismatched
|
||||
/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema.
|
||||
pub fn identity_pipeline(array: Vec<serde_json::Value>) -> Result<Rows> {
|
||||
fn identity_pipeline_inner<'a>(
|
||||
array: Vec<serde_json::Value>,
|
||||
tag_column_names: Option<impl Iterator<Item = &'a String>>,
|
||||
) -> Result<Rows> {
|
||||
let mut rows = Vec::with_capacity(array.len());
|
||||
|
||||
let mut schema = SchemaInfo::default();
|
||||
let mut schema_info = SchemaInfo::default();
|
||||
for value in array {
|
||||
if let serde_json::Value::Object(map) = value {
|
||||
let row = json_value_to_row(&mut schema, map)?;
|
||||
let row = json_value_to_row(&mut schema_info, map)?;
|
||||
rows.push(row);
|
||||
}
|
||||
}
|
||||
@@ -395,7 +391,7 @@ pub fn identity_pipeline(array: Vec<serde_json::Value>) -> Result<Rows> {
|
||||
let ts = GreptimeValue {
|
||||
value_data: Some(ValueData::TimestampNanosecondValue(ns)),
|
||||
};
|
||||
let column_count = schema.schema.len();
|
||||
let column_count = schema_info.schema.len();
|
||||
for row in rows.iter_mut() {
|
||||
let diff = column_count - row.values.len();
|
||||
for _ in 0..diff {
|
||||
@@ -403,15 +399,49 @@ pub fn identity_pipeline(array: Vec<serde_json::Value>) -> Result<Rows> {
|
||||
}
|
||||
row.values.push(ts.clone());
|
||||
}
|
||||
schema.schema.push(greptime_timestamp_schema);
|
||||
schema_info.schema.push(greptime_timestamp_schema);
|
||||
|
||||
// set the semantic type of the row key column to Tag
|
||||
if let Some(tag_column_names) = tag_column_names {
|
||||
tag_column_names.for_each(|tag_column_name| {
|
||||
if let Some(index) = schema_info.index.get(tag_column_name) {
|
||||
schema_info.schema[*index].semantic_type = SemanticType::Tag as i32;
|
||||
}
|
||||
});
|
||||
}
|
||||
Ok(Rows {
|
||||
schema: schema.schema,
|
||||
schema: schema_info.schema,
|
||||
rows,
|
||||
})
|
||||
}
|
||||
|
||||
/// Identity pipeline for Greptime
|
||||
/// This pipeline will convert the input JSON array to Greptime Rows
|
||||
/// params table is used to set the semantic type of the row key column to Tag
|
||||
/// 1. The pipeline will add a default timestamp column to the schema
|
||||
/// 2. The pipeline not resolve NULL value
|
||||
/// 3. The pipeline assumes that the json format is fixed
|
||||
/// 4. The pipeline will return an error if the same column datatype is mismatched
|
||||
/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema.
|
||||
pub fn identity_pipeline(
|
||||
array: Vec<serde_json::Value>,
|
||||
table: Option<Arc<table::Table>>,
|
||||
) -> Result<Rows> {
|
||||
match table {
|
||||
Some(table) => {
|
||||
let table_info = table.table_info();
|
||||
let tag_column_names = table_info.meta.row_key_column_names();
|
||||
identity_pipeline_inner(array, Some(tag_column_names))
|
||||
}
|
||||
None => identity_pipeline_inner(array, None::<std::iter::Empty<&String>>),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::SemanticType;
|
||||
|
||||
use crate::etl::transform::transformer::greptime::identity_pipeline_inner;
|
||||
use crate::identity_pipeline;
|
||||
|
||||
#[test]
|
||||
@@ -437,7 +467,7 @@ mod tests {
|
||||
"gaga": "gaga"
|
||||
}),
|
||||
];
|
||||
let rows = identity_pipeline(array);
|
||||
let rows = identity_pipeline(array, None);
|
||||
assert!(rows.is_err());
|
||||
assert_eq!(
|
||||
rows.err().unwrap().to_string(),
|
||||
@@ -465,7 +495,7 @@ mod tests {
|
||||
"gaga": "gaga"
|
||||
}),
|
||||
];
|
||||
let rows = identity_pipeline(array);
|
||||
let rows = identity_pipeline(array, None);
|
||||
assert!(rows.is_err());
|
||||
assert_eq!(
|
||||
rows.err().unwrap().to_string(),
|
||||
@@ -493,7 +523,7 @@ mod tests {
|
||||
"gaga": "gaga"
|
||||
}),
|
||||
];
|
||||
let rows = identity_pipeline(array);
|
||||
let rows = identity_pipeline(array, None);
|
||||
assert!(rows.is_ok());
|
||||
let rows = rows.unwrap();
|
||||
assert_eq!(rows.schema.len(), 8);
|
||||
@@ -501,5 +531,58 @@ mod tests {
|
||||
assert_eq!(8, rows.rows[0].values.len());
|
||||
assert_eq!(8, rows.rows[1].values.len());
|
||||
}
|
||||
{
|
||||
let array = vec![
|
||||
serde_json::json!({
|
||||
"woshinull": null,
|
||||
"name": "Alice",
|
||||
"age": 20,
|
||||
"is_student": true,
|
||||
"score": 99.5,
|
||||
"hobbies": "reading",
|
||||
"address": "Beijing",
|
||||
}),
|
||||
serde_json::json!({
|
||||
"name": "Bob",
|
||||
"age": 21,
|
||||
"is_student": false,
|
||||
"score": 88.5,
|
||||
"hobbies": "swimming",
|
||||
"address": "Shanghai",
|
||||
"gaga": "gaga"
|
||||
}),
|
||||
];
|
||||
let tag_column_names = ["name".to_string(), "address".to_string()];
|
||||
let rows = identity_pipeline_inner(array, Some(tag_column_names.iter()));
|
||||
assert!(rows.is_ok());
|
||||
let rows = rows.unwrap();
|
||||
assert_eq!(rows.schema.len(), 8);
|
||||
assert_eq!(rows.rows.len(), 2);
|
||||
assert_eq!(8, rows.rows[0].values.len());
|
||||
assert_eq!(8, rows.rows[1].values.len());
|
||||
assert_eq!(
|
||||
rows.schema
|
||||
.iter()
|
||||
.find(|x| x.column_name == "name")
|
||||
.unwrap()
|
||||
.semantic_type,
|
||||
SemanticType::Tag as i32
|
||||
);
|
||||
assert_eq!(
|
||||
rows.schema
|
||||
.iter()
|
||||
.find(|x| x.column_name == "address")
|
||||
.unwrap()
|
||||
.semantic_type,
|
||||
SemanticType::Tag as i32
|
||||
);
|
||||
assert_eq!(
|
||||
rows.schema
|
||||
.iter()
|
||||
.filter(|x| x.semantic_type == SemanticType::Tag as i32)
|
||||
.count(),
|
||||
2
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -243,4 +243,9 @@ impl PipelineOperator {
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Compile a pipeline.
|
||||
pub fn build_pipeline(pipeline: &str) -> Result<Pipeline<GreptimeTransformer>> {
|
||||
PipelineTable::compile_pipeline(pipeline)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -203,7 +203,7 @@ impl PipelineTable {
|
||||
|
||||
/// Compile a pipeline from a string.
|
||||
pub fn compile_pipeline(pipeline: &str) -> Result<Pipeline<GreptimeTransformer>> {
|
||||
let yaml_content = Content::Yaml(pipeline.into());
|
||||
let yaml_content = Content::Yaml(pipeline);
|
||||
parse::<GreptimeTransformer>(&yaml_content).context(CompilePipelineSnafu)
|
||||
}
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ use pipeline::{parse, Content, GreptimeTransformer, Pipeline};
|
||||
pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
|
||||
let input_value = serde_json::from_str::<serde_json::Value>(input_str).unwrap();
|
||||
|
||||
let yaml_content = Content::Yaml(pipeline_yaml.into());
|
||||
let yaml_content = Content::Yaml(pipeline_yaml);
|
||||
let pipeline: Pipeline<GreptimeTransformer> =
|
||||
parse(&yaml_content).expect("failed to parse pipeline");
|
||||
let mut result = pipeline.init_intermediate_state();
|
||||
|
||||
@@ -270,7 +270,7 @@ transform:
|
||||
|
||||
let input_value = serde_json::from_str::<serde_json::Value>(input_str).unwrap();
|
||||
|
||||
let yaml_content = pipeline::Content::Yaml(pipeline_yaml.into());
|
||||
let yaml_content = pipeline::Content::Yaml(pipeline_yaml);
|
||||
let pipeline: pipeline::Pipeline<pipeline::GreptimeTransformer> =
|
||||
pipeline::parse(&yaml_content).expect("failed to parse pipeline");
|
||||
let mut result = pipeline.init_intermediate_state();
|
||||
|
||||
@@ -417,7 +417,7 @@ transform:
|
||||
.map(|(_, d)| GreptimeValue { value_data: d })
|
||||
.collect::<Vec<GreptimeValue>>();
|
||||
|
||||
let yaml_content = Content::Yaml(pipeline_yaml.into());
|
||||
let yaml_content = Content::Yaml(pipeline_yaml);
|
||||
let pipeline: Pipeline<GreptimeTransformer> =
|
||||
parse(&yaml_content).expect("failed to parse pipeline");
|
||||
let mut stats = pipeline.init_intermediate_state();
|
||||
@@ -487,7 +487,7 @@ transform:
|
||||
type: json
|
||||
"#;
|
||||
|
||||
let yaml_content = Content::Yaml(pipeline_yaml.into());
|
||||
let yaml_content = Content::Yaml(pipeline_yaml);
|
||||
let pipeline: Pipeline<GreptimeTransformer> = parse(&yaml_content).unwrap();
|
||||
|
||||
let mut status = pipeline.init_intermediate_state();
|
||||
@@ -592,7 +592,7 @@ transform:
|
||||
type: json
|
||||
"#;
|
||||
|
||||
let yaml_content = Content::Yaml(pipeline_yaml.into());
|
||||
let yaml_content = Content::Yaml(pipeline_yaml);
|
||||
let pipeline: Pipeline<GreptimeTransformer> = parse(&yaml_content).unwrap();
|
||||
|
||||
let mut status = pipeline.init_intermediate_state();
|
||||
@@ -655,7 +655,7 @@ transform:
|
||||
index: timestamp
|
||||
"#;
|
||||
|
||||
let yaml_content = Content::Yaml(pipeline_yaml.into());
|
||||
let yaml_content = Content::Yaml(pipeline_yaml);
|
||||
let pipeline: Pipeline<GreptimeTransformer> = parse(&yaml_content).unwrap();
|
||||
|
||||
let mut status = pipeline.init_intermediate_state();
|
||||
@@ -691,7 +691,7 @@ transform:
|
||||
- message
|
||||
type: string
|
||||
"#;
|
||||
let yaml_content = Content::Yaml(pipeline_yaml.into());
|
||||
let yaml_content = Content::Yaml(pipeline_yaml);
|
||||
let pipeline: Pipeline<GreptimeTransformer> = parse(&yaml_content).unwrap();
|
||||
|
||||
let mut status = pipeline.init_intermediate_state();
|
||||
|
||||
@@ -25,6 +25,7 @@ futures.workspace = true
|
||||
lz4_flex = "0.11"
|
||||
moka = { workspace = true, features = ["future", "sync"] }
|
||||
pin-project.workspace = true
|
||||
prometheus.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
sha2 = "0.10.8"
|
||||
|
||||
@@ -68,6 +68,20 @@ pub struct BlobMetadata {
|
||||
pub properties: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl BlobMetadata {
|
||||
/// Calculates the memory usage of the blob metadata in bytes.
|
||||
pub fn memory_usage(&self) -> usize {
|
||||
self.blob_type.len()
|
||||
+ self.input_fields.len() * std::mem::size_of::<i32>()
|
||||
+ self
|
||||
.properties
|
||||
.iter()
|
||||
.map(|(k, v)| k.len() + v.len())
|
||||
.sum::<usize>()
|
||||
+ std::mem::size_of::<Self>()
|
||||
}
|
||||
}
|
||||
|
||||
/// Compression codec used to compress the blob
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
|
||||
@@ -25,14 +25,6 @@ use snafu::{Location, Snafu};
|
||||
#[snafu(visibility(pub))]
|
||||
#[stack_trace_debug]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to seek"))]
|
||||
Seek {
|
||||
#[snafu(source)]
|
||||
error: IoError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read"))]
|
||||
Read {
|
||||
#[snafu(source)]
|
||||
@@ -119,14 +111,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert bytes to integer"))]
|
||||
BytesToInteger {
|
||||
#[snafu(source)]
|
||||
error: std::array::TryFromSliceError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported decompression: {}", decompression))]
|
||||
UnsupportedDecompression {
|
||||
decompression: String,
|
||||
@@ -150,17 +134,15 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Parse stage not match, expected: {}, actual: {}", expected, actual))]
|
||||
ParseStageNotMatch {
|
||||
expected: String,
|
||||
actual: String,
|
||||
#[snafu(display("Unexpected footer payload size: {}", size))]
|
||||
UnexpectedFooterPayloadSize {
|
||||
size: i32,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unexpected footer payload size: {}", size))]
|
||||
UnexpectedFooterPayloadSize {
|
||||
size: i32,
|
||||
#[snafu(display("Invalid puffin footer"))]
|
||||
InvalidPuffinFooter {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
@@ -177,20 +159,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid blob offset: {}, location: {:?}", offset, location))]
|
||||
InvalidBlobOffset {
|
||||
offset: i64,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid blob area end: {}, location: {:?}", offset, location))]
|
||||
InvalidBlobAreaEnd {
|
||||
offset: u64,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to compress lz4"))]
|
||||
Lz4Compression {
|
||||
#[snafu(source)]
|
||||
@@ -262,8 +230,7 @@ impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
use Error::*;
|
||||
match self {
|
||||
Seek { .. }
|
||||
| Read { .. }
|
||||
Read { .. }
|
||||
| MagicNotMatched { .. }
|
||||
| DeserializeJson { .. }
|
||||
| Write { .. }
|
||||
@@ -275,18 +242,15 @@ impl ErrorExt for Error {
|
||||
| Remove { .. }
|
||||
| Rename { .. }
|
||||
| SerializeJson { .. }
|
||||
| BytesToInteger { .. }
|
||||
| ParseStageNotMatch { .. }
|
||||
| UnexpectedFooterPayloadSize { .. }
|
||||
| UnexpectedPuffinFileSize { .. }
|
||||
| InvalidBlobOffset { .. }
|
||||
| InvalidBlobAreaEnd { .. }
|
||||
| Lz4Compression { .. }
|
||||
| Lz4Decompression { .. }
|
||||
| BlobNotFound { .. }
|
||||
| BlobIndexOutOfBound { .. }
|
||||
| FileKeyNotMatch { .. }
|
||||
| WalkDir { .. } => StatusCode::Unexpected,
|
||||
| WalkDir { .. }
|
||||
| InvalidPuffinFooter { .. } => StatusCode::Unexpected,
|
||||
|
||||
UnsupportedCompression { .. } | UnsupportedDecompression { .. } => {
|
||||
StatusCode::Unsupported
|
||||
|
||||
@@ -21,21 +21,9 @@ use common_base::range_read::RangeReader;
|
||||
use crate::blob_metadata::BlobMetadata;
|
||||
use crate::error::Result;
|
||||
pub use crate::file_format::reader::file::PuffinFileReader;
|
||||
pub use crate::file_format::reader::footer::PuffinFileFooterReader;
|
||||
use crate::file_metadata::FileMetadata;
|
||||
|
||||
/// `SyncReader` defines a synchronous reader for puffin data.
|
||||
pub trait SyncReader<'a> {
|
||||
type Reader: std::io::Read + std::io::Seek;
|
||||
|
||||
/// Fetches the FileMetadata.
|
||||
fn metadata(&'a mut self) -> Result<FileMetadata>;
|
||||
|
||||
/// Reads particular blob data based on given metadata.
|
||||
///
|
||||
/// Data read from the reader is compressed leaving the caller to decompress the data.
|
||||
fn blob_reader(&'a mut self, blob_metadata: &BlobMetadata) -> Result<Self::Reader>;
|
||||
}
|
||||
|
||||
/// `AsyncReader` defines an asynchronous reader for puffin data.
|
||||
#[async_trait]
|
||||
pub trait AsyncReader<'a> {
|
||||
|
||||
@@ -12,20 +12,15 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::io::{self, SeekFrom};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_base::range_read::RangeReader;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::blob_metadata::BlobMetadata;
|
||||
use crate::error::{
|
||||
MagicNotMatchedSnafu, ReadSnafu, Result, SeekSnafu, UnexpectedPuffinFileSizeSnafu,
|
||||
UnsupportedDecompressionSnafu,
|
||||
};
|
||||
use crate::file_format::reader::footer::FooterParser;
|
||||
use crate::file_format::reader::{AsyncReader, SyncReader};
|
||||
use crate::file_format::{MAGIC, MAGIC_SIZE, MIN_FILE_SIZE};
|
||||
use crate::error::{ReadSnafu, Result, UnexpectedPuffinFileSizeSnafu};
|
||||
use crate::file_format::reader::footer::DEFAULT_PREFETCH_SIZE;
|
||||
use crate::file_format::reader::{AsyncReader, PuffinFileFooterReader};
|
||||
use crate::file_format::MIN_FILE_SIZE;
|
||||
use crate::file_metadata::FileMetadata;
|
||||
use crate::partial_reader::PartialReader;
|
||||
|
||||
@@ -51,6 +46,11 @@ impl<R> PuffinFileReader<R> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_metadata(mut self, metadata: Option<FileMetadata>) -> Self {
|
||||
self.metadata = metadata;
|
||||
self
|
||||
}
|
||||
|
||||
fn validate_file_size(file_size: u64) -> Result<()> {
|
||||
ensure!(
|
||||
file_size >= MIN_FILE_SIZE,
|
||||
@@ -72,45 +72,6 @@ impl<R> PuffinFileReader<R> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, R: io::Read + io::Seek + 'a> SyncReader<'a> for PuffinFileReader<R> {
|
||||
type Reader = PartialReader<&'a mut R>;
|
||||
|
||||
fn metadata(&mut self) -> Result<FileMetadata> {
|
||||
if let Some(metadata) = &self.metadata {
|
||||
return Ok(metadata.clone());
|
||||
}
|
||||
|
||||
// check the magic
|
||||
let mut magic = [0; MAGIC_SIZE as usize];
|
||||
self.source.read_exact(&mut magic).context(ReadSnafu)?;
|
||||
ensure!(magic == MAGIC, MagicNotMatchedSnafu);
|
||||
|
||||
let file_size = self.get_file_size_sync()?;
|
||||
|
||||
// parse the footer
|
||||
let metadata = FooterParser::new(&mut self.source, file_size).parse_sync()?;
|
||||
self.metadata = Some(metadata.clone());
|
||||
Ok(metadata)
|
||||
}
|
||||
|
||||
fn blob_reader(&'a mut self, blob_metadata: &BlobMetadata) -> Result<Self::Reader> {
|
||||
// TODO(zhongzc): support decompression
|
||||
let compression = blob_metadata.compression_codec.as_ref();
|
||||
ensure!(
|
||||
compression.is_none(),
|
||||
UnsupportedDecompressionSnafu {
|
||||
decompression: compression.unwrap().to_string()
|
||||
}
|
||||
);
|
||||
|
||||
Ok(PartialReader::new(
|
||||
&mut self.source,
|
||||
blob_metadata.offset as _,
|
||||
blob_metadata.length as _,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader<R> {
|
||||
type Reader = PartialReader<&'a mut R>;
|
||||
@@ -119,17 +80,10 @@ impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader<R> {
|
||||
if let Some(metadata) = &self.metadata {
|
||||
return Ok(metadata.clone());
|
||||
}
|
||||
|
||||
// check the magic
|
||||
let magic = self.source.read(0..MAGIC_SIZE).await.context(ReadSnafu)?;
|
||||
ensure!(*magic == MAGIC, MagicNotMatchedSnafu);
|
||||
|
||||
let file_size = self.get_file_size_async().await?;
|
||||
|
||||
// parse the footer
|
||||
let metadata = FooterParser::new(&mut self.source, file_size)
|
||||
.parse_async()
|
||||
.await?;
|
||||
let mut reader = PuffinFileFooterReader::new(&mut self.source, file_size)
|
||||
.with_prefetch_size(DEFAULT_PREFETCH_SIZE);
|
||||
let metadata = reader.metadata().await?;
|
||||
self.metadata = Some(metadata.clone());
|
||||
Ok(metadata)
|
||||
}
|
||||
@@ -143,14 +97,6 @@ impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader<R> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: io::Read + io::Seek> PuffinFileReader<R> {
|
||||
fn get_file_size_sync(&mut self) -> Result<u64> {
|
||||
let file_size = self.source.seek(SeekFrom::End(0)).context(SeekSnafu)?;
|
||||
Self::validate_file_size(file_size)?;
|
||||
Ok(file_size)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: RangeReader> PuffinFileReader<R> {
|
||||
async fn get_file_size_async(&mut self) -> Result<u64> {
|
||||
let file_size = self
|
||||
|
||||
@@ -12,240 +12,98 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::io::{self, Cursor, SeekFrom};
|
||||
use std::io::Cursor;
|
||||
|
||||
use common_base::range_read::RangeReader;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
BytesToIntegerSnafu, DeserializeJsonSnafu, InvalidBlobAreaEndSnafu, InvalidBlobOffsetSnafu,
|
||||
Lz4DecompressionSnafu, MagicNotMatchedSnafu, ParseStageNotMatchSnafu, ReadSnafu, Result,
|
||||
SeekSnafu, UnexpectedFooterPayloadSizeSnafu,
|
||||
DeserializeJsonSnafu, InvalidPuffinFooterSnafu, Lz4DecompressionSnafu, MagicNotMatchedSnafu,
|
||||
ReadSnafu, Result, UnexpectedFooterPayloadSizeSnafu,
|
||||
};
|
||||
use crate::file_format::{Flags, FLAGS_SIZE, MAGIC, MAGIC_SIZE, MIN_FILE_SIZE, PAYLOAD_SIZE_SIZE};
|
||||
use crate::file_metadata::FileMetadata;
|
||||
|
||||
/// Parser for the footer of a Puffin data file
|
||||
/// The default prefetch size for the footer reader.
|
||||
pub const DEFAULT_PREFETCH_SIZE: u64 = 1024; // 1KiB
|
||||
|
||||
/// Reader for the footer of a Puffin data file
|
||||
///
|
||||
/// The footer has a specific layout that needs to be read and parsed to
|
||||
/// extract metadata about the file, which is encapsulated in the [`FileMetadata`] type.
|
||||
///
|
||||
/// This reader supports prefetching, allowing for more efficient reading
|
||||
/// of the footer by fetching additional data ahead of time.
|
||||
///
|
||||
/// ```text
|
||||
/// Footer layout: HeadMagic Payload PayloadSize Flags FootMagic
|
||||
/// [4] [?] [4] [4] [4]
|
||||
/// ```
|
||||
pub struct FooterParser<R> {
|
||||
// The underlying IO source
|
||||
pub struct PuffinFileFooterReader<R> {
|
||||
/// The source of the puffin file
|
||||
source: R,
|
||||
|
||||
// The size of the file, used for calculating offsets to read from
|
||||
/// The content length of the puffin file
|
||||
file_size: u64,
|
||||
/// The prefetch footer size
|
||||
prefetch_size: Option<u64>,
|
||||
}
|
||||
|
||||
impl<R> FooterParser<R> {
|
||||
pub fn new(source: R, file_size: u64) -> Self {
|
||||
Self { source, file_size }
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: io::Read + io::Seek> FooterParser<R> {
|
||||
/// Parses the footer from the IO source in a synchronous manner.
|
||||
pub fn parse_sync(&mut self) -> Result<FileMetadata> {
|
||||
let mut parser = StageParser::new(self.file_size);
|
||||
|
||||
let mut buf = vec![];
|
||||
while let Some(byte_to_read) = parser.next_to_read() {
|
||||
self.source
|
||||
.seek(SeekFrom::Start(byte_to_read.offset))
|
||||
.context(SeekSnafu)?;
|
||||
let size = byte_to_read.size as usize;
|
||||
|
||||
buf.resize(size, 0);
|
||||
let buf = &mut buf[..size];
|
||||
|
||||
self.source.read_exact(buf).context(ReadSnafu)?;
|
||||
|
||||
parser.consume_bytes(buf)?;
|
||||
impl<'a, R: RangeReader + 'a> PuffinFileFooterReader<R> {
|
||||
pub fn new(source: R, content_len: u64) -> Self {
|
||||
Self {
|
||||
source,
|
||||
file_size: content_len,
|
||||
prefetch_size: None,
|
||||
}
|
||||
|
||||
parser.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: RangeReader> FooterParser<R> {
|
||||
/// Parses the footer from the IO source in a asynchronous manner.
|
||||
pub async fn parse_async(&mut self) -> Result<FileMetadata> {
|
||||
let mut parser = StageParser::new(self.file_size);
|
||||
fn prefetch_size(&self) -> u64 {
|
||||
self.prefetch_size.unwrap_or(MIN_FILE_SIZE)
|
||||
}
|
||||
|
||||
let mut buf = vec![];
|
||||
while let Some(byte_to_read) = parser.next_to_read() {
|
||||
buf.clear();
|
||||
let range = byte_to_read.offset..byte_to_read.offset + byte_to_read.size;
|
||||
self.source
|
||||
.read_into(range, &mut buf)
|
||||
pub fn with_prefetch_size(mut self, prefetch_size: u64) -> Self {
|
||||
self.prefetch_size = Some(prefetch_size.max(MIN_FILE_SIZE));
|
||||
self
|
||||
}
|
||||
|
||||
pub async fn metadata(&'a mut self) -> Result<FileMetadata> {
|
||||
// Note: prefetch > content_len is allowed, since we're using saturating_sub.
|
||||
let footer_start = self.file_size.saturating_sub(self.prefetch_size());
|
||||
let suffix = self
|
||||
.source
|
||||
.read(footer_start..self.file_size)
|
||||
.await
|
||||
.context(ReadSnafu)?;
|
||||
let suffix_len = suffix.len();
|
||||
|
||||
// check the magic
|
||||
let magic = Self::read_tailing_four_bytes(&suffix)?;
|
||||
ensure!(magic == MAGIC, MagicNotMatchedSnafu);
|
||||
|
||||
let flags = self.decode_flags(&suffix[..suffix_len - MAGIC_SIZE as usize])?;
|
||||
let length = self.decode_payload_size(
|
||||
&suffix[..suffix_len - MAGIC_SIZE as usize - FLAGS_SIZE as usize],
|
||||
)?;
|
||||
let footer_size = PAYLOAD_SIZE_SIZE + FLAGS_SIZE + MAGIC_SIZE;
|
||||
|
||||
// Did not fetch the entire file metadata in the initial read, need to make a second request.
|
||||
if length > suffix_len as u64 - footer_size {
|
||||
let metadata_start = self.file_size - length - footer_size;
|
||||
let meta = self
|
||||
.source
|
||||
.read(metadata_start..self.file_size - footer_size)
|
||||
.await
|
||||
.context(ReadSnafu)?;
|
||||
parser.consume_bytes(&buf)?;
|
||||
}
|
||||
|
||||
parser.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// The internal stages of parsing the footer.
|
||||
/// This enum allows the StageParser to keep track of which part
|
||||
/// of the footer needs to be parsed next.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum ParseStage {
|
||||
FootMagic,
|
||||
Flags,
|
||||
PayloadSize,
|
||||
Payload,
|
||||
HeadMagic,
|
||||
Done,
|
||||
}
|
||||
|
||||
/// Manages the parsing process of the file's footer.
|
||||
struct StageParser {
|
||||
/// Current stage in the parsing sequence of the footer.
|
||||
stage: ParseStage,
|
||||
|
||||
/// Total file size; used for calculating offsets to read from.
|
||||
file_size: u64,
|
||||
|
||||
/// Flags from the footer, set when the `Flags` field is parsed.
|
||||
flags: Flags,
|
||||
|
||||
/// Size of the footer's payload, set when the `PayloadSize` is parsed.
|
||||
payload_size: u64,
|
||||
|
||||
/// Metadata from the footer's payload, set when the `Payload` is parsed.
|
||||
metadata: Option<FileMetadata>,
|
||||
}
|
||||
|
||||
/// Represents a read operation that needs to be performed, including the
|
||||
/// offset from the start of the file and the number of bytes to read.
|
||||
struct BytesToRead {
|
||||
offset: u64,
|
||||
size: u64,
|
||||
}
|
||||
|
||||
impl StageParser {
|
||||
fn new(file_size: u64) -> Self {
|
||||
Self {
|
||||
stage: ParseStage::FootMagic,
|
||||
file_size,
|
||||
payload_size: 0,
|
||||
flags: Flags::empty(),
|
||||
metadata: None,
|
||||
self.parse_payload(&flags, &meta)
|
||||
} else {
|
||||
let metadata_start = self.file_size - length - footer_size - footer_start;
|
||||
let meta = &suffix[metadata_start as usize..suffix_len - footer_size as usize];
|
||||
self.parse_payload(&flags, meta)
|
||||
}
|
||||
}
|
||||
|
||||
/// Determines the next segment of bytes to read based on the current parsing stage.
|
||||
/// This method returns information like the offset and size of the next read,
|
||||
/// or None if parsing is complete.
|
||||
fn next_to_read(&self) -> Option<BytesToRead> {
|
||||
if self.stage == ParseStage::Done {
|
||||
return None;
|
||||
}
|
||||
|
||||
let btr = match self.stage {
|
||||
ParseStage::FootMagic => BytesToRead {
|
||||
offset: self.foot_magic_offset(),
|
||||
size: MAGIC_SIZE,
|
||||
},
|
||||
ParseStage::Flags => BytesToRead {
|
||||
offset: self.flags_offset(),
|
||||
size: FLAGS_SIZE,
|
||||
},
|
||||
ParseStage::PayloadSize => BytesToRead {
|
||||
offset: self.payload_size_offset(),
|
||||
size: PAYLOAD_SIZE_SIZE,
|
||||
},
|
||||
ParseStage::Payload => BytesToRead {
|
||||
offset: self.payload_offset(),
|
||||
size: self.payload_size,
|
||||
},
|
||||
ParseStage::HeadMagic => BytesToRead {
|
||||
offset: self.head_magic_offset(),
|
||||
size: MAGIC_SIZE,
|
||||
},
|
||||
ParseStage::Done => unreachable!(),
|
||||
};
|
||||
|
||||
Some(btr)
|
||||
}
|
||||
|
||||
/// Processes the bytes that have been read according to the current parsing stage
|
||||
/// and advances the parsing stage. It ensures the correct sequence of bytes is
|
||||
/// encountered and stores the necessary information in the `StageParser`.
|
||||
fn consume_bytes(&mut self, bytes: &[u8]) -> Result<()> {
|
||||
match self.stage {
|
||||
ParseStage::FootMagic => {
|
||||
ensure!(bytes == MAGIC, MagicNotMatchedSnafu);
|
||||
self.stage = ParseStage::Flags;
|
||||
}
|
||||
ParseStage::Flags => {
|
||||
self.flags = Self::parse_flags(bytes)?;
|
||||
self.stage = ParseStage::PayloadSize;
|
||||
}
|
||||
ParseStage::PayloadSize => {
|
||||
self.payload_size = Self::parse_payload_size(bytes)?;
|
||||
self.validate_payload_size()?;
|
||||
self.stage = ParseStage::Payload;
|
||||
}
|
||||
ParseStage::Payload => {
|
||||
self.metadata = Some(self.parse_payload(bytes)?);
|
||||
self.validate_metadata()?;
|
||||
self.stage = ParseStage::HeadMagic;
|
||||
}
|
||||
ParseStage::HeadMagic => {
|
||||
ensure!(bytes == MAGIC, MagicNotMatchedSnafu);
|
||||
self.stage = ParseStage::Done;
|
||||
}
|
||||
ParseStage::Done => unreachable!(),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Finalizes the parsing process, ensuring all stages are complete, and returns
|
||||
/// the parsed `FileMetadata`. It converts the raw footer payload into structured data.
|
||||
fn finish(self) -> Result<FileMetadata> {
|
||||
ensure!(
|
||||
self.stage == ParseStage::Done,
|
||||
ParseStageNotMatchSnafu {
|
||||
expected: format!("{:?}", ParseStage::Done),
|
||||
actual: format!("{:?}", self.stage),
|
||||
}
|
||||
);
|
||||
|
||||
Ok(self.metadata.unwrap())
|
||||
}
|
||||
|
||||
fn parse_flags(bytes: &[u8]) -> Result<Flags> {
|
||||
let n = u32::from_le_bytes(bytes.try_into().context(BytesToIntegerSnafu)?);
|
||||
Ok(Flags::from_bits_truncate(n))
|
||||
}
|
||||
|
||||
fn parse_payload_size(bytes: &[u8]) -> Result<u64> {
|
||||
let n = i32::from_le_bytes(bytes.try_into().context(BytesToIntegerSnafu)?);
|
||||
ensure!(n >= 0, UnexpectedFooterPayloadSizeSnafu { size: n });
|
||||
Ok(n as u64)
|
||||
}
|
||||
|
||||
fn validate_payload_size(&self) -> Result<()> {
|
||||
ensure!(
|
||||
self.payload_size <= self.file_size - MIN_FILE_SIZE,
|
||||
UnexpectedFooterPayloadSizeSnafu {
|
||||
size: self.payload_size as i32
|
||||
}
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_payload(&self, bytes: &[u8]) -> Result<FileMetadata> {
|
||||
if self.flags.contains(Flags::FOOTER_PAYLOAD_COMPRESSED_LZ4) {
|
||||
fn parse_payload(&self, flags: &Flags, bytes: &[u8]) -> Result<FileMetadata> {
|
||||
if flags.contains(Flags::FOOTER_PAYLOAD_COMPRESSED_LZ4) {
|
||||
let decoder = lz4_flex::frame::FrameDecoder::new(Cursor::new(bytes));
|
||||
let res = serde_json::from_reader(decoder).context(Lz4DecompressionSnafu)?;
|
||||
Ok(res)
|
||||
@@ -254,54 +112,35 @@ impl StageParser {
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_metadata(&self) -> Result<()> {
|
||||
let metadata = self.metadata.as_ref().expect("metadata is not set");
|
||||
fn read_tailing_four_bytes(suffix: &[u8]) -> Result<[u8; 4]> {
|
||||
let suffix_len = suffix.len();
|
||||
ensure!(suffix_len >= 4, InvalidPuffinFooterSnafu);
|
||||
let mut bytes = [0; 4];
|
||||
bytes.copy_from_slice(&suffix[suffix_len - 4..suffix_len]);
|
||||
|
||||
let mut next_blob_offset = MAGIC_SIZE;
|
||||
// check blob offsets
|
||||
for blob in &metadata.blobs {
|
||||
ensure!(
|
||||
blob.offset as u64 == next_blob_offset,
|
||||
InvalidBlobOffsetSnafu {
|
||||
offset: blob.offset
|
||||
}
|
||||
);
|
||||
next_blob_offset += blob.length as u64;
|
||||
}
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
fn decode_flags(&self, suffix: &[u8]) -> Result<Flags> {
|
||||
let flags = u32::from_le_bytes(Self::read_tailing_four_bytes(suffix)?);
|
||||
Ok(Flags::from_bits_truncate(flags))
|
||||
}
|
||||
|
||||
fn decode_payload_size(&self, suffix: &[u8]) -> Result<u64> {
|
||||
let payload_size = i32::from_le_bytes(Self::read_tailing_four_bytes(suffix)?);
|
||||
|
||||
let blob_area_end = metadata
|
||||
.blobs
|
||||
.last()
|
||||
.map_or(MAGIC_SIZE, |b| (b.offset + b.length) as u64);
|
||||
ensure!(
|
||||
blob_area_end == self.head_magic_offset(),
|
||||
InvalidBlobAreaEndSnafu {
|
||||
offset: blob_area_end
|
||||
payload_size >= 0,
|
||||
UnexpectedFooterPayloadSizeSnafu { size: payload_size }
|
||||
);
|
||||
let payload_size = payload_size as u64;
|
||||
ensure!(
|
||||
payload_size <= self.file_size - MIN_FILE_SIZE,
|
||||
UnexpectedFooterPayloadSizeSnafu {
|
||||
size: self.file_size as i32
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn foot_magic_offset(&self) -> u64 {
|
||||
self.file_size - MAGIC_SIZE
|
||||
}
|
||||
|
||||
fn flags_offset(&self) -> u64 {
|
||||
self.file_size - MAGIC_SIZE - FLAGS_SIZE
|
||||
}
|
||||
|
||||
fn payload_size_offset(&self) -> u64 {
|
||||
self.file_size - MAGIC_SIZE - FLAGS_SIZE - PAYLOAD_SIZE_SIZE
|
||||
}
|
||||
|
||||
fn payload_offset(&self) -> u64 {
|
||||
// `validate_payload_size` ensures that this subtraction will not overflow
|
||||
self.file_size - MAGIC_SIZE - FLAGS_SIZE - PAYLOAD_SIZE_SIZE - self.payload_size
|
||||
}
|
||||
|
||||
fn head_magic_offset(&self) -> u64 {
|
||||
// `validate_payload_size` ensures that this subtraction will not overflow
|
||||
self.file_size - MAGIC_SIZE * 2 - FLAGS_SIZE - PAYLOAD_SIZE_SIZE - self.payload_size
|
||||
Ok(payload_size)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,6 +33,22 @@ pub struct FileMetadata {
|
||||
pub properties: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl FileMetadata {
|
||||
/// Calculates the memory usage of the file metadata in bytes.
|
||||
pub fn memory_usage(&self) -> usize {
|
||||
self.blobs
|
||||
.iter()
|
||||
.map(|blob| blob.memory_usage())
|
||||
.sum::<usize>()
|
||||
+ self
|
||||
.properties
|
||||
.iter()
|
||||
.map(|(k, v)| k.len() + v.len())
|
||||
.sum::<usize>()
|
||||
+ std::mem::size_of::<Self>()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
|
||||
@@ -23,6 +23,10 @@ use crate::partial_reader::PartialReader;
|
||||
|
||||
#[async_trait]
|
||||
impl<R: RangeReader> RangeReader for PartialReader<R> {
|
||||
fn with_file_size_hint(&mut self, _file_size_hint: u64) {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
Ok(Metadata {
|
||||
content_length: self.size,
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod cache;
|
||||
pub mod file_accessor;
|
||||
pub mod fs_puffin_manager;
|
||||
pub mod stager;
|
||||
@@ -72,11 +73,12 @@ pub struct PutOptions {
|
||||
|
||||
/// The `PuffinReader` trait provides methods for reading blobs and directories from a Puffin file.
|
||||
#[async_trait]
|
||||
#[auto_impl::auto_impl(Arc)]
|
||||
pub trait PuffinReader {
|
||||
type Blob: BlobGuard;
|
||||
type Dir: DirGuard;
|
||||
|
||||
fn with_file_size_hint(self, file_size_hint: Option<u64>) -> Self;
|
||||
|
||||
/// Reads a blob from the Puffin file.
|
||||
///
|
||||
/// The returned `BlobGuard` is used to access the blob data.
|
||||
|
||||
60
src/puffin/src/puffin_manager/cache.rs
Normal file
60
src/puffin/src/puffin_manager/cache.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use prometheus::IntGaugeVec;
|
||||
|
||||
use crate::file_metadata::FileMetadata;
|
||||
/// Metrics for index metadata.
|
||||
const PUFFIN_METADATA_TYPE: &str = "puffin_metadata";
|
||||
|
||||
pub type PuffinMetadataCacheRef = Arc<PuffinMetadataCache>;
|
||||
|
||||
/// A cache for storing the metadata of the index files.
|
||||
pub struct PuffinMetadataCache {
|
||||
cache: moka::sync::Cache<String, Arc<FileMetadata>>,
|
||||
}
|
||||
|
||||
fn puffin_metadata_weight(k: &String, v: &Arc<FileMetadata>) -> u32 {
|
||||
(k.as_bytes().len() + v.memory_usage()) as u32
|
||||
}
|
||||
|
||||
impl PuffinMetadataCache {
|
||||
pub fn new(capacity: u64, cache_bytes: &'static IntGaugeVec) -> Self {
|
||||
common_telemetry::debug!("Building PuffinMetadataCache with capacity: {capacity}");
|
||||
Self {
|
||||
cache: moka::sync::CacheBuilder::new(capacity)
|
||||
.name("puffin_metadata")
|
||||
.weigher(puffin_metadata_weight)
|
||||
.eviction_listener(|k, v, _cause| {
|
||||
let size = puffin_metadata_weight(&k, &v);
|
||||
cache_bytes
|
||||
.with_label_values(&[PUFFIN_METADATA_TYPE])
|
||||
.sub(size.into());
|
||||
})
|
||||
.build(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the metadata from the cache.
|
||||
pub fn get_metadata(&self, file_id: &str) -> Option<Arc<FileMetadata>> {
|
||||
self.cache.get(file_id)
|
||||
}
|
||||
|
||||
/// Puts the metadata into the cache.
|
||||
pub fn put_metadata(&self, file_id: String, metadata: Arc<FileMetadata>) {
|
||||
self.cache.insert(file_id, metadata);
|
||||
}
|
||||
}
|
||||
@@ -21,6 +21,7 @@ pub use reader::FsPuffinReader;
|
||||
pub use writer::FsPuffinWriter;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::puffin_manager::cache::PuffinMetadataCacheRef;
|
||||
use crate::puffin_manager::file_accessor::PuffinFileAccessor;
|
||||
use crate::puffin_manager::stager::Stager;
|
||||
use crate::puffin_manager::PuffinManager;
|
||||
@@ -31,16 +32,29 @@ pub struct FsPuffinManager<S, F> {
|
||||
stager: S,
|
||||
/// The puffin file accessor.
|
||||
puffin_file_accessor: F,
|
||||
/// The puffin metadata cache.
|
||||
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
|
||||
}
|
||||
|
||||
impl<S, F> FsPuffinManager<S, F> {
|
||||
/// Creates a new `FsPuffinManager` with the specified `stager` and `puffin_file_accessor`.
|
||||
/// Creates a new `FsPuffinManager` with the specified `stager` and `puffin_file_accessor`,
|
||||
/// and optionally with a `puffin_metadata_cache`.
|
||||
pub fn new(stager: S, puffin_file_accessor: F) -> Self {
|
||||
Self {
|
||||
stager,
|
||||
puffin_file_accessor,
|
||||
puffin_metadata_cache: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the puffin metadata cache.
|
||||
pub fn with_puffin_metadata_cache(
|
||||
mut self,
|
||||
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
|
||||
) -> Self {
|
||||
self.puffin_metadata_cache = puffin_metadata_cache;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -57,6 +71,7 @@ where
|
||||
puffin_file_name.to_string(),
|
||||
self.stager.clone(),
|
||||
self.puffin_file_accessor.clone(),
|
||||
self.puffin_metadata_cache.clone(),
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use std::io;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_compression::futures::bufread::ZstdDecoder;
|
||||
use async_trait::async_trait;
|
||||
@@ -23,12 +24,14 @@ use futures::io::BufReader;
|
||||
use futures::{AsyncRead, AsyncWrite};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use super::PuffinMetadataCacheRef;
|
||||
use crate::blob_metadata::{BlobMetadata, CompressionCodec};
|
||||
use crate::error::{
|
||||
BlobIndexOutOfBoundSnafu, BlobNotFoundSnafu, DeserializeJsonSnafu, FileKeyNotMatchSnafu,
|
||||
MetadataSnafu, ReadSnafu, Result, UnsupportedDecompressionSnafu, WriteSnafu,
|
||||
};
|
||||
use crate::file_format::reader::{AsyncReader, PuffinFileReader};
|
||||
use crate::file_metadata::FileMetadata;
|
||||
use crate::partial_reader::PartialReader;
|
||||
use crate::puffin_manager::file_accessor::PuffinFileAccessor;
|
||||
use crate::puffin_manager::fs_puffin_manager::dir_meta::DirMetadata;
|
||||
@@ -40,19 +43,32 @@ pub struct FsPuffinReader<S, F> {
|
||||
/// The name of the puffin file.
|
||||
puffin_file_name: String,
|
||||
|
||||
/// The file size hint.
|
||||
file_size_hint: Option<u64>,
|
||||
|
||||
/// The stager.
|
||||
stager: S,
|
||||
|
||||
/// The puffin file accessor.
|
||||
puffin_file_accessor: F,
|
||||
|
||||
/// The puffin file metadata cache.
|
||||
puffin_file_metadata_cache: Option<PuffinMetadataCacheRef>,
|
||||
}
|
||||
|
||||
impl<S, F> FsPuffinReader<S, F> {
|
||||
pub(crate) fn new(puffin_file_name: String, stager: S, puffin_file_accessor: F) -> Self {
|
||||
pub(crate) fn new(
|
||||
puffin_file_name: String,
|
||||
stager: S,
|
||||
puffin_file_accessor: F,
|
||||
puffin_file_metadata_cache: Option<PuffinMetadataCacheRef>,
|
||||
) -> Self {
|
||||
Self {
|
||||
puffin_file_name,
|
||||
file_size_hint: None,
|
||||
stager,
|
||||
puffin_file_accessor,
|
||||
puffin_file_metadata_cache,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -66,20 +82,28 @@ where
|
||||
type Blob = Either<RandomReadBlob<F>, S::Blob>;
|
||||
type Dir = S::Dir;
|
||||
|
||||
fn with_file_size_hint(mut self, file_size_hint: Option<u64>) -> Self {
|
||||
self.file_size_hint = file_size_hint;
|
||||
self
|
||||
}
|
||||
|
||||
async fn blob(&self, key: &str) -> Result<Self::Blob> {
|
||||
let reader = self
|
||||
let mut reader = self
|
||||
.puffin_file_accessor
|
||||
.reader(&self.puffin_file_name)
|
||||
.await?;
|
||||
if let Some(file_size_hint) = self.file_size_hint {
|
||||
reader.with_file_size_hint(file_size_hint);
|
||||
}
|
||||
let mut file = PuffinFileReader::new(reader);
|
||||
|
||||
// TODO(zhongzc): cache the metadata.
|
||||
let metadata = file.metadata().await?;
|
||||
let metadata = self.get_puffin_file_metadata(&mut file).await?;
|
||||
let blob_metadata = metadata
|
||||
.blobs
|
||||
.into_iter()
|
||||
.iter()
|
||||
.find(|m| m.blob_type == key)
|
||||
.context(BlobNotFoundSnafu { blob: key })?;
|
||||
.context(BlobNotFoundSnafu { blob: key })?
|
||||
.clone();
|
||||
|
||||
let blob = if blob_metadata.compression_codec.is_none() {
|
||||
// If the blob is not compressed, we can directly read it from the puffin file.
|
||||
@@ -133,6 +157,23 @@ where
|
||||
S: Stager,
|
||||
F: PuffinFileAccessor + Clone,
|
||||
{
|
||||
async fn get_puffin_file_metadata(
|
||||
&self,
|
||||
reader: &mut PuffinFileReader<F::Reader>,
|
||||
) -> Result<Arc<FileMetadata>> {
|
||||
if let Some(cache) = self.puffin_file_metadata_cache.as_ref() {
|
||||
if let Some(metadata) = cache.get_metadata(&self.puffin_file_name) {
|
||||
return Ok(metadata);
|
||||
}
|
||||
}
|
||||
|
||||
let metadata = Arc::new(reader.metadata().await?);
|
||||
if let Some(cache) = self.puffin_file_metadata_cache.as_ref() {
|
||||
cache.put_metadata(self.puffin_file_name.to_string(), metadata.clone());
|
||||
}
|
||||
Ok(metadata)
|
||||
}
|
||||
|
||||
async fn init_blob_to_stager(
|
||||
reader: PuffinFileReader<F::Reader>,
|
||||
blob_metadata: BlobMetadata,
|
||||
@@ -274,6 +315,13 @@ where
|
||||
A: RangeReader,
|
||||
B: RangeReader,
|
||||
{
|
||||
fn with_file_size_hint(&mut self, file_size_hint: u64) {
|
||||
match self {
|
||||
Either::L(a) => a.with_file_size_hint(file_size_hint),
|
||||
Either::R(b) => b.with_file_size_hint(file_size_hint),
|
||||
}
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
match self {
|
||||
Either::L(a) => a.metadata().await,
|
||||
|
||||
@@ -13,26 +13,14 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{Cursor, Read};
|
||||
use std::vec;
|
||||
|
||||
use common_base::range_read::{FileReader, RangeReader};
|
||||
use futures::io::Cursor as AsyncCursor;
|
||||
|
||||
use crate::file_format::reader::{AsyncReader, PuffinFileReader, SyncReader};
|
||||
use crate::file_format::writer::{AsyncWriter, Blob, PuffinFileWriter, SyncWriter};
|
||||
|
||||
#[test]
|
||||
fn test_read_empty_puffin_sync() {
|
||||
let path = "src/tests/resources/empty-puffin-uncompressed.puffin";
|
||||
|
||||
let file = File::open(path).unwrap();
|
||||
let mut reader = PuffinFileReader::new(file);
|
||||
let metadata = reader.metadata().unwrap();
|
||||
assert_eq!(metadata.properties.len(), 0);
|
||||
assert_eq!(metadata.blobs.len(), 0);
|
||||
}
|
||||
use crate::file_format::reader::{AsyncReader, PuffinFileFooterReader, PuffinFileReader};
|
||||
use crate::file_format::writer::{AsyncWriter, Blob, PuffinFileWriter};
|
||||
use crate::file_metadata::FileMetadata;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_read_empty_puffin_async() {
|
||||
@@ -45,39 +33,37 @@ async fn test_read_empty_puffin_async() {
|
||||
assert_eq!(metadata.blobs.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sample_metric_data_puffin_sync() {
|
||||
let path = "src/tests/resources/sample-metric-data-uncompressed.puffin";
|
||||
async fn test_read_puffin_file_metadata(
|
||||
path: &str,
|
||||
file_size: u64,
|
||||
expeccted_metadata: FileMetadata,
|
||||
) {
|
||||
for prefetch_size in [0, file_size / 2, file_size, file_size + 10] {
|
||||
let reader = FileReader::new(path).await.unwrap();
|
||||
let mut footer_reader = PuffinFileFooterReader::new(reader, file_size);
|
||||
if prefetch_size > 0 {
|
||||
footer_reader = footer_reader.with_prefetch_size(prefetch_size);
|
||||
}
|
||||
let metadata = footer_reader.metadata().await.unwrap();
|
||||
assert_eq!(metadata.properties, expeccted_metadata.properties,);
|
||||
assert_eq!(metadata.blobs, expeccted_metadata.blobs);
|
||||
}
|
||||
}
|
||||
|
||||
let file = File::open(path).unwrap();
|
||||
let mut reader = PuffinFileReader::new(file);
|
||||
let metadata = reader.metadata().unwrap();
|
||||
#[tokio::test]
|
||||
async fn test_read_puffin_file_metadata_async() {
|
||||
let paths = vec![
|
||||
"src/tests/resources/empty-puffin-uncompressed.puffin",
|
||||
"src/tests/resources/sample-metric-data-uncompressed.puffin",
|
||||
];
|
||||
for path in paths {
|
||||
let mut reader = FileReader::new(path).await.unwrap();
|
||||
let file_size = reader.metadata().await.unwrap().content_length;
|
||||
let mut reader = PuffinFileReader::new(reader);
|
||||
let metadata = reader.metadata().await.unwrap();
|
||||
|
||||
assert_eq!(metadata.properties.len(), 1);
|
||||
assert_eq!(
|
||||
metadata.properties.get("created-by"),
|
||||
Some(&"Test 1234".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(metadata.blobs.len(), 2);
|
||||
assert_eq!(metadata.blobs[0].blob_type, "some-blob");
|
||||
assert_eq!(metadata.blobs[0].offset, 4);
|
||||
assert_eq!(metadata.blobs[0].length, 9);
|
||||
|
||||
assert_eq!(metadata.blobs[1].blob_type, "some-other-blob");
|
||||
assert_eq!(metadata.blobs[1].offset, 13);
|
||||
assert_eq!(metadata.blobs[1].length, 83);
|
||||
|
||||
let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap();
|
||||
let mut buf = String::new();
|
||||
some_blob.read_to_string(&mut buf).unwrap();
|
||||
assert_eq!(buf, "abcdefghi");
|
||||
|
||||
let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
some_other_blob.read_to_end(&mut buf).unwrap();
|
||||
let expected = include_bytes!("tests/resources/sample-metric-data.blob");
|
||||
assert_eq!(buf, expected);
|
||||
test_read_puffin_file_metadata(path, file_size, metadata).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -113,38 +99,6 @@ async fn test_sample_metric_data_puffin_async() {
|
||||
assert_eq!(buf, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_writer_reader_with_empty_sync() {
|
||||
fn test_writer_reader_with_empty_sync(footer_compressed: bool) {
|
||||
let mut buf = Cursor::new(vec![]);
|
||||
|
||||
let mut writer = PuffinFileWriter::new(&mut buf);
|
||||
writer.set_properties(HashMap::from([(
|
||||
"created-by".to_string(),
|
||||
"Test 1234".to_string(),
|
||||
)]));
|
||||
|
||||
writer.set_footer_lz4_compressed(footer_compressed);
|
||||
let written_bytes = writer.finish().unwrap();
|
||||
assert!(written_bytes > 0);
|
||||
|
||||
let mut buf = Cursor::new(buf.into_inner());
|
||||
let mut reader = PuffinFileReader::new(&mut buf);
|
||||
let metadata = reader.metadata().unwrap();
|
||||
|
||||
assert_eq!(metadata.properties.len(), 1);
|
||||
assert_eq!(
|
||||
metadata.properties.get("created-by"),
|
||||
Some(&"Test 1234".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(metadata.blobs.len(), 0);
|
||||
}
|
||||
|
||||
test_writer_reader_with_empty_sync(false);
|
||||
test_writer_reader_with_empty_sync(true);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_writer_reader_empty_async() {
|
||||
async fn test_writer_reader_empty_async(footer_compressed: bool) {
|
||||
@@ -176,76 +130,6 @@ async fn test_writer_reader_empty_async() {
|
||||
test_writer_reader_empty_async(true).await;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_writer_reader_sync() {
|
||||
fn test_writer_reader_sync(footer_compressed: bool) {
|
||||
let mut buf = Cursor::new(vec![]);
|
||||
|
||||
let mut writer = PuffinFileWriter::new(&mut buf);
|
||||
|
||||
let blob1 = "abcdefghi";
|
||||
writer
|
||||
.add_blob(Blob {
|
||||
compressed_data: Cursor::new(&blob1),
|
||||
blob_type: "some-blob".to_string(),
|
||||
properties: Default::default(),
|
||||
compression_codec: None,
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let blob2 = include_bytes!("tests/resources/sample-metric-data.blob");
|
||||
writer
|
||||
.add_blob(Blob {
|
||||
compressed_data: Cursor::new(&blob2),
|
||||
blob_type: "some-other-blob".to_string(),
|
||||
properties: Default::default(),
|
||||
compression_codec: None,
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
writer.set_properties(HashMap::from([(
|
||||
"created-by".to_string(),
|
||||
"Test 1234".to_string(),
|
||||
)]));
|
||||
|
||||
writer.set_footer_lz4_compressed(footer_compressed);
|
||||
let written_bytes = writer.finish().unwrap();
|
||||
assert!(written_bytes > 0);
|
||||
|
||||
let mut buf = Cursor::new(buf.into_inner());
|
||||
let mut reader = PuffinFileReader::new(&mut buf);
|
||||
let metadata = reader.metadata().unwrap();
|
||||
|
||||
assert_eq!(metadata.properties.len(), 1);
|
||||
assert_eq!(
|
||||
metadata.properties.get("created-by"),
|
||||
Some(&"Test 1234".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(metadata.blobs.len(), 2);
|
||||
assert_eq!(metadata.blobs[0].blob_type, "some-blob");
|
||||
assert_eq!(metadata.blobs[0].offset, 4);
|
||||
assert_eq!(metadata.blobs[0].length, 9);
|
||||
|
||||
assert_eq!(metadata.blobs[1].blob_type, "some-other-blob");
|
||||
assert_eq!(metadata.blobs[1].offset, 13);
|
||||
assert_eq!(metadata.blobs[1].length, 83);
|
||||
|
||||
let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap();
|
||||
let mut buf = String::new();
|
||||
some_blob.read_to_string(&mut buf).unwrap();
|
||||
assert_eq!(buf, blob1);
|
||||
|
||||
let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
some_other_blob.read_to_end(&mut buf).unwrap();
|
||||
assert_eq!(buf, blob2);
|
||||
}
|
||||
|
||||
test_writer_reader_sync(false);
|
||||
test_writer_reader_sync(true);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_writer_reader_async() {
|
||||
async fn test_writer_reader_async(footer_compressed: bool) {
|
||||
|
||||
@@ -146,6 +146,7 @@ impl Categorizer {
|
||||
| Expr::Between(_)
|
||||
| Expr::Sort(_)
|
||||
| Expr::Exists(_)
|
||||
| Expr::InList(_)
|
||||
| Expr::ScalarFunction(_) => Commutativity::Commutative,
|
||||
|
||||
Expr::Like(_)
|
||||
@@ -157,7 +158,6 @@ impl Categorizer {
|
||||
| Expr::TryCast(_)
|
||||
| Expr::AggregateFunction(_)
|
||||
| Expr::WindowFunction(_)
|
||||
| Expr::InList(_)
|
||||
| Expr::InSubquery(_)
|
||||
| Expr::ScalarSubquery(_)
|
||||
| Expr::Wildcard { .. } => Commutativity::Unimplemented,
|
||||
|
||||
@@ -139,11 +139,6 @@ tokio-test = "0.4"
|
||||
[target.'cfg(unix)'.dev-dependencies]
|
||||
pprof = { version = "0.13", features = ["criterion", "flamegraph"] }
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
aws-lc-sys = { version = "0.21.0", features = [
|
||||
"prebuilt-nasm",
|
||||
] } # use prebuilt nasm on windows per https://github.com/aws/aws-lc-rs/blob/main/aws-lc-sys/README.md#use-of-prebuilt-nasm-objects
|
||||
|
||||
[build-dependencies]
|
||||
common-version.workspace = true
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
v0.7.1
|
||||
v0.7.2
|
||||
|
||||
@@ -189,6 +189,13 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse query"))]
|
||||
FailedToParseQuery {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: sql::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse InfluxDB line protocol"))]
|
||||
InfluxdbLineProtocol {
|
||||
#[snafu(implicit)]
|
||||
@@ -651,7 +658,8 @@ impl ErrorExt for Error {
|
||||
| OpenTelemetryLog { .. }
|
||||
| UnsupportedJsonDataTypeForTag { .. }
|
||||
| InvalidTableName { .. }
|
||||
| PrepareStatementNotFound { .. } => StatusCode::InvalidArguments,
|
||||
| PrepareStatementNotFound { .. }
|
||||
| FailedToParseQuery { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
Catalog { source, .. } => source.status_code(),
|
||||
RowWriter { source, .. } => source.status_code(),
|
||||
|
||||
@@ -638,10 +638,15 @@ impl HttpServer {
|
||||
router.clone()
|
||||
};
|
||||
|
||||
router = router.route(
|
||||
"/health",
|
||||
routing::get(handler::health).post(handler::health),
|
||||
);
|
||||
router = router
|
||||
.route(
|
||||
"/health",
|
||||
routing::get(handler::health).post(handler::health),
|
||||
)
|
||||
.route(
|
||||
"/ready",
|
||||
routing::get(handler::health).post(handler::health),
|
||||
);
|
||||
|
||||
router = router.route("/status", routing::get(handler::status));
|
||||
|
||||
@@ -750,6 +755,10 @@ impl HttpServer {
|
||||
fn route_sql<S>(api_state: ApiState) -> Router<S> {
|
||||
Router::new()
|
||||
.route("/sql", routing::get(handler::sql).post(handler::sql))
|
||||
.route(
|
||||
"/sql/parse",
|
||||
routing::get(handler::sql_parse).post(handler::sql_parse),
|
||||
)
|
||||
.route(
|
||||
"/promql",
|
||||
routing::get(handler::promql).post(handler::promql),
|
||||
|
||||
@@ -38,7 +38,7 @@ use lazy_static::lazy_static;
|
||||
use loki_api::prost_types::Timestamp;
|
||||
use pipeline::error::PipelineTransformSnafu;
|
||||
use pipeline::util::to_pipeline_version;
|
||||
use pipeline::PipelineVersion;
|
||||
use pipeline::{GreptimeTransformer, PipelineVersion};
|
||||
use prost::Message;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Deserializer, Map, Value};
|
||||
@@ -46,8 +46,8 @@ use session::context::{Channel, QueryContext, QueryContextRef};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
DecodeOtlpRequestSnafu, Error, InvalidParameterSnafu, ParseJson5Snafu, ParseJsonSnafu,
|
||||
PipelineSnafu, Result, UnsupportedContentTypeSnafu,
|
||||
CatalogSnafu, DecodeOtlpRequestSnafu, Error, InvalidParameterSnafu, ParseJson5Snafu,
|
||||
ParseJsonSnafu, PipelineSnafu, Result, UnsupportedContentTypeSnafu,
|
||||
};
|
||||
use crate::http::extractor::LogTableName;
|
||||
use crate::http::header::CONTENT_TYPE_PROTOBUF_STR;
|
||||
@@ -276,39 +276,11 @@ fn transform_ndjson_array_factory(
|
||||
})
|
||||
}
|
||||
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn pipeline_dryrun(
|
||||
State(log_state): State<LogState>,
|
||||
Query(query_params): Query<LogIngesterQueryParams>,
|
||||
Extension(mut query_ctx): Extension<QueryContext>,
|
||||
TypedHeader(content_type): TypedHeader<ContentType>,
|
||||
payload: String,
|
||||
/// Dryrun pipeline with given data
|
||||
fn dryrun_pipeline_inner(
|
||||
value: Vec<Value>,
|
||||
pipeline: &pipeline::Pipeline<GreptimeTransformer>,
|
||||
) -> Result<Response> {
|
||||
let handler = log_state.log_handler;
|
||||
let pipeline_name = query_params.pipeline_name.context(InvalidParameterSnafu {
|
||||
reason: "pipeline_name is required",
|
||||
})?;
|
||||
|
||||
let version = to_pipeline_version(query_params.version).context(PipelineSnafu)?;
|
||||
|
||||
let ignore_errors = query_params.ignore_errors.unwrap_or(false);
|
||||
|
||||
let value = extract_pipeline_value_by_content_type(content_type, payload, ignore_errors)?;
|
||||
|
||||
ensure!(
|
||||
value.len() <= 10,
|
||||
InvalidParameterSnafu {
|
||||
reason: "too many rows for dryrun",
|
||||
}
|
||||
);
|
||||
|
||||
query_ctx.set_channel(Channel::Http);
|
||||
let query_ctx = Arc::new(query_ctx);
|
||||
|
||||
let pipeline = handler
|
||||
.get_pipeline(&pipeline_name, version, query_ctx.clone())
|
||||
.await?;
|
||||
|
||||
let mut intermediate_state = pipeline.init_intermediate_state();
|
||||
|
||||
let mut results = Vec::with_capacity(value.len());
|
||||
@@ -387,6 +359,110 @@ pub async fn pipeline_dryrun(
|
||||
Ok(Json(result).into_response())
|
||||
}
|
||||
|
||||
/// Dryrun pipeline with given data
|
||||
/// pipeline_name and pipeline_version to specify pipeline stored in db
|
||||
/// pipeline to specify pipeline raw content
|
||||
/// data to specify data
|
||||
/// data maght be list of string or list of object
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub struct PipelineDryrunParams {
|
||||
pub pipeline_name: Option<String>,
|
||||
pub pipeline_version: Option<String>,
|
||||
pub pipeline: Option<String>,
|
||||
pub data: Vec<Value>,
|
||||
}
|
||||
|
||||
/// Check if the payload is valid json
|
||||
/// Check if the payload contains pipeline or pipeline_name and data
|
||||
/// Return Some if valid, None if invalid
|
||||
fn check_pipeline_dryrun_params_valid(payload: &str) -> Option<PipelineDryrunParams> {
|
||||
match serde_json::from_str::<PipelineDryrunParams>(payload) {
|
||||
// payload with pipeline or pipeline_name and data is array
|
||||
Ok(params) if params.pipeline.is_some() || params.pipeline_name.is_some() => Some(params),
|
||||
// because of the pipeline_name or pipeline is required
|
||||
Ok(_) => None,
|
||||
// invalid json
|
||||
Err(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the pipeline_name exists
|
||||
fn check_pipeline_name_exists(pipeline_name: Option<String>) -> Result<String> {
|
||||
pipeline_name.context(InvalidParameterSnafu {
|
||||
reason: "pipeline_name is required",
|
||||
})
|
||||
}
|
||||
|
||||
/// Check if the data length less than 10
|
||||
fn check_data_valid(data_len: usize) -> Result<()> {
|
||||
ensure!(
|
||||
data_len <= 10,
|
||||
InvalidParameterSnafu {
|
||||
reason: "data is required",
|
||||
}
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn pipeline_dryrun(
|
||||
State(log_state): State<LogState>,
|
||||
Query(query_params): Query<LogIngesterQueryParams>,
|
||||
Extension(mut query_ctx): Extension<QueryContext>,
|
||||
TypedHeader(content_type): TypedHeader<ContentType>,
|
||||
payload: String,
|
||||
) -> Result<Response> {
|
||||
let handler = log_state.log_handler;
|
||||
|
||||
match check_pipeline_dryrun_params_valid(&payload) {
|
||||
Some(params) => {
|
||||
let data = params.data;
|
||||
|
||||
check_data_valid(data.len())?;
|
||||
|
||||
match params.pipeline {
|
||||
None => {
|
||||
let version =
|
||||
to_pipeline_version(params.pipeline_version).context(PipelineSnafu)?;
|
||||
let pipeline_name = check_pipeline_name_exists(params.pipeline_name)?;
|
||||
let pipeline = handler
|
||||
.get_pipeline(&pipeline_name, version, Arc::new(query_ctx))
|
||||
.await?;
|
||||
dryrun_pipeline_inner(data, &pipeline)
|
||||
}
|
||||
Some(pipeline) => {
|
||||
let pipeline = handler.build_pipeline(&pipeline)?;
|
||||
dryrun_pipeline_inner(data, &pipeline)
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// This path is for back compatibility with the previous dry run code
|
||||
// where the payload is just data (JSON or plain text) and the pipeline name
|
||||
// is specified using query param.
|
||||
let pipeline_name = check_pipeline_name_exists(query_params.pipeline_name)?;
|
||||
|
||||
let version = to_pipeline_version(query_params.version).context(PipelineSnafu)?;
|
||||
|
||||
let ignore_errors = query_params.ignore_errors.unwrap_or(false);
|
||||
|
||||
let value =
|
||||
extract_pipeline_value_by_content_type(content_type, payload, ignore_errors)?;
|
||||
|
||||
check_data_valid(value.len())?;
|
||||
|
||||
query_ctx.set_channel(Channel::Http);
|
||||
let query_ctx = Arc::new(query_ctx);
|
||||
|
||||
let pipeline = handler
|
||||
.get_pipeline(&pipeline_name, version, query_ctx.clone())
|
||||
.await?;
|
||||
|
||||
dryrun_pipeline_inner(value, &pipeline)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn loki_ingest(
|
||||
State(log_state): State<LogState>,
|
||||
@@ -438,8 +514,8 @@ pub async fn loki_ingest(
|
||||
let line = entry.line;
|
||||
|
||||
// create and init row
|
||||
let mut row = Vec::with_capacity(schemas.capacity());
|
||||
for _ in 0..row.capacity() {
|
||||
let mut row = Vec::with_capacity(schemas.len());
|
||||
for _ in 0..schemas.len() {
|
||||
row.push(GreptimeValue { value_data: None });
|
||||
}
|
||||
// insert ts and line
|
||||
@@ -612,10 +688,15 @@ async fn ingest_logs_inner(
|
||||
let mut results = Vec::with_capacity(pipeline_data.len());
|
||||
let transformed_data: Rows;
|
||||
if pipeline_name == GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME {
|
||||
let rows = pipeline::identity_pipeline(pipeline_data)
|
||||
let table = state
|
||||
.get_table(&table_name, &query_ctx)
|
||||
.await
|
||||
.context(CatalogSnafu)?;
|
||||
let rows = pipeline::identity_pipeline(pipeline_data, table)
|
||||
.context(PipelineTransformSnafu)
|
||||
.context(PipelineSnafu)?;
|
||||
transformed_data = rows;
|
||||
|
||||
transformed_data = rows
|
||||
} else {
|
||||
let pipeline = state
|
||||
.get_pipeline(&pipeline_name, version, query_ctx.clone())
|
||||
|
||||
@@ -30,8 +30,13 @@ use query::parser::{PromQuery, DEFAULT_LOOKBACK_STRING};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use session::context::{Channel, QueryContext, QueryContextRef};
|
||||
use snafu::ResultExt;
|
||||
use sql::dialect::GreptimeDbDialect;
|
||||
use sql::parser::{ParseOptions, ParserContext};
|
||||
use sql::statements::statement::Statement;
|
||||
|
||||
use super::header::collect_plan_metrics;
|
||||
use crate::error::{FailedToParseQuerySnafu, InvalidQuerySnafu, Result};
|
||||
use crate::http::result::arrow_result::ArrowResponse;
|
||||
use crate::http::result::csv_result::CsvResponse;
|
||||
use crate::http::result::error_result::ErrorResponse;
|
||||
@@ -146,10 +151,31 @@ pub async fn sql(
|
||||
resp.with_execution_time(start.elapsed().as_millis() as u64)
|
||||
}
|
||||
|
||||
/// Handler to parse sql
|
||||
#[axum_macros::debug_handler]
|
||||
#[tracing::instrument(skip_all, fields(protocol = "http", request_type = "sql"))]
|
||||
pub async fn sql_parse(
|
||||
Query(query_params): Query<SqlQuery>,
|
||||
Form(form_params): Form<SqlQuery>,
|
||||
) -> Result<Json<Vec<Statement>>> {
|
||||
let Some(sql) = query_params.sql.or(form_params.sql) else {
|
||||
return InvalidQuerySnafu {
|
||||
reason: "sql parameter is required.",
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
|
||||
let stmts =
|
||||
ParserContext::create_with_dialect(&sql, &GreptimeDbDialect {}, ParseOptions::default())
|
||||
.context(FailedToParseQuerySnafu)?;
|
||||
|
||||
Ok(stmts.into())
|
||||
}
|
||||
|
||||
/// Create a response from query result
|
||||
pub async fn from_output(
|
||||
outputs: Vec<crate::error::Result<Output>>,
|
||||
) -> Result<(Vec<GreptimeQueryOutput>, HashMap<String, Value>), ErrorResponse> {
|
||||
) -> std::result::Result<(Vec<GreptimeQueryOutput>, HashMap<String, Value>), ErrorResponse> {
|
||||
// TODO(sunng87): this api response structure cannot represent error well.
|
||||
// It hides successful execution results from error response
|
||||
let mut results = Vec::with_capacity(outputs.len());
|
||||
|
||||
@@ -39,7 +39,7 @@ use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequ
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
|
||||
use pipeline::{GreptimeTransformer, Pipeline, PipelineInfo, PipelineVersion, PipelineWay};
|
||||
use serde_json::Value;
|
||||
use session::context::QueryContextRef;
|
||||
use session::context::{QueryContext, QueryContextRef};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::influxdb::InfluxdbRequest;
|
||||
@@ -164,4 +164,13 @@ pub trait PipelineHandler {
|
||||
version: PipelineVersion,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<Option<()>>;
|
||||
|
||||
async fn get_table(
|
||||
&self,
|
||||
table: &str,
|
||||
query_ctx: &QueryContext,
|
||||
) -> std::result::Result<Option<Arc<table::Table>>, catalog::error::Error>;
|
||||
|
||||
//// Build a pipeline from a string.
|
||||
fn build_pipeline(&self, pipeline: &str) -> Result<Pipeline<GreptimeTransformer>>;
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ itertools.workspace = true
|
||||
jsonb.workspace = true
|
||||
lazy_static.workspace = true
|
||||
regex.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
snafu.workspace = true
|
||||
sqlparser.workspace = true
|
||||
|
||||
@@ -14,12 +14,13 @@
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use serde::Serialize;
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
use crate::ast::Function;
|
||||
|
||||
/// `ADMIN` statement to execute some administration commands.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub enum Admin {
|
||||
/// Run a admin function.
|
||||
Func(Function),
|
||||
|
||||
@@ -18,10 +18,11 @@ use api::v1;
|
||||
use common_query::AddColumnLocation;
|
||||
use datatypes::schema::FulltextOptions;
|
||||
use itertools::Itertools;
|
||||
use serde::Serialize;
|
||||
use sqlparser::ast::{ColumnDef, DataType, Ident, ObjectName, TableConstraint};
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct AlterTable {
|
||||
pub table_name: ObjectName,
|
||||
pub alter_operation: AlterTableOperation,
|
||||
@@ -56,7 +57,7 @@ impl Display for AlterTable {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub enum AlterTableOperation {
|
||||
/// `ADD <table_constraint>`
|
||||
AddConstraint(TableConstraint),
|
||||
@@ -71,29 +72,20 @@ pub enum AlterTableOperation {
|
||||
target_type: DataType,
|
||||
},
|
||||
/// `SET <table attrs key> = <table attr value>`
|
||||
SetTableOptions {
|
||||
options: Vec<KeyValueOption>,
|
||||
},
|
||||
UnsetTableOptions {
|
||||
keys: Vec<String>,
|
||||
},
|
||||
SetTableOptions { options: Vec<KeyValueOption> },
|
||||
/// `UNSET <table attrs key>`
|
||||
UnsetTableOptions { keys: Vec<String> },
|
||||
/// `DROP COLUMN <name>`
|
||||
DropColumn {
|
||||
name: Ident,
|
||||
},
|
||||
DropColumn { name: Ident },
|
||||
/// `RENAME <new_table_name>`
|
||||
RenameTable {
|
||||
new_table_name: String,
|
||||
},
|
||||
RenameTable { new_table_name: String },
|
||||
/// `MODIFY COLUMN <column_name> SET FULLTEXT [WITH <options>]`
|
||||
SetColumnFulltext {
|
||||
column_name: Ident,
|
||||
options: FulltextOptions,
|
||||
},
|
||||
/// `MODIFY COLUMN <column_name> UNSET FULLTEXT`
|
||||
UnsetColumnFulltext {
|
||||
column_name: Ident,
|
||||
},
|
||||
UnsetColumnFulltext { column_name: Ident },
|
||||
}
|
||||
|
||||
impl Display for AlterTableOperation {
|
||||
@@ -151,7 +143,7 @@ impl Display for AlterTableOperation {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct KeyValueOption {
|
||||
pub key: String,
|
||||
pub value: String,
|
||||
@@ -166,7 +158,7 @@ impl From<KeyValueOption> for v1::Option {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct AlterDatabase {
|
||||
pub database_name: ObjectName,
|
||||
pub alter_operation: AlterDatabaseOperation,
|
||||
@@ -197,7 +189,7 @@ impl Display for AlterDatabase {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub enum AlterDatabaseOperation {
|
||||
SetDatabaseOption { options: Vec<KeyValueOption> },
|
||||
UnsetDatabaseOption { keys: Vec<String> },
|
||||
|
||||
@@ -14,12 +14,13 @@
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use serde::Serialize;
|
||||
use sqlparser::ast::ObjectName;
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
use crate::statements::OptionMap;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub enum Copy {
|
||||
CopyTable(CopyTable),
|
||||
CopyDatabase(CopyDatabase),
|
||||
@@ -34,7 +35,7 @@ impl Display for Copy {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub enum CopyTable {
|
||||
To(CopyTableArgument),
|
||||
From(CopyTableArgument),
|
||||
@@ -65,7 +66,7 @@ impl Display for CopyTable {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub enum CopyDatabase {
|
||||
To(CopyDatabaseArgument),
|
||||
From(CopyDatabaseArgument),
|
||||
@@ -96,7 +97,7 @@ impl Display for CopyDatabase {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct CopyDatabaseArgument {
|
||||
pub database_name: ObjectName,
|
||||
pub with: OptionMap,
|
||||
@@ -104,7 +105,7 @@ pub struct CopyDatabaseArgument {
|
||||
pub location: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct CopyTableArgument {
|
||||
pub table_name: ObjectName,
|
||||
pub with: OptionMap,
|
||||
|
||||
@@ -18,6 +18,7 @@ use std::fmt::{Display, Formatter};
|
||||
use common_catalog::consts::FILE_ENGINE;
|
||||
use datatypes::schema::FulltextOptions;
|
||||
use itertools::Itertools;
|
||||
use serde::Serialize;
|
||||
use snafu::ResultExt;
|
||||
use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query};
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
@@ -58,7 +59,7 @@ fn format_table_constraint(constraints: &[TableConstraint]) -> String {
|
||||
}
|
||||
|
||||
/// Table constraint for create table statement.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
|
||||
pub enum TableConstraint {
|
||||
/// Primary key constraint.
|
||||
PrimaryKey { columns: Vec<Ident> },
|
||||
@@ -84,7 +85,7 @@ impl Display for TableConstraint {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
|
||||
pub struct CreateTable {
|
||||
/// Create if not exists
|
||||
pub if_not_exists: bool,
|
||||
@@ -100,7 +101,7 @@ pub struct CreateTable {
|
||||
}
|
||||
|
||||
/// Column definition in `CREATE TABLE` statement.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
|
||||
pub struct Column {
|
||||
/// `ColumnDef` from `sqlparser::ast`
|
||||
pub column_def: ColumnDef,
|
||||
@@ -109,7 +110,7 @@ pub struct Column {
|
||||
}
|
||||
|
||||
/// Column extensions for greptimedb dialect.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Default)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Default, Serialize)]
|
||||
pub struct ColumnExtensions {
|
||||
/// Fulltext options.
|
||||
pub fulltext_options: Option<OptionMap>,
|
||||
@@ -172,7 +173,7 @@ impl ColumnExtensions {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
|
||||
pub struct Partitions {
|
||||
pub column_list: Vec<Ident>,
|
||||
pub exprs: Vec<Expr>,
|
||||
@@ -244,7 +245,7 @@ impl Display for CreateTable {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
|
||||
pub struct CreateDatabase {
|
||||
pub name: ObjectName,
|
||||
/// Create if not exists
|
||||
@@ -278,7 +279,7 @@ impl Display for CreateDatabase {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
|
||||
pub struct CreateExternalTable {
|
||||
/// Table name
|
||||
pub name: ObjectName,
|
||||
@@ -309,7 +310,7 @@ impl Display for CreateExternalTable {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
|
||||
pub struct CreateTableLike {
|
||||
/// Table name
|
||||
pub table_name: ObjectName,
|
||||
@@ -325,7 +326,7 @@ impl Display for CreateTableLike {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
|
||||
pub struct CreateFlow {
|
||||
/// Flow name
|
||||
pub flow_name: ObjectName,
|
||||
@@ -367,7 +368,7 @@ impl Display for CreateFlow {
|
||||
}
|
||||
|
||||
/// Create SQL view statement.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
|
||||
pub struct CreateView {
|
||||
/// View name
|
||||
pub name: ObjectName,
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use serde::Serialize;
|
||||
use sqlparser::ast::ObjectName;
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
@@ -22,7 +23,7 @@ use super::query::Query;
|
||||
/// Represents a DECLARE CURSOR statement
|
||||
///
|
||||
/// This statement will carry a SQL query
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct DeclareCursor {
|
||||
pub cursor_name: ObjectName,
|
||||
pub query: Box<Query>,
|
||||
@@ -35,7 +36,7 @@ impl Display for DeclareCursor {
|
||||
}
|
||||
|
||||
/// Represents a FETCH FROM cursor statement
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct FetchCursor {
|
||||
pub cursor_name: ObjectName,
|
||||
pub fetch_size: u64,
|
||||
@@ -48,7 +49,7 @@ impl Display for FetchCursor {
|
||||
}
|
||||
|
||||
/// Represents a CLOSE cursor statement
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct CloseCursor {
|
||||
pub cursor_name: ObjectName,
|
||||
}
|
||||
|
||||
@@ -12,10 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use serde::Serialize;
|
||||
use sqlparser::ast::Statement;
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct Delete {
|
||||
pub inner: Statement,
|
||||
}
|
||||
|
||||
@@ -14,11 +14,12 @@
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use serde::Serialize;
|
||||
use sqlparser::ast::ObjectName;
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
/// SQL structure for `DESCRIBE TABLE`.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct DescribeTable {
|
||||
name: ObjectName,
|
||||
}
|
||||
|
||||
@@ -14,11 +14,12 @@
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use serde::Serialize;
|
||||
use sqlparser::ast::ObjectName;
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
/// DROP TABLE statement.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct DropTable {
|
||||
table_names: Vec<ObjectName>,
|
||||
|
||||
@@ -62,7 +63,7 @@ impl Display for DropTable {
|
||||
}
|
||||
|
||||
/// DROP DATABASE statement.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct DropDatabase {
|
||||
name: ObjectName,
|
||||
/// drop table if exists
|
||||
@@ -99,7 +100,7 @@ impl Display for DropDatabase {
|
||||
}
|
||||
|
||||
/// DROP FLOW statement.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct DropFlow {
|
||||
flow_name: ObjectName,
|
||||
/// drop flow if exists
|
||||
@@ -138,7 +139,7 @@ impl Display for DropFlow {
|
||||
}
|
||||
|
||||
/// `DROP VIEW` statement.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct DropView {
|
||||
// The view name
|
||||
pub view_name: ObjectName,
|
||||
|
||||
@@ -14,13 +14,14 @@
|
||||
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use serde::Serialize;
|
||||
use sqlparser::ast::Statement as SpStatement;
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
use crate::error::Error;
|
||||
|
||||
/// Explain statement.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct Explain {
|
||||
pub inner: SpStatement,
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use serde::Serialize;
|
||||
use sqlparser::ast::{ObjectName, Query, SetExpr, Statement, UnaryOperator, Values};
|
||||
use sqlparser::parser::ParserError;
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
@@ -20,7 +21,7 @@ use crate::ast::{Expr, Value};
|
||||
use crate::error::Result;
|
||||
use crate::statements::query::Query as GtQuery;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct Insert {
|
||||
// Can only be sqlparser::ast::Statement::Insert variant
|
||||
pub inner: Statement,
|
||||
|
||||
@@ -16,14 +16,16 @@ use std::collections::{BTreeMap, HashMap};
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use common_base::secrets::{ExposeSecret, ExposeSecretMut, SecretString};
|
||||
use serde::Serialize;
|
||||
use sqlparser::ast::{Visit, VisitMut, Visitor, VisitorMut};
|
||||
|
||||
const REDACTED_OPTIONS: [&str; 2] = ["access_key_id", "secret_access_key"];
|
||||
|
||||
/// Options hashmap.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
#[derive(Clone, Debug, Default, Serialize)]
|
||||
pub struct OptionMap {
|
||||
options: BTreeMap<String, String>,
|
||||
#[serde(skip_serializing)]
|
||||
secrets: BTreeMap<String, SecretString>,
|
||||
}
|
||||
|
||||
|
||||
@@ -14,13 +14,14 @@
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use serde::Serialize;
|
||||
use sqlparser::ast::Query as SpQuery;
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
use crate::error::Error;
|
||||
|
||||
/// Query statement instance.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
pub struct Query {
|
||||
pub inner: SpQuery,
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user