mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-24 07:00:00 +00:00
Compare commits
22 Commits
v1.0.0-bet
...
create-tab
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1bb541112f | ||
|
|
fdedbb8261 | ||
|
|
8d9afc83e3 | ||
|
|
625fdd09ea | ||
|
|
b3bc3c76f1 | ||
|
|
342eb47e19 | ||
|
|
6a6b34c709 | ||
|
|
a8b512dded | ||
|
|
bd8ffd3db9 | ||
|
|
c0652f6dd5 | ||
|
|
fed6cb0806 | ||
|
|
69659211f6 | ||
|
|
6332d91884 | ||
|
|
4d66bd96b8 | ||
|
|
2f4a15ec40 | ||
|
|
658332fe68 | ||
|
|
c088d361a4 | ||
|
|
a85864067e | ||
|
|
0df69c95aa | ||
|
|
72eede8b38 | ||
|
|
95eccd6cde | ||
|
|
0bc5a305be |
11
.github/scripts/create-version.sh
vendored
11
.github/scripts/create-version.sh
vendored
@@ -49,6 +49,17 @@ function create_version() {
|
||||
echo "GITHUB_REF_NAME is empty in push event" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# For tag releases, ensure GITHUB_REF_NAME matches the version in Cargo.toml
|
||||
CARGO_VERSION=$(grep '^version = ' Cargo.toml | cut -d '"' -f 2 | head -n 1)
|
||||
EXPECTED_REF_NAME="v${CARGO_VERSION}"
|
||||
|
||||
if [ "$GITHUB_REF_NAME" != "$EXPECTED_REF_NAME" ]; then
|
||||
echo "Error: GITHUB_REF_NAME '$GITHUB_REF_NAME' does not match Cargo.toml version 'v${CARGO_VERSION}'" >&2
|
||||
echo "Expected tag name: '$EXPECTED_REF_NAME'" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$GITHUB_REF_NAME"
|
||||
elif [ "$GITHUB_EVENT_NAME" = workflow_dispatch ]; then
|
||||
echo "$NEXT_RELEASE_VERSION-$(git rev-parse --short HEAD)-$(date "+%Y%m%d-%s")"
|
||||
|
||||
154
.github/workflows/check-git-deps.yml
vendored
Normal file
154
.github/workflows/check-git-deps.yml
vendored
Normal file
@@ -0,0 +1,154 @@
|
||||
name: Check Git Dependencies on Main Branch
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'Cargo.toml'
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'Cargo.toml'
|
||||
|
||||
jobs:
|
||||
check-git-deps:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Check git dependencies
|
||||
env:
|
||||
WHITELIST_DEPS: "greptime-proto,meter-core,meter-macros"
|
||||
run: |
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "Checking whitelisted git dependencies..."
|
||||
|
||||
# Function to check if a commit is on main branch
|
||||
check_commit_on_main() {
|
||||
local repo_url="$1"
|
||||
local commit="$2"
|
||||
local repo_name=$(basename "$repo_url" .git)
|
||||
|
||||
echo "Checking $repo_name"
|
||||
echo "Repo: $repo_url"
|
||||
echo "Commit: $commit"
|
||||
|
||||
# Create a temporary directory for cloning
|
||||
local temp_dir=$(mktemp -d)
|
||||
|
||||
# Clone the repository
|
||||
if git clone "$repo_url" "$temp_dir" 2>/dev/null; then
|
||||
cd "$temp_dir"
|
||||
|
||||
# Try to determine the main branch name
|
||||
local main_branch="main"
|
||||
if ! git rev-parse --verify origin/main >/dev/null 2>&1; then
|
||||
if git rev-parse --verify origin/master >/dev/null 2>&1; then
|
||||
main_branch="master"
|
||||
else
|
||||
# Try to get the default branch
|
||||
main_branch=$(git symbolic-ref refs/remotes/origin/HEAD | sed 's@^refs/remotes/origin/@@')
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Main branch: $main_branch"
|
||||
|
||||
# Check if commit exists
|
||||
if git cat-file -e "$commit" 2>/dev/null; then
|
||||
# Check if commit is on main branch
|
||||
if git merge-base --is-ancestor "$commit" "origin/$main_branch" 2>/dev/null; then
|
||||
echo "PASS: Commit $commit is on $main_branch branch"
|
||||
cd - >/dev/null
|
||||
rm -rf "$temp_dir"
|
||||
return 0
|
||||
else
|
||||
echo "FAIL: Commit $commit is NOT on $main_branch branch"
|
||||
|
||||
# Try to find which branch contains this commit
|
||||
local branch_name=$(git branch -r --contains "$commit" 2>/dev/null | head -1 | sed 's/^[[:space:]]*origin\///' | sed 's/[[:space:]]*$//')
|
||||
if [[ -n "$branch_name" ]]; then
|
||||
echo "Found on branch: $branch_name"
|
||||
fi
|
||||
cd - >/dev/null
|
||||
rm -rf "$temp_dir"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
echo "FAIL: Commit $commit not found in repository"
|
||||
cd - >/dev/null
|
||||
rm -rf "$temp_dir"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
echo "FAIL: Failed to clone $repo_url"
|
||||
rm -rf "$temp_dir"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Extract whitelisted git dependencies from Cargo.toml
|
||||
echo "Extracting git dependencies from Cargo.toml..."
|
||||
|
||||
# Create temporary array to store dependencies
|
||||
declare -a deps=()
|
||||
|
||||
# Build awk pattern from whitelist
|
||||
IFS=',' read -ra WHITELIST <<< "$WHITELIST_DEPS"
|
||||
awk_pattern=""
|
||||
for dep in "${WHITELIST[@]}"; do
|
||||
if [[ -n "$awk_pattern" ]]; then
|
||||
awk_pattern="$awk_pattern|"
|
||||
fi
|
||||
awk_pattern="$awk_pattern$dep"
|
||||
done
|
||||
|
||||
# Extract whitelisted dependencies
|
||||
while IFS= read -r line; do
|
||||
if [[ -n "$line" ]]; then
|
||||
deps+=("$line")
|
||||
fi
|
||||
done < <(awk -v pattern="$awk_pattern" '
|
||||
$0 ~ pattern ".*git = \"https:/" {
|
||||
match($0, /git = "([^"]+)"/, arr)
|
||||
git_url = arr[1]
|
||||
if (match($0, /rev = "([^"]+)"/, rev_arr)) {
|
||||
rev = rev_arr[1]
|
||||
print git_url " " rev
|
||||
} else {
|
||||
# Check next line for rev
|
||||
getline
|
||||
if (match($0, /rev = "([^"]+)"/, rev_arr)) {
|
||||
rev = rev_arr[1]
|
||||
print git_url " " rev
|
||||
}
|
||||
}
|
||||
}
|
||||
' Cargo.toml)
|
||||
|
||||
echo "Found ${#deps[@]} dependencies to check:"
|
||||
for dep in "${deps[@]}"; do
|
||||
echo " $dep"
|
||||
done
|
||||
|
||||
failed=0
|
||||
|
||||
for dep in "${deps[@]}"; do
|
||||
read -r repo_url commit <<< "$dep"
|
||||
if ! check_commit_on_main "$repo_url" "$commit"; then
|
||||
failed=1
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Check completed."
|
||||
|
||||
if [[ $failed -eq 1 ]]; then
|
||||
echo "ERROR: Some git dependencies are not on their main branches!"
|
||||
echo "Please update the commits to point to main branch commits."
|
||||
exit 1
|
||||
else
|
||||
echo "SUCCESS: All git dependencies are on their main branches!"
|
||||
fi
|
||||
53
Cargo.lock
generated
53
Cargo.lock
generated
@@ -1786,6 +1786,7 @@ dependencies = [
|
||||
"common-recordbatch",
|
||||
"common-runtime",
|
||||
"common-telemetry",
|
||||
"common-test-util",
|
||||
"common-time",
|
||||
"common-version",
|
||||
"common-wal",
|
||||
@@ -2579,10 +2580,12 @@ dependencies = [
|
||||
name = "common-sql"
|
||||
version = "1.0.0-beta.3"
|
||||
dependencies = [
|
||||
"arrow-schema",
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"datafusion-sql",
|
||||
"datatypes",
|
||||
@@ -4633,8 +4636,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "etcd-client"
|
||||
version = "0.15.0"
|
||||
source = "git+https://github.com/GreptimeTeam/etcd-client?rev=f62df834f0cffda355eba96691fe1a9a332b75a7#f62df834f0cffda355eba96691fe1a9a332b75a7"
|
||||
version = "0.16.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88365f1a5671eb2f7fc240adb216786bc6494b38ce15f1d26ad6eaa303d5e822"
|
||||
dependencies = [
|
||||
"http 1.3.1",
|
||||
"prost 0.13.5",
|
||||
@@ -5034,6 +5038,7 @@ dependencies = [
|
||||
"common-function",
|
||||
"common-grpc",
|
||||
"common-macro",
|
||||
"common-memory-manager",
|
||||
"common-meta",
|
||||
"common-options",
|
||||
"common-procedure",
|
||||
@@ -5459,7 +5464,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=0423fa30203187c75e2937a668df1da699c8b96c#0423fa30203187c75e2937a668df1da699c8b96c"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=173efe5ec62722089db7c531c0b0d470a072b915#173efe5ec62722089db7c531c0b0d470a072b915"
|
||||
dependencies = [
|
||||
"prost 0.13.5",
|
||||
"prost-types 0.13.5",
|
||||
@@ -9318,9 +9323,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pgwire"
|
||||
version = "0.36.3"
|
||||
version = "0.37.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70a2bcdcc4b20a88e0648778ecf00415bbd5b447742275439c22176835056f99"
|
||||
checksum = "02d86d57e732d40382ceb9bfea80901d839bae8571aa11c06af9177aed9dfb6c"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.22.1",
|
||||
@@ -9339,6 +9344,7 @@ dependencies = [
|
||||
"ryu",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"smol_str",
|
||||
"stringprep",
|
||||
"thiserror 2.0.17",
|
||||
"tokio",
|
||||
@@ -11503,10 +11509,11 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.219"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
|
||||
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
||||
dependencies = [
|
||||
"serde_core",
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
@@ -11521,10 +11528,19 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.219"
|
||||
name = "serde_core"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
|
||||
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -11677,6 +11693,7 @@ dependencies = [
|
||||
"common-grpc",
|
||||
"common-macro",
|
||||
"common-mem-prof",
|
||||
"common-memory-manager",
|
||||
"common-meta",
|
||||
"common-plugins",
|
||||
"common-pprof",
|
||||
@@ -11999,6 +12016,16 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smol_str"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3498b0a27f93ef1402f20eefacfaa1691272ac4eca1cdc8c596cb0a245d6cbf5"
|
||||
dependencies = [
|
||||
"borsh",
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "snafu"
|
||||
version = "0.7.5"
|
||||
@@ -12204,7 +12231,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.58.0"
|
||||
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=4b519a5caa95472cc3988f5556813a583dd35af1#4b519a5caa95472cc3988f5556813a583dd35af1"
|
||||
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=a0ce2bc6eb3e804532932f39833c32432f5c9a39#a0ce2bc6eb3e804532932f39833c32432f5c9a39"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"log",
|
||||
@@ -12228,7 +12255,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "sqlparser_derive"
|
||||
version = "0.3.0"
|
||||
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=4b519a5caa95472cc3988f5556813a583dd35af1#4b519a5caa95472cc3988f5556813a583dd35af1"
|
||||
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=a0ce2bc6eb3e804532932f39833c32432f5c9a39#a0ce2bc6eb3e804532932f39833c32432f5c9a39"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -12459,6 +12486,7 @@ dependencies = [
|
||||
"common-config",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-memory-manager",
|
||||
"common-meta",
|
||||
"common-options",
|
||||
"common-procedure",
|
||||
@@ -13161,6 +13189,7 @@ dependencies = [
|
||||
"common-event-recorder",
|
||||
"common-frontend",
|
||||
"common-grpc",
|
||||
"common-memory-manager",
|
||||
"common-meta",
|
||||
"common-procedure",
|
||||
"common-query",
|
||||
|
||||
@@ -143,14 +143,14 @@ derive_builder = "0.20"
|
||||
derive_more = { version = "2.1", features = ["full"] }
|
||||
dotenv = "0.15"
|
||||
either = "1.15"
|
||||
etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62df834f0cffda355eba96691fe1a9a332b75a7", features = [
|
||||
etcd-client = { version = "0.16.1", features = [
|
||||
"tls",
|
||||
"tls-roots",
|
||||
] }
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "0423fa30203187c75e2937a668df1da699c8b96c" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "173efe5ec62722089db7c531c0b0d470a072b915" }
|
||||
hex = "0.4"
|
||||
http = "1"
|
||||
humantime = "2.1"
|
||||
@@ -332,7 +332,7 @@ datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.g
|
||||
datafusion-datasource = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
|
||||
datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
|
||||
datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "4b519a5caa95472cc3988f5556813a583dd35af1" } # branch = "v0.58.x"
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "a0ce2bc6eb3e804532932f39833c32432f5c9a39" } # branch = "v0.58.x"
|
||||
|
||||
[profile.release]
|
||||
debug = 1
|
||||
|
||||
@@ -14,11 +14,12 @@
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `default_timezone` | String | Unset | The default timezone of the server. |
|
||||
| `default_column_prefix` | String | Unset | The default column prefix for auto-created time index and value columns. |
|
||||
| `max_in_flight_write_bytes` | String | Unset | Maximum total memory for all concurrent write request bodies and messages (HTTP, gRPC, Flight).<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
|
||||
| `write_bytes_exhausted_policy` | String | Unset | Policy when write bytes quota is exhausted.<br/>Options: "wait" (default, 10s timeout), "wait(<duration>)" (e.g., "wait(30s)"), "fail" |
|
||||
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
|
||||
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
|
||||
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited.<br/>NOTE: This setting affects scan_memory_limit's privileged tier allocation.<br/>When set, 70% of queries get privileged memory access (full scan_memory_limit).<br/>The remaining 30% get standard tier access (70% of scan_memory_limit). |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
|
||||
| `max_in_flight_write_bytes` | String | Unset | The maximum in-flight write bytes. |
|
||||
| `runtime` | -- | -- | The runtime options. |
|
||||
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
|
||||
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
|
||||
@@ -26,14 +27,12 @@
|
||||
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
|
||||
| `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
|
||||
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
|
||||
| `http.max_total_body_memory` | String | Unset | Maximum total memory for all concurrent HTTP request bodies.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
|
||||
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
|
||||
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
|
||||
| `http.prom_validation_mode` | String | `strict` | Whether to enable validation for Prometheus remote write requests.<br/>Available options:<br/>- strict: deny invalid UTF-8 strings (default).<br/>- lossy: allow invalid UTF-8 strings, replace invalid characters with REPLACEMENT_CHARACTER(U+FFFD).<br/>- unchecked: do not valid strings. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `grpc.max_total_message_memory` | String | Unset | Maximum total memory for all concurrent gRPC request messages.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
|
||||
| `grpc.max_connection_age` | String | Unset | The maximum connection age for gRPC connection.<br/>The value can be a human-readable time string. For example: `10m` for ten minutes or `1h` for one hour.<br/>Refer to https://grpc.io/docs/guides/keepalive/ for more details. |
|
||||
| `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
|
||||
| `grpc.tls.mode` | String | `disable` | TLS mode. |
|
||||
@@ -83,6 +82,8 @@
|
||||
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. |
|
||||
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.connect_timeout` | String | `3s` | The connect timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.timeout` | String | `3s` | The timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
|
||||
| `wal.num_topics` | Integer | `64` | Number of topics.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default)<br/>**It's only used when the provider is `kafka`**. |
|
||||
@@ -225,7 +226,8 @@
|
||||
| --- | -----| ------- | ----------- |
|
||||
| `default_timezone` | String | Unset | The default timezone of the server. |
|
||||
| `default_column_prefix` | String | Unset | The default column prefix for auto-created time index and value columns. |
|
||||
| `max_in_flight_write_bytes` | String | Unset | The maximum in-flight write bytes. |
|
||||
| `max_in_flight_write_bytes` | String | Unset | Maximum total memory for all concurrent write request bodies and messages (HTTP, gRPC, Flight).<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
|
||||
| `write_bytes_exhausted_policy` | String | Unset | Policy when write bytes quota is exhausted.<br/>Options: "wait" (default, 10s timeout), "wait(<duration>)" (e.g., "wait(30s)"), "fail" |
|
||||
| `runtime` | -- | -- | The runtime options. |
|
||||
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
|
||||
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
|
||||
@@ -236,7 +238,6 @@
|
||||
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
|
||||
| `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
|
||||
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
|
||||
| `http.max_total_body_memory` | String | Unset | Maximum total memory for all concurrent HTTP request bodies.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
|
||||
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
|
||||
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
|
||||
| `http.prom_validation_mode` | String | `strict` | Whether to enable validation for Prometheus remote write requests.<br/>Available options:<br/>- strict: deny invalid UTF-8 strings (default).<br/>- lossy: allow invalid UTF-8 strings, replace invalid characters with REPLACEMENT_CHARACTER(U+FFFD).<br/>- unchecked: do not valid strings. |
|
||||
@@ -244,7 +245,6 @@
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
|
||||
| `grpc.server_addr` | String | `127.0.0.1:4001` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `grpc.max_total_message_memory` | String | Unset | Maximum total memory for all concurrent gRPC request messages.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
|
||||
| `grpc.flight_compression` | String | `arrow_ipc` | Compression mode for frontend side Arrow IPC service. Available options:<br/>- `none`: disable all compression<br/>- `transport`: only enable gRPC transport compression (zstd)<br/>- `arrow_ipc`: only enable Arrow IPC compression (lz4)<br/>- `all`: enable all compression.<br/>Default to `none` |
|
||||
| `grpc.max_connection_age` | String | Unset | The maximum connection age for gRPC connection.<br/>The value can be a human-readable time string. For example: `10m` for ten minutes or `1h` for one hour.<br/>Refer to https://grpc.io/docs/guides/keepalive/ for more details. |
|
||||
| `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
|
||||
@@ -344,14 +344,15 @@
|
||||
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
|
||||
| `backend` | String | `etcd_store` | The datastore for meta server.<br/>Available values:<br/>- `etcd_store` (default value)<br/>- `memory_store`<br/>- `postgres_store`<br/>- `mysql_store` |
|
||||
| `meta_table_name` | String | `greptime_metakv` | Table name in RDS to store metadata. Effect when using a RDS kvbackend.<br/>**Only used when backend is `postgres_store`.** |
|
||||
| `meta_schema_name` | String | `greptime_schema` | Optional PostgreSQL schema for metadata table and election table name qualification.<br/>When PostgreSQL public schema is not writable (e.g., PostgreSQL 15+ with restricted public),<br/>set this to a writable schema. GreptimeDB will use `meta_schema_name`.`meta_table_name`.<br/>GreptimeDB will NOT create the schema automatically; please ensure it exists or the user has permission.<br/>**Only used when backend is `postgres_store`.** |
|
||||
| `meta_schema_name` | String | `greptime_schema` | Optional PostgreSQL schema for metadata table and election table name qualification.<br/>When PostgreSQL public schema is not writable (e.g., PostgreSQL 15+ with restricted public),<br/>set this to a writable schema. GreptimeDB will use `meta_schema_name`.`meta_table_name`.<br/>**Only used when backend is `postgres_store`.** |
|
||||
| `auto_create_schema` | Bool | `true` | Automatically create PostgreSQL schema if it doesn't exist.<br/>When enabled, the system will execute `CREATE SCHEMA IF NOT EXISTS <schema_name>`<br/>before creating metadata tables. This is useful in production environments where<br/>manual schema creation may be restricted.<br/>Default is true.<br/>Note: The PostgreSQL user must have CREATE SCHEMA permission for this to work.<br/>**Only used when backend is `postgres_store`.** |
|
||||
| `meta_election_lock_id` | Integer | `1` | Advisory lock id in PostgreSQL for election. Effect when using PostgreSQL as kvbackend<br/>Only used when backend is `postgres_store`. |
|
||||
| `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
|
||||
| `use_memory_store` | Bool | `false` | Store data in memory. |
|
||||
| `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
|
||||
| `region_failure_detector_initialization_delay` | String | `10m` | The delay before starting region failure detection.<br/>This delay helps prevent Metasrv from triggering unnecessary region failovers before all Datanodes are fully started.<br/>Especially useful when the cluster is not deployed with GreptimeDB Operator and maintenance mode is not enabled. |
|
||||
| `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.<br/>**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
|
||||
| `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
|
||||
| `heartbeat_interval` | String | `3s` | Base heartbeat interval for calculating distributed time constants.<br/>The frontend heartbeat interval is 6 times of the base heartbeat interval.<br/>The flownode/datanode heartbeat interval is 1 times of the base heartbeat interval.<br/>e.g., If the base heartbeat interval is 3s, the frontend heartbeat interval is 18s, the flownode/datanode heartbeat interval is 3s.<br/>If you change this value, you need to change the heartbeat interval of the flownode/frontend/datanode accordingly. |
|
||||
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
|
||||
| `runtime` | -- | -- | The runtime options. |
|
||||
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
|
||||
@@ -361,12 +362,18 @@
|
||||
| `backend_tls.cert_path` | String | `""` | Path to client certificate file (for client authentication)<br/>Like "/path/to/client.crt" |
|
||||
| `backend_tls.key_path` | String | `""` | Path to client private key file (for client authentication)<br/>Like "/path/to/client.key" |
|
||||
| `backend_tls.ca_cert_path` | String | `""` | Path to CA certificate file (for server certificate verification)<br/>Required when using custom CAs or self-signed certificates<br/>Leave empty to use system root certificates only<br/>Like "/path/to/ca.crt" |
|
||||
| `backend_client` | -- | -- | The backend client options.<br/>Currently, only applicable when using etcd as the metadata store. |
|
||||
| `backend_client.keep_alive_timeout` | String | `3s` | The keep alive timeout for backend client. |
|
||||
| `backend_client.keep_alive_interval` | String | `10s` | The keep alive interval for backend client. |
|
||||
| `backend_client.connect_timeout` | String | `3s` | The connect timeout for backend client. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:3002` | The address to bind the gRPC server. |
|
||||
| `grpc.server_addr` | String | `127.0.0.1:3002` | The communication server address for the frontend and datanode to connect to metasrv.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `bind_addr`. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
|
||||
| `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
|
||||
| `grpc.http2_keep_alive_interval` | String | `10s` | The server side HTTP/2 keep-alive interval |
|
||||
| `grpc.http2_keep_alive_timeout` | String | `3s` | The server side HTTP/2 keep-alive timeout. |
|
||||
| `http` | -- | -- | The HTTP server options. |
|
||||
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
|
||||
| `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
|
||||
@@ -476,6 +483,8 @@
|
||||
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. |
|
||||
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.connect_timeout` | String | `3s` | The connect timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.timeout` | String | `3s` | The timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.max_batch_bytes` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
|
||||
|
||||
@@ -169,6 +169,14 @@ recovery_parallelism = 2
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
|
||||
## The connect timeout for kafka client.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
#+ connect_timeout = "3s"
|
||||
|
||||
## The timeout for kafka client.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
#+ timeout = "3s"
|
||||
|
||||
## The max size of a single producer batch.
|
||||
## Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
@@ -225,6 +233,7 @@ overwrite_entry_start_id = false
|
||||
# endpoint = "https://s3.amazonaws.com"
|
||||
# region = "us-west-2"
|
||||
# enable_virtual_host_style = false
|
||||
# disable_ec2_metadata = false
|
||||
|
||||
# Example of using Oss as the storage.
|
||||
# [storage]
|
||||
|
||||
@@ -6,9 +6,15 @@ default_timezone = "UTC"
|
||||
## @toml2docs:none-default
|
||||
default_column_prefix = "greptime"
|
||||
|
||||
## The maximum in-flight write bytes.
|
||||
## Maximum total memory for all concurrent write request bodies and messages (HTTP, gRPC, Flight).
|
||||
## Set to 0 to disable the limit. Default: "0" (unlimited)
|
||||
## @toml2docs:none-default
|
||||
#+ max_in_flight_write_bytes = "500MB"
|
||||
#+ max_in_flight_write_bytes = "1GB"
|
||||
|
||||
## Policy when write bytes quota is exhausted.
|
||||
## Options: "wait" (default, 10s timeout), "wait(<duration>)" (e.g., "wait(30s)"), "fail"
|
||||
## @toml2docs:none-default
|
||||
#+ write_bytes_exhausted_policy = "wait"
|
||||
|
||||
## The runtime options.
|
||||
#+ [runtime]
|
||||
@@ -35,10 +41,6 @@ timeout = "0s"
|
||||
## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
|
||||
## Set to 0 to disable limit.
|
||||
body_limit = "64MB"
|
||||
## Maximum total memory for all concurrent HTTP request bodies.
|
||||
## Set to 0 to disable the limit. Default: "0" (unlimited)
|
||||
## @toml2docs:none-default
|
||||
#+ max_total_body_memory = "1GB"
|
||||
## HTTP CORS support, it's turned on by default
|
||||
## This allows browser to access http APIs without CORS restrictions
|
||||
enable_cors = true
|
||||
@@ -62,10 +64,6 @@ bind_addr = "127.0.0.1:4001"
|
||||
server_addr = "127.0.0.1:4001"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 8
|
||||
## Maximum total memory for all concurrent gRPC request messages.
|
||||
## Set to 0 to disable the limit. Default: "0" (unlimited)
|
||||
## @toml2docs:none-default
|
||||
#+ max_total_message_memory = "1GB"
|
||||
## Compression mode for frontend side Arrow IPC service. Available options:
|
||||
## - `none`: disable all compression
|
||||
## - `transport`: only enable gRPC transport compression (zstd)
|
||||
@@ -131,7 +129,6 @@ key_path = ""
|
||||
## For now, gRPC tls config does not support auto reload.
|
||||
watch = false
|
||||
|
||||
|
||||
## MySQL server options.
|
||||
[mysql]
|
||||
## Whether to enable.
|
||||
|
||||
@@ -34,11 +34,18 @@ meta_table_name = "greptime_metakv"
|
||||
## Optional PostgreSQL schema for metadata table and election table name qualification.
|
||||
## When PostgreSQL public schema is not writable (e.g., PostgreSQL 15+ with restricted public),
|
||||
## set this to a writable schema. GreptimeDB will use `meta_schema_name`.`meta_table_name`.
|
||||
## GreptimeDB will NOT create the schema automatically; please ensure it exists or the user has permission.
|
||||
## **Only used when backend is `postgres_store`.**
|
||||
|
||||
meta_schema_name = "greptime_schema"
|
||||
|
||||
## Automatically create PostgreSQL schema if it doesn't exist.
|
||||
## When enabled, the system will execute `CREATE SCHEMA IF NOT EXISTS <schema_name>`
|
||||
## before creating metadata tables. This is useful in production environments where
|
||||
## manual schema creation may be restricted.
|
||||
## Default is true.
|
||||
## Note: The PostgreSQL user must have CREATE SCHEMA permission for this to work.
|
||||
## **Only used when backend is `postgres_store`.**
|
||||
auto_create_schema = true
|
||||
|
||||
## Advisory lock id in PostgreSQL for election. Effect when using PostgreSQL as kvbackend
|
||||
## Only used when backend is `postgres_store`.
|
||||
meta_election_lock_id = 1
|
||||
@@ -50,9 +57,6 @@ meta_election_lock_id = 1
|
||||
## For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector".
|
||||
selector = "round_robin"
|
||||
|
||||
## Store data in memory.
|
||||
use_memory_store = false
|
||||
|
||||
## Whether to enable region failover.
|
||||
## This feature is only available on GreptimeDB running on cluster mode and
|
||||
## - Using Remote WAL
|
||||
@@ -71,6 +75,13 @@ allow_region_failover_on_local_wal = false
|
||||
## Max allowed idle time before removing node info from metasrv memory.
|
||||
node_max_idle_time = "24hours"
|
||||
|
||||
## Base heartbeat interval for calculating distributed time constants.
|
||||
## The frontend heartbeat interval is 6 times of the base heartbeat interval.
|
||||
## The flownode/datanode heartbeat interval is 1 times of the base heartbeat interval.
|
||||
## e.g., If the base heartbeat interval is 3s, the frontend heartbeat interval is 18s, the flownode/datanode heartbeat interval is 3s.
|
||||
## If you change this value, you need to change the heartbeat interval of the flownode/frontend/datanode accordingly.
|
||||
#+ heartbeat_interval = "3s"
|
||||
|
||||
## Whether to enable greptimedb telemetry. Enabled by default.
|
||||
#+ enable_telemetry = true
|
||||
|
||||
@@ -109,6 +120,16 @@ key_path = ""
|
||||
## Like "/path/to/ca.crt"
|
||||
ca_cert_path = ""
|
||||
|
||||
## The backend client options.
|
||||
## Currently, only applicable when using etcd as the metadata store.
|
||||
#+ [backend_client]
|
||||
## The keep alive timeout for backend client.
|
||||
#+ keep_alive_timeout = "3s"
|
||||
## The keep alive interval for backend client.
|
||||
#+ keep_alive_interval = "10s"
|
||||
## The connect timeout for backend client.
|
||||
#+ connect_timeout = "3s"
|
||||
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
## The address to bind the gRPC server.
|
||||
@@ -123,6 +144,10 @@ runtime_size = 8
|
||||
max_recv_message_size = "512MB"
|
||||
## The maximum send message size for gRPC server.
|
||||
max_send_message_size = "512MB"
|
||||
## The server side HTTP/2 keep-alive interval
|
||||
#+ http2_keep_alive_interval = "10s"
|
||||
## The server side HTTP/2 keep-alive timeout.
|
||||
#+ http2_keep_alive_timeout = "3s"
|
||||
|
||||
## The HTTP server options.
|
||||
[http]
|
||||
|
||||
@@ -6,6 +6,16 @@ default_timezone = "UTC"
|
||||
## @toml2docs:none-default
|
||||
default_column_prefix = "greptime"
|
||||
|
||||
## Maximum total memory for all concurrent write request bodies and messages (HTTP, gRPC, Flight).
|
||||
## Set to 0 to disable the limit. Default: "0" (unlimited)
|
||||
## @toml2docs:none-default
|
||||
#+ max_in_flight_write_bytes = "1GB"
|
||||
|
||||
## Policy when write bytes quota is exhausted.
|
||||
## Options: "wait" (default, 10s timeout), "wait(<duration>)" (e.g., "wait(30s)"), "fail"
|
||||
## @toml2docs:none-default
|
||||
#+ write_bytes_exhausted_policy = "wait"
|
||||
|
||||
## Initialize all regions in the background during the startup.
|
||||
## By default, it provides services after all regions have been initialized.
|
||||
init_regions_in_background = false
|
||||
@@ -22,10 +32,6 @@ max_concurrent_queries = 0
|
||||
## Enable telemetry to collect anonymous usage data. Enabled by default.
|
||||
#+ enable_telemetry = true
|
||||
|
||||
## The maximum in-flight write bytes.
|
||||
## @toml2docs:none-default
|
||||
#+ max_in_flight_write_bytes = "500MB"
|
||||
|
||||
## The runtime options.
|
||||
#+ [runtime]
|
||||
## The number of threads to execute the runtime for global read operations.
|
||||
@@ -43,10 +49,6 @@ timeout = "0s"
|
||||
## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
|
||||
## Set to 0 to disable limit.
|
||||
body_limit = "64MB"
|
||||
## Maximum total memory for all concurrent HTTP request bodies.
|
||||
## Set to 0 to disable the limit. Default: "0" (unlimited)
|
||||
## @toml2docs:none-default
|
||||
#+ max_total_body_memory = "1GB"
|
||||
## HTTP CORS support, it's turned on by default
|
||||
## This allows browser to access http APIs without CORS restrictions
|
||||
enable_cors = true
|
||||
@@ -67,10 +69,6 @@ prom_validation_mode = "strict"
|
||||
bind_addr = "127.0.0.1:4001"
|
||||
## The number of server worker threads.
|
||||
runtime_size = 8
|
||||
## Maximum total memory for all concurrent gRPC request messages.
|
||||
## Set to 0 to disable the limit. Default: "0" (unlimited)
|
||||
## @toml2docs:none-default
|
||||
#+ max_total_message_memory = "1GB"
|
||||
## The maximum connection age for gRPC connection.
|
||||
## The value can be a human-readable time string. For example: `10m` for ten minutes or `1h` for one hour.
|
||||
## Refer to https://grpc.io/docs/guides/keepalive/ for more details.
|
||||
@@ -230,6 +228,14 @@ recovery_parallelism = 2
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
|
||||
## The connect timeout for kafka client.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
#+ connect_timeout = "3s"
|
||||
|
||||
## The timeout for kafka client.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
#+ timeout = "3s"
|
||||
|
||||
## Automatically create topics for WAL.
|
||||
## Set to `true` to automatically create topics for WAL.
|
||||
## Otherwise, use topics named `topic_name_prefix_[0..num_topics)`
|
||||
@@ -332,6 +338,7 @@ max_running_procedures = 128
|
||||
# endpoint = "https://s3.amazonaws.com"
|
||||
# region = "us-west-2"
|
||||
# enable_virtual_host_style = false
|
||||
# disable_ec2_metadata = false
|
||||
|
||||
# Example of using Oss as the storage.
|
||||
# [storage]
|
||||
|
||||
@@ -67,6 +67,7 @@ tracing-appender.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
common-meta = { workspace = true, features = ["testing"] }
|
||||
common-test-util.workspace = true
|
||||
common-version.workspace = true
|
||||
serde.workspace = true
|
||||
tempfile.workspace = true
|
||||
|
||||
@@ -15,5 +15,8 @@
|
||||
mod object_store;
|
||||
mod store;
|
||||
|
||||
pub use object_store::{ObjectStoreConfig, new_fs_object_store};
|
||||
pub use object_store::{
|
||||
ObjectStoreConfig, PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection,
|
||||
PrefixedS3Connection, new_fs_object_store,
|
||||
};
|
||||
pub use store::StoreConfig;
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_base::secrets::SecretString;
|
||||
use common_base::secrets::{ExposeSecret, SecretString};
|
||||
use common_error::ext::BoxedError;
|
||||
use object_store::services::{Azblob, Fs, Gcs, Oss, S3};
|
||||
use object_store::util::{with_instrument_layers, with_retry_layers};
|
||||
@@ -22,9 +22,69 @@ use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self};
|
||||
|
||||
/// Trait to convert CLI field types to target struct field types.
|
||||
/// This enables `Option<SecretString>` (CLI) -> `SecretString` (target) conversions,
|
||||
/// allowing us to distinguish "not provided" from "provided but empty".
|
||||
trait IntoField<T> {
|
||||
fn into_field(self) -> T;
|
||||
}
|
||||
|
||||
/// Identity conversion for types that are the same.
|
||||
impl<T> IntoField<T> for T {
|
||||
fn into_field(self) -> T {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert `Option<SecretString>` to `SecretString`, using default for None.
|
||||
impl IntoField<SecretString> for Option<SecretString> {
|
||||
fn into_field(self) -> SecretString {
|
||||
self.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait for checking if a field is effectively empty.
|
||||
///
|
||||
/// **`is_empty()`**: Checks if the field has no meaningful value
|
||||
/// - Used when backend is enabled to validate required fields
|
||||
/// - `None`, `Some("")`, `false`, or `""` are considered empty
|
||||
trait FieldValidator {
|
||||
/// Check if the field is empty (has no meaningful value).
|
||||
fn is_empty(&self) -> bool;
|
||||
}
|
||||
|
||||
/// String fields: empty if the string is empty
|
||||
impl FieldValidator for String {
|
||||
fn is_empty(&self) -> bool {
|
||||
self.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// Bool fields: false is considered "empty", true is "provided"
|
||||
impl FieldValidator for bool {
|
||||
fn is_empty(&self) -> bool {
|
||||
!self
|
||||
}
|
||||
}
|
||||
|
||||
/// Option<String> fields: None or empty content is empty
|
||||
impl FieldValidator for Option<String> {
|
||||
fn is_empty(&self) -> bool {
|
||||
self.as_ref().is_none_or(|s| s.is_empty())
|
||||
}
|
||||
}
|
||||
|
||||
/// Option<SecretString> fields: None or empty secret is empty
|
||||
/// For secrets, Some("") is treated as "not provided" for both checks
|
||||
impl FieldValidator for Option<SecretString> {
|
||||
fn is_empty(&self) -> bool {
|
||||
self.as_ref().is_none_or(|s| s.expose_secret().is_empty())
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! wrap_with_clap_prefix {
|
||||
(
|
||||
$new_name:ident, $prefix:literal, $base:ty, {
|
||||
$new_name:ident, $prefix:literal, $enable_flag:literal, $base:ty, {
|
||||
$( $( #[doc = $doc:expr] )? $( #[alias = $alias:literal] )? $field:ident : $type:ty $( = $default:expr )? ),* $(,)?
|
||||
}
|
||||
) => {
|
||||
@@ -34,15 +94,16 @@ macro_rules! wrap_with_clap_prefix {
|
||||
$(
|
||||
$( #[doc = $doc] )?
|
||||
$( #[clap(alias = $alias)] )?
|
||||
#[clap(long $(, default_value_t = $default )? )]
|
||||
[<$prefix $field>]: $type,
|
||||
#[clap(long, requires = $enable_flag $(, default_value_t = $default )? )]
|
||||
pub [<$prefix $field>]: $type,
|
||||
)*
|
||||
}
|
||||
|
||||
impl From<$new_name> for $base {
|
||||
fn from(w: $new_name) -> Self {
|
||||
Self {
|
||||
$( $field: w.[<$prefix $field>] ),*
|
||||
// Use into_field() to handle Option<SecretString> -> SecretString conversion
|
||||
$( $field: w.[<$prefix $field>].into_field() ),*
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -50,9 +111,90 @@ macro_rules! wrap_with_clap_prefix {
|
||||
};
|
||||
}
|
||||
|
||||
/// Macro for declarative backend validation.
|
||||
///
|
||||
/// # Validation Rules
|
||||
///
|
||||
/// For each storage backend (S3, OSS, GCS, Azblob), this function validates:
|
||||
/// **When backend is enabled** (e.g., `--s3`): All required fields must be non-empty
|
||||
///
|
||||
/// Note: When backend is disabled, clap's `requires` attribute ensures no configuration
|
||||
/// fields can be provided at parse time.
|
||||
///
|
||||
/// # Syntax
|
||||
///
|
||||
/// ```ignore
|
||||
/// validate_backend!(
|
||||
/// enable: self.enable_s3,
|
||||
/// name: "S3",
|
||||
/// required: [(field1, "name1"), (field2, "name2"), ...],
|
||||
/// custom_validator: |missing| { ... } // optional
|
||||
/// )
|
||||
/// ```
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// - `enable`: Boolean expression indicating if backend is enabled
|
||||
/// - `name`: Human-readable backend name for error messages
|
||||
/// - `required`: Array of (field_ref, field_name) tuples for required fields
|
||||
/// - `custom_validator`: Optional closure for complex validation logic
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```ignore
|
||||
/// validate_backend!(
|
||||
/// enable: self.enable_s3,
|
||||
/// name: "S3",
|
||||
/// required: [
|
||||
/// (&self.s3.s3_bucket, "bucket"),
|
||||
/// (&self.s3.s3_access_key_id, "access key ID"),
|
||||
/// ]
|
||||
/// )
|
||||
/// ```
|
||||
macro_rules! validate_backend {
|
||||
(
|
||||
enable: $enable:expr,
|
||||
name: $backend_name:expr,
|
||||
required: [ $( ($field:expr, $field_name:expr) ),* $(,)? ]
|
||||
$(, custom_validator: $custom_validator:expr)?
|
||||
) => {{
|
||||
if $enable {
|
||||
// Check required fields when backend is enabled
|
||||
let mut missing = Vec::new();
|
||||
$(
|
||||
if FieldValidator::is_empty($field) {
|
||||
missing.push($field_name);
|
||||
}
|
||||
)*
|
||||
|
||||
// Run custom validation if provided
|
||||
$(
|
||||
$custom_validator(&mut missing);
|
||||
)?
|
||||
|
||||
if !missing.is_empty() {
|
||||
return Err(BoxedError::new(
|
||||
error::MissingConfigSnafu {
|
||||
msg: format!(
|
||||
"{} {} must be set when --{} is enabled.",
|
||||
$backend_name,
|
||||
missing.join(", "),
|
||||
$backend_name.to_lowercase()
|
||||
),
|
||||
}
|
||||
.build(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}};
|
||||
}
|
||||
|
||||
wrap_with_clap_prefix! {
|
||||
PrefixedAzblobConnection,
|
||||
"azblob-",
|
||||
"enable_azblob",
|
||||
AzblobConnection,
|
||||
{
|
||||
#[doc = "The container of the object store."]
|
||||
@@ -60,9 +202,9 @@ wrap_with_clap_prefix! {
|
||||
#[doc = "The root of the object store."]
|
||||
root: String = Default::default(),
|
||||
#[doc = "The account name of the object store."]
|
||||
account_name: SecretString = Default::default(),
|
||||
account_name: Option<SecretString>,
|
||||
#[doc = "The account key of the object store."]
|
||||
account_key: SecretString = Default::default(),
|
||||
account_key: Option<SecretString>,
|
||||
#[doc = "The endpoint of the object store."]
|
||||
endpoint: String = Default::default(),
|
||||
#[doc = "The SAS token of the object store."]
|
||||
@@ -70,9 +212,33 @@ wrap_with_clap_prefix! {
|
||||
}
|
||||
}
|
||||
|
||||
impl PrefixedAzblobConnection {
|
||||
pub fn validate(&self) -> Result<(), BoxedError> {
|
||||
validate_backend!(
|
||||
enable: true,
|
||||
name: "AzBlob",
|
||||
required: [
|
||||
(&self.azblob_container, "container"),
|
||||
(&self.azblob_root, "root"),
|
||||
(&self.azblob_account_name, "account name"),
|
||||
(&self.azblob_endpoint, "endpoint"),
|
||||
],
|
||||
custom_validator: |missing: &mut Vec<&str>| {
|
||||
// account_key is only required if sas_token is not provided
|
||||
if self.azblob_sas_token.is_none()
|
||||
&& self.azblob_account_key.is_empty()
|
||||
{
|
||||
missing.push("account key (when sas_token is not provided)");
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
wrap_with_clap_prefix! {
|
||||
PrefixedS3Connection,
|
||||
"s3-",
|
||||
"enable_s3",
|
||||
S3Connection,
|
||||
{
|
||||
#[doc = "The bucket of the object store."]
|
||||
@@ -80,21 +246,39 @@ wrap_with_clap_prefix! {
|
||||
#[doc = "The root of the object store."]
|
||||
root: String = Default::default(),
|
||||
#[doc = "The access key ID of the object store."]
|
||||
access_key_id: SecretString = Default::default(),
|
||||
access_key_id: Option<SecretString>,
|
||||
#[doc = "The secret access key of the object store."]
|
||||
secret_access_key: SecretString = Default::default(),
|
||||
secret_access_key: Option<SecretString>,
|
||||
#[doc = "The endpoint of the object store."]
|
||||
endpoint: Option<String>,
|
||||
#[doc = "The region of the object store."]
|
||||
region: Option<String>,
|
||||
#[doc = "Enable virtual host style for the object store."]
|
||||
enable_virtual_host_style: bool = Default::default(),
|
||||
#[doc = "Disable EC2 metadata service for the object store."]
|
||||
disable_ec2_metadata: bool = Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
impl PrefixedS3Connection {
|
||||
pub fn validate(&self) -> Result<(), BoxedError> {
|
||||
validate_backend!(
|
||||
enable: true,
|
||||
name: "S3",
|
||||
required: [
|
||||
(&self.s3_bucket, "bucket"),
|
||||
(&self.s3_access_key_id, "access key ID"),
|
||||
(&self.s3_secret_access_key, "secret access key"),
|
||||
(&self.s3_region, "region"),
|
||||
]
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
wrap_with_clap_prefix! {
|
||||
PrefixedOssConnection,
|
||||
"oss-",
|
||||
"enable_oss",
|
||||
OssConnection,
|
||||
{
|
||||
#[doc = "The bucket of the object store."]
|
||||
@@ -102,17 +286,33 @@ wrap_with_clap_prefix! {
|
||||
#[doc = "The root of the object store."]
|
||||
root: String = Default::default(),
|
||||
#[doc = "The access key ID of the object store."]
|
||||
access_key_id: SecretString = Default::default(),
|
||||
access_key_id: Option<SecretString>,
|
||||
#[doc = "The access key secret of the object store."]
|
||||
access_key_secret: SecretString = Default::default(),
|
||||
access_key_secret: Option<SecretString>,
|
||||
#[doc = "The endpoint of the object store."]
|
||||
endpoint: String = Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
impl PrefixedOssConnection {
|
||||
pub fn validate(&self) -> Result<(), BoxedError> {
|
||||
validate_backend!(
|
||||
enable: true,
|
||||
name: "OSS",
|
||||
required: [
|
||||
(&self.oss_bucket, "bucket"),
|
||||
(&self.oss_access_key_id, "access key ID"),
|
||||
(&self.oss_access_key_secret, "access key secret"),
|
||||
(&self.oss_endpoint, "endpoint"),
|
||||
]
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
wrap_with_clap_prefix! {
|
||||
PrefixedGcsConnection,
|
||||
"gcs-",
|
||||
"enable_gcs",
|
||||
GcsConnection,
|
||||
{
|
||||
#[doc = "The root of the object store."]
|
||||
@@ -122,40 +322,72 @@ wrap_with_clap_prefix! {
|
||||
#[doc = "The scope of the object store."]
|
||||
scope: String = Default::default(),
|
||||
#[doc = "The credential path of the object store."]
|
||||
credential_path: SecretString = Default::default(),
|
||||
credential_path: Option<SecretString>,
|
||||
#[doc = "The credential of the object store."]
|
||||
credential: SecretString = Default::default(),
|
||||
credential: Option<SecretString>,
|
||||
#[doc = "The endpoint of the object store."]
|
||||
endpoint: String = Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// common config for object store.
|
||||
impl PrefixedGcsConnection {
|
||||
pub fn validate(&self) -> Result<(), BoxedError> {
|
||||
validate_backend!(
|
||||
enable: true,
|
||||
name: "GCS",
|
||||
required: [
|
||||
(&self.gcs_bucket, "bucket"),
|
||||
(&self.gcs_root, "root"),
|
||||
(&self.gcs_scope, "scope"),
|
||||
]
|
||||
// No custom_validator needed: GCS supports Application Default Credentials (ADC)
|
||||
// where neither credential_path nor credential is required.
|
||||
// Endpoint is also optional (defaults to https://storage.googleapis.com).
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Common config for object store.
|
||||
///
|
||||
/// # Dependency Enforcement
|
||||
///
|
||||
/// Each backend's configuration fields (e.g., `--s3-bucket`) requires its corresponding
|
||||
/// enable flag (e.g., `--s3`) to be present. This is enforced by `clap` at parse time
|
||||
/// using the `requires` attribute.
|
||||
///
|
||||
/// For example, attempting to use `--s3-bucket my-bucket` without `--s3` will result in:
|
||||
/// ```text
|
||||
/// error: The argument '--s3-bucket <BUCKET>' requires '--s3'
|
||||
/// ```
|
||||
///
|
||||
/// This ensures that users cannot accidentally provide backend-specific configuration
|
||||
/// without explicitly enabling that backend.
|
||||
#[derive(clap::Parser, Debug, Clone, PartialEq, Default)]
|
||||
#[clap(group(clap::ArgGroup::new("storage_backend").required(false).multiple(false)))]
|
||||
pub struct ObjectStoreConfig {
|
||||
/// Whether to use S3 object store.
|
||||
#[clap(long, alias = "s3")]
|
||||
#[clap(long = "s3", group = "storage_backend")]
|
||||
pub enable_s3: bool,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub s3: PrefixedS3Connection,
|
||||
|
||||
/// Whether to use OSS.
|
||||
#[clap(long, alias = "oss")]
|
||||
#[clap(long = "oss", group = "storage_backend")]
|
||||
pub enable_oss: bool,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub oss: PrefixedOssConnection,
|
||||
|
||||
/// Whether to use GCS.
|
||||
#[clap(long, alias = "gcs")]
|
||||
#[clap(long = "gcs", group = "storage_backend")]
|
||||
pub enable_gcs: bool,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub gcs: PrefixedGcsConnection,
|
||||
|
||||
/// Whether to use Azure Blob.
|
||||
#[clap(long, alias = "azblob")]
|
||||
#[clap(long = "azblob", group = "storage_backend")]
|
||||
pub enable_azblob: bool,
|
||||
|
||||
#[clap(flatten)]
|
||||
@@ -173,52 +405,66 @@ pub fn new_fs_object_store(root: &str) -> std::result::Result<ObjectStore, Boxed
|
||||
Ok(with_instrument_layers(object_store, false))
|
||||
}
|
||||
|
||||
macro_rules! gen_object_store_builder {
|
||||
($method:ident, $field:ident, $conn_type:ty, $service_type:ty) => {
|
||||
pub fn $method(&self) -> Result<ObjectStore, BoxedError> {
|
||||
let config = <$conn_type>::from(self.$field.clone());
|
||||
common_telemetry::info!(
|
||||
"Building object store with {}: {:?}",
|
||||
stringify!($field),
|
||||
config
|
||||
);
|
||||
let object_store = ObjectStore::new(<$service_type>::from(&config))
|
||||
.context(error::InitBackendSnafu)
|
||||
.map_err(BoxedError::new)?
|
||||
.finish();
|
||||
Ok(with_instrument_layers(
|
||||
with_retry_layers(object_store),
|
||||
false,
|
||||
))
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl ObjectStoreConfig {
|
||||
gen_object_store_builder!(build_s3, s3, S3Connection, S3);
|
||||
|
||||
gen_object_store_builder!(build_oss, oss, OssConnection, Oss);
|
||||
|
||||
gen_object_store_builder!(build_gcs, gcs, GcsConnection, Gcs);
|
||||
|
||||
gen_object_store_builder!(build_azblob, azblob, AzblobConnection, Azblob);
|
||||
|
||||
pub fn validate(&self) -> Result<(), BoxedError> {
|
||||
if self.enable_s3 {
|
||||
self.s3.validate()?;
|
||||
}
|
||||
if self.enable_oss {
|
||||
self.oss.validate()?;
|
||||
}
|
||||
if self.enable_gcs {
|
||||
self.gcs.validate()?;
|
||||
}
|
||||
if self.enable_azblob {
|
||||
self.azblob.validate()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Builds the object store from the config.
|
||||
pub fn build(&self) -> Result<Option<ObjectStore>, BoxedError> {
|
||||
let object_store = if self.enable_s3 {
|
||||
let s3 = S3Connection::from(self.s3.clone());
|
||||
common_telemetry::info!("Building object store with s3: {:?}", s3);
|
||||
Some(
|
||||
ObjectStore::new(S3::from(&s3))
|
||||
.context(error::InitBackendSnafu)
|
||||
.map_err(BoxedError::new)?
|
||||
.finish(),
|
||||
)
|
||||
self.validate()?;
|
||||
|
||||
if self.enable_s3 {
|
||||
self.build_s3().map(Some)
|
||||
} else if self.enable_oss {
|
||||
let oss = OssConnection::from(self.oss.clone());
|
||||
common_telemetry::info!("Building object store with oss: {:?}", oss);
|
||||
Some(
|
||||
ObjectStore::new(Oss::from(&oss))
|
||||
.context(error::InitBackendSnafu)
|
||||
.map_err(BoxedError::new)?
|
||||
.finish(),
|
||||
)
|
||||
self.build_oss().map(Some)
|
||||
} else if self.enable_gcs {
|
||||
let gcs = GcsConnection::from(self.gcs.clone());
|
||||
common_telemetry::info!("Building object store with gcs: {:?}", gcs);
|
||||
Some(
|
||||
ObjectStore::new(Gcs::from(&gcs))
|
||||
.context(error::InitBackendSnafu)
|
||||
.map_err(BoxedError::new)?
|
||||
.finish(),
|
||||
)
|
||||
self.build_gcs().map(Some)
|
||||
} else if self.enable_azblob {
|
||||
let azblob = AzblobConnection::from(self.azblob.clone());
|
||||
common_telemetry::info!("Building object store with azblob: {:?}", azblob);
|
||||
Some(
|
||||
ObjectStore::new(Azblob::from(&azblob))
|
||||
.context(error::InitBackendSnafu)
|
||||
.map_err(BoxedError::new)?
|
||||
.finish(),
|
||||
)
|
||||
self.build_azblob().map(Some)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let object_store = object_store
|
||||
.map(|object_store| with_instrument_layers(with_retry_layers(object_store), false));
|
||||
|
||||
Ok(object_store)
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ use common_error::ext::BoxedError;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::kv_backend::chroot::ChrootKvBackend;
|
||||
use common_meta::kv_backend::etcd::EtcdStore;
|
||||
use meta_srv::metasrv::BackendImpl;
|
||||
use meta_srv::metasrv::{BackendClientOptions, BackendImpl};
|
||||
use meta_srv::utils::etcd::create_etcd_client_with_tls;
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
|
||||
@@ -61,6 +61,12 @@ pub struct StoreConfig {
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[clap(long)]
|
||||
pub meta_schema_name: Option<String>,
|
||||
|
||||
/// Automatically create PostgreSQL schema if it doesn't exist (default: true).
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[clap(long, default_value_t = true)]
|
||||
pub auto_create_schema: bool,
|
||||
|
||||
/// TLS mode for backend store connections (etcd, PostgreSQL, MySQL)
|
||||
#[clap(long = "backend-tls-mode", value_enum, default_value = "disable")]
|
||||
pub backend_tls_mode: TlsMode,
|
||||
@@ -112,9 +118,13 @@ impl StoreConfig {
|
||||
let kvbackend = match self.backend {
|
||||
BackendImpl::EtcdStore => {
|
||||
let tls_config = self.tls_config();
|
||||
let etcd_client = create_etcd_client_with_tls(store_addrs, tls_config.as_ref())
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
let etcd_client = create_etcd_client_with_tls(
|
||||
store_addrs,
|
||||
&BackendClientOptions::default(),
|
||||
tls_config.as_ref(),
|
||||
)
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
Ok(EtcdStore::with_etcd_client(etcd_client, max_txn_ops))
|
||||
}
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
@@ -134,6 +144,7 @@ impl StoreConfig {
|
||||
schema_name,
|
||||
table_name,
|
||||
max_txn_ops,
|
||||
self.auto_create_schema,
|
||||
)
|
||||
.await
|
||||
.map_err(BoxedError::new)?)
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
mod export;
|
||||
mod import;
|
||||
mod storage_export;
|
||||
|
||||
use clap::Subcommand;
|
||||
use client::DEFAULT_CATALOG_NAME;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
373
src/cli/src/data/storage_export.rs
Normal file
373
src/cli/src/data/storage_export.rs
Normal file
@@ -0,0 +1,373 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use common_base::secrets::{ExposeSecret, SecretString};
|
||||
use common_error::ext::BoxedError;
|
||||
|
||||
use crate::common::{
|
||||
PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection, PrefixedS3Connection,
|
||||
};
|
||||
|
||||
/// Helper function to extract secret string from Option<SecretString>.
|
||||
/// Returns empty string if None.
|
||||
fn expose_optional_secret(secret: &Option<SecretString>) -> &str {
|
||||
secret
|
||||
.as_ref()
|
||||
.map(|s| s.expose_secret().as_str())
|
||||
.unwrap_or("")
|
||||
}
|
||||
|
||||
/// Helper function to format root path with leading slash if non-empty.
|
||||
fn format_root_path(root: &str) -> String {
|
||||
if root.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!("/{}", root)
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to mask multiple secrets in a string.
|
||||
fn mask_secrets(mut sql: String, secrets: &[&str]) -> String {
|
||||
for secret in secrets {
|
||||
if !secret.is_empty() {
|
||||
sql = sql.replace(secret, "[REDACTED]");
|
||||
}
|
||||
}
|
||||
sql
|
||||
}
|
||||
|
||||
/// Helper function to format storage URI.
|
||||
fn format_uri(scheme: &str, bucket: &str, root: &str, path: &str) -> String {
|
||||
let root = format_root_path(root);
|
||||
format!("{}://{}{}/{}", scheme, bucket, root, path)
|
||||
}
|
||||
|
||||
/// Trait for storage backends that can be used for data export.
|
||||
pub trait StorageExport: Send + Sync {
|
||||
/// Generate the storage path for COPY DATABASE command.
|
||||
/// Returns (path, connection_string) where connection_string includes CONNECTION clause.
|
||||
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String);
|
||||
|
||||
/// Format the output path for logging purposes.
|
||||
fn format_output_path(&self, file_path: &str) -> String;
|
||||
|
||||
/// Mask sensitive information in SQL commands for safe logging.
|
||||
fn mask_sensitive_info(&self, sql: &str) -> String;
|
||||
}
|
||||
|
||||
macro_rules! define_backend {
|
||||
($name:ident, $config:ty) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $name {
|
||||
config: $config,
|
||||
}
|
||||
|
||||
impl $name {
|
||||
pub fn new(config: $config) -> Result<Self, BoxedError> {
|
||||
config.validate()?;
|
||||
Ok(Self { config })
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Local file system storage backend.
|
||||
#[derive(Clone)]
|
||||
pub struct FsBackend {
|
||||
output_dir: String,
|
||||
}
|
||||
|
||||
impl FsBackend {
|
||||
pub fn new(output_dir: String) -> Self {
|
||||
Self { output_dir }
|
||||
}
|
||||
}
|
||||
|
||||
impl StorageExport for FsBackend {
|
||||
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
|
||||
if self.output_dir.is_empty() {
|
||||
unreachable!("output_dir must be set when not using remote storage")
|
||||
}
|
||||
let path = PathBuf::from(&self.output_dir)
|
||||
.join(catalog)
|
||||
.join(format!("{schema}/"))
|
||||
.to_string_lossy()
|
||||
.to_string();
|
||||
(path, String::new())
|
||||
}
|
||||
|
||||
fn format_output_path(&self, file_path: &str) -> String {
|
||||
format!("{}/{}", self.output_dir, file_path)
|
||||
}
|
||||
|
||||
fn mask_sensitive_info(&self, sql: &str) -> String {
|
||||
sql.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
define_backend!(S3Backend, PrefixedS3Connection);
|
||||
|
||||
impl StorageExport for S3Backend {
|
||||
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
|
||||
let s3_path = format_uri(
|
||||
"s3",
|
||||
&self.config.s3_bucket,
|
||||
&self.config.s3_root,
|
||||
&format!("{}/{}/", catalog, schema),
|
||||
);
|
||||
|
||||
let mut connection_options = vec![
|
||||
format!(
|
||||
"ACCESS_KEY_ID='{}'",
|
||||
expose_optional_secret(&self.config.s3_access_key_id)
|
||||
),
|
||||
format!(
|
||||
"SECRET_ACCESS_KEY='{}'",
|
||||
expose_optional_secret(&self.config.s3_secret_access_key)
|
||||
),
|
||||
];
|
||||
|
||||
if let Some(region) = &self.config.s3_region {
|
||||
connection_options.push(format!("REGION='{}'", region));
|
||||
}
|
||||
|
||||
if let Some(endpoint) = &self.config.s3_endpoint {
|
||||
connection_options.push(format!("ENDPOINT='{}'", endpoint));
|
||||
}
|
||||
|
||||
let connection_str = format!(" CONNECTION ({})", connection_options.join(", "));
|
||||
(s3_path, connection_str)
|
||||
}
|
||||
|
||||
fn format_output_path(&self, file_path: &str) -> String {
|
||||
format_uri(
|
||||
"s3",
|
||||
&self.config.s3_bucket,
|
||||
&self.config.s3_root,
|
||||
file_path,
|
||||
)
|
||||
}
|
||||
|
||||
fn mask_sensitive_info(&self, sql: &str) -> String {
|
||||
mask_secrets(
|
||||
sql.to_string(),
|
||||
&[
|
||||
expose_optional_secret(&self.config.s3_access_key_id),
|
||||
expose_optional_secret(&self.config.s3_secret_access_key),
|
||||
],
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
define_backend!(OssBackend, PrefixedOssConnection);
|
||||
|
||||
impl StorageExport for OssBackend {
|
||||
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
|
||||
let oss_path = format_uri(
|
||||
"oss",
|
||||
&self.config.oss_bucket,
|
||||
&self.config.oss_root,
|
||||
&format!("{}/{}/", catalog, schema),
|
||||
);
|
||||
|
||||
let connection_options = [
|
||||
format!(
|
||||
"ACCESS_KEY_ID='{}'",
|
||||
expose_optional_secret(&self.config.oss_access_key_id)
|
||||
),
|
||||
format!(
|
||||
"ACCESS_KEY_SECRET='{}'",
|
||||
expose_optional_secret(&self.config.oss_access_key_secret)
|
||||
),
|
||||
];
|
||||
|
||||
let connection_str = format!(" CONNECTION ({})", connection_options.join(", "));
|
||||
(oss_path, connection_str)
|
||||
}
|
||||
|
||||
fn format_output_path(&self, file_path: &str) -> String {
|
||||
format_uri(
|
||||
"oss",
|
||||
&self.config.oss_bucket,
|
||||
&self.config.oss_root,
|
||||
file_path,
|
||||
)
|
||||
}
|
||||
|
||||
fn mask_sensitive_info(&self, sql: &str) -> String {
|
||||
mask_secrets(
|
||||
sql.to_string(),
|
||||
&[
|
||||
expose_optional_secret(&self.config.oss_access_key_id),
|
||||
expose_optional_secret(&self.config.oss_access_key_secret),
|
||||
],
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
define_backend!(GcsBackend, PrefixedGcsConnection);
|
||||
|
||||
impl StorageExport for GcsBackend {
|
||||
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
|
||||
let gcs_path = format_uri(
|
||||
"gcs",
|
||||
&self.config.gcs_bucket,
|
||||
&self.config.gcs_root,
|
||||
&format!("{}/{}/", catalog, schema),
|
||||
);
|
||||
|
||||
let mut connection_options = Vec::new();
|
||||
|
||||
let credential_path = expose_optional_secret(&self.config.gcs_credential_path);
|
||||
if !credential_path.is_empty() {
|
||||
connection_options.push(format!("CREDENTIAL_PATH='{}'", credential_path));
|
||||
}
|
||||
|
||||
let credential = expose_optional_secret(&self.config.gcs_credential);
|
||||
if !credential.is_empty() {
|
||||
connection_options.push(format!("CREDENTIAL='{}'", credential));
|
||||
}
|
||||
|
||||
if !self.config.gcs_endpoint.is_empty() {
|
||||
connection_options.push(format!("ENDPOINT='{}'", self.config.gcs_endpoint));
|
||||
}
|
||||
|
||||
let connection_str = if connection_options.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!(" CONNECTION ({})", connection_options.join(", "))
|
||||
};
|
||||
|
||||
(gcs_path, connection_str)
|
||||
}
|
||||
|
||||
fn format_output_path(&self, file_path: &str) -> String {
|
||||
format_uri(
|
||||
"gcs",
|
||||
&self.config.gcs_bucket,
|
||||
&self.config.gcs_root,
|
||||
file_path,
|
||||
)
|
||||
}
|
||||
|
||||
fn mask_sensitive_info(&self, sql: &str) -> String {
|
||||
mask_secrets(
|
||||
sql.to_string(),
|
||||
&[
|
||||
expose_optional_secret(&self.config.gcs_credential_path),
|
||||
expose_optional_secret(&self.config.gcs_credential),
|
||||
],
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
define_backend!(AzblobBackend, PrefixedAzblobConnection);
|
||||
|
||||
impl StorageExport for AzblobBackend {
|
||||
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
|
||||
let azblob_path = format_uri(
|
||||
"azblob",
|
||||
&self.config.azblob_container,
|
||||
&self.config.azblob_root,
|
||||
&format!("{}/{}/", catalog, schema),
|
||||
);
|
||||
|
||||
let mut connection_options = vec![
|
||||
format!(
|
||||
"ACCOUNT_NAME='{}'",
|
||||
expose_optional_secret(&self.config.azblob_account_name)
|
||||
),
|
||||
format!(
|
||||
"ACCOUNT_KEY='{}'",
|
||||
expose_optional_secret(&self.config.azblob_account_key)
|
||||
),
|
||||
];
|
||||
|
||||
if let Some(sas_token) = &self.config.azblob_sas_token {
|
||||
connection_options.push(format!("SAS_TOKEN='{}'", sas_token));
|
||||
}
|
||||
|
||||
let connection_str = format!(" CONNECTION ({})", connection_options.join(", "));
|
||||
(azblob_path, connection_str)
|
||||
}
|
||||
|
||||
fn format_output_path(&self, file_path: &str) -> String {
|
||||
format_uri(
|
||||
"azblob",
|
||||
&self.config.azblob_container,
|
||||
&self.config.azblob_root,
|
||||
file_path,
|
||||
)
|
||||
}
|
||||
|
||||
fn mask_sensitive_info(&self, sql: &str) -> String {
|
||||
mask_secrets(
|
||||
sql.to_string(),
|
||||
&[
|
||||
expose_optional_secret(&self.config.azblob_account_name),
|
||||
expose_optional_secret(&self.config.azblob_account_key),
|
||||
],
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum StorageType {
|
||||
Fs(FsBackend),
|
||||
S3(S3Backend),
|
||||
Oss(OssBackend),
|
||||
Gcs(GcsBackend),
|
||||
Azblob(AzblobBackend),
|
||||
}
|
||||
|
||||
impl StorageExport for StorageType {
|
||||
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
|
||||
match self {
|
||||
StorageType::Fs(backend) => backend.get_storage_path(catalog, schema),
|
||||
StorageType::S3(backend) => backend.get_storage_path(catalog, schema),
|
||||
StorageType::Oss(backend) => backend.get_storage_path(catalog, schema),
|
||||
StorageType::Gcs(backend) => backend.get_storage_path(catalog, schema),
|
||||
StorageType::Azblob(backend) => backend.get_storage_path(catalog, schema),
|
||||
}
|
||||
}
|
||||
|
||||
fn format_output_path(&self, file_path: &str) -> String {
|
||||
match self {
|
||||
StorageType::Fs(backend) => backend.format_output_path(file_path),
|
||||
StorageType::S3(backend) => backend.format_output_path(file_path),
|
||||
StorageType::Oss(backend) => backend.format_output_path(file_path),
|
||||
StorageType::Gcs(backend) => backend.format_output_path(file_path),
|
||||
StorageType::Azblob(backend) => backend.format_output_path(file_path),
|
||||
}
|
||||
}
|
||||
|
||||
fn mask_sensitive_info(&self, sql: &str) -> String {
|
||||
match self {
|
||||
StorageType::Fs(backend) => backend.mask_sensitive_info(sql),
|
||||
StorageType::S3(backend) => backend.mask_sensitive_info(sql),
|
||||
StorageType::Oss(backend) => backend.mask_sensitive_info(sql),
|
||||
StorageType::Gcs(backend) => backend.mask_sensitive_info(sql),
|
||||
StorageType::Azblob(backend) => backend.mask_sensitive_info(sql),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl StorageType {
|
||||
/// Returns true if the storage backend is remote (not local filesystem).
|
||||
pub fn is_remote_storage(&self) -> bool {
|
||||
!matches!(self, StorageType::Fs(_))
|
||||
}
|
||||
}
|
||||
@@ -253,12 +253,6 @@ pub enum Error {
|
||||
error: ObjectStoreError,
|
||||
},
|
||||
|
||||
#[snafu(display("S3 config need be set"))]
|
||||
S3ConfigNotSet {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Output directory not set"))]
|
||||
OutputDirNotSet {
|
||||
#[snafu(implicit)]
|
||||
@@ -364,9 +358,9 @@ impl ErrorExt for Error {
|
||||
|
||||
Error::Other { source, .. } => source.status_code(),
|
||||
Error::OpenDal { .. } | Error::InitBackend { .. } => StatusCode::Internal,
|
||||
Error::S3ConfigNotSet { .. }
|
||||
| Error::OutputDirNotSet { .. }
|
||||
| Error::EmptyStoreAddrs { .. } => StatusCode::InvalidArguments,
|
||||
Error::OutputDirNotSet { .. } | Error::EmptyStoreAddrs { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
|
||||
Error::BuildRuntime { source, .. } => source.status_code(),
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ use async_trait::async_trait;
|
||||
use clap::Parser;
|
||||
use common_base::Plugins;
|
||||
use common_config::Configurable;
|
||||
use common_meta::distributed_time_constants::init_distributed_time_constants;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
|
||||
use common_version::{short_version, verbose_version};
|
||||
@@ -154,8 +155,6 @@ pub struct StartCommand {
|
||||
#[clap(short, long)]
|
||||
selector: Option<String>,
|
||||
#[clap(long)]
|
||||
use_memory_store: Option<bool>,
|
||||
#[clap(long)]
|
||||
enable_region_failover: Option<bool>,
|
||||
#[clap(long)]
|
||||
http_addr: Option<String>,
|
||||
@@ -185,7 +184,6 @@ impl Debug for StartCommand {
|
||||
.field("store_addrs", &self.sanitize_store_addrs())
|
||||
.field("config_file", &self.config_file)
|
||||
.field("selector", &self.selector)
|
||||
.field("use_memory_store", &self.use_memory_store)
|
||||
.field("enable_region_failover", &self.enable_region_failover)
|
||||
.field("http_addr", &self.http_addr)
|
||||
.field("http_timeout", &self.http_timeout)
|
||||
@@ -267,10 +265,6 @@ impl StartCommand {
|
||||
.context(error::UnsupportedSelectorTypeSnafu { selector_type })?;
|
||||
}
|
||||
|
||||
if let Some(use_memory_store) = self.use_memory_store {
|
||||
opts.use_memory_store = use_memory_store;
|
||||
}
|
||||
|
||||
if let Some(enable_region_failover) = self.enable_region_failover {
|
||||
opts.enable_region_failover = enable_region_failover;
|
||||
}
|
||||
@@ -327,6 +321,7 @@ impl StartCommand {
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
maybe_activate_heap_profile(&opts.component.memory);
|
||||
create_resource_limit_metrics(APP_NAME);
|
||||
init_distributed_time_constants(opts.component.heartbeat_interval);
|
||||
|
||||
info!("Metasrv start command: {:#?}", self);
|
||||
|
||||
@@ -389,7 +384,6 @@ mod tests {
|
||||
server_addr = "127.0.0.1:3002"
|
||||
store_addr = "127.0.0.1:2379"
|
||||
selector = "LeaseBased"
|
||||
use_memory_store = false
|
||||
|
||||
[logging]
|
||||
level = "debug"
|
||||
@@ -468,7 +462,6 @@ mod tests {
|
||||
server_addr = "127.0.0.1:3002"
|
||||
datanode_lease_secs = 15
|
||||
selector = "LeaseBased"
|
||||
use_memory_store = false
|
||||
|
||||
[http]
|
||||
addr = "127.0.0.1:4000"
|
||||
|
||||
@@ -552,9 +552,8 @@ impl StartCommand {
|
||||
let grpc_handler = fe_instance.clone() as Arc<dyn GrpcQueryHandlerWithBoxedError>;
|
||||
let weak_grpc_handler = Arc::downgrade(&grpc_handler);
|
||||
frontend_instance_handler
|
||||
.lock()
|
||||
.unwrap()
|
||||
.replace(weak_grpc_handler);
|
||||
.set_handler(weak_grpc_handler)
|
||||
.await;
|
||||
|
||||
// set the frontend invoker for flownode
|
||||
let flow_streaming_engine = flownode.flow_engine().streaming_engine();
|
||||
|
||||
@@ -59,15 +59,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to canonicalize path: {}", path))]
|
||||
CanonicalizePath {
|
||||
path: String,
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid path '{}': expected a file, not a directory", path))]
|
||||
InvalidPath {
|
||||
path: String,
|
||||
@@ -82,8 +73,7 @@ impl ErrorExt for Error {
|
||||
Error::TomlFormat { .. }
|
||||
| Error::LoadLayeredConfig { .. }
|
||||
| Error::FileWatch { .. }
|
||||
| Error::InvalidPath { .. }
|
||||
| Error::CanonicalizePath { .. } => StatusCode::InvalidArguments,
|
||||
| Error::InvalidPath { .. } => StatusCode::InvalidArguments,
|
||||
Error::SerdeJson { .. } => StatusCode::Unexpected,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ use common_telemetry::{error, info, warn};
|
||||
use notify::{EventKind, RecursiveMode, Watcher};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{CanonicalizePathSnafu, FileWatchSnafu, InvalidPathSnafu, Result};
|
||||
use crate::error::{FileWatchSnafu, InvalidPathSnafu, Result};
|
||||
|
||||
/// Configuration for the file watcher behavior.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
@@ -41,15 +41,10 @@ pub struct FileWatcherConfig {
|
||||
|
||||
impl FileWatcherConfig {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub fn with_modify_and_create(mut self) -> Self {
|
||||
self.include_remove_events = false;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_remove_events(mut self) -> Self {
|
||||
pub fn include_remove_events(mut self) -> Self {
|
||||
self.include_remove_events = true;
|
||||
self
|
||||
}
|
||||
@@ -93,11 +88,8 @@ impl FileWatcherBuilder {
|
||||
path: path.display().to_string(),
|
||||
}
|
||||
);
|
||||
// Canonicalize the path for reliable comparison with event paths
|
||||
let canonical = path.canonicalize().context(CanonicalizePathSnafu {
|
||||
path: path.display().to_string(),
|
||||
})?;
|
||||
self.file_paths.push(canonical);
|
||||
|
||||
self.file_paths.push(path.to_path_buf());
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
@@ -144,7 +136,6 @@ impl FileWatcherBuilder {
|
||||
}
|
||||
|
||||
let config = self.config;
|
||||
let watched_files: HashSet<PathBuf> = self.file_paths.iter().cloned().collect();
|
||||
|
||||
info!(
|
||||
"Spawning file watcher for paths: {:?} (watching parent directories)",
|
||||
@@ -165,25 +156,7 @@ impl FileWatcherBuilder {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if any of the event paths match our watched files
|
||||
let is_watched_file = event.paths.iter().any(|event_path| {
|
||||
// Try to canonicalize the event path for comparison
|
||||
// If the file was deleted, canonicalize will fail, so we also
|
||||
// compare the raw path
|
||||
if let Ok(canonical) = event_path.canonicalize()
|
||||
&& watched_files.contains(&canonical)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
// For deleted files, compare using the raw path
|
||||
watched_files.contains(event_path)
|
||||
});
|
||||
|
||||
if !is_watched_file {
|
||||
continue;
|
||||
}
|
||||
|
||||
info!(?event.kind, ?event.paths, "Detected file change");
|
||||
info!(?event.kind, ?event.paths, "Detected folder change");
|
||||
callback();
|
||||
}
|
||||
Err(err) => {
|
||||
@@ -301,55 +274,4 @@ mod tests {
|
||||
"Watcher should have detected file recreation"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_file_watcher_ignores_other_files() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let dir = create_temp_dir("test_file_watcher_other");
|
||||
let watched_file = dir.path().join("watched.txt");
|
||||
let other_file = dir.path().join("other.txt");
|
||||
|
||||
// Create both files
|
||||
std::fs::write(&watched_file, "watched content").unwrap();
|
||||
std::fs::write(&other_file, "other content").unwrap();
|
||||
|
||||
let counter = Arc::new(AtomicUsize::new(0));
|
||||
let counter_clone = counter.clone();
|
||||
|
||||
FileWatcherBuilder::new()
|
||||
.watch_path(&watched_file)
|
||||
.unwrap()
|
||||
.config(FileWatcherConfig::new())
|
||||
.spawn(move || {
|
||||
counter_clone.fetch_add(1, Ordering::SeqCst);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// Give watcher time to start
|
||||
std::thread::sleep(Duration::from_millis(100));
|
||||
|
||||
// Modify the other file - should NOT trigger callback
|
||||
std::fs::write(&other_file, "modified other content").unwrap();
|
||||
|
||||
// Wait for potential event
|
||||
std::thread::sleep(Duration::from_millis(500));
|
||||
|
||||
assert_eq!(
|
||||
counter.load(Ordering::SeqCst),
|
||||
0,
|
||||
"Watcher should not have detected changes to other files"
|
||||
);
|
||||
|
||||
// Now modify the watched file - SHOULD trigger callback
|
||||
std::fs::write(&watched_file, "modified watched content").unwrap();
|
||||
|
||||
// Wait for the event to be processed
|
||||
std::thread::sleep(Duration::from_millis(500));
|
||||
|
||||
assert!(
|
||||
counter.load(Ordering::SeqCst) >= 1,
|
||||
"Watcher should have detected change to watched file"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ const SECRET_ACCESS_KEY: &str = "secret_access_key";
|
||||
const SESSION_TOKEN: &str = "session_token";
|
||||
const REGION: &str = "region";
|
||||
const ENABLE_VIRTUAL_HOST_STYLE: &str = "enable_virtual_host_style";
|
||||
const DISABLE_EC2_METADATA: &str = "disable_ec2_metadata";
|
||||
|
||||
pub fn is_supported_in_s3(key: &str) -> bool {
|
||||
[
|
||||
@@ -36,6 +37,7 @@ pub fn is_supported_in_s3(key: &str) -> bool {
|
||||
SESSION_TOKEN,
|
||||
REGION,
|
||||
ENABLE_VIRTUAL_HOST_STYLE,
|
||||
DISABLE_EC2_METADATA,
|
||||
]
|
||||
.contains(&key)
|
||||
}
|
||||
@@ -82,6 +84,21 @@ pub fn build_s3_backend(
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(disable_str) = connection.get(DISABLE_EC2_METADATA) {
|
||||
let disable = disable_str.as_str().parse::<bool>().map_err(|e| {
|
||||
error::InvalidConnectionSnafu {
|
||||
msg: format!(
|
||||
"failed to parse the option {}={}, {}",
|
||||
DISABLE_EC2_METADATA, disable_str, e
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
if disable {
|
||||
builder = builder.disable_ec2_metadata();
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(weny): Consider finding a better way to eliminate duplicate code.
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::BuildBackendSnafu)?
|
||||
@@ -109,6 +126,7 @@ mod tests {
|
||||
assert!(is_supported_in_s3(SESSION_TOKEN));
|
||||
assert!(is_supported_in_s3(REGION));
|
||||
assert!(is_supported_in_s3(ENABLE_VIRTUAL_HOST_STYLE));
|
||||
assert!(is_supported_in_s3(DISABLE_EC2_METADATA));
|
||||
assert!(!is_supported_in_s3("foo"))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ arc-swap = "1.0"
|
||||
arrow.workspace = true
|
||||
arrow-schema.workspace = true
|
||||
async-trait.workspace = true
|
||||
bincode = "1.3"
|
||||
bincode = "=1.3.3"
|
||||
catalog.workspace = true
|
||||
chrono.workspace = true
|
||||
common-base.workspace = true
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
@@ -35,6 +36,14 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Memory semaphore unexpectedly closed"))]
|
||||
MemorySemaphoreClosed,
|
||||
|
||||
#[snafu(display(
|
||||
"Timeout waiting for memory quota: requested {requested_bytes} bytes, waited {waited:?}"
|
||||
))]
|
||||
MemoryAcquireTimeout {
|
||||
requested_bytes: u64,
|
||||
waited: Duration,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -44,6 +53,7 @@ impl ErrorExt for Error {
|
||||
match self {
|
||||
MemoryLimitExceeded { .. } => StatusCode::RuntimeResourcesExhausted,
|
||||
MemorySemaphoreClosed => StatusCode::Unexpected,
|
||||
MemoryAcquireTimeout { .. } => StatusCode::RuntimeResourcesExhausted,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
168
src/common/memory-manager/src/granularity.rs
Normal file
168
src/common/memory-manager/src/granularity.rs
Normal file
@@ -0,0 +1,168 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt;
|
||||
|
||||
/// Memory permit granularity for different use cases.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
||||
pub enum PermitGranularity {
|
||||
/// 1 KB per permit
|
||||
///
|
||||
/// Use for:
|
||||
/// - HTTP/gRPC request limiting (small, high-concurrency operations)
|
||||
/// - Small batch operations
|
||||
/// - Scenarios requiring fine-grained fairness
|
||||
Kilobyte,
|
||||
|
||||
/// 1 MB per permit (default)
|
||||
///
|
||||
/// Use for:
|
||||
/// - Query execution memory management
|
||||
/// - Compaction memory control
|
||||
/// - Large, long-running operations
|
||||
#[default]
|
||||
Megabyte,
|
||||
}
|
||||
|
||||
impl PermitGranularity {
|
||||
/// Returns the number of bytes per permit.
|
||||
#[inline]
|
||||
pub const fn bytes(self) -> u64 {
|
||||
match self {
|
||||
Self::Kilobyte => 1024,
|
||||
Self::Megabyte => 1024 * 1024,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a human-readable string representation.
|
||||
pub const fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Kilobyte => "1KB",
|
||||
Self::Megabyte => "1MB",
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts bytes to permits based on this granularity.
|
||||
///
|
||||
/// Rounds up to ensure the requested bytes are fully covered.
|
||||
/// Clamped to Semaphore::MAX_PERMITS.
|
||||
#[inline]
|
||||
pub fn bytes_to_permits(self, bytes: u64) -> u32 {
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
let granularity_bytes = self.bytes();
|
||||
bytes
|
||||
.saturating_add(granularity_bytes - 1)
|
||||
.saturating_div(granularity_bytes)
|
||||
.min(Semaphore::MAX_PERMITS as u64)
|
||||
.min(u32::MAX as u64) as u32
|
||||
}
|
||||
|
||||
/// Converts permits to bytes based on this granularity.
|
||||
#[inline]
|
||||
pub fn permits_to_bytes(self, permits: u32) -> u64 {
|
||||
(permits as u64).saturating_mul(self.bytes())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for PermitGranularity {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", self.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_bytes_to_permits_kilobyte() {
|
||||
let granularity = PermitGranularity::Kilobyte;
|
||||
|
||||
// Exact multiples
|
||||
assert_eq!(granularity.bytes_to_permits(1024), 1);
|
||||
assert_eq!(granularity.bytes_to_permits(2048), 2);
|
||||
assert_eq!(granularity.bytes_to_permits(10 * 1024), 10);
|
||||
|
||||
// Rounds up
|
||||
assert_eq!(granularity.bytes_to_permits(1), 1);
|
||||
assert_eq!(granularity.bytes_to_permits(1025), 2);
|
||||
assert_eq!(granularity.bytes_to_permits(2047), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytes_to_permits_megabyte() {
|
||||
let granularity = PermitGranularity::Megabyte;
|
||||
|
||||
// Exact multiples
|
||||
assert_eq!(granularity.bytes_to_permits(1024 * 1024), 1);
|
||||
assert_eq!(granularity.bytes_to_permits(2 * 1024 * 1024), 2);
|
||||
|
||||
// Rounds up
|
||||
assert_eq!(granularity.bytes_to_permits(1), 1);
|
||||
assert_eq!(granularity.bytes_to_permits(1024), 1);
|
||||
assert_eq!(granularity.bytes_to_permits(1024 * 1024 + 1), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytes_to_permits_zero_bytes() {
|
||||
assert_eq!(PermitGranularity::Kilobyte.bytes_to_permits(0), 0);
|
||||
assert_eq!(PermitGranularity::Megabyte.bytes_to_permits(0), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bytes_to_permits_clamps_to_maximum() {
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
let max_permits = (Semaphore::MAX_PERMITS as u64).min(u32::MAX as u64) as u32;
|
||||
|
||||
assert_eq!(
|
||||
PermitGranularity::Kilobyte.bytes_to_permits(u64::MAX),
|
||||
max_permits
|
||||
);
|
||||
assert_eq!(
|
||||
PermitGranularity::Megabyte.bytes_to_permits(u64::MAX),
|
||||
max_permits
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_permits_to_bytes() {
|
||||
assert_eq!(PermitGranularity::Kilobyte.permits_to_bytes(1), 1024);
|
||||
assert_eq!(PermitGranularity::Kilobyte.permits_to_bytes(10), 10 * 1024);
|
||||
|
||||
assert_eq!(PermitGranularity::Megabyte.permits_to_bytes(1), 1024 * 1024);
|
||||
assert_eq!(
|
||||
PermitGranularity::Megabyte.permits_to_bytes(10),
|
||||
10 * 1024 * 1024
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_round_trip_conversion() {
|
||||
// Kilobyte: bytes -> permits -> bytes (should round up)
|
||||
let kb = PermitGranularity::Kilobyte;
|
||||
let permits = kb.bytes_to_permits(1500);
|
||||
let bytes = kb.permits_to_bytes(permits);
|
||||
assert!(bytes >= 1500); // Must cover original request
|
||||
assert_eq!(bytes, 2048); // 2KB
|
||||
|
||||
// Megabyte: bytes -> permits -> bytes (should round up)
|
||||
let mb = PermitGranularity::Megabyte;
|
||||
let permits = mb.bytes_to_permits(1500);
|
||||
let bytes = mb.permits_to_bytes(permits);
|
||||
assert!(bytes >= 1500);
|
||||
assert_eq!(bytes, 1024 * 1024); // 1MB
|
||||
}
|
||||
}
|
||||
@@ -17,7 +17,7 @@ use std::{fmt, mem};
|
||||
use common_telemetry::debug;
|
||||
use tokio::sync::{OwnedSemaphorePermit, TryAcquireError};
|
||||
|
||||
use crate::manager::{MemoryMetrics, MemoryQuota, bytes_to_permits, permits_to_bytes};
|
||||
use crate::manager::{MemoryMetrics, MemoryQuota};
|
||||
|
||||
/// Guard representing a slice of reserved memory.
|
||||
pub struct MemoryGuard<M: MemoryMetrics> {
|
||||
@@ -49,7 +49,9 @@ impl<M: MemoryMetrics> MemoryGuard<M> {
|
||||
pub fn granted_bytes(&self) -> u64 {
|
||||
match &self.state {
|
||||
GuardState::Unlimited => 0,
|
||||
GuardState::Limited { permit, .. } => permits_to_bytes(permit.num_permits() as u32),
|
||||
GuardState::Limited { permit, quota } => {
|
||||
quota.permits_to_bytes(permit.num_permits() as u32)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,7 +67,7 @@ impl<M: MemoryMetrics> MemoryGuard<M> {
|
||||
return true;
|
||||
}
|
||||
|
||||
let additional_permits = bytes_to_permits(bytes);
|
||||
let additional_permits = quota.bytes_to_permits(bytes);
|
||||
|
||||
match quota
|
||||
.semaphore
|
||||
@@ -99,11 +101,12 @@ impl<M: MemoryMetrics> MemoryGuard<M> {
|
||||
return true;
|
||||
}
|
||||
|
||||
let release_permits = bytes_to_permits(bytes);
|
||||
let release_permits = quota.bytes_to_permits(bytes);
|
||||
|
||||
match permit.split(release_permits as usize) {
|
||||
Some(released_permit) => {
|
||||
let released_bytes = permits_to_bytes(released_permit.num_permits() as u32);
|
||||
let released_bytes =
|
||||
quota.permits_to_bytes(released_permit.num_permits() as u32);
|
||||
drop(released_permit);
|
||||
quota.update_in_use_metric();
|
||||
debug!("Early released {} bytes from memory guard", released_bytes);
|
||||
@@ -121,7 +124,7 @@ impl<M: MemoryMetrics> Drop for MemoryGuard<M> {
|
||||
if let GuardState::Limited { permit, quota } =
|
||||
mem::replace(&mut self.state, GuardState::Unlimited)
|
||||
{
|
||||
let bytes = permits_to_bytes(permit.num_permits() as u32);
|
||||
let bytes = quota.permits_to_bytes(permit.num_permits() as u32);
|
||||
drop(permit);
|
||||
quota.update_in_use_metric();
|
||||
debug!("Released memory: {} bytes", bytes);
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
//! share the same allocation logic while using their own metrics.
|
||||
|
||||
mod error;
|
||||
mod granularity;
|
||||
mod guard;
|
||||
mod manager;
|
||||
mod policy;
|
||||
@@ -27,8 +28,9 @@ mod policy;
|
||||
mod tests;
|
||||
|
||||
pub use error::{Error, Result};
|
||||
pub use granularity::PermitGranularity;
|
||||
pub use guard::MemoryGuard;
|
||||
pub use manager::{MemoryManager, MemoryMetrics, PERMIT_GRANULARITY_BYTES};
|
||||
pub use manager::{MemoryManager, MemoryMetrics};
|
||||
pub use policy::{DEFAULT_MEMORY_WAIT_TIMEOUT, OnExhaustedPolicy};
|
||||
|
||||
/// No-op metrics implementation for testing.
|
||||
|
||||
@@ -17,11 +17,12 @@ use std::sync::Arc;
|
||||
use snafu::ensure;
|
||||
use tokio::sync::{Semaphore, TryAcquireError};
|
||||
|
||||
use crate::error::{MemoryLimitExceededSnafu, MemorySemaphoreClosedSnafu, Result};
|
||||
use crate::error::{
|
||||
MemoryAcquireTimeoutSnafu, MemoryLimitExceededSnafu, MemorySemaphoreClosedSnafu, Result,
|
||||
};
|
||||
use crate::granularity::PermitGranularity;
|
||||
use crate::guard::MemoryGuard;
|
||||
|
||||
/// Minimum bytes controlled by one semaphore permit.
|
||||
pub const PERMIT_GRANULARITY_BYTES: u64 = 1 << 20; // 1 MB
|
||||
use crate::policy::OnExhaustedPolicy;
|
||||
|
||||
/// Trait for recording memory usage metrics.
|
||||
pub trait MemoryMetrics: Clone + Send + Sync + 'static {
|
||||
@@ -36,10 +37,17 @@ pub struct MemoryManager<M: MemoryMetrics> {
|
||||
quota: Option<MemoryQuota<M>>,
|
||||
}
|
||||
|
||||
impl<M: MemoryMetrics + Default> Default for MemoryManager<M> {
|
||||
fn default() -> Self {
|
||||
Self::new(0, M::default())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct MemoryQuota<M: MemoryMetrics> {
|
||||
pub(crate) semaphore: Arc<Semaphore>,
|
||||
pub(crate) limit_permits: u32,
|
||||
pub(crate) granularity: PermitGranularity,
|
||||
pub(crate) metrics: M,
|
||||
}
|
||||
|
||||
@@ -47,19 +55,25 @@ impl<M: MemoryMetrics> MemoryManager<M> {
|
||||
/// Creates a new memory manager with the given limit in bytes.
|
||||
/// `limit_bytes = 0` disables the limit.
|
||||
pub fn new(limit_bytes: u64, metrics: M) -> Self {
|
||||
Self::with_granularity(limit_bytes, PermitGranularity::default(), metrics)
|
||||
}
|
||||
|
||||
/// Creates a new memory manager with specified granularity.
|
||||
pub fn with_granularity(limit_bytes: u64, granularity: PermitGranularity, metrics: M) -> Self {
|
||||
if limit_bytes == 0 {
|
||||
metrics.set_limit(0);
|
||||
return Self { quota: None };
|
||||
}
|
||||
|
||||
let limit_permits = bytes_to_permits(limit_bytes);
|
||||
let limit_aligned_bytes = permits_to_bytes(limit_permits);
|
||||
let limit_permits = granularity.bytes_to_permits(limit_bytes);
|
||||
let limit_aligned_bytes = granularity.permits_to_bytes(limit_permits);
|
||||
metrics.set_limit(limit_aligned_bytes as i64);
|
||||
|
||||
Self {
|
||||
quota: Some(MemoryQuota {
|
||||
semaphore: Arc::new(Semaphore::new(limit_permits as usize)),
|
||||
limit_permits,
|
||||
granularity,
|
||||
metrics,
|
||||
}),
|
||||
}
|
||||
@@ -69,7 +83,7 @@ impl<M: MemoryMetrics> MemoryManager<M> {
|
||||
pub fn limit_bytes(&self) -> u64 {
|
||||
self.quota
|
||||
.as_ref()
|
||||
.map(|quota| permits_to_bytes(quota.limit_permits))
|
||||
.map(|quota| quota.permits_to_bytes(quota.limit_permits))
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
@@ -77,7 +91,7 @@ impl<M: MemoryMetrics> MemoryManager<M> {
|
||||
pub fn used_bytes(&self) -> u64 {
|
||||
self.quota
|
||||
.as_ref()
|
||||
.map(|quota| permits_to_bytes(quota.used_permits()))
|
||||
.map(|quota| quota.permits_to_bytes(quota.used_permits()))
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
@@ -85,7 +99,7 @@ impl<M: MemoryMetrics> MemoryManager<M> {
|
||||
pub fn available_bytes(&self) -> u64 {
|
||||
self.quota
|
||||
.as_ref()
|
||||
.map(|quota| permits_to_bytes(quota.available_permits_clamped()))
|
||||
.map(|quota| quota.permits_to_bytes(quota.available_permits_clamped()))
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
@@ -98,13 +112,13 @@ impl<M: MemoryMetrics> MemoryManager<M> {
|
||||
match &self.quota {
|
||||
None => Ok(MemoryGuard::unlimited()),
|
||||
Some(quota) => {
|
||||
let permits = bytes_to_permits(bytes);
|
||||
let permits = quota.bytes_to_permits(bytes);
|
||||
|
||||
ensure!(
|
||||
permits <= quota.limit_permits,
|
||||
MemoryLimitExceededSnafu {
|
||||
requested_bytes: bytes,
|
||||
limit_bytes: permits_to_bytes(quota.limit_permits),
|
||||
limit_bytes: self.limit_bytes()
|
||||
}
|
||||
);
|
||||
|
||||
@@ -125,7 +139,7 @@ impl<M: MemoryMetrics> MemoryManager<M> {
|
||||
match &self.quota {
|
||||
None => Some(MemoryGuard::unlimited()),
|
||||
Some(quota) => {
|
||||
let permits = bytes_to_permits(bytes);
|
||||
let permits = quota.bytes_to_permits(bytes);
|
||||
|
||||
match quota.semaphore.clone().try_acquire_many_owned(permits) {
|
||||
Ok(permit) => {
|
||||
@@ -140,9 +154,56 @@ impl<M: MemoryMetrics> MemoryManager<M> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Acquires memory based on the given policy.
|
||||
///
|
||||
/// - For `OnExhaustedPolicy::Wait`: Waits up to the timeout duration for memory to become available
|
||||
/// - For `OnExhaustedPolicy::Fail`: Returns immediately if memory is not available
|
||||
///
|
||||
/// # Errors
|
||||
/// - `MemoryLimitExceeded`: Requested bytes exceed the total limit (both policies), or memory is currently exhausted (Fail policy only)
|
||||
/// - `MemoryAcquireTimeout`: Timeout elapsed while waiting for memory (Wait policy only)
|
||||
/// - `MemorySemaphoreClosed`: The internal semaphore is unexpectedly closed (rare, indicates system issue)
|
||||
pub async fn acquire_with_policy(
|
||||
&self,
|
||||
bytes: u64,
|
||||
policy: OnExhaustedPolicy,
|
||||
) -> Result<MemoryGuard<M>> {
|
||||
match policy {
|
||||
OnExhaustedPolicy::Wait { timeout } => {
|
||||
match tokio::time::timeout(timeout, self.acquire(bytes)).await {
|
||||
Ok(Ok(guard)) => Ok(guard),
|
||||
Ok(Err(e)) => Err(e),
|
||||
Err(_elapsed) => {
|
||||
// Timeout elapsed while waiting
|
||||
MemoryAcquireTimeoutSnafu {
|
||||
requested_bytes: bytes,
|
||||
waited: timeout,
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
OnExhaustedPolicy::Fail => self.try_acquire(bytes).ok_or_else(|| {
|
||||
MemoryLimitExceededSnafu {
|
||||
requested_bytes: bytes,
|
||||
limit_bytes: self.limit_bytes(),
|
||||
}
|
||||
.build()
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<M: MemoryMetrics> MemoryQuota<M> {
|
||||
pub(crate) fn bytes_to_permits(&self, bytes: u64) -> u32 {
|
||||
self.granularity.bytes_to_permits(bytes)
|
||||
}
|
||||
|
||||
pub(crate) fn permits_to_bytes(&self, permits: u32) -> u64 {
|
||||
self.granularity.permits_to_bytes(permits)
|
||||
}
|
||||
|
||||
pub(crate) fn used_permits(&self) -> u32 {
|
||||
self.limit_permits
|
||||
.saturating_sub(self.available_permits_clamped())
|
||||
@@ -155,19 +216,7 @@ impl<M: MemoryMetrics> MemoryQuota<M> {
|
||||
}
|
||||
|
||||
pub(crate) fn update_in_use_metric(&self) {
|
||||
let bytes = permits_to_bytes(self.used_permits());
|
||||
let bytes = self.permits_to_bytes(self.used_permits());
|
||||
self.metrics.set_in_use(bytes as i64);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn bytes_to_permits(bytes: u64) -> u32 {
|
||||
bytes
|
||||
.saturating_add(PERMIT_GRANULARITY_BYTES - 1)
|
||||
.saturating_div(PERMIT_GRANULARITY_BYTES)
|
||||
.min(Semaphore::MAX_PERMITS as u64)
|
||||
.min(u32::MAX as u64) as u32
|
||||
}
|
||||
|
||||
pub(crate) fn permits_to_bytes(permits: u32) -> u64 {
|
||||
(permits as u64).saturating_mul(PERMIT_GRANULARITY_BYTES)
|
||||
}
|
||||
|
||||
@@ -14,7 +14,10 @@
|
||||
|
||||
use tokio::time::{Duration, sleep};
|
||||
|
||||
use crate::{MemoryManager, NoOpMetrics, PERMIT_GRANULARITY_BYTES};
|
||||
use crate::{MemoryManager, NoOpMetrics, PermitGranularity};
|
||||
|
||||
// Helper constant for tests - use default Megabyte granularity
|
||||
const PERMIT_GRANULARITY_BYTES: u64 = PermitGranularity::Megabyte.bytes();
|
||||
|
||||
#[test]
|
||||
fn test_try_acquire_unlimited() {
|
||||
|
||||
@@ -12,27 +12,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
use std::time::Duration;
|
||||
|
||||
use etcd_client::ConnectOptions;
|
||||
|
||||
/// Heartbeat interval time (is the basic unit of various time).
|
||||
pub const HEARTBEAT_INTERVAL_MILLIS: u64 = 3000;
|
||||
|
||||
/// The frontend will also send heartbeats to Metasrv, sending an empty
|
||||
/// heartbeat every HEARTBEAT_INTERVAL_MILLIS * 6 seconds.
|
||||
pub const FRONTEND_HEARTBEAT_INTERVAL_MILLIS: u64 = HEARTBEAT_INTERVAL_MILLIS * 6;
|
||||
|
||||
/// The lease seconds of a region. It's set by 3 heartbeat intervals
|
||||
/// (HEARTBEAT_INTERVAL_MILLIS × 3), plus some extra buffer (1 second).
|
||||
pub const REGION_LEASE_SECS: u64 =
|
||||
Duration::from_millis(HEARTBEAT_INTERVAL_MILLIS * 3).as_secs() + 1;
|
||||
|
||||
/// When creating table or region failover, a target node needs to be selected.
|
||||
/// If the node's lease has expired, the `Selector` will not select it.
|
||||
pub const DATANODE_LEASE_SECS: u64 = REGION_LEASE_SECS;
|
||||
|
||||
pub const FLOWNODE_LEASE_SECS: u64 = DATANODE_LEASE_SECS;
|
||||
pub const BASE_HEARTBEAT_INTERVAL: Duration = Duration::from_secs(3);
|
||||
|
||||
/// The lease seconds of metasrv leader.
|
||||
pub const META_LEASE_SECS: u64 = 5;
|
||||
@@ -52,14 +35,6 @@ pub const HEARTBEAT_CHANNEL_KEEP_ALIVE_INTERVAL_SECS: Duration = Duration::from_
|
||||
/// The keep-alive timeout of the heartbeat channel.
|
||||
pub const HEARTBEAT_CHANNEL_KEEP_ALIVE_TIMEOUT_SECS: Duration = Duration::from_secs(5);
|
||||
|
||||
/// The default options for the etcd client.
|
||||
pub fn default_etcd_client_options() -> ConnectOptions {
|
||||
ConnectOptions::new()
|
||||
.with_keep_alive_while_idle(true)
|
||||
.with_keep_alive(Duration::from_secs(15), Duration::from_secs(5))
|
||||
.with_connect_timeout(Duration::from_secs(10))
|
||||
}
|
||||
|
||||
/// The default mailbox round-trip timeout.
|
||||
pub const MAILBOX_RTT_SECS: u64 = 1;
|
||||
|
||||
@@ -68,3 +43,60 @@ pub const TOPIC_STATS_REPORT_INTERVAL_SECS: u64 = 15;
|
||||
|
||||
/// The retention seconds of topic stats.
|
||||
pub const TOPIC_STATS_RETENTION_SECS: u64 = TOPIC_STATS_REPORT_INTERVAL_SECS * 100;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
/// The distributed time constants.
|
||||
pub struct DistributedTimeConstants {
|
||||
pub heartbeat_interval: Duration,
|
||||
pub frontend_heartbeat_interval: Duration,
|
||||
pub region_lease: Duration,
|
||||
pub datanode_lease: Duration,
|
||||
pub flownode_lease: Duration,
|
||||
}
|
||||
|
||||
/// The frontend heartbeat interval is 6 times of the base heartbeat interval.
|
||||
pub fn frontend_heartbeat_interval(base_heartbeat_interval: Duration) -> Duration {
|
||||
base_heartbeat_interval * 6
|
||||
}
|
||||
|
||||
impl DistributedTimeConstants {
|
||||
/// Create a new DistributedTimeConstants from the heartbeat interval.
|
||||
pub fn from_heartbeat_interval(heartbeat_interval: Duration) -> Self {
|
||||
let region_lease = heartbeat_interval * 3 + Duration::from_secs(1);
|
||||
let datanode_lease = region_lease;
|
||||
let flownode_lease = datanode_lease;
|
||||
Self {
|
||||
heartbeat_interval,
|
||||
frontend_heartbeat_interval: frontend_heartbeat_interval(heartbeat_interval),
|
||||
region_lease,
|
||||
datanode_lease,
|
||||
flownode_lease,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for DistributedTimeConstants {
|
||||
fn default() -> Self {
|
||||
Self::from_heartbeat_interval(BASE_HEARTBEAT_INTERVAL)
|
||||
}
|
||||
}
|
||||
|
||||
static DEFAULT_DISTRIBUTED_TIME_CONSTANTS: OnceLock<DistributedTimeConstants> = OnceLock::new();
|
||||
|
||||
/// Get the default distributed time constants.
|
||||
pub fn default_distributed_time_constants() -> &'static DistributedTimeConstants {
|
||||
DEFAULT_DISTRIBUTED_TIME_CONSTANTS.get_or_init(Default::default)
|
||||
}
|
||||
|
||||
/// Initialize the default distributed time constants.
|
||||
pub fn init_distributed_time_constants(base_heartbeat_interval: Duration) {
|
||||
let distributed_time_constants =
|
||||
DistributedTimeConstants::from_heartbeat_interval(base_heartbeat_interval);
|
||||
DEFAULT_DISTRIBUTED_TIME_CONSTANTS
|
||||
.set(distributed_time_constants)
|
||||
.expect("Failed to set default distributed time constants");
|
||||
common_telemetry::info!(
|
||||
"Initialized default distributed time constants: {:#?}",
|
||||
distributed_time_constants
|
||||
);
|
||||
}
|
||||
|
||||
@@ -848,7 +848,7 @@ impl PgStore {
|
||||
.context(CreatePostgresPoolSnafu)?,
|
||||
};
|
||||
|
||||
Self::with_pg_pool(pool, None, table_name, max_txn_ops).await
|
||||
Self::with_pg_pool(pool, None, table_name, max_txn_ops, false).await
|
||||
}
|
||||
|
||||
/// Create [PgStore] impl of [KvBackendRef] from url (backward compatibility).
|
||||
@@ -862,6 +862,7 @@ impl PgStore {
|
||||
schema_name: Option<&str>,
|
||||
table_name: &str,
|
||||
max_txn_ops: usize,
|
||||
auto_create_schema: bool,
|
||||
) -> Result<KvBackendRef> {
|
||||
// Ensure the postgres metadata backend is ready to use.
|
||||
let client = match pool.get().await {
|
||||
@@ -873,9 +874,23 @@ impl PgStore {
|
||||
.fail();
|
||||
}
|
||||
};
|
||||
|
||||
// Automatically create schema if enabled and schema_name is provided.
|
||||
if auto_create_schema
|
||||
&& let Some(schema) = schema_name
|
||||
&& !schema.is_empty()
|
||||
{
|
||||
let create_schema_sql = format!("CREATE SCHEMA IF NOT EXISTS \"{}\"", schema);
|
||||
client
|
||||
.execute(&create_schema_sql, &[])
|
||||
.await
|
||||
.with_context(|_| PostgresExecutionSnafu {
|
||||
sql: create_schema_sql.clone(),
|
||||
})?;
|
||||
}
|
||||
|
||||
let template_factory = PgSqlTemplateFactory::new(schema_name, table_name);
|
||||
let sql_template_set = template_factory.build();
|
||||
// Do not attempt to create schema implicitly.
|
||||
client
|
||||
.execute(&sql_template_set.create_table_statement, &[])
|
||||
.await
|
||||
@@ -959,7 +974,7 @@ mod tests {
|
||||
let Some(pool) = build_pg15_pool().await else {
|
||||
return;
|
||||
};
|
||||
let res = PgStore::with_pg_pool(pool, None, "pg15_public_should_fail", 128).await;
|
||||
let res = PgStore::with_pg_pool(pool, None, "pg15_public_should_fail", 128, false).await;
|
||||
assert!(
|
||||
res.is_err(),
|
||||
"creating table in public should fail for test_user"
|
||||
@@ -1214,4 +1229,249 @@ mod tests {
|
||||
let t = PgSqlTemplateFactory::format_table_ident(Some(""), "test_table");
|
||||
assert_eq!(t, "\"test_table\"");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_auto_create_schema_enabled() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap();
|
||||
let mut cfg = Config::new();
|
||||
cfg.url = Some(endpoints);
|
||||
let pool = cfg
|
||||
.create_pool(Some(Runtime::Tokio1), NoTls)
|
||||
.context(CreatePostgresPoolSnafu)
|
||||
.unwrap();
|
||||
|
||||
let schema_name = "test_auto_create_enabled";
|
||||
let table_name = "test_table";
|
||||
|
||||
// Drop the schema if it exists to start clean
|
||||
let client = pool.get().await.unwrap();
|
||||
let _ = client
|
||||
.execute(
|
||||
&format!("DROP SCHEMA IF EXISTS \"{}\" CASCADE", schema_name),
|
||||
&[],
|
||||
)
|
||||
.await;
|
||||
|
||||
// Create store with auto_create_schema enabled
|
||||
let _ = PgStore::with_pg_pool(pool.clone(), Some(schema_name), table_name, 128, true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify schema was created
|
||||
let row = client
|
||||
.query_one(
|
||||
"SELECT schema_name FROM information_schema.schemata WHERE schema_name = $1",
|
||||
&[&schema_name],
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let created_schema: String = row.get(0);
|
||||
assert_eq!(created_schema, schema_name);
|
||||
|
||||
// Verify table was created in the schema
|
||||
let row = client
|
||||
.query_one(
|
||||
"SELECT table_schema, table_name FROM information_schema.tables WHERE table_schema = $1 AND table_name = $2",
|
||||
&[&schema_name, &table_name],
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let created_table_schema: String = row.get(0);
|
||||
let created_table_name: String = row.get(1);
|
||||
assert_eq!(created_table_schema, schema_name);
|
||||
assert_eq!(created_table_name, table_name);
|
||||
|
||||
// Cleanup
|
||||
let _ = client
|
||||
.execute(
|
||||
&format!("DROP SCHEMA IF EXISTS \"{}\" CASCADE", schema_name),
|
||||
&[],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_auto_create_schema_disabled() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap();
|
||||
let mut cfg = Config::new();
|
||||
cfg.url = Some(endpoints);
|
||||
let pool = cfg
|
||||
.create_pool(Some(Runtime::Tokio1), NoTls)
|
||||
.context(CreatePostgresPoolSnafu)
|
||||
.unwrap();
|
||||
|
||||
let schema_name = "test_auto_create_disabled";
|
||||
let table_name = "test_table";
|
||||
|
||||
// Drop the schema if it exists to start clean
|
||||
let client = pool.get().await.unwrap();
|
||||
let _ = client
|
||||
.execute(
|
||||
&format!("DROP SCHEMA IF EXISTS \"{}\" CASCADE", schema_name),
|
||||
&[],
|
||||
)
|
||||
.await;
|
||||
|
||||
// Try to create store with auto_create_schema disabled (should fail)
|
||||
let result =
|
||||
PgStore::with_pg_pool(pool.clone(), Some(schema_name), table_name, 128, false).await;
|
||||
|
||||
// Verify it failed because schema doesn't exist
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Expected error when schema doesn't exist and auto_create_schema is disabled"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_auto_create_schema_already_exists() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap();
|
||||
let mut cfg = Config::new();
|
||||
cfg.url = Some(endpoints);
|
||||
let pool = cfg
|
||||
.create_pool(Some(Runtime::Tokio1), NoTls)
|
||||
.context(CreatePostgresPoolSnafu)
|
||||
.unwrap();
|
||||
|
||||
let schema_name = "test_auto_create_existing";
|
||||
let table_name = "test_table";
|
||||
|
||||
// Manually create the schema first
|
||||
let client = pool.get().await.unwrap();
|
||||
let _ = client
|
||||
.execute(
|
||||
&format!("DROP SCHEMA IF EXISTS \"{}\" CASCADE", schema_name),
|
||||
&[],
|
||||
)
|
||||
.await;
|
||||
client
|
||||
.execute(&format!("CREATE SCHEMA \"{}\"", schema_name), &[])
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Create store with auto_create_schema enabled (should succeed idempotently)
|
||||
let _ = PgStore::with_pg_pool(pool.clone(), Some(schema_name), table_name, 128, true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify schema still exists
|
||||
let row = client
|
||||
.query_one(
|
||||
"SELECT schema_name FROM information_schema.schemata WHERE schema_name = $1",
|
||||
&[&schema_name],
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let created_schema: String = row.get(0);
|
||||
assert_eq!(created_schema, schema_name);
|
||||
|
||||
// Verify table was created in the schema
|
||||
let row = client
|
||||
.query_one(
|
||||
"SELECT table_schema, table_name FROM information_schema.tables WHERE table_schema = $1 AND table_name = $2",
|
||||
&[&schema_name, &table_name],
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let created_table_schema: String = row.get(0);
|
||||
let created_table_name: String = row.get(1);
|
||||
assert_eq!(created_table_schema, schema_name);
|
||||
assert_eq!(created_table_name, table_name);
|
||||
|
||||
// Cleanup
|
||||
let _ = client
|
||||
.execute(
|
||||
&format!("DROP SCHEMA IF EXISTS \"{}\" CASCADE", schema_name),
|
||||
&[],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_auto_create_schema_no_schema_name() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap();
|
||||
let mut cfg = Config::new();
|
||||
cfg.url = Some(endpoints);
|
||||
let pool = cfg
|
||||
.create_pool(Some(Runtime::Tokio1), NoTls)
|
||||
.context(CreatePostgresPoolSnafu)
|
||||
.unwrap();
|
||||
|
||||
let table_name = "test_table_no_schema";
|
||||
|
||||
// Create store with auto_create_schema enabled but no schema name (should succeed)
|
||||
// This should create the table in the default schema (public)
|
||||
let _ = PgStore::with_pg_pool(pool.clone(), None, table_name, 128, true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify table was created in public schema
|
||||
let client = pool.get().await.unwrap();
|
||||
let row = client
|
||||
.query_one(
|
||||
"SELECT table_schema, table_name FROM information_schema.tables WHERE table_name = $1",
|
||||
&[&table_name],
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let created_table_schema: String = row.get(0);
|
||||
let created_table_name: String = row.get(1);
|
||||
assert_eq!(created_table_name, table_name);
|
||||
// Verify it's in public schema (or whichever is the default)
|
||||
assert!(created_table_schema == "public" || !created_table_schema.is_empty());
|
||||
|
||||
// Cleanup
|
||||
let _ = client
|
||||
.execute(&format!("DROP TABLE IF EXISTS \"{}\"", table_name), &[])
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_auto_create_schema_with_empty_schema_name() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap();
|
||||
let mut cfg = Config::new();
|
||||
cfg.url = Some(endpoints);
|
||||
let pool = cfg
|
||||
.create_pool(Some(Runtime::Tokio1), NoTls)
|
||||
.context(CreatePostgresPoolSnafu)
|
||||
.unwrap();
|
||||
|
||||
let table_name = "test_table_empty_schema";
|
||||
|
||||
// Create store with auto_create_schema enabled but empty schema name (should succeed)
|
||||
// This should create the table in the default schema (public)
|
||||
let _ = PgStore::with_pg_pool(pool.clone(), Some(""), table_name, 128, true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify table was created in public schema
|
||||
let client = pool.get().await.unwrap();
|
||||
let row = client
|
||||
.query_one(
|
||||
"SELECT table_schema, table_name FROM information_schema.tables WHERE table_name = $1",
|
||||
&[&table_name],
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let created_table_schema: String = row.get(0);
|
||||
let created_table_name: String = row.get(1);
|
||||
assert_eq!(created_table_name, table_name);
|
||||
// Verify it's in public schema (or whichever is the default)
|
||||
assert!(created_table_schema == "public" || !created_table_schema.is_empty());
|
||||
|
||||
// Cleanup
|
||||
let _ = client
|
||||
.execute(&format!("DROP TABLE IF EXISTS \"{}\"", table_name), &[])
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use common_telemetry::{debug, error, info};
|
||||
use common_wal::config::kafka::common::{
|
||||
DEFAULT_BACKOFF_CONFIG, DEFAULT_CONNECT_TIMEOUT, KafkaConnectionConfig, KafkaTopicConfig,
|
||||
DEFAULT_BACKOFF_CONFIG, KafkaConnectionConfig, KafkaTopicConfig,
|
||||
};
|
||||
use rskafka::client::error::Error as RsKafkaError;
|
||||
use rskafka::client::error::ProtocolError::TopicAlreadyExists;
|
||||
@@ -211,7 +211,8 @@ pub async fn build_kafka_client(connection: &KafkaConnectionConfig) -> Result<Cl
|
||||
// Builds an kafka controller client for creating topics.
|
||||
let mut builder = ClientBuilder::new(connection.broker_endpoints.clone())
|
||||
.backoff_config(DEFAULT_BACKOFF_CONFIG)
|
||||
.connect_timeout(Some(DEFAULT_CONNECT_TIMEOUT));
|
||||
.connect_timeout(Some(connection.connect_timeout))
|
||||
.timeout(Some(connection.timeout));
|
||||
if let Some(sasl) = &connection.sasl {
|
||||
builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
|
||||
};
|
||||
|
||||
@@ -5,10 +5,12 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arrow-schema.workspace = true
|
||||
common-base.workspace = true
|
||||
common-decimal.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
datafusion-sql.workspace = true
|
||||
datatypes.workspace = true
|
||||
|
||||
@@ -14,11 +14,12 @@
|
||||
|
||||
use std::str::FromStr;
|
||||
|
||||
use arrow_schema::extension::ExtensionType;
|
||||
use common_time::Timestamp;
|
||||
use common_time::timezone::Timezone;
|
||||
use datatypes::json::JsonStructureSettings;
|
||||
use datatypes::extension::json::JsonExtensionType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::ColumnDefaultConstraint;
|
||||
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
|
||||
use datatypes::types::{JsonFormat, parse_string_to_jsonb, parse_string_to_vector_type_value};
|
||||
use datatypes::value::{OrderedF32, OrderedF64, Value};
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
@@ -124,13 +125,14 @@ pub(crate) fn sql_number_to_value(data_type: &ConcreteDataType, n: &str) -> Resu
|
||||
/// If `auto_string_to_numeric` is true, tries to cast the string value to numeric values,
|
||||
/// and returns error if the cast fails.
|
||||
pub fn sql_value_to_value(
|
||||
column_name: &str,
|
||||
data_type: &ConcreteDataType,
|
||||
column_schema: &ColumnSchema,
|
||||
sql_val: &SqlValue,
|
||||
timezone: Option<&Timezone>,
|
||||
unary_op: Option<UnaryOperator>,
|
||||
auto_string_to_numeric: bool,
|
||||
) -> Result<Value> {
|
||||
let column_name = &column_schema.name;
|
||||
let data_type = &column_schema.data_type;
|
||||
let mut value = match sql_val {
|
||||
SqlValue::Number(n, _) => sql_number_to_value(data_type, n)?,
|
||||
SqlValue::Null => Value::Null,
|
||||
@@ -146,13 +148,9 @@ pub fn sql_value_to_value(
|
||||
|
||||
(*b).into()
|
||||
}
|
||||
SqlValue::DoubleQuotedString(s) | SqlValue::SingleQuotedString(s) => parse_string_to_value(
|
||||
column_name,
|
||||
s.clone(),
|
||||
data_type,
|
||||
timezone,
|
||||
auto_string_to_numeric,
|
||||
)?,
|
||||
SqlValue::DoubleQuotedString(s) | SqlValue::SingleQuotedString(s) => {
|
||||
parse_string_to_value(column_schema, s.clone(), timezone, auto_string_to_numeric)?
|
||||
}
|
||||
SqlValue::HexStringLiteral(s) => {
|
||||
// Should not directly write binary into json column
|
||||
ensure!(
|
||||
@@ -244,12 +242,12 @@ pub fn sql_value_to_value(
|
||||
}
|
||||
|
||||
pub(crate) fn parse_string_to_value(
|
||||
column_name: &str,
|
||||
column_schema: &ColumnSchema,
|
||||
s: String,
|
||||
data_type: &ConcreteDataType,
|
||||
timezone: Option<&Timezone>,
|
||||
auto_string_to_numeric: bool,
|
||||
) -> Result<Value> {
|
||||
let data_type = &column_schema.data_type;
|
||||
if auto_string_to_numeric && let Some(value) = auto_cast_to_numeric(&s, data_type)? {
|
||||
return Ok(value);
|
||||
}
|
||||
@@ -257,7 +255,7 @@ pub(crate) fn parse_string_to_value(
|
||||
ensure!(
|
||||
data_type.is_stringifiable(),
|
||||
ColumnTypeMismatchSnafu {
|
||||
column_name,
|
||||
column_name: column_schema.name.clone(),
|
||||
expect: data_type.clone(),
|
||||
actual: ConcreteDataType::string_datatype(),
|
||||
}
|
||||
@@ -303,23 +301,21 @@ pub(crate) fn parse_string_to_value(
|
||||
}
|
||||
}
|
||||
ConcreteDataType::Binary(_) => Ok(Value::Binary(s.as_bytes().into())),
|
||||
ConcreteDataType::Json(j) => {
|
||||
match &j.format {
|
||||
JsonFormat::Jsonb => {
|
||||
let v = parse_string_to_jsonb(&s).context(DatatypeSnafu)?;
|
||||
Ok(Value::Binary(v.into()))
|
||||
}
|
||||
JsonFormat::Native(_inner) => {
|
||||
// Always use the structured version at this level.
|
||||
let serde_json_value =
|
||||
serde_json::from_str(&s).context(DeserializeSnafu { json: s })?;
|
||||
let json_structure_settings = JsonStructureSettings::Structured(None);
|
||||
json_structure_settings
|
||||
.encode(serde_json_value)
|
||||
.context(DatatypeSnafu)
|
||||
}
|
||||
ConcreteDataType::Json(j) => match &j.format {
|
||||
JsonFormat::Jsonb => {
|
||||
let v = parse_string_to_jsonb(&s).context(DatatypeSnafu)?;
|
||||
Ok(Value::Binary(v.into()))
|
||||
}
|
||||
}
|
||||
JsonFormat::Native(_) => {
|
||||
let extension_type: Option<JsonExtensionType> =
|
||||
column_schema.extension_type().context(DatatypeSnafu)?;
|
||||
let json_structure_settings = extension_type
|
||||
.and_then(|x| x.metadata().json_structure_settings.clone())
|
||||
.unwrap_or_default();
|
||||
let v = serde_json::from_str(&s).context(DeserializeSnafu { json: s })?;
|
||||
json_structure_settings.encode(v).context(DatatypeSnafu)
|
||||
}
|
||||
},
|
||||
ConcreteDataType::Vector(d) => {
|
||||
let v = parse_string_to_vector_type_value(&s, Some(d.dim)).context(DatatypeSnafu)?;
|
||||
Ok(Value::Binary(v.into()))
|
||||
@@ -417,305 +413,265 @@ mod test {
|
||||
|
||||
use super::*;
|
||||
|
||||
macro_rules! call_parse_string_to_value {
|
||||
($column_name: expr, $input: expr, $data_type: expr) => {
|
||||
call_parse_string_to_value!($column_name, $input, $data_type, None)
|
||||
};
|
||||
($column_name: expr, $input: expr, $data_type: expr, timezone = $timezone: expr) => {
|
||||
call_parse_string_to_value!($column_name, $input, $data_type, Some($timezone))
|
||||
};
|
||||
($column_name: expr, $input: expr, $data_type: expr, $timezone: expr) => {{
|
||||
let column_schema = ColumnSchema::new($column_name, $data_type, true);
|
||||
parse_string_to_value(&column_schema, $input, $timezone, true)
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_to_value_auto_numeric() {
|
||||
fn test_string_to_value_auto_numeric() -> Result<()> {
|
||||
// Test string to boolean with auto cast
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"true".to_string(),
|
||||
&ConcreteDataType::boolean_datatype(),
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.unwrap();
|
||||
ConcreteDataType::boolean_datatype()
|
||||
)?;
|
||||
assert_eq!(Value::Boolean(true), result);
|
||||
|
||||
// Test invalid string to boolean with auto cast
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"not_a_boolean".to_string(),
|
||||
&ConcreteDataType::boolean_datatype(),
|
||||
None,
|
||||
true,
|
||||
ConcreteDataType::boolean_datatype()
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
// Test string to int8
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"42".to_string(),
|
||||
&ConcreteDataType::int8_datatype(),
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.unwrap();
|
||||
ConcreteDataType::int8_datatype()
|
||||
)?;
|
||||
assert_eq!(Value::Int8(42), result);
|
||||
|
||||
// Test invalid string to int8 with auto cast
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"not_an_int8".to_string(),
|
||||
&ConcreteDataType::int8_datatype(),
|
||||
None,
|
||||
true,
|
||||
ConcreteDataType::int8_datatype()
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
// Test string to int16
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"1000".to_string(),
|
||||
&ConcreteDataType::int16_datatype(),
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.unwrap();
|
||||
ConcreteDataType::int16_datatype()
|
||||
)?;
|
||||
assert_eq!(Value::Int16(1000), result);
|
||||
|
||||
// Test invalid string to int16 with auto cast
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"not_an_int16".to_string(),
|
||||
&ConcreteDataType::int16_datatype(),
|
||||
None,
|
||||
true,
|
||||
ConcreteDataType::int16_datatype()
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
// Test string to int32
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"100000".to_string(),
|
||||
&ConcreteDataType::int32_datatype(),
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.unwrap();
|
||||
ConcreteDataType::int32_datatype()
|
||||
)?;
|
||||
assert_eq!(Value::Int32(100000), result);
|
||||
|
||||
// Test invalid string to int32 with auto cast
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"not_an_int32".to_string(),
|
||||
&ConcreteDataType::int32_datatype(),
|
||||
None,
|
||||
true,
|
||||
ConcreteDataType::int32_datatype()
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
// Test string to int64
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"1000000".to_string(),
|
||||
&ConcreteDataType::int64_datatype(),
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.unwrap();
|
||||
ConcreteDataType::int64_datatype()
|
||||
)?;
|
||||
assert_eq!(Value::Int64(1000000), result);
|
||||
|
||||
// Test invalid string to int64 with auto cast
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"not_an_int64".to_string(),
|
||||
&ConcreteDataType::int64_datatype(),
|
||||
None,
|
||||
true,
|
||||
ConcreteDataType::int64_datatype()
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
// Test string to uint8
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"200".to_string(),
|
||||
&ConcreteDataType::uint8_datatype(),
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.unwrap();
|
||||
ConcreteDataType::uint8_datatype()
|
||||
)?;
|
||||
assert_eq!(Value::UInt8(200), result);
|
||||
|
||||
// Test invalid string to uint8 with auto cast
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"not_a_uint8".to_string(),
|
||||
&ConcreteDataType::uint8_datatype(),
|
||||
None,
|
||||
true,
|
||||
ConcreteDataType::uint8_datatype()
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
// Test string to uint16
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"60000".to_string(),
|
||||
&ConcreteDataType::uint16_datatype(),
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.unwrap();
|
||||
ConcreteDataType::uint16_datatype()
|
||||
)?;
|
||||
assert_eq!(Value::UInt16(60000), result);
|
||||
|
||||
// Test invalid string to uint16 with auto cast
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"not_a_uint16".to_string(),
|
||||
&ConcreteDataType::uint16_datatype(),
|
||||
None,
|
||||
true,
|
||||
ConcreteDataType::uint16_datatype()
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
// Test string to uint32
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"4000000000".to_string(),
|
||||
&ConcreteDataType::uint32_datatype(),
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.unwrap();
|
||||
ConcreteDataType::uint32_datatype()
|
||||
)?;
|
||||
assert_eq!(Value::UInt32(4000000000), result);
|
||||
|
||||
// Test invalid string to uint32 with auto cast
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"not_a_uint32".to_string(),
|
||||
&ConcreteDataType::uint32_datatype(),
|
||||
None,
|
||||
true,
|
||||
ConcreteDataType::uint32_datatype()
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
// Test string to uint64
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"18446744073709551615".to_string(),
|
||||
&ConcreteDataType::uint64_datatype(),
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.unwrap();
|
||||
ConcreteDataType::uint64_datatype()
|
||||
)?;
|
||||
assert_eq!(Value::UInt64(18446744073709551615), result);
|
||||
|
||||
// Test invalid string to uint64 with auto cast
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"not_a_uint64".to_string(),
|
||||
&ConcreteDataType::uint64_datatype(),
|
||||
None,
|
||||
true,
|
||||
ConcreteDataType::uint64_datatype()
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
// Test string to float32
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"3.5".to_string(),
|
||||
&ConcreteDataType::float32_datatype(),
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.unwrap();
|
||||
ConcreteDataType::float32_datatype()
|
||||
)?;
|
||||
assert_eq!(Value::Float32(OrderedF32::from(3.5)), result);
|
||||
|
||||
// Test invalid string to float32 with auto cast
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"not_a_float32".to_string(),
|
||||
&ConcreteDataType::float32_datatype(),
|
||||
None,
|
||||
true,
|
||||
ConcreteDataType::float32_datatype()
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
// Test string to float64
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"3.5".to_string(),
|
||||
&ConcreteDataType::float64_datatype(),
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.unwrap();
|
||||
ConcreteDataType::float64_datatype()
|
||||
)?;
|
||||
assert_eq!(Value::Float64(OrderedF64::from(3.5)), result);
|
||||
|
||||
// Test invalid string to float64 with auto cast
|
||||
let result = parse_string_to_value(
|
||||
let result = call_parse_string_to_value!(
|
||||
"col",
|
||||
"not_a_float64".to_string(),
|
||||
&ConcreteDataType::float64_datatype(),
|
||||
None,
|
||||
true,
|
||||
ConcreteDataType::float64_datatype()
|
||||
);
|
||||
assert!(result.is_err());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sql_value_to_value() {
|
||||
let sql_val = SqlValue::Null;
|
||||
assert_eq!(
|
||||
Value::Null,
|
||||
sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::float64_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
macro_rules! call_sql_value_to_value {
|
||||
($column_name: expr, $data_type: expr, $sql_value: expr) => {
|
||||
call_sql_value_to_value!($column_name, $data_type, $sql_value, None, None, false)
|
||||
};
|
||||
($column_name: expr, $data_type: expr, $sql_value: expr, timezone = $timezone: expr) => {
|
||||
call_sql_value_to_value!(
|
||||
$column_name,
|
||||
$data_type,
|
||||
$sql_value,
|
||||
Some($timezone),
|
||||
None,
|
||||
false
|
||||
)
|
||||
.unwrap()
|
||||
};
|
||||
($column_name: expr, $data_type: expr, $sql_value: expr, unary_op = $unary_op: expr) => {
|
||||
call_sql_value_to_value!(
|
||||
$column_name,
|
||||
$data_type,
|
||||
$sql_value,
|
||||
None,
|
||||
Some($unary_op),
|
||||
false
|
||||
)
|
||||
};
|
||||
($column_name: expr, $data_type: expr, $sql_value: expr, auto_string_to_numeric) => {
|
||||
call_sql_value_to_value!($column_name, $data_type, $sql_value, None, None, true)
|
||||
};
|
||||
($column_name: expr, $data_type: expr, $sql_value: expr, $timezone: expr, $unary_op: expr, $auto_string_to_numeric: expr) => {{
|
||||
let column_schema = ColumnSchema::new($column_name, $data_type, true);
|
||||
sql_value_to_value(
|
||||
&column_schema,
|
||||
$sql_value,
|
||||
$timezone,
|
||||
$unary_op,
|
||||
$auto_string_to_numeric,
|
||||
)
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sql_value_to_value() -> Result<()> {
|
||||
let sql_val = SqlValue::Null;
|
||||
assert_eq!(
|
||||
Value::Null,
|
||||
call_sql_value_to_value!("a", ConcreteDataType::float64_datatype(), &sql_val)?
|
||||
);
|
||||
|
||||
let sql_val = SqlValue::Boolean(true);
|
||||
assert_eq!(
|
||||
Value::Boolean(true),
|
||||
sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::boolean_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
false
|
||||
)
|
||||
.unwrap()
|
||||
call_sql_value_to_value!("a", ConcreteDataType::boolean_datatype(), &sql_val)?
|
||||
);
|
||||
|
||||
let sql_val = SqlValue::Number("3.0".to_string(), false);
|
||||
assert_eq!(
|
||||
Value::Float64(OrderedFloat(3.0)),
|
||||
sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::float64_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
false
|
||||
)
|
||||
.unwrap()
|
||||
call_sql_value_to_value!("a", ConcreteDataType::float64_datatype(), &sql_val)?
|
||||
);
|
||||
|
||||
let sql_val = SqlValue::Number("3.0".to_string(), false);
|
||||
let v = sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::boolean_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
);
|
||||
let v = call_sql_value_to_value!("a", ConcreteDataType::boolean_datatype(), &sql_val);
|
||||
assert!(v.is_err());
|
||||
assert!(format!("{v:?}").contains("Failed to parse number '3.0' to boolean column type"));
|
||||
|
||||
let sql_val = SqlValue::Boolean(true);
|
||||
let v = sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::float64_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
);
|
||||
let v = call_sql_value_to_value!("a", ConcreteDataType::float64_datatype(), &sql_val);
|
||||
assert!(v.is_err());
|
||||
assert!(
|
||||
format!("{v:?}").contains(
|
||||
@@ -725,41 +681,18 @@ mod test {
|
||||
);
|
||||
|
||||
let sql_val = SqlValue::HexStringLiteral("48656c6c6f20776f726c6421".to_string());
|
||||
let v = sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::binary_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
let v = call_sql_value_to_value!("a", ConcreteDataType::binary_datatype(), &sql_val)?;
|
||||
assert_eq!(Value::Binary(Bytes::from(b"Hello world!".as_slice())), v);
|
||||
|
||||
let sql_val = SqlValue::DoubleQuotedString("MorningMyFriends".to_string());
|
||||
let v = sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::binary_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
let v = call_sql_value_to_value!("a", ConcreteDataType::binary_datatype(), &sql_val)?;
|
||||
assert_eq!(
|
||||
Value::Binary(Bytes::from(b"MorningMyFriends".as_slice())),
|
||||
v
|
||||
);
|
||||
|
||||
let sql_val = SqlValue::HexStringLiteral("9AF".to_string());
|
||||
let v = sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::binary_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
);
|
||||
let v = call_sql_value_to_value!("a", ConcreteDataType::binary_datatype(), &sql_val);
|
||||
assert!(v.is_err());
|
||||
assert!(
|
||||
format!("{v:?}").contains("odd number of digits"),
|
||||
@@ -767,38 +700,16 @@ mod test {
|
||||
);
|
||||
|
||||
let sql_val = SqlValue::HexStringLiteral("AG".to_string());
|
||||
let v = sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::binary_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
);
|
||||
let v = call_sql_value_to_value!("a", ConcreteDataType::binary_datatype(), &sql_val);
|
||||
assert!(v.is_err());
|
||||
assert!(format!("{v:?}").contains("invalid character"), "v is {v:?}",);
|
||||
|
||||
let sql_val = SqlValue::DoubleQuotedString("MorningMyFriends".to_string());
|
||||
let v = sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::json_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
);
|
||||
let v = call_sql_value_to_value!("a", ConcreteDataType::json_datatype(), &sql_val);
|
||||
assert!(v.is_err());
|
||||
|
||||
let sql_val = SqlValue::DoubleQuotedString(r#"{"a":"b"}"#.to_string());
|
||||
let v = sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::json_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
let v = call_sql_value_to_value!("a", ConcreteDataType::json_datatype(), &sql_val)?;
|
||||
assert_eq!(
|
||||
Value::Binary(Bytes::from(
|
||||
jsonb::parse_value(r#"{"a":"b"}"#.as_bytes())
|
||||
@@ -808,16 +719,15 @@ mod test {
|
||||
)),
|
||||
v
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_json_to_jsonb() {
|
||||
match parse_string_to_value(
|
||||
match call_parse_string_to_value!(
|
||||
"json_col",
|
||||
r#"{"a": "b"}"#.to_string(),
|
||||
&ConcreteDataType::json_datatype(),
|
||||
None,
|
||||
false,
|
||||
ConcreteDataType::json_datatype()
|
||||
) {
|
||||
Ok(Value::Binary(b)) => {
|
||||
assert_eq!(
|
||||
@@ -833,12 +743,10 @@ mod test {
|
||||
}
|
||||
|
||||
assert!(
|
||||
parse_string_to_value(
|
||||
call_parse_string_to_value!(
|
||||
"json_col",
|
||||
r#"Nicola Kovac is the best rifler in the world"#.to_string(),
|
||||
&ConcreteDataType::json_datatype(),
|
||||
None,
|
||||
false,
|
||||
ConcreteDataType::json_datatype()
|
||||
)
|
||||
.is_err()
|
||||
)
|
||||
@@ -878,13 +786,10 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn test_parse_date_literal() {
|
||||
let value = sql_value_to_value(
|
||||
let value = call_sql_value_to_value!(
|
||||
"date",
|
||||
&ConcreteDataType::date_datatype(),
|
||||
&SqlValue::DoubleQuotedString("2022-02-22".to_string()),
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
ConcreteDataType::date_datatype(),
|
||||
&SqlValue::DoubleQuotedString("2022-02-22".to_string())
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(ConcreteDataType::date_datatype(), value.data_type());
|
||||
@@ -895,13 +800,11 @@ mod test {
|
||||
}
|
||||
|
||||
// with timezone
|
||||
let value = sql_value_to_value(
|
||||
let value = call_sql_value_to_value!(
|
||||
"date",
|
||||
&ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::date_datatype(),
|
||||
&SqlValue::DoubleQuotedString("2022-02-22".to_string()),
|
||||
Some(&Timezone::from_tz_string("+07:00").unwrap()),
|
||||
None,
|
||||
false,
|
||||
timezone = &Timezone::from_tz_string("+07:00").unwrap()
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(ConcreteDataType::date_datatype(), value.data_type());
|
||||
@@ -913,16 +816,12 @@ mod test {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_timestamp_literal() {
|
||||
match parse_string_to_value(
|
||||
fn test_parse_timestamp_literal() -> Result<()> {
|
||||
match call_parse_string_to_value!(
|
||||
"timestamp_col",
|
||||
"2022-02-22T00:01:01+08:00".to_string(),
|
||||
&ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap()
|
||||
{
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
)? {
|
||||
Value::Timestamp(ts) => {
|
||||
assert_eq!(1645459261000, ts.value());
|
||||
assert_eq!(TimeUnit::Millisecond, ts.unit());
|
||||
@@ -932,15 +831,11 @@ mod test {
|
||||
}
|
||||
}
|
||||
|
||||
match parse_string_to_value(
|
||||
match call_parse_string_to_value!(
|
||||
"timestamp_col",
|
||||
"2022-02-22T00:01:01+08:00".to_string(),
|
||||
&ConcreteDataType::timestamp_datatype(TimeUnit::Second),
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap()
|
||||
{
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Second)
|
||||
)? {
|
||||
Value::Timestamp(ts) => {
|
||||
assert_eq!(1645459261, ts.value());
|
||||
assert_eq!(TimeUnit::Second, ts.unit());
|
||||
@@ -950,15 +845,11 @@ mod test {
|
||||
}
|
||||
}
|
||||
|
||||
match parse_string_to_value(
|
||||
match call_parse_string_to_value!(
|
||||
"timestamp_col",
|
||||
"2022-02-22T00:01:01+08:00".to_string(),
|
||||
&ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond),
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap()
|
||||
{
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond)
|
||||
)? {
|
||||
Value::Timestamp(ts) => {
|
||||
assert_eq!(1645459261000000, ts.value());
|
||||
assert_eq!(TimeUnit::Microsecond, ts.unit());
|
||||
@@ -968,15 +859,11 @@ mod test {
|
||||
}
|
||||
}
|
||||
|
||||
match parse_string_to_value(
|
||||
match call_parse_string_to_value!(
|
||||
"timestamp_col",
|
||||
"2022-02-22T00:01:01+08:00".to_string(),
|
||||
&ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap()
|
||||
{
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond)
|
||||
)? {
|
||||
Value::Timestamp(ts) => {
|
||||
assert_eq!(1645459261000000000, ts.value());
|
||||
assert_eq!(TimeUnit::Nanosecond, ts.unit());
|
||||
@@ -987,26 +874,21 @@ mod test {
|
||||
}
|
||||
|
||||
assert!(
|
||||
parse_string_to_value(
|
||||
call_parse_string_to_value!(
|
||||
"timestamp_col",
|
||||
"2022-02-22T00:01:01+08".to_string(),
|
||||
&ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
|
||||
None,
|
||||
false,
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond)
|
||||
)
|
||||
.is_err()
|
||||
);
|
||||
|
||||
// with timezone
|
||||
match parse_string_to_value(
|
||||
match call_parse_string_to_value!(
|
||||
"timestamp_col",
|
||||
"2022-02-22T00:01:01".to_string(),
|
||||
&ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
|
||||
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap()),
|
||||
false,
|
||||
)
|
||||
.unwrap()
|
||||
{
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
|
||||
timezone = &Timezone::from_tz_string("Asia/Shanghai").unwrap()
|
||||
)? {
|
||||
Value::Timestamp(ts) => {
|
||||
assert_eq!(1645459261000000000, ts.value());
|
||||
assert_eq!("2022-02-21 16:01:01+0000", ts.to_iso8601_string());
|
||||
@@ -1016,51 +898,42 @@ mod test {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_placeholder_value() {
|
||||
assert!(
|
||||
sql_value_to_value(
|
||||
call_sql_value_to_value!(
|
||||
"test",
|
||||
&ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
&SqlValue::Placeholder("default".into())
|
||||
)
|
||||
.is_err()
|
||||
);
|
||||
assert!(
|
||||
call_sql_value_to_value!(
|
||||
"test",
|
||||
ConcreteDataType::string_datatype(),
|
||||
&SqlValue::Placeholder("default".into()),
|
||||
None,
|
||||
None,
|
||||
false
|
||||
unary_op = UnaryOperator::Minus
|
||||
)
|
||||
.is_err()
|
||||
);
|
||||
assert!(
|
||||
sql_value_to_value(
|
||||
call_sql_value_to_value!(
|
||||
"test",
|
||||
&ConcreteDataType::string_datatype(),
|
||||
&SqlValue::Placeholder("default".into()),
|
||||
None,
|
||||
Some(UnaryOperator::Minus),
|
||||
false
|
||||
)
|
||||
.is_err()
|
||||
);
|
||||
assert!(
|
||||
sql_value_to_value(
|
||||
"test",
|
||||
&ConcreteDataType::uint16_datatype(),
|
||||
ConcreteDataType::uint16_datatype(),
|
||||
&SqlValue::Number("3".into(), false),
|
||||
None,
|
||||
Some(UnaryOperator::Minus),
|
||||
false
|
||||
unary_op = UnaryOperator::Minus
|
||||
)
|
||||
.is_err()
|
||||
);
|
||||
assert!(
|
||||
sql_value_to_value(
|
||||
call_sql_value_to_value!(
|
||||
"test",
|
||||
&ConcreteDataType::uint16_datatype(),
|
||||
&SqlValue::Number("3".into(), false),
|
||||
None,
|
||||
None,
|
||||
false
|
||||
ConcreteDataType::uint16_datatype(),
|
||||
&SqlValue::Number("3".into(), false)
|
||||
)
|
||||
.is_ok()
|
||||
);
|
||||
@@ -1070,77 +943,60 @@ mod test {
|
||||
fn test_auto_string_to_numeric() {
|
||||
// Test with auto_string_to_numeric=true
|
||||
let sql_val = SqlValue::SingleQuotedString("123".to_string());
|
||||
let v = sql_value_to_value(
|
||||
let v = call_sql_value_to_value!(
|
||||
"a",
|
||||
&ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
true,
|
||||
auto_string_to_numeric
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(Value::Int32(123), v);
|
||||
|
||||
// Test with a float string
|
||||
let sql_val = SqlValue::SingleQuotedString("3.5".to_string());
|
||||
let v = sql_value_to_value(
|
||||
let v = call_sql_value_to_value!(
|
||||
"a",
|
||||
&ConcreteDataType::float64_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
true,
|
||||
auto_string_to_numeric
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(Value::Float64(OrderedFloat(3.5)), v);
|
||||
|
||||
// Test with auto_string_to_numeric=false
|
||||
let sql_val = SqlValue::SingleQuotedString("123".to_string());
|
||||
let v = sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::int32_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
);
|
||||
let v = call_sql_value_to_value!("a", ConcreteDataType::int32_datatype(), &sql_val);
|
||||
assert!(v.is_err());
|
||||
|
||||
// Test with an invalid numeric string but auto_string_to_numeric=true
|
||||
// Should return an error now with the new auto_cast_to_numeric behavior
|
||||
let sql_val = SqlValue::SingleQuotedString("not_a_number".to_string());
|
||||
let v = sql_value_to_value(
|
||||
let v = call_sql_value_to_value!(
|
||||
"a",
|
||||
&ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
true,
|
||||
auto_string_to_numeric
|
||||
);
|
||||
assert!(v.is_err());
|
||||
|
||||
// Test with boolean type
|
||||
let sql_val = SqlValue::SingleQuotedString("true".to_string());
|
||||
let v = sql_value_to_value(
|
||||
let v = call_sql_value_to_value!(
|
||||
"a",
|
||||
&ConcreteDataType::boolean_datatype(),
|
||||
ConcreteDataType::boolean_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
true,
|
||||
auto_string_to_numeric
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(Value::Boolean(true), v);
|
||||
|
||||
// Non-numeric types should still be handled normally
|
||||
let sql_val = SqlValue::SingleQuotedString("hello".to_string());
|
||||
let v = sql_value_to_value(
|
||||
let v = call_sql_value_to_value!(
|
||||
"a",
|
||||
&ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
true,
|
||||
auto_string_to_numeric
|
||||
);
|
||||
assert!(v.is_ok());
|
||||
}
|
||||
|
||||
@@ -14,8 +14,8 @@
|
||||
|
||||
use common_time::timezone::Timezone;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::ColumnDefaultConstraint;
|
||||
use datatypes::schema::constraint::{CURRENT_TIMESTAMP, CURRENT_TIMESTAMP_FN};
|
||||
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
|
||||
use snafu::ensure;
|
||||
use sqlparser::ast::ValueWithSpan;
|
||||
pub use sqlparser::ast::{
|
||||
@@ -47,9 +47,12 @@ pub fn parse_column_default_constraint(
|
||||
);
|
||||
|
||||
let default_constraint = match &opt.option {
|
||||
ColumnOption::Default(Expr::Value(v)) => ColumnDefaultConstraint::Value(
|
||||
sql_value_to_value(column_name, data_type, &v.value, timezone, None, false)?,
|
||||
),
|
||||
ColumnOption::Default(Expr::Value(v)) => {
|
||||
let schema = ColumnSchema::new(column_name, data_type.clone(), true);
|
||||
ColumnDefaultConstraint::Value(sql_value_to_value(
|
||||
&schema, &v.value, timezone, None, false,
|
||||
)?)
|
||||
}
|
||||
ColumnOption::Default(Expr::Function(func)) => {
|
||||
let mut func = format!("{func}").to_lowercase();
|
||||
// normalize CURRENT_TIMESTAMP to CURRENT_TIMESTAMP()
|
||||
@@ -80,8 +83,7 @@ pub fn parse_column_default_constraint(
|
||||
|
||||
if let Expr::Value(v) = &**expr {
|
||||
let value = sql_value_to_value(
|
||||
column_name,
|
||||
data_type,
|
||||
&ColumnSchema::new(column_name, data_type.clone(), true),
|
||||
&v.value,
|
||||
timezone,
|
||||
Some(*op),
|
||||
|
||||
@@ -71,6 +71,7 @@ pub fn convert_metric_to_write_request(
|
||||
timestamp,
|
||||
}],
|
||||
exemplars: vec![],
|
||||
histograms: vec![],
|
||||
}),
|
||||
MetricType::GAUGE => timeseries.push(TimeSeries {
|
||||
labels: convert_label(m.get_label(), mf_name, None),
|
||||
@@ -79,6 +80,7 @@ pub fn convert_metric_to_write_request(
|
||||
timestamp,
|
||||
}],
|
||||
exemplars: vec![],
|
||||
histograms: vec![],
|
||||
}),
|
||||
MetricType::HISTOGRAM => {
|
||||
let h = m.get_histogram();
|
||||
@@ -97,6 +99,7 @@ pub fn convert_metric_to_write_request(
|
||||
timestamp,
|
||||
}],
|
||||
exemplars: vec![],
|
||||
histograms: vec![],
|
||||
});
|
||||
if upper_bound.is_sign_positive() && upper_bound.is_infinite() {
|
||||
inf_seen = true;
|
||||
@@ -114,6 +117,7 @@ pub fn convert_metric_to_write_request(
|
||||
timestamp,
|
||||
}],
|
||||
exemplars: vec![],
|
||||
histograms: vec![],
|
||||
});
|
||||
}
|
||||
timeseries.push(TimeSeries {
|
||||
@@ -127,6 +131,7 @@ pub fn convert_metric_to_write_request(
|
||||
timestamp,
|
||||
}],
|
||||
exemplars: vec![],
|
||||
histograms: vec![],
|
||||
});
|
||||
timeseries.push(TimeSeries {
|
||||
labels: convert_label(
|
||||
@@ -139,6 +144,7 @@ pub fn convert_metric_to_write_request(
|
||||
timestamp,
|
||||
}],
|
||||
exemplars: vec![],
|
||||
histograms: vec![],
|
||||
});
|
||||
}
|
||||
MetricType::SUMMARY => {
|
||||
@@ -155,6 +161,7 @@ pub fn convert_metric_to_write_request(
|
||||
timestamp,
|
||||
}],
|
||||
exemplars: vec![],
|
||||
histograms: vec![],
|
||||
});
|
||||
}
|
||||
timeseries.push(TimeSeries {
|
||||
@@ -168,6 +175,7 @@ pub fn convert_metric_to_write_request(
|
||||
timestamp,
|
||||
}],
|
||||
exemplars: vec![],
|
||||
histograms: vec![],
|
||||
});
|
||||
timeseries.push(TimeSeries {
|
||||
labels: convert_label(
|
||||
@@ -180,6 +188,7 @@ pub fn convert_metric_to_write_request(
|
||||
timestamp,
|
||||
}],
|
||||
exemplars: vec![],
|
||||
histograms: vec![],
|
||||
});
|
||||
}
|
||||
MetricType::UNTYPED => {
|
||||
@@ -274,7 +283,7 @@ mod test {
|
||||
|
||||
assert_eq!(
|
||||
format!("{:?}", write_quest.timeseries),
|
||||
r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }]"#
|
||||
r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }]"#
|
||||
);
|
||||
|
||||
let gauge_opts = Opts::new("test_gauge", "test help")
|
||||
@@ -288,7 +297,7 @@ mod test {
|
||||
let write_quest = convert_metric_to_write_request(mf, None, 0);
|
||||
assert_eq!(
|
||||
format!("{:?}", write_quest.timeseries),
|
||||
r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_gauge" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 42.0, timestamp: 0 }], exemplars: [] }]"#
|
||||
r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_gauge" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 42.0, timestamp: 0 }], exemplars: [], histograms: [] }]"#
|
||||
);
|
||||
}
|
||||
|
||||
@@ -305,20 +314,20 @@ mod test {
|
||||
.iter()
|
||||
.map(|x| format!("{:?}", x))
|
||||
.collect();
|
||||
let ans = r#"TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.005" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.01" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.025" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.05" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.1" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.25" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "1" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "2.5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "10" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "+Inf" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_sum" }, Label { name: "a", value: "1" }], samples: [Sample { value: 0.25, timestamp: 0 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_count" }, Label { name: "a", value: "1" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }"#;
|
||||
let ans = r#"TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.005" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.01" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.025" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.05" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.1" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.25" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "1" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "2.5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "10" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "+Inf" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_sum" }, Label { name: "a", value: "1" }], samples: [Sample { value: 0.25, timestamp: 0 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_count" }, Label { name: "a", value: "1" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }"#;
|
||||
assert_eq!(write_quest_str.join("\n"), ans);
|
||||
}
|
||||
|
||||
@@ -355,10 +364,10 @@ TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_count" },
|
||||
.iter()
|
||||
.map(|x| format!("{:?}", x))
|
||||
.collect();
|
||||
let ans = r#"TimeSeries { labels: [Label { name: "__name__", value: "test_summary" }, Label { name: "quantile", value: "50" }], samples: [Sample { value: 3.0, timestamp: 20 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_summary" }, Label { name: "quantile", value: "100" }], samples: [Sample { value: 5.0, timestamp: 20 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_summary_sum" }], samples: [Sample { value: 15.0, timestamp: 20 }], exemplars: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_summary_count" }], samples: [Sample { value: 5.0, timestamp: 20 }], exemplars: [] }"#;
|
||||
let ans = r#"TimeSeries { labels: [Label { name: "__name__", value: "test_summary" }, Label { name: "quantile", value: "50" }], samples: [Sample { value: 3.0, timestamp: 20 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_summary" }, Label { name: "quantile", value: "100" }], samples: [Sample { value: 5.0, timestamp: 20 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_summary_sum" }], samples: [Sample { value: 15.0, timestamp: 20 }], exemplars: [], histograms: [] }
|
||||
TimeSeries { labels: [Label { name: "__name__", value: "test_summary_count" }], samples: [Sample { value: 5.0, timestamp: 20 }], exemplars: [], histograms: [] }"#;
|
||||
assert_eq!(write_quest_str.join("\n"), ans);
|
||||
}
|
||||
|
||||
@@ -385,11 +394,11 @@ TimeSeries { labels: [Label { name: "__name__", value: "test_summary_count" }],
|
||||
let write_quest2 = convert_metric_to_write_request(mf, Some(&filter), 0);
|
||||
assert_eq!(
|
||||
format!("{:?}", write_quest1.timeseries),
|
||||
r#"[TimeSeries { labels: [Label { name: "__name__", value: "filter_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }, TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 2.0, timestamp: 0 }], exemplars: [] }]"#
|
||||
r#"[TimeSeries { labels: [Label { name: "__name__", value: "filter_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }, TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 2.0, timestamp: 0 }], exemplars: [], histograms: [] }]"#
|
||||
);
|
||||
assert_eq!(
|
||||
format!("{:?}", write_quest2.timeseries),
|
||||
r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 2.0, timestamp: 0 }], exemplars: [] }]"#
|
||||
r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 2.0, timestamp: 0 }], exemplars: [], histograms: [] }]"#
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -206,6 +206,8 @@ mod tests {
|
||||
client_cert_path: None,
|
||||
client_key_path: None,
|
||||
}),
|
||||
connect_timeout: Duration::from_secs(3),
|
||||
timeout: Duration::from_secs(3),
|
||||
},
|
||||
kafka_topic: KafkaTopicConfig {
|
||||
num_topics: 32,
|
||||
@@ -239,6 +241,8 @@ mod tests {
|
||||
client_cert_path: None,
|
||||
client_key_path: None,
|
||||
}),
|
||||
connect_timeout: Duration::from_secs(3),
|
||||
timeout: Duration::from_secs(3),
|
||||
},
|
||||
max_batch_bytes: ReadableSize::mb(1),
|
||||
consumer_wait_timeout: Duration::from_millis(100),
|
||||
|
||||
@@ -36,9 +36,6 @@ pub const DEFAULT_BACKOFF_CONFIG: BackoffConfig = BackoffConfig {
|
||||
deadline: Some(Duration::from_secs(3)),
|
||||
};
|
||||
|
||||
/// The default connect timeout for kafka client.
|
||||
pub const DEFAULT_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Default interval for auto WAL pruning.
|
||||
pub const DEFAULT_AUTO_PRUNE_INTERVAL: Duration = Duration::from_mins(30);
|
||||
/// Default limit for concurrent auto pruning tasks.
|
||||
@@ -167,6 +164,12 @@ pub struct KafkaConnectionConfig {
|
||||
pub sasl: Option<KafkaClientSasl>,
|
||||
/// Client TLS config
|
||||
pub tls: Option<KafkaClientTls>,
|
||||
/// The connect timeout for kafka client.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub connect_timeout: Duration,
|
||||
/// The timeout for kafka client.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub timeout: Duration,
|
||||
}
|
||||
|
||||
impl Default for KafkaConnectionConfig {
|
||||
@@ -175,6 +178,8 @@ impl Default for KafkaConnectionConfig {
|
||||
broker_endpoints: vec![BROKER_ENDPOINT.to_string()],
|
||||
sasl: None,
|
||||
tls: None,
|
||||
connect_timeout: Duration::from_secs(3),
|
||||
timeout: Duration::from_secs(3),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,9 +26,9 @@ use std::sync::Arc;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Map, Value as Json};
|
||||
use snafu::{ResultExt, ensure};
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
|
||||
use crate::error::{self, Error};
|
||||
use crate::error::{self, InvalidJsonSnafu, Result, SerializeSnafu};
|
||||
use crate::json::value::{JsonValue, JsonVariant};
|
||||
use crate::types::json_type::{JsonNativeType, JsonNumberType, JsonObjectType};
|
||||
use crate::types::{StructField, StructType};
|
||||
@@ -71,7 +71,7 @@ impl JsonStructureSettings {
|
||||
pub const RAW_FIELD: &'static str = "_raw";
|
||||
|
||||
/// Decode an encoded StructValue back into a serde_json::Value.
|
||||
pub fn decode(&self, value: Value) -> Result<Json, Error> {
|
||||
pub fn decode(&self, value: Value) -> Result<Json> {
|
||||
let context = JsonContext {
|
||||
key_path: String::new(),
|
||||
settings: self,
|
||||
@@ -82,7 +82,7 @@ impl JsonStructureSettings {
|
||||
/// Decode a StructValue that was encoded with current settings back into a fully structured StructValue.
|
||||
/// This is useful for reconstructing the original structure from encoded data, especially when
|
||||
/// unstructured encoding was used for some fields.
|
||||
pub fn decode_struct(&self, struct_value: StructValue) -> Result<StructValue, Error> {
|
||||
pub fn decode_struct(&self, struct_value: StructValue) -> Result<StructValue> {
|
||||
let context = JsonContext {
|
||||
key_path: String::new(),
|
||||
settings: self,
|
||||
@@ -91,7 +91,11 @@ impl JsonStructureSettings {
|
||||
}
|
||||
|
||||
/// Encode a serde_json::Value into a Value::Json using current settings.
|
||||
pub fn encode(&self, json: Json) -> Result<Value, Error> {
|
||||
pub fn encode(&self, json: Json) -> Result<Value> {
|
||||
if let Some(json_struct) = self.json_struct() {
|
||||
return encode_by_struct(json_struct, json);
|
||||
}
|
||||
|
||||
let context = JsonContext {
|
||||
key_path: String::new(),
|
||||
settings: self,
|
||||
@@ -104,13 +108,21 @@ impl JsonStructureSettings {
|
||||
&self,
|
||||
json: Json,
|
||||
data_type: Option<&JsonNativeType>,
|
||||
) -> Result<Value, Error> {
|
||||
) -> Result<Value> {
|
||||
let context = JsonContext {
|
||||
key_path: String::new(),
|
||||
settings: self,
|
||||
};
|
||||
encode_json_with_context(json, data_type, &context).map(|v| Value::Json(Box::new(v)))
|
||||
}
|
||||
|
||||
fn json_struct(&self) -> Option<&StructType> {
|
||||
match &self {
|
||||
JsonStructureSettings::Structured(fields) => fields.as_ref(),
|
||||
JsonStructureSettings::PartialUnstructuredByKey { fields, .. } => fields.as_ref(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for JsonStructureSettings {
|
||||
@@ -144,12 +156,54 @@ impl<'a> JsonContext<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_by_struct(json_struct: &StructType, mut json: Json) -> Result<Value> {
|
||||
let Some(json_object) = json.as_object_mut() else {
|
||||
return InvalidJsonSnafu {
|
||||
value: "expect JSON object when struct is provided",
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
let mut encoded = BTreeMap::new();
|
||||
|
||||
fn extract_field(json_object: &mut Map<String, Json>, field: &str) -> Result<Option<Json>> {
|
||||
let (first, rest) = field.split_once('.').unwrap_or((field, ""));
|
||||
|
||||
if rest.is_empty() {
|
||||
Ok(json_object.remove(first))
|
||||
} else {
|
||||
let Some(value) = json_object.get_mut(first) else {
|
||||
return Ok(None);
|
||||
};
|
||||
let json_object = value.as_object_mut().with_context(|| InvalidJsonSnafu {
|
||||
value: format!(r#"expect "{}" an object"#, first),
|
||||
})?;
|
||||
extract_field(json_object, rest)
|
||||
}
|
||||
}
|
||||
|
||||
let fields = json_struct.fields();
|
||||
for field in fields.iter() {
|
||||
let Some(field_value) = extract_field(json_object, field.name())? else {
|
||||
continue;
|
||||
};
|
||||
let field_type: JsonNativeType = field.data_type().into();
|
||||
let field_value = try_convert_to_expected_type(field_value, &field_type)?;
|
||||
encoded.insert(field.name().to_string(), field_value);
|
||||
}
|
||||
|
||||
let rest = serde_json::to_string(json_object).context(SerializeSnafu)?;
|
||||
encoded.insert(JsonStructureSettings::RAW_FIELD.to_string(), rest.into());
|
||||
|
||||
let value: JsonValue = encoded.into();
|
||||
Ok(Value::Json(Box::new(value)))
|
||||
}
|
||||
|
||||
/// Main encoding function with key path tracking
|
||||
pub fn encode_json_with_context<'a>(
|
||||
json: Json,
|
||||
data_type: Option<&JsonNativeType>,
|
||||
context: &JsonContext<'a>,
|
||||
) -> Result<JsonValue, Error> {
|
||||
) -> Result<JsonValue> {
|
||||
// Check if the entire encoding should be unstructured
|
||||
if matches!(context.settings, JsonStructureSettings::UnstructuredRaw) {
|
||||
let json_string = json.to_string();
|
||||
@@ -215,7 +269,7 @@ fn encode_json_object_with_context<'a>(
|
||||
mut json_object: Map<String, Json>,
|
||||
fields: Option<&JsonObjectType>,
|
||||
context: &JsonContext<'a>,
|
||||
) -> Result<JsonValue, Error> {
|
||||
) -> Result<JsonValue> {
|
||||
let mut object = BTreeMap::new();
|
||||
// First, process fields from the provided schema in their original order
|
||||
if let Some(fields) = fields {
|
||||
@@ -248,7 +302,7 @@ fn encode_json_array_with_context<'a>(
|
||||
json_array: Vec<Json>,
|
||||
item_type: Option<&JsonNativeType>,
|
||||
context: &JsonContext<'a>,
|
||||
) -> Result<JsonValue, Error> {
|
||||
) -> Result<JsonValue> {
|
||||
let json_array_len = json_array.len();
|
||||
let mut items = Vec::with_capacity(json_array_len);
|
||||
let mut element_type = item_type.cloned();
|
||||
@@ -286,7 +340,7 @@ fn encode_json_value_with_context<'a>(
|
||||
json: Json,
|
||||
expected_type: Option<&JsonNativeType>,
|
||||
context: &JsonContext<'a>,
|
||||
) -> Result<JsonValue, Error> {
|
||||
) -> Result<JsonValue> {
|
||||
// Check if current key should be treated as unstructured
|
||||
if context.is_unstructured_key() {
|
||||
return Ok(json.to_string().into());
|
||||
@@ -301,7 +355,7 @@ fn encode_json_value_with_context<'a>(
|
||||
if let Some(expected) = expected_type
|
||||
&& let Ok(value) = try_convert_to_expected_type(i, expected)
|
||||
{
|
||||
return Ok(value);
|
||||
return Ok(value.into());
|
||||
}
|
||||
Ok(i.into())
|
||||
} else if let Some(u) = n.as_u64() {
|
||||
@@ -309,7 +363,7 @@ fn encode_json_value_with_context<'a>(
|
||||
if let Some(expected) = expected_type
|
||||
&& let Ok(value) = try_convert_to_expected_type(u, expected)
|
||||
{
|
||||
return Ok(value);
|
||||
return Ok(value.into());
|
||||
}
|
||||
if u <= i64::MAX as u64 {
|
||||
Ok((u as i64).into())
|
||||
@@ -321,7 +375,7 @@ fn encode_json_value_with_context<'a>(
|
||||
if let Some(expected) = expected_type
|
||||
&& let Ok(value) = try_convert_to_expected_type(f, expected)
|
||||
{
|
||||
return Ok(value);
|
||||
return Ok(value.into());
|
||||
}
|
||||
|
||||
// Default to f64 for floating point numbers
|
||||
@@ -335,7 +389,7 @@ fn encode_json_value_with_context<'a>(
|
||||
if let Some(expected) = expected_type
|
||||
&& let Ok(value) = try_convert_to_expected_type(s.as_str(), expected)
|
||||
{
|
||||
return Ok(value);
|
||||
return Ok(value.into());
|
||||
}
|
||||
Ok(s.into())
|
||||
}
|
||||
@@ -345,10 +399,7 @@ fn encode_json_value_with_context<'a>(
|
||||
}
|
||||
|
||||
/// Main decoding function with key path tracking
|
||||
pub fn decode_value_with_context<'a>(
|
||||
value: Value,
|
||||
context: &JsonContext<'a>,
|
||||
) -> Result<Json, Error> {
|
||||
pub fn decode_value_with_context(value: Value, context: &JsonContext) -> Result<Json> {
|
||||
// Check if the entire decoding should be unstructured
|
||||
if matches!(context.settings, JsonStructureSettings::UnstructuredRaw) {
|
||||
return decode_unstructured_value(value);
|
||||
@@ -370,7 +421,7 @@ pub fn decode_value_with_context<'a>(
|
||||
fn decode_struct_with_context<'a>(
|
||||
struct_value: StructValue,
|
||||
context: &JsonContext<'a>,
|
||||
) -> Result<Json, Error> {
|
||||
) -> Result<Json> {
|
||||
let mut json_object = Map::with_capacity(struct_value.len());
|
||||
|
||||
let (items, fields) = struct_value.into_parts();
|
||||
@@ -385,10 +436,7 @@ fn decode_struct_with_context<'a>(
|
||||
}
|
||||
|
||||
/// Decode a list value to JSON array
|
||||
fn decode_list_with_context<'a>(
|
||||
list_value: ListValue,
|
||||
context: &JsonContext<'a>,
|
||||
) -> Result<Json, Error> {
|
||||
fn decode_list_with_context(list_value: ListValue, context: &JsonContext) -> Result<Json> {
|
||||
let mut json_array = Vec::with_capacity(list_value.len());
|
||||
|
||||
let data_items = list_value.take_items();
|
||||
@@ -403,7 +451,7 @@ fn decode_list_with_context<'a>(
|
||||
}
|
||||
|
||||
/// Decode unstructured value (stored as string)
|
||||
fn decode_unstructured_value(value: Value) -> Result<Json, Error> {
|
||||
fn decode_unstructured_value(value: Value) -> Result<Json> {
|
||||
match value {
|
||||
// Handle expected format: StructValue with single _raw field
|
||||
Value::Struct(struct_value) => {
|
||||
@@ -443,7 +491,7 @@ fn decode_unstructured_value(value: Value) -> Result<Json, Error> {
|
||||
}
|
||||
|
||||
/// Decode primitive value to JSON
|
||||
fn decode_primitive_value(value: Value) -> Result<Json, Error> {
|
||||
fn decode_primitive_value(value: Value) -> Result<Json> {
|
||||
match value {
|
||||
Value::Null => Ok(Json::Null),
|
||||
Value::Boolean(b) => Ok(Json::Bool(b)),
|
||||
@@ -487,7 +535,7 @@ fn decode_primitive_value(value: Value) -> Result<Json, Error> {
|
||||
fn decode_struct_with_settings<'a>(
|
||||
struct_value: StructValue,
|
||||
context: &JsonContext<'a>,
|
||||
) -> Result<StructValue, Error> {
|
||||
) -> Result<StructValue> {
|
||||
// Check if we can return the struct directly (Structured case)
|
||||
if matches!(context.settings, JsonStructureSettings::Structured(_)) {
|
||||
return Ok(struct_value);
|
||||
@@ -567,7 +615,7 @@ fn decode_struct_with_settings<'a>(
|
||||
fn decode_list_with_settings<'a>(
|
||||
list_value: ListValue,
|
||||
context: &JsonContext<'a>,
|
||||
) -> Result<ListValue, Error> {
|
||||
) -> Result<ListValue> {
|
||||
let mut items = Vec::with_capacity(list_value.len());
|
||||
|
||||
let (data_items, datatype) = list_value.into_parts();
|
||||
@@ -592,7 +640,7 @@ fn decode_list_with_settings<'a>(
|
||||
}
|
||||
|
||||
/// Helper function to decode a struct that was encoded with UnstructuredRaw settings
|
||||
fn decode_unstructured_raw_struct(struct_value: StructValue) -> Result<StructValue, Error> {
|
||||
fn decode_unstructured_raw_struct(struct_value: StructValue) -> Result<StructValue> {
|
||||
// For UnstructuredRaw, the struct must have exactly one field named "_raw"
|
||||
if struct_value.struct_type().fields().len() == 1 {
|
||||
let field = &struct_value.struct_type().fields()[0];
|
||||
@@ -636,12 +684,9 @@ fn decode_unstructured_raw_struct(struct_value: StructValue) -> Result<StructVal
|
||||
}
|
||||
|
||||
/// Helper function to try converting a value to an expected type
|
||||
fn try_convert_to_expected_type<T>(
|
||||
value: T,
|
||||
expected_type: &JsonNativeType,
|
||||
) -> Result<JsonValue, Error>
|
||||
fn try_convert_to_expected_type<T>(value: T, expected_type: &JsonNativeType) -> Result<JsonVariant>
|
||||
where
|
||||
T: Into<JsonValue>,
|
||||
T: Into<JsonVariant>,
|
||||
{
|
||||
let value = value.into();
|
||||
let cast_error = || {
|
||||
@@ -650,7 +695,7 @@ where
|
||||
}
|
||||
.fail()
|
||||
};
|
||||
let actual_type = value.json_type().native_type();
|
||||
let actual_type = &value.native_type();
|
||||
match (actual_type, expected_type) {
|
||||
(x, y) if x == y => Ok(value),
|
||||
(JsonNativeType::Number(x), JsonNativeType::Number(y)) => match (x, y) {
|
||||
@@ -691,6 +736,107 @@ mod tests {
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::types::ListType;
|
||||
|
||||
#[test]
|
||||
fn test_encode_by_struct() {
|
||||
let json_struct: StructType = [
|
||||
StructField::new("s", ConcreteDataType::string_datatype(), true),
|
||||
StructField::new("foo.i", ConcreteDataType::int64_datatype(), true),
|
||||
StructField::new("x.y.z", ConcreteDataType::boolean_datatype(), true),
|
||||
]
|
||||
.into();
|
||||
|
||||
let json = json!({
|
||||
"s": "hello",
|
||||
"t": "world",
|
||||
"foo": {
|
||||
"i": 1,
|
||||
"j": 2
|
||||
},
|
||||
"x": {
|
||||
"y": {
|
||||
"z": true
|
||||
}
|
||||
}
|
||||
});
|
||||
let value = encode_by_struct(&json_struct, json).unwrap();
|
||||
assert_eq!(
|
||||
value.to_string(),
|
||||
r#"Json({ _raw: {"foo":{"j":2},"t":"world","x":{"y":{}}}, foo.i: 1, s: hello, x.y.z: true })"#
|
||||
);
|
||||
|
||||
let json = json!({
|
||||
"t": "world",
|
||||
"foo": {
|
||||
"i": 1,
|
||||
"j": 2
|
||||
},
|
||||
"x": {
|
||||
"y": {
|
||||
"z": true
|
||||
}
|
||||
}
|
||||
});
|
||||
let value = encode_by_struct(&json_struct, json).unwrap();
|
||||
assert_eq!(
|
||||
value.to_string(),
|
||||
r#"Json({ _raw: {"foo":{"j":2},"t":"world","x":{"y":{}}}, foo.i: 1, x.y.z: true })"#
|
||||
);
|
||||
|
||||
let json = json!({
|
||||
"s": 1234,
|
||||
"foo": {
|
||||
"i": 1,
|
||||
"j": 2
|
||||
},
|
||||
"x": {
|
||||
"y": {
|
||||
"z": true
|
||||
}
|
||||
}
|
||||
});
|
||||
let value = encode_by_struct(&json_struct, json).unwrap();
|
||||
assert_eq!(
|
||||
value.to_string(),
|
||||
r#"Json({ _raw: {"foo":{"j":2},"x":{"y":{}}}, foo.i: 1, s: 1234, x.y.z: true })"#
|
||||
);
|
||||
|
||||
let json = json!({
|
||||
"s": "hello",
|
||||
"t": "world",
|
||||
"foo": {
|
||||
"i": "bar",
|
||||
"j": 2
|
||||
},
|
||||
"x": {
|
||||
"y": {
|
||||
"z": true
|
||||
}
|
||||
}
|
||||
});
|
||||
let result = encode_by_struct(&json_struct, json);
|
||||
assert_eq!(
|
||||
result.unwrap_err().to_string(),
|
||||
"Cannot cast value bar to Number(I64)"
|
||||
);
|
||||
|
||||
let json = json!({
|
||||
"s": "hello",
|
||||
"t": "world",
|
||||
"foo": {
|
||||
"i": 1,
|
||||
"j": 2
|
||||
},
|
||||
"x": {
|
||||
"y": "z"
|
||||
}
|
||||
});
|
||||
let result = encode_by_struct(&json_struct, json);
|
||||
assert_eq!(
|
||||
result.unwrap_err().to_string(),
|
||||
r#"Invalid JSON: expect "y" an object"#
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_json_null() {
|
||||
let json = Json::Null;
|
||||
|
||||
@@ -82,6 +82,18 @@ impl From<f64> for JsonNumber {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Number> for JsonNumber {
|
||||
fn from(n: Number) -> Self {
|
||||
if let Some(i) = n.as_i64() {
|
||||
i.into()
|
||||
} else if let Some(i) = n.as_u64() {
|
||||
i.into()
|
||||
} else {
|
||||
n.as_f64().unwrap_or(f64::NAN).into()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for JsonNumber {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
@@ -109,7 +121,28 @@ pub enum JsonVariant {
|
||||
}
|
||||
|
||||
impl JsonVariant {
|
||||
fn native_type(&self) -> JsonNativeType {
|
||||
pub(crate) fn as_i64(&self) -> Option<i64> {
|
||||
match self {
|
||||
JsonVariant::Number(n) => n.as_i64(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_u64(&self) -> Option<u64> {
|
||||
match self {
|
||||
JsonVariant::Number(n) => n.as_u64(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_f64(&self) -> Option<f64> {
|
||||
match self {
|
||||
JsonVariant::Number(n) => Some(n.as_f64()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn native_type(&self) -> JsonNativeType {
|
||||
match self {
|
||||
JsonVariant::Null => JsonNativeType::Null,
|
||||
JsonVariant::Bool(_) => JsonNativeType::Bool,
|
||||
@@ -205,6 +238,32 @@ impl<K: Into<String>, V: Into<JsonVariant>, const N: usize> From<[(K, V); N]> fo
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Value> for JsonVariant {
|
||||
fn from(v: serde_json::Value) -> Self {
|
||||
fn helper(v: serde_json::Value) -> JsonVariant {
|
||||
match v {
|
||||
serde_json::Value::Null => JsonVariant::Null,
|
||||
serde_json::Value::Bool(b) => b.into(),
|
||||
serde_json::Value::Number(n) => n.into(),
|
||||
serde_json::Value::String(s) => s.into(),
|
||||
serde_json::Value::Array(array) => {
|
||||
JsonVariant::Array(array.into_iter().map(helper).collect())
|
||||
}
|
||||
serde_json::Value::Object(object) => {
|
||||
JsonVariant::Object(object.into_iter().map(|(k, v)| (k, helper(v))).collect())
|
||||
}
|
||||
}
|
||||
}
|
||||
helper(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BTreeMap<String, JsonVariant>> for JsonVariant {
|
||||
fn from(v: BTreeMap<String, JsonVariant>) -> Self {
|
||||
Self::Object(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for JsonVariant {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
@@ -277,24 +336,11 @@ impl JsonValue {
|
||||
}
|
||||
|
||||
pub(crate) fn as_i64(&self) -> Option<i64> {
|
||||
match self.json_variant {
|
||||
JsonVariant::Number(n) => n.as_i64(),
|
||||
_ => None,
|
||||
}
|
||||
self.json_variant.as_i64()
|
||||
}
|
||||
|
||||
pub(crate) fn as_u64(&self) -> Option<u64> {
|
||||
match self.json_variant {
|
||||
JsonVariant::Number(n) => n.as_u64(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_f64(&self) -> Option<f64> {
|
||||
match self.json_variant {
|
||||
JsonVariant::Number(n) => Some(n.as_f64()),
|
||||
_ => None,
|
||||
}
|
||||
self.json_variant.as_u64()
|
||||
}
|
||||
|
||||
pub(crate) fn as_f64_lossy(&self) -> Option<f64> {
|
||||
|
||||
@@ -122,9 +122,9 @@ pub struct StructField {
|
||||
}
|
||||
|
||||
impl StructField {
|
||||
pub fn new(name: String, data_type: ConcreteDataType, nullable: bool) -> Self {
|
||||
pub fn new<T: Into<String>>(name: T, data_type: ConcreteDataType, nullable: bool) -> Self {
|
||||
StructField {
|
||||
name,
|
||||
name: name.into(),
|
||||
data_type,
|
||||
nullable,
|
||||
metadata: BTreeMap::new(),
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
//! Frontend client to run flow as batching task which is time-window-aware normal query triggered every tick set by user
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Weak};
|
||||
use std::sync::{Arc, Mutex, Weak};
|
||||
use std::time::SystemTime;
|
||||
|
||||
use api::v1::greptime_request::Request;
|
||||
@@ -38,6 +38,7 @@ use servers::query_handler::grpc::GrpcQueryHandler;
|
||||
use session::context::{QueryContextBuilder, QueryContextRef};
|
||||
use session::hints::READ_PREFERENCE_HINT;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use tokio::sync::SetOnce;
|
||||
|
||||
use crate::batching_mode::BatchingModeOptions;
|
||||
use crate::error::{
|
||||
@@ -75,7 +76,19 @@ impl<E: ErrorExt + Send + Sync + 'static, T: GrpcQueryHandler<Error = E> + Send
|
||||
}
|
||||
}
|
||||
|
||||
type HandlerMutable = Arc<std::sync::Mutex<Option<Weak<dyn GrpcQueryHandlerWithBoxedError>>>>;
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct HandlerMutable {
|
||||
handler: Arc<Mutex<Option<Weak<dyn GrpcQueryHandlerWithBoxedError>>>>,
|
||||
is_initialized: Arc<SetOnce<()>>,
|
||||
}
|
||||
|
||||
impl HandlerMutable {
|
||||
pub async fn set_handler(&self, handler: Weak<dyn GrpcQueryHandlerWithBoxedError>) {
|
||||
*self.handler.lock().unwrap() = Some(handler);
|
||||
// Ignore the error, as we allow the handler to be set multiple times.
|
||||
let _ = self.is_initialized.set(());
|
||||
}
|
||||
}
|
||||
|
||||
/// A simple frontend client able to execute sql using grpc protocol
|
||||
///
|
||||
@@ -100,7 +113,11 @@ pub enum FrontendClient {
|
||||
impl FrontendClient {
|
||||
/// Create a new empty frontend client, with a `HandlerMutable` to set the grpc handler later
|
||||
pub fn from_empty_grpc_handler(query: QueryOptions) -> (Self, HandlerMutable) {
|
||||
let handler = Arc::new(std::sync::Mutex::new(None));
|
||||
let is_initialized = Arc::new(SetOnce::new());
|
||||
let handler = HandlerMutable {
|
||||
handler: Arc::new(Mutex::new(None)),
|
||||
is_initialized,
|
||||
};
|
||||
(
|
||||
Self::Standalone {
|
||||
database_client: handler.clone(),
|
||||
@@ -110,23 +127,13 @@ impl FrontendClient {
|
||||
)
|
||||
}
|
||||
|
||||
/// Check if the frontend client is initialized.
|
||||
///
|
||||
/// In distributed mode, it is always initialized.
|
||||
/// In standalone mode, it checks if the database client is set.
|
||||
pub fn is_initialized(&self) -> bool {
|
||||
match self {
|
||||
FrontendClient::Distributed { .. } => true,
|
||||
FrontendClient::Standalone {
|
||||
database_client, ..
|
||||
} => {
|
||||
let guard = database_client.lock();
|
||||
if let Ok(guard) = guard {
|
||||
guard.is_some()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
/// Waits until the frontend client is initialized.
|
||||
pub async fn wait_initialized(&self) {
|
||||
if let FrontendClient::Standalone {
|
||||
database_client, ..
|
||||
} = self
|
||||
{
|
||||
database_client.is_initialized.wait().await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -158,8 +165,14 @@ impl FrontendClient {
|
||||
grpc_handler: Weak<dyn GrpcQueryHandlerWithBoxedError>,
|
||||
query: QueryOptions,
|
||||
) -> Self {
|
||||
let is_initialized = Arc::new(SetOnce::new_with(Some(())));
|
||||
let handler = HandlerMutable {
|
||||
handler: Arc::new(Mutex::new(Some(grpc_handler))),
|
||||
is_initialized: is_initialized.clone(),
|
||||
};
|
||||
|
||||
Self::Standalone {
|
||||
database_client: Arc::new(std::sync::Mutex::new(Some(grpc_handler))),
|
||||
database_client: handler,
|
||||
query,
|
||||
}
|
||||
}
|
||||
@@ -341,6 +354,7 @@ impl FrontendClient {
|
||||
{
|
||||
let database_client = {
|
||||
database_client
|
||||
.handler
|
||||
.lock()
|
||||
.map_err(|e| {
|
||||
UnexpectedSnafu {
|
||||
@@ -418,6 +432,7 @@ impl FrontendClient {
|
||||
{
|
||||
let database_client = {
|
||||
database_client
|
||||
.handler
|
||||
.lock()
|
||||
.map_err(|e| {
|
||||
UnexpectedSnafu {
|
||||
@@ -480,3 +495,73 @@ impl std::fmt::Display for PeerDesc {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use common_query::Output;
|
||||
use tokio::time::timeout;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct NoopHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl GrpcQueryHandlerWithBoxedError for NoopHandler {
|
||||
async fn do_query(
|
||||
&self,
|
||||
_query: Request,
|
||||
_ctx: QueryContextRef,
|
||||
) -> std::result::Result<Output, BoxedError> {
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn wait_initialized() {
|
||||
let (client, handler_mut) =
|
||||
FrontendClient::from_empty_grpc_handler(QueryOptions::default());
|
||||
|
||||
assert!(
|
||||
timeout(Duration::from_millis(50), client.wait_initialized())
|
||||
.await
|
||||
.is_err()
|
||||
);
|
||||
|
||||
let handler: Arc<dyn GrpcQueryHandlerWithBoxedError> = Arc::new(NoopHandler);
|
||||
handler_mut.set_handler(Arc::downgrade(&handler)).await;
|
||||
|
||||
timeout(Duration::from_secs(1), client.wait_initialized())
|
||||
.await
|
||||
.expect("wait_initialized should complete after handler is set");
|
||||
|
||||
timeout(Duration::from_millis(10), client.wait_initialized())
|
||||
.await
|
||||
.expect("wait_initialized should be a no-op once initialized");
|
||||
|
||||
let handler: Arc<dyn GrpcQueryHandlerWithBoxedError> = Arc::new(NoopHandler);
|
||||
let client =
|
||||
FrontendClient::from_grpc_handler(Arc::downgrade(&handler), QueryOptions::default());
|
||||
assert!(
|
||||
timeout(Duration::from_millis(10), client.wait_initialized())
|
||||
.await
|
||||
.is_ok()
|
||||
);
|
||||
|
||||
let meta_client = Arc::new(MetaClient::default());
|
||||
let client = FrontendClient::from_meta_client(
|
||||
meta_client,
|
||||
None,
|
||||
QueryOptions::default(),
|
||||
BatchingModeOptions::default(),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(
|
||||
timeout(Duration::from_millis(10), client.wait_initialized())
|
||||
.await
|
||||
.is_ok()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -490,7 +490,6 @@ impl<'a> FlownodeServiceBuilder<'a> {
|
||||
let config = GrpcServerConfig {
|
||||
max_recv_message_size: opts.grpc.max_recv_message_size.as_bytes() as usize,
|
||||
max_send_message_size: opts.grpc.max_send_message_size.as_bytes() as usize,
|
||||
max_total_message_memory: opts.grpc.max_total_message_memory.as_bytes() as usize,
|
||||
tls: opts.grpc.tls.clone(),
|
||||
max_connection_age: opts.grpc.max_connection_age,
|
||||
};
|
||||
|
||||
@@ -32,6 +32,7 @@ common-frontend.workspace = true
|
||||
common-function.workspace = true
|
||||
common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-memory-manager.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-options.workspace = true
|
||||
common-procedure.workspace = true
|
||||
|
||||
@@ -357,14 +357,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to acquire more permits from limiter"))]
|
||||
AcquireLimiter {
|
||||
#[snafu(source)]
|
||||
error: tokio::sync::AcquireError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Service suspended"))]
|
||||
Suspended {
|
||||
#[snafu(implicit)]
|
||||
@@ -449,8 +441,6 @@ impl ErrorExt for Error {
|
||||
|
||||
Error::StatementTimeout { .. } => StatusCode::Cancelled,
|
||||
|
||||
Error::AcquireLimiter { .. } => StatusCode::Internal,
|
||||
|
||||
Error::Suspended { .. } => StatusCode::Suspended,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ use std::sync::Arc;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_config::config::Configurable;
|
||||
use common_event_recorder::EventRecorderOptions;
|
||||
use common_memory_manager::OnExhaustedPolicy;
|
||||
use common_options::datanode::DatanodeClientOptions;
|
||||
use common_options::memory::MemoryOptions;
|
||||
use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, TracingOptions};
|
||||
@@ -45,6 +46,12 @@ pub struct FrontendOptions {
|
||||
pub default_timezone: Option<String>,
|
||||
pub default_column_prefix: Option<String>,
|
||||
pub heartbeat: HeartbeatOptions,
|
||||
/// Maximum total memory for all concurrent write request bodies and messages (HTTP, gRPC, Flight).
|
||||
/// Set to 0 to disable the limit. Default: "0" (unlimited)
|
||||
pub max_in_flight_write_bytes: ReadableSize,
|
||||
/// Policy when write bytes quota is exhausted.
|
||||
/// Options: "wait" (default, 10s), "wait(<duration>)", "fail"
|
||||
pub write_bytes_exhausted_policy: OnExhaustedPolicy,
|
||||
pub http: HttpOptions,
|
||||
pub grpc: GrpcOptions,
|
||||
/// The internal gRPC options for the frontend service.
|
||||
@@ -63,7 +70,6 @@ pub struct FrontendOptions {
|
||||
pub user_provider: Option<String>,
|
||||
pub tracing: TracingOptions,
|
||||
pub query: QueryOptions,
|
||||
pub max_in_flight_write_bytes: Option<ReadableSize>,
|
||||
pub slow_query: SlowQueryOptions,
|
||||
pub memory: MemoryOptions,
|
||||
/// The event recorder options.
|
||||
@@ -77,6 +83,8 @@ impl Default for FrontendOptions {
|
||||
default_timezone: None,
|
||||
default_column_prefix: None,
|
||||
heartbeat: HeartbeatOptions::frontend_default(),
|
||||
max_in_flight_write_bytes: ReadableSize(0),
|
||||
write_bytes_exhausted_policy: OnExhaustedPolicy::default(),
|
||||
http: HttpOptions::default(),
|
||||
grpc: GrpcOptions::default(),
|
||||
internal_grpc: None,
|
||||
@@ -93,7 +101,6 @@ impl Default for FrontendOptions {
|
||||
user_provider: None,
|
||||
tracing: TracingOptions::default(),
|
||||
query: QueryOptions::default(),
|
||||
max_in_flight_write_bytes: None,
|
||||
slow_query: SlowQueryOptions::default(),
|
||||
memory: MemoryOptions::default(),
|
||||
event_recorder: EventRecorderOptions::default(),
|
||||
@@ -157,7 +164,6 @@ mod tests {
|
||||
use common_error::from_header_to_err_code_msg;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
use common_meta::distributed_time_constants::FRONTEND_HEARTBEAT_INTERVAL_MILLIS;
|
||||
use common_meta::heartbeat::handler::HandlerGroupExecutor;
|
||||
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
|
||||
use common_meta::heartbeat::handler::suspend::SuspendHandler;
|
||||
@@ -400,6 +406,10 @@ mod tests {
|
||||
..Default::default()
|
||||
},
|
||||
meta_client: Some(meta_client_options.clone()),
|
||||
heartbeat: HeartbeatOptions {
|
||||
interval: Duration::from_secs(1),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -409,7 +419,8 @@ mod tests {
|
||||
let meta_client = create_meta_client(&meta_client_options, server.clone()).await;
|
||||
let frontend = create_frontend(&options, meta_client).await?;
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(FRONTEND_HEARTBEAT_INTERVAL_MILLIS)).await;
|
||||
let frontend_heartbeat_interval = options.heartbeat.interval;
|
||||
tokio::time::sleep(frontend_heartbeat_interval).await;
|
||||
// initial state: not suspend:
|
||||
assert!(!frontend.instance.is_suspended());
|
||||
verify_suspend_state_by_http(&frontend, Ok(r#"[{"records":{"schema":{"column_schemas":[{"name":"Int64(1)","data_type":"Int64"}]},"rows":[[1]],"total_rows":1}}]"#)).await;
|
||||
@@ -426,7 +437,7 @@ mod tests {
|
||||
|
||||
// make heartbeat server returned "suspend" instruction,
|
||||
server.suspend.store(true, Ordering::Relaxed);
|
||||
tokio::time::sleep(Duration::from_millis(FRONTEND_HEARTBEAT_INTERVAL_MILLIS)).await;
|
||||
tokio::time::sleep(frontend_heartbeat_interval).await;
|
||||
// ... then the frontend is suspended:
|
||||
assert!(frontend.instance.is_suspended());
|
||||
verify_suspend_state_by_http(
|
||||
@@ -442,7 +453,7 @@ mod tests {
|
||||
|
||||
// make heartbeat server NOT returned "suspend" instruction,
|
||||
server.suspend.store(false, Ordering::Relaxed);
|
||||
tokio::time::sleep(Duration::from_millis(FRONTEND_HEARTBEAT_INTERVAL_MILLIS)).await;
|
||||
tokio::time::sleep(frontend_heartbeat_interval).await;
|
||||
// ... then frontend's suspend state is cleared:
|
||||
assert!(!frontend.instance.is_suspended());
|
||||
verify_suspend_state_by_http(&frontend, Ok(r#"[{"records":{"schema":{"column_schemas":[{"name":"Int64(1)","data_type":"Int64"}]},"rows":[[1]],"total_rows":1}}]"#)).await;
|
||||
|
||||
@@ -97,7 +97,6 @@ use crate::error::{
|
||||
ParseSqlSnafu, PermissionSnafu, PlanStatementSnafu, Result, SqlExecInterceptedSnafu,
|
||||
StatementTimeoutSnafu, TableOperationSnafu,
|
||||
};
|
||||
use crate::limiter::LimiterRef;
|
||||
use crate::stream_wrapper::CancellableStreamWrapper;
|
||||
|
||||
lazy_static! {
|
||||
@@ -118,7 +117,6 @@ pub struct Instance {
|
||||
deleter: DeleterRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
event_recorder: Option<EventRecorderRef>,
|
||||
limiter: Option<LimiterRef>,
|
||||
process_manager: ProcessManagerRef,
|
||||
slow_query_options: SlowQueryOptions,
|
||||
suspend: Arc<AtomicBool>,
|
||||
|
||||
@@ -49,7 +49,6 @@ use crate::events::EventHandlerImpl;
|
||||
use crate::frontend::FrontendOptions;
|
||||
use crate::instance::Instance;
|
||||
use crate::instance::region_query::FrontendRegionQueryHandler;
|
||||
use crate::limiter::Limiter;
|
||||
|
||||
/// The frontend [`Instance`] builder.
|
||||
pub struct FrontendBuilder {
|
||||
@@ -248,14 +247,6 @@ impl FrontendBuilder {
|
||||
self.options.event_recorder.ttl,
|
||||
))));
|
||||
|
||||
// Create the limiter if the max_in_flight_write_bytes is set.
|
||||
let limiter = self
|
||||
.options
|
||||
.max_in_flight_write_bytes
|
||||
.map(|max_in_flight_write_bytes| {
|
||||
Arc::new(Limiter::new(max_in_flight_write_bytes.as_bytes() as usize))
|
||||
});
|
||||
|
||||
Ok(Instance {
|
||||
catalog_manager: self.catalog_manager,
|
||||
pipeline_operator,
|
||||
@@ -266,7 +257,6 @@ impl FrontendBuilder {
|
||||
deleter,
|
||||
table_metadata_manager: Arc::new(TableMetadataManager::new(kv_backend)),
|
||||
event_recorder: Some(event_recorder),
|
||||
limiter,
|
||||
process_manager,
|
||||
otlp_metrics_table_legacy_cache: DashMap::new(),
|
||||
slow_query_options: self.options.slow_query.clone(),
|
||||
|
||||
@@ -71,12 +71,6 @@ impl GrpcQueryHandler for Instance {
|
||||
.check_permission(ctx.current_user(), PermissionReq::GrpcRequest(&request))
|
||||
.context(PermissionSnafu)?;
|
||||
|
||||
let _guard = if let Some(limiter) = &self.limiter {
|
||||
Some(limiter.limit_request(&request).await?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let output = match request {
|
||||
Request::Inserts(requests) => self.handle_inserts(requests, ctx.clone()).await?,
|
||||
Request::RowInserts(requests) => {
|
||||
|
||||
@@ -22,7 +22,7 @@ use common_error::ext::BoxedError;
|
||||
use common_time::Timestamp;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use servers::error::{
|
||||
AuthSnafu, CatalogSnafu, Error, OtherSnafu, TimestampOverflowSnafu, UnexpectedResultSnafu,
|
||||
AuthSnafu, CatalogSnafu, Error, TimestampOverflowSnafu, UnexpectedResultSnafu,
|
||||
};
|
||||
use servers::influxdb::InfluxdbRequest;
|
||||
use servers::interceptor::{LineProtocolInterceptor, LineProtocolInterceptorRef};
|
||||
@@ -59,18 +59,6 @@ impl InfluxdbLineProtocolHandler for Instance {
|
||||
.post_lines_conversion(requests, ctx.clone())
|
||||
.await?;
|
||||
|
||||
let _guard = if let Some(limiter) = &self.limiter {
|
||||
Some(
|
||||
limiter
|
||||
.limit_row_inserts(&requests)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(OtherSnafu)?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
self.handle_influx_row_inserts(requests, ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
|
||||
@@ -23,8 +23,7 @@ use datatypes::timestamp::TimestampNanosecond;
|
||||
use pipeline::pipeline_operator::PipelineOperator;
|
||||
use pipeline::{Pipeline, PipelineInfo, PipelineVersion};
|
||||
use servers::error::{
|
||||
AuthSnafu, Error as ServerError, ExecuteGrpcRequestSnafu, OtherSnafu, PipelineSnafu,
|
||||
Result as ServerResult,
|
||||
AuthSnafu, Error as ServerError, ExecuteGrpcRequestSnafu, PipelineSnafu, Result as ServerResult,
|
||||
};
|
||||
use servers::interceptor::{LogIngestInterceptor, LogIngestInterceptorRef};
|
||||
use servers::query_handler::PipelineHandler;
|
||||
@@ -124,18 +123,6 @@ impl Instance {
|
||||
log: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
) -> ServerResult<Output> {
|
||||
let _guard = if let Some(limiter) = &self.limiter {
|
||||
Some(
|
||||
limiter
|
||||
.limit_row_inserts(&log)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(OtherSnafu)?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
self.inserter
|
||||
.handle_log_inserts(log, ctx, self.statement_executor.as_ref())
|
||||
.await
|
||||
@@ -148,18 +135,6 @@ impl Instance {
|
||||
rows: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
) -> ServerResult<Output> {
|
||||
let _guard = if let Some(limiter) = &self.limiter {
|
||||
Some(
|
||||
limiter
|
||||
.limit_row_inserts(&rows)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(OtherSnafu)?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
self.inserter
|
||||
.handle_trace_inserts(rows, ctx, self.statement_executor.as_ref())
|
||||
.await
|
||||
|
||||
@@ -16,7 +16,7 @@ use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::tracing;
|
||||
use servers::error::{self as server_error, AuthSnafu, ExecuteGrpcQuerySnafu, OtherSnafu};
|
||||
use servers::error::{self as server_error, AuthSnafu, ExecuteGrpcQuerySnafu};
|
||||
use servers::opentsdb::codec::DataPoint;
|
||||
use servers::opentsdb::data_point_to_grpc_row_insert_requests;
|
||||
use servers::query_handler::OpentsdbProtocolHandler;
|
||||
@@ -41,18 +41,6 @@ impl OpentsdbProtocolHandler for Instance {
|
||||
|
||||
let (requests, _) = data_point_to_grpc_row_insert_requests(data_points)?;
|
||||
|
||||
let _guard = if let Some(limiter) = &self.limiter {
|
||||
Some(
|
||||
limiter
|
||||
.limit_row_inserts(&requests)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(OtherSnafu)?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// OpenTSDB is single value.
|
||||
let output = self
|
||||
.handle_row_inserts(requests, ctx, true, true)
|
||||
|
||||
@@ -24,7 +24,7 @@ use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
|
||||
use otel_arrow_rust::proto::opentelemetry::collector::metrics::v1::ExportMetricsServiceRequest;
|
||||
use pipeline::{GreptimePipelineParams, PipelineWay};
|
||||
use servers::error::{self, AuthSnafu, OtherSnafu, Result as ServerResult};
|
||||
use servers::error::{self, AuthSnafu, Result as ServerResult};
|
||||
use servers::http::prom_store::PHYSICAL_TABLE_PARAM;
|
||||
use servers::interceptor::{OpenTelemetryProtocolInterceptor, OpenTelemetryProtocolInterceptorRef};
|
||||
use servers::otlp;
|
||||
@@ -83,18 +83,6 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
ctx
|
||||
};
|
||||
|
||||
let _guard = if let Some(limiter) = &self.limiter {
|
||||
Some(
|
||||
limiter
|
||||
.limit_row_inserts(&requests)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(OtherSnafu)?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// If the user uses the legacy path, it is by default without metric engine.
|
||||
if metric_ctx.is_legacy || !metric_ctx.with_metric_engine {
|
||||
self.handle_row_inserts(requests, ctx, false, false)
|
||||
@@ -191,18 +179,6 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
)
|
||||
.await?;
|
||||
|
||||
let _guard = if let Some(limiter) = &self.limiter {
|
||||
Some(
|
||||
limiter
|
||||
.limit_ctx_req(&opt_req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(OtherSnafu)?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let mut outputs = vec![];
|
||||
|
||||
for (temp_ctx, requests) in opt_req.as_req_iter(ctx) {
|
||||
|
||||
@@ -175,18 +175,6 @@ impl PromStoreProtocolHandler for Instance {
|
||||
.get::<PromStoreProtocolInterceptorRef<servers::error::Error>>();
|
||||
interceptor_ref.pre_write(&request, ctx.clone())?;
|
||||
|
||||
let _guard = if let Some(limiter) = &self.limiter {
|
||||
Some(
|
||||
limiter
|
||||
.limit_row_inserts(&request)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::OtherSnafu)?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let output = if with_metric_engine {
|
||||
let physical_table = ctx
|
||||
.extension(PHYSICAL_TABLE_PARAM)
|
||||
|
||||
@@ -19,7 +19,6 @@ pub mod events;
|
||||
pub mod frontend;
|
||||
pub mod heartbeat;
|
||||
pub mod instance;
|
||||
pub(crate) mod limiter;
|
||||
pub(crate) mod metrics;
|
||||
pub mod server;
|
||||
pub mod service_config;
|
||||
|
||||
@@ -1,332 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::column::Values;
|
||||
use api::v1::greptime_request::Request;
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{
|
||||
Decimal128, InsertRequests, IntervalMonthDayNano, JsonValue, RowInsertRequest,
|
||||
RowInsertRequests, json_value,
|
||||
};
|
||||
use pipeline::ContextReq;
|
||||
use snafu::ResultExt;
|
||||
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
|
||||
|
||||
use crate::error::{AcquireLimiterSnafu, Result};
|
||||
|
||||
pub(crate) type LimiterRef = Arc<Limiter>;
|
||||
|
||||
/// A frontend request limiter that controls the total size of in-flight write
|
||||
/// requests.
|
||||
pub(crate) struct Limiter {
|
||||
max_in_flight_write_bytes: usize,
|
||||
byte_counter: Arc<Semaphore>,
|
||||
}
|
||||
|
||||
impl Limiter {
|
||||
pub fn new(max_in_flight_write_bytes: usize) -> Self {
|
||||
Self {
|
||||
byte_counter: Arc::new(Semaphore::new(max_in_flight_write_bytes)),
|
||||
max_in_flight_write_bytes,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn limit_request(&self, request: &Request) -> Result<OwnedSemaphorePermit> {
|
||||
let size = match request {
|
||||
Request::Inserts(requests) => self.insert_requests_data_size(requests),
|
||||
Request::RowInserts(requests) => {
|
||||
self.rows_insert_requests_data_size(requests.inserts.iter())
|
||||
}
|
||||
_ => 0,
|
||||
};
|
||||
self.limit_in_flight_write_bytes(size).await
|
||||
}
|
||||
|
||||
pub async fn limit_row_inserts(
|
||||
&self,
|
||||
requests: &RowInsertRequests,
|
||||
) -> Result<OwnedSemaphorePermit> {
|
||||
let size = self.rows_insert_requests_data_size(requests.inserts.iter());
|
||||
self.limit_in_flight_write_bytes(size).await
|
||||
}
|
||||
|
||||
pub async fn limit_ctx_req(&self, opt_req: &ContextReq) -> Result<OwnedSemaphorePermit> {
|
||||
let size = self.rows_insert_requests_data_size(opt_req.ref_all_req());
|
||||
self.limit_in_flight_write_bytes(size).await
|
||||
}
|
||||
|
||||
/// Await until more inflight bytes are available
|
||||
pub async fn limit_in_flight_write_bytes(&self, bytes: usize) -> Result<OwnedSemaphorePermit> {
|
||||
self.byte_counter
|
||||
.clone()
|
||||
.acquire_many_owned(bytes as u32)
|
||||
.await
|
||||
.context(AcquireLimiterSnafu)
|
||||
}
|
||||
|
||||
/// Returns the current in-flight write bytes.
|
||||
#[allow(dead_code)]
|
||||
pub fn in_flight_write_bytes(&self) -> usize {
|
||||
self.max_in_flight_write_bytes - self.byte_counter.available_permits()
|
||||
}
|
||||
|
||||
fn insert_requests_data_size(&self, request: &InsertRequests) -> usize {
|
||||
let mut size: usize = 0;
|
||||
for insert in &request.inserts {
|
||||
for column in &insert.columns {
|
||||
if let Some(values) = &column.values {
|
||||
size += Self::size_of_column_values(values);
|
||||
}
|
||||
}
|
||||
}
|
||||
size
|
||||
}
|
||||
|
||||
fn rows_insert_requests_data_size<'a>(
|
||||
&self,
|
||||
inserts: impl Iterator<Item = &'a RowInsertRequest>,
|
||||
) -> usize {
|
||||
let mut size: usize = 0;
|
||||
for insert in inserts {
|
||||
if let Some(rows) = &insert.rows {
|
||||
for row in &rows.rows {
|
||||
for value in &row.values {
|
||||
if let Some(value) = &value.value_data {
|
||||
size += Self::size_of_value_data(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
size
|
||||
}
|
||||
|
||||
fn size_of_column_values(values: &Values) -> usize {
|
||||
let mut size: usize = 0;
|
||||
size += values.i8_values.len() * size_of::<i32>();
|
||||
size += values.i16_values.len() * size_of::<i32>();
|
||||
size += values.i32_values.len() * size_of::<i32>();
|
||||
size += values.i64_values.len() * size_of::<i64>();
|
||||
size += values.u8_values.len() * size_of::<u32>();
|
||||
size += values.u16_values.len() * size_of::<u32>();
|
||||
size += values.u32_values.len() * size_of::<u32>();
|
||||
size += values.u64_values.len() * size_of::<u64>();
|
||||
size += values.f32_values.len() * size_of::<f32>();
|
||||
size += values.f64_values.len() * size_of::<f64>();
|
||||
size += values.bool_values.len() * size_of::<bool>();
|
||||
size += values
|
||||
.binary_values
|
||||
.iter()
|
||||
.map(|v| v.len() * size_of::<u8>())
|
||||
.sum::<usize>();
|
||||
size += values.string_values.iter().map(|v| v.len()).sum::<usize>();
|
||||
size += values.date_values.len() * size_of::<i32>();
|
||||
size += values.datetime_values.len() * size_of::<i64>();
|
||||
size += values.timestamp_second_values.len() * size_of::<i64>();
|
||||
size += values.timestamp_millisecond_values.len() * size_of::<i64>();
|
||||
size += values.timestamp_microsecond_values.len() * size_of::<i64>();
|
||||
size += values.timestamp_nanosecond_values.len() * size_of::<i64>();
|
||||
size += values.time_second_values.len() * size_of::<i64>();
|
||||
size += values.time_millisecond_values.len() * size_of::<i64>();
|
||||
size += values.time_microsecond_values.len() * size_of::<i64>();
|
||||
size += values.time_nanosecond_values.len() * size_of::<i64>();
|
||||
size += values.interval_year_month_values.len() * size_of::<i64>();
|
||||
size += values.interval_day_time_values.len() * size_of::<i64>();
|
||||
size += values.interval_month_day_nano_values.len() * size_of::<IntervalMonthDayNano>();
|
||||
size += values.decimal128_values.len() * size_of::<Decimal128>();
|
||||
size += values
|
||||
.list_values
|
||||
.iter()
|
||||
.map(|v| {
|
||||
v.items
|
||||
.iter()
|
||||
.map(|item| {
|
||||
item.value_data
|
||||
.as_ref()
|
||||
.map(Self::size_of_value_data)
|
||||
.unwrap_or(0)
|
||||
})
|
||||
.sum::<usize>()
|
||||
})
|
||||
.sum::<usize>();
|
||||
size += values
|
||||
.struct_values
|
||||
.iter()
|
||||
.map(|v| {
|
||||
v.items
|
||||
.iter()
|
||||
.map(|item| {
|
||||
item.value_data
|
||||
.as_ref()
|
||||
.map(Self::size_of_value_data)
|
||||
.unwrap_or(0)
|
||||
})
|
||||
.sum::<usize>()
|
||||
})
|
||||
.sum::<usize>();
|
||||
|
||||
size
|
||||
}
|
||||
|
||||
fn size_of_value_data(value: &ValueData) -> usize {
|
||||
match value {
|
||||
ValueData::I8Value(_) => size_of::<i32>(),
|
||||
ValueData::I16Value(_) => size_of::<i32>(),
|
||||
ValueData::I32Value(_) => size_of::<i32>(),
|
||||
ValueData::I64Value(_) => size_of::<i64>(),
|
||||
ValueData::U8Value(_) => size_of::<u32>(),
|
||||
ValueData::U16Value(_) => size_of::<u32>(),
|
||||
ValueData::U32Value(_) => size_of::<u32>(),
|
||||
ValueData::U64Value(_) => size_of::<u64>(),
|
||||
ValueData::F32Value(_) => size_of::<f32>(),
|
||||
ValueData::F64Value(_) => size_of::<f64>(),
|
||||
ValueData::BoolValue(_) => size_of::<bool>(),
|
||||
ValueData::BinaryValue(v) => v.len() * size_of::<u8>(),
|
||||
ValueData::StringValue(v) => v.len(),
|
||||
ValueData::DateValue(_) => size_of::<i32>(),
|
||||
ValueData::DatetimeValue(_) => size_of::<i64>(),
|
||||
ValueData::TimestampSecondValue(_) => size_of::<i64>(),
|
||||
ValueData::TimestampMillisecondValue(_) => size_of::<i64>(),
|
||||
ValueData::TimestampMicrosecondValue(_) => size_of::<i64>(),
|
||||
ValueData::TimestampNanosecondValue(_) => size_of::<i64>(),
|
||||
ValueData::TimeSecondValue(_) => size_of::<i64>(),
|
||||
ValueData::TimeMillisecondValue(_) => size_of::<i64>(),
|
||||
ValueData::TimeMicrosecondValue(_) => size_of::<i64>(),
|
||||
ValueData::TimeNanosecondValue(_) => size_of::<i64>(),
|
||||
ValueData::IntervalYearMonthValue(_) => size_of::<i32>(),
|
||||
ValueData::IntervalDayTimeValue(_) => size_of::<i64>(),
|
||||
ValueData::IntervalMonthDayNanoValue(_) => size_of::<IntervalMonthDayNano>(),
|
||||
ValueData::Decimal128Value(_) => size_of::<Decimal128>(),
|
||||
ValueData::ListValue(list_values) => list_values
|
||||
.items
|
||||
.iter()
|
||||
.map(|item| {
|
||||
item.value_data
|
||||
.as_ref()
|
||||
.map(Self::size_of_value_data)
|
||||
.unwrap_or(0)
|
||||
})
|
||||
.sum(),
|
||||
ValueData::StructValue(struct_values) => struct_values
|
||||
.items
|
||||
.iter()
|
||||
.map(|item| {
|
||||
item.value_data
|
||||
.as_ref()
|
||||
.map(Self::size_of_value_data)
|
||||
.unwrap_or(0)
|
||||
})
|
||||
.sum(),
|
||||
ValueData::JsonValue(v) => {
|
||||
fn calc(v: &JsonValue) -> usize {
|
||||
let Some(value) = v.value.as_ref() else {
|
||||
return 0;
|
||||
};
|
||||
match value {
|
||||
json_value::Value::Boolean(_) => size_of::<bool>(),
|
||||
json_value::Value::Int(_) => size_of::<i64>(),
|
||||
json_value::Value::Uint(_) => size_of::<u64>(),
|
||||
json_value::Value::Float(_) => size_of::<f64>(),
|
||||
json_value::Value::Str(s) => s.len(),
|
||||
json_value::Value::Array(array) => array.items.iter().map(calc).sum(),
|
||||
json_value::Value::Object(object) => object
|
||||
.entries
|
||||
.iter()
|
||||
.flat_map(|entry| {
|
||||
entry.value.as_ref().map(|v| entry.key.len() + calc(v))
|
||||
})
|
||||
.sum(),
|
||||
}
|
||||
}
|
||||
calc(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::column::Values;
|
||||
use api::v1::greptime_request::Request;
|
||||
use api::v1::{Column, InsertRequest};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn generate_request(size: usize) -> Request {
|
||||
let i8_values = vec![0; size / 4];
|
||||
Request::Inserts(InsertRequests {
|
||||
inserts: vec![InsertRequest {
|
||||
columns: vec![Column {
|
||||
values: Some(Values {
|
||||
i8_values,
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
}],
|
||||
..Default::default()
|
||||
}],
|
||||
})
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_limiter() {
|
||||
let limiter_ref: LimiterRef = Arc::new(Limiter::new(1024));
|
||||
let tasks_count = 10;
|
||||
let request_data_size = 100;
|
||||
let mut handles = vec![];
|
||||
|
||||
// Generate multiple requests to test the limiter.
|
||||
for _ in 0..tasks_count {
|
||||
let limiter = limiter_ref.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
let result = limiter
|
||||
.limit_request(&generate_request(request_data_size))
|
||||
.await;
|
||||
assert!(result.is_ok());
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
// Wait for all threads to complete.
|
||||
for handle in handles {
|
||||
handle.await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_in_flight_write_bytes() {
|
||||
let limiter_ref: LimiterRef = Arc::new(Limiter::new(1024));
|
||||
let req1 = generate_request(100);
|
||||
let result1 = limiter_ref
|
||||
.limit_request(&req1)
|
||||
.await
|
||||
.expect("failed to acquire permits");
|
||||
assert_eq!(limiter_ref.in_flight_write_bytes(), 100);
|
||||
|
||||
let req2 = generate_request(200);
|
||||
let result2 = limiter_ref
|
||||
.limit_request(&req2)
|
||||
.await
|
||||
.expect("failed to acquire permits");
|
||||
assert_eq!(limiter_ref.in_flight_write_bytes(), 300);
|
||||
|
||||
drop(result1);
|
||||
assert_eq!(limiter_ref.in_flight_write_bytes(), 200);
|
||||
|
||||
drop(result2);
|
||||
assert_eq!(limiter_ref.in_flight_write_bytes(), 0);
|
||||
}
|
||||
}
|
||||
@@ -40,6 +40,7 @@ use servers::otel_arrow::OtelArrowServiceHandler;
|
||||
use servers::postgres::PostgresServer;
|
||||
use servers::query_handler::grpc::ServerGrpcQueryHandlerAdapter;
|
||||
use servers::query_handler::sql::ServerSqlQueryHandlerAdapter;
|
||||
use servers::request_memory_limiter::ServerMemoryLimiter;
|
||||
use servers::server::{Server, ServerHandlers};
|
||||
use servers::tls::{ReloadableTlsServerConfig, maybe_watch_server_tls_config};
|
||||
use snafu::ResultExt;
|
||||
@@ -76,15 +77,25 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
pub fn grpc_server_builder(&self, opts: &GrpcOptions) -> Result<GrpcServerBuilder> {
|
||||
pub fn grpc_server_builder(
|
||||
&self,
|
||||
opts: &GrpcOptions,
|
||||
request_memory_limiter: ServerMemoryLimiter,
|
||||
) -> Result<GrpcServerBuilder> {
|
||||
let builder = GrpcServerBuilder::new(opts.as_config(), common_runtime::global_runtime())
|
||||
.with_memory_limiter(request_memory_limiter)
|
||||
.with_tls_config(opts.tls.clone())
|
||||
.context(error::InvalidTlsConfigSnafu)?;
|
||||
Ok(builder)
|
||||
}
|
||||
|
||||
pub fn http_server_builder(&self, opts: &FrontendOptions) -> HttpServerBuilder {
|
||||
pub fn http_server_builder(
|
||||
&self,
|
||||
opts: &FrontendOptions,
|
||||
request_memory_limiter: ServerMemoryLimiter,
|
||||
) -> HttpServerBuilder {
|
||||
let mut builder = HttpServerBuilder::new(opts.http.clone())
|
||||
.with_memory_limiter(request_memory_limiter)
|
||||
.with_sql_handler(ServerSqlQueryHandlerAdapter::arc(self.instance.clone()));
|
||||
|
||||
let validator = self.plugins.get::<LogValidatorRef>();
|
||||
@@ -169,11 +180,12 @@ where
|
||||
meta_client: &Option<MetaClientOptions>,
|
||||
name: Option<String>,
|
||||
external: bool,
|
||||
request_memory_limiter: ServerMemoryLimiter,
|
||||
) -> Result<GrpcServer> {
|
||||
let builder = if let Some(builder) = self.grpc_server_builder.take() {
|
||||
builder
|
||||
} else {
|
||||
self.grpc_server_builder(grpc)?
|
||||
self.grpc_server_builder(grpc, request_memory_limiter)?
|
||||
};
|
||||
|
||||
let user_provider = if external {
|
||||
@@ -235,11 +247,16 @@ where
|
||||
Ok(grpc_server)
|
||||
}
|
||||
|
||||
fn build_http_server(&mut self, opts: &FrontendOptions, toml: String) -> Result<HttpServer> {
|
||||
fn build_http_server(
|
||||
&mut self,
|
||||
opts: &FrontendOptions,
|
||||
toml: String,
|
||||
request_memory_limiter: ServerMemoryLimiter,
|
||||
) -> Result<HttpServer> {
|
||||
let builder = if let Some(builder) = self.http_server_builder.take() {
|
||||
builder
|
||||
} else {
|
||||
self.http_server_builder(opts)
|
||||
self.http_server_builder(opts, request_memory_limiter)
|
||||
};
|
||||
|
||||
let http_server = builder
|
||||
@@ -257,6 +274,12 @@ where
|
||||
let toml = opts.to_toml().context(TomlFormatSnafu)?;
|
||||
let opts: FrontendOptions = opts.into();
|
||||
|
||||
// Create request memory limiter for all server protocols
|
||||
let request_memory_limiter = ServerMemoryLimiter::new(
|
||||
opts.max_in_flight_write_bytes.as_bytes(),
|
||||
opts.write_bytes_exhausted_policy,
|
||||
);
|
||||
|
||||
let handlers = ServerHandlers::default();
|
||||
|
||||
let user_provider = self.plugins.get::<UserProviderRef>();
|
||||
@@ -264,7 +287,13 @@ where
|
||||
{
|
||||
// Always init GRPC server
|
||||
let grpc_addr = parse_addr(&opts.grpc.bind_addr)?;
|
||||
let grpc_server = self.build_grpc_server(&opts.grpc, &opts.meta_client, None, true)?;
|
||||
let grpc_server = self.build_grpc_server(
|
||||
&opts.grpc,
|
||||
&opts.meta_client,
|
||||
None,
|
||||
true,
|
||||
request_memory_limiter.clone(),
|
||||
)?;
|
||||
handlers.insert((Box::new(grpc_server), grpc_addr));
|
||||
}
|
||||
|
||||
@@ -276,6 +305,7 @@ where
|
||||
&opts.meta_client,
|
||||
Some("INTERNAL_GRPC_SERVER".to_string()),
|
||||
false,
|
||||
request_memory_limiter.clone(),
|
||||
)?;
|
||||
handlers.insert((Box::new(grpc_server), grpc_addr));
|
||||
}
|
||||
@@ -284,7 +314,8 @@ where
|
||||
// Always init HTTP server
|
||||
let http_options = &opts.http;
|
||||
let http_addr = parse_addr(&http_options.addr)?;
|
||||
let http_server = self.build_http_server(&opts, toml)?;
|
||||
let http_server =
|
||||
self.build_http_server(&opts, toml, request_memory_limiter.clone())?;
|
||||
handlers.insert((Box::new(http_server), http_addr));
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_wal::config::kafka::DatanodeKafkaConfig;
|
||||
use common_wal::config::kafka::common::{DEFAULT_BACKOFF_CONFIG, DEFAULT_CONNECT_TIMEOUT};
|
||||
use common_wal::config::kafka::common::DEFAULT_BACKOFF_CONFIG;
|
||||
use dashmap::DashMap;
|
||||
use rskafka::client::ClientBuilder;
|
||||
use rskafka::client::partition::{Compression, PartitionClient, UnknownTopicHandling};
|
||||
@@ -79,7 +79,8 @@ impl ClientManager {
|
||||
// Sets backoff config for the top-level kafka client and all clients constructed by it.
|
||||
let mut builder = ClientBuilder::new(config.connection.broker_endpoints.clone())
|
||||
.backoff_config(DEFAULT_BACKOFF_CONFIG)
|
||||
.connect_timeout(Some(DEFAULT_CONNECT_TIMEOUT));
|
||||
.connect_timeout(Some(config.connection.connect_timeout))
|
||||
.timeout(Some(config.connection.timeout));
|
||||
if let Some(sasl) = &config.connection.sasl {
|
||||
builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
|
||||
};
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::cluster_server::ClusterServer;
|
||||
use api::v1::meta::heartbeat_server::HeartbeatServer;
|
||||
@@ -60,11 +59,6 @@ use crate::service::admin::admin_axum_router;
|
||||
use crate::utils::etcd::create_etcd_client_with_tls;
|
||||
use crate::{Result, error};
|
||||
|
||||
/// The default keep-alive interval for gRPC.
|
||||
const DEFAULT_GRPC_KEEP_ALIVE_INTERVAL: Duration = Duration::from_secs(10);
|
||||
/// The default keep-alive timeout for gRPC.
|
||||
const DEFAULT_GRPC_KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
pub struct MetasrvInstance {
|
||||
metasrv: Arc<Metasrv>,
|
||||
|
||||
@@ -255,8 +249,8 @@ pub fn router(metasrv: Arc<Metasrv>) -> Router {
|
||||
// for admin services
|
||||
.accept_http1(true)
|
||||
// For quick network failures detection.
|
||||
.http2_keepalive_interval(Some(DEFAULT_GRPC_KEEP_ALIVE_INTERVAL))
|
||||
.http2_keepalive_timeout(Some(DEFAULT_GRPC_KEEP_ALIVE_TIMEOUT));
|
||||
.http2_keepalive_interval(Some(metasrv.options().grpc.http2_keep_alive_interval))
|
||||
.http2_keepalive_timeout(Some(metasrv.options().grpc.http2_keep_alive_timeout));
|
||||
let router = add_compressed_service!(router, HeartbeatServer::from_arc(metasrv.clone()));
|
||||
let router = add_compressed_service!(router, StoreServer::from_arc(metasrv.clone()));
|
||||
let router = add_compressed_service!(router, ClusterServer::from_arc(metasrv.clone()));
|
||||
@@ -273,8 +267,12 @@ pub async fn metasrv_builder(
|
||||
(Some(kv_backend), _) => (kv_backend, None),
|
||||
(None, BackendImpl::MemoryStore) => (Arc::new(MemoryKvBackend::new()) as _, None),
|
||||
(None, BackendImpl::EtcdStore) => {
|
||||
let etcd_client =
|
||||
create_etcd_client_with_tls(&opts.store_addrs, opts.backend_tls.as_ref()).await?;
|
||||
let etcd_client = create_etcd_client_with_tls(
|
||||
&opts.store_addrs,
|
||||
&opts.backend_client,
|
||||
opts.backend_tls.as_ref(),
|
||||
)
|
||||
.await?;
|
||||
let kv_backend = EtcdStore::with_etcd_client(etcd_client.clone(), opts.max_txn_ops);
|
||||
let election = EtcdElection::with_etcd_client(
|
||||
&opts.grpc.server_addr,
|
||||
@@ -341,6 +339,7 @@ pub async fn metasrv_builder(
|
||||
opts.meta_schema_name.as_deref(),
|
||||
&opts.meta_table_name,
|
||||
opts.max_txn_ops,
|
||||
opts.auto_create_schema,
|
||||
)
|
||||
.await
|
||||
.context(error::KvBackendSnafu)?;
|
||||
|
||||
@@ -16,13 +16,9 @@ pub mod lease;
|
||||
pub mod node_info;
|
||||
pub mod utils;
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::heartbeat_request::NodeWorkloads;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::distributed_time_constants::{
|
||||
DATANODE_LEASE_SECS, FLOWNODE_LEASE_SECS, FRONTEND_HEARTBEAT_INTERVAL_MILLIS,
|
||||
};
|
||||
use common_meta::distributed_time_constants::default_distributed_time_constants;
|
||||
use common_meta::error::Result;
|
||||
use common_meta::peer::{Peer, PeerDiscovery, PeerResolver};
|
||||
use common_meta::{DatanodeId, FlownodeId};
|
||||
@@ -38,7 +34,7 @@ impl PeerDiscovery for MetaPeerClient {
|
||||
utils::alive_frontends(
|
||||
&DefaultSystemTimer,
|
||||
self,
|
||||
Duration::from_millis(FRONTEND_HEARTBEAT_INTERVAL_MILLIS),
|
||||
default_distributed_time_constants().frontend_heartbeat_interval,
|
||||
)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
@@ -52,7 +48,7 @@ impl PeerDiscovery for MetaPeerClient {
|
||||
utils::alive_datanodes(
|
||||
&DefaultSystemTimer,
|
||||
self,
|
||||
Duration::from_secs(DATANODE_LEASE_SECS),
|
||||
default_distributed_time_constants().datanode_lease,
|
||||
filter,
|
||||
)
|
||||
.await
|
||||
@@ -67,7 +63,7 @@ impl PeerDiscovery for MetaPeerClient {
|
||||
utils::alive_flownodes(
|
||||
&DefaultSystemTimer,
|
||||
self,
|
||||
Duration::from_secs(FLOWNODE_LEASE_SECS),
|
||||
default_distributed_time_constants().flownode_lease,
|
||||
filter,
|
||||
)
|
||||
.await
|
||||
|
||||
@@ -102,7 +102,7 @@ mod tests {
|
||||
use api::v1::meta::heartbeat_request::NodeWorkloads;
|
||||
use api::v1::meta::{DatanodeWorkloads, FlownodeWorkloads};
|
||||
use common_meta::cluster::{FrontendStatus, NodeInfo, NodeInfoKey, NodeStatus, Role};
|
||||
use common_meta::distributed_time_constants::FRONTEND_HEARTBEAT_INTERVAL_MILLIS;
|
||||
use common_meta::distributed_time_constants::default_distributed_time_constants;
|
||||
use common_meta::kv_backend::ResettableKvBackendRef;
|
||||
use common_meta::peer::{Peer, PeerDiscovery};
|
||||
use common_meta::rpc::store::PutRequest;
|
||||
@@ -473,8 +473,10 @@ mod tests {
|
||||
let client = create_meta_peer_client();
|
||||
let in_memory = client.memory_backend();
|
||||
|
||||
let frontend_heartbeat_interval =
|
||||
default_distributed_time_constants().frontend_heartbeat_interval;
|
||||
let last_activity_ts =
|
||||
current_time_millis() - FRONTEND_HEARTBEAT_INTERVAL_MILLIS as i64 - 1000;
|
||||
current_time_millis() - frontend_heartbeat_interval.as_millis() as i64 - 1000;
|
||||
let active_frontend_node = NodeInfo {
|
||||
peer: Peer {
|
||||
id: 0,
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
use std::collections::VecDeque;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_meta::distributed_time_constants;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
const FIRST_HEARTBEAT_ESTIMATE_MILLIS: i64 = 1000;
|
||||
@@ -79,9 +78,7 @@ impl Default for PhiAccrualFailureDetectorOptions {
|
||||
Self {
|
||||
threshold: 8_f32,
|
||||
min_std_deviation: Duration::from_millis(100),
|
||||
acceptable_heartbeat_pause: Duration::from_secs(
|
||||
distributed_time_constants::DATANODE_LEASE_SECS,
|
||||
),
|
||||
acceptable_heartbeat_pause: Duration::from_secs(10),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,7 +134,7 @@ mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_meta::datanode::{RegionManifestInfo, RegionStat, Stat};
|
||||
use common_meta::distributed_time_constants;
|
||||
use common_meta::distributed_time_constants::default_distributed_time_constants;
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_meta::key::table_route::TableRouteValue;
|
||||
use common_meta::key::test_utils::new_test_table_info;
|
||||
@@ -236,7 +236,7 @@ mod test {
|
||||
let opening_region_keeper = Arc::new(MemoryRegionKeeper::default());
|
||||
|
||||
let handler = RegionLeaseHandler::new(
|
||||
distributed_time_constants::REGION_LEASE_SECS,
|
||||
default_distributed_time_constants().region_lease.as_secs(),
|
||||
table_metadata_manager.clone(),
|
||||
opening_region_keeper.clone(),
|
||||
None,
|
||||
@@ -266,7 +266,7 @@ mod test {
|
||||
|
||||
assert_eq!(
|
||||
acc.region_lease.as_ref().unwrap().lease_seconds,
|
||||
distributed_time_constants::REGION_LEASE_SECS
|
||||
default_distributed_time_constants().region_lease.as_secs()
|
||||
);
|
||||
|
||||
assert_region_lease(
|
||||
@@ -300,7 +300,7 @@ mod test {
|
||||
|
||||
assert_eq!(
|
||||
acc.region_lease.as_ref().unwrap().lease_seconds,
|
||||
distributed_time_constants::REGION_LEASE_SECS
|
||||
default_distributed_time_constants().region_lease.as_secs()
|
||||
);
|
||||
|
||||
assert_region_lease(
|
||||
@@ -379,7 +379,7 @@ mod test {
|
||||
});
|
||||
|
||||
let handler = RegionLeaseHandler::new(
|
||||
distributed_time_constants::REGION_LEASE_SECS,
|
||||
default_distributed_time_constants().region_lease.as_secs(),
|
||||
table_metadata_manager.clone(),
|
||||
Default::default(),
|
||||
None,
|
||||
@@ -461,7 +461,7 @@ mod test {
|
||||
..Default::default()
|
||||
});
|
||||
let handler = RegionLeaseHandler::new(
|
||||
distributed_time_constants::REGION_LEASE_SECS,
|
||||
default_distributed_time_constants().region_lease.as_secs(),
|
||||
table_metadata_manager.clone(),
|
||||
Default::default(),
|
||||
None,
|
||||
|
||||
@@ -27,7 +27,7 @@ use common_event_recorder::EventRecorderOptions;
|
||||
use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
|
||||
use common_meta::cache_invalidator::CacheInvalidatorRef;
|
||||
use common_meta::ddl_manager::DdlManagerRef;
|
||||
use common_meta::distributed_time_constants;
|
||||
use common_meta::distributed_time_constants::{self, default_distributed_time_constants};
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_meta::key::runtime_switch::RuntimeSwitchManagerRef;
|
||||
use common_meta::kv_backend::{KvBackendRef, ResettableKvBackend, ResettableKvBackendRef};
|
||||
@@ -121,6 +121,27 @@ impl Default for StatsPersistenceOptions {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Serialize, Deserialize, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct BackendClientOptions {
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub keep_alive_timeout: Duration,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub keep_alive_interval: Duration,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub connect_timeout: Duration,
|
||||
}
|
||||
|
||||
impl Default for BackendClientOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
keep_alive_interval: Duration::from_secs(10),
|
||||
keep_alive_timeout: Duration::from_secs(3),
|
||||
connect_timeout: Duration::from_secs(3),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct MetasrvOptions {
|
||||
@@ -136,12 +157,20 @@ pub struct MetasrvOptions {
|
||||
/// Only applicable when using PostgreSQL or MySQL as the metadata store
|
||||
#[serde(default)]
|
||||
pub backend_tls: Option<TlsOption>,
|
||||
/// The backend client options.
|
||||
/// Currently, only applicable when using etcd as the metadata store.
|
||||
#[serde(default)]
|
||||
pub backend_client: BackendClientOptions,
|
||||
/// The type of selector.
|
||||
pub selector: SelectorType,
|
||||
/// Whether to use the memory store.
|
||||
pub use_memory_store: bool,
|
||||
/// Whether to enable region failover.
|
||||
pub enable_region_failover: bool,
|
||||
/// The base heartbeat interval.
|
||||
///
|
||||
/// This value is used to calculate the distributed time constants for components.
|
||||
/// e.g., the region lease time is `heartbeat_interval * 3 + Duration::from_secs(1)`.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub heartbeat_interval: Duration,
|
||||
/// The delay before starting region failure detection.
|
||||
/// This delay helps prevent Metasrv from triggering unnecessary region failovers before all Datanodes are fully started.
|
||||
/// Especially useful when the cluster is not deployed with GreptimeDB Operator and maintenance mode is not enabled.
|
||||
@@ -202,6 +231,9 @@ pub struct MetasrvOptions {
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
/// Optional PostgreSQL schema for metadata table (defaults to current search_path if empty).
|
||||
pub meta_schema_name: Option<String>,
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
/// Automatically create PostgreSQL schema if it doesn't exist (default: true).
|
||||
pub auto_create_schema: bool,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub node_max_idle_time: Duration,
|
||||
/// The event recorder options.
|
||||
@@ -219,7 +251,6 @@ impl fmt::Debug for MetasrvOptions {
|
||||
.field("store_addrs", &self.sanitize_store_addrs())
|
||||
.field("backend_tls", &self.backend_tls)
|
||||
.field("selector", &self.selector)
|
||||
.field("use_memory_store", &self.use_memory_store)
|
||||
.field("enable_region_failover", &self.enable_region_failover)
|
||||
.field(
|
||||
"allow_region_failover_on_local_wal",
|
||||
@@ -240,7 +271,9 @@ impl fmt::Debug for MetasrvOptions {
|
||||
.field("tracing", &self.tracing)
|
||||
.field("backend", &self.backend)
|
||||
.field("event_recorder", &self.event_recorder)
|
||||
.field("stats_persistence", &self.stats_persistence);
|
||||
.field("stats_persistence", &self.stats_persistence)
|
||||
.field("heartbeat_interval", &self.heartbeat_interval)
|
||||
.field("backend_client", &self.backend_client);
|
||||
|
||||
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
|
||||
debug_struct.field("meta_table_name", &self.meta_table_name);
|
||||
@@ -268,8 +301,8 @@ impl Default for MetasrvOptions {
|
||||
store_addrs: vec!["127.0.0.1:2379".to_string()],
|
||||
backend_tls: None,
|
||||
selector: SelectorType::default(),
|
||||
use_memory_store: false,
|
||||
enable_region_failover: false,
|
||||
heartbeat_interval: distributed_time_constants::BASE_HEARTBEAT_INTERVAL,
|
||||
region_failure_detector_initialization_delay: Duration::from_secs(10 * 60),
|
||||
allow_region_failover_on_local_wal: false,
|
||||
grpc: GrpcOptions {
|
||||
@@ -303,10 +336,13 @@ impl Default for MetasrvOptions {
|
||||
meta_election_lock_id: common_meta::kv_backend::DEFAULT_META_ELECTION_LOCK_ID,
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
meta_schema_name: None,
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
auto_create_schema: true,
|
||||
node_max_idle_time: Duration::from_secs(24 * 60 * 60),
|
||||
event_recorder: EventRecorderOptions::default(),
|
||||
stats_persistence: StatsPersistenceOptions::default(),
|
||||
gc: GcSchedulerOptions::default(),
|
||||
backend_client: BackendClientOptions::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -747,7 +783,7 @@ impl Metasrv {
|
||||
&DefaultSystemTimer,
|
||||
self.meta_peer_client.as_ref(),
|
||||
peer_id,
|
||||
Duration::from_secs(distributed_time_constants::DATANODE_LEASE_SECS),
|
||||
default_distributed_time_constants().datanode_lease,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ use common_meta::ddl::{
|
||||
DdlContext, NoopRegionFailureDetectorControl, RegionFailureDetectorControllerRef,
|
||||
};
|
||||
use common_meta::ddl_manager::{DdlManager, DdlManagerConfiguratorRef};
|
||||
use common_meta::distributed_time_constants::{self};
|
||||
use common_meta::distributed_time_constants::default_distributed_time_constants;
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_meta::key::flow::FlowMetadataManager;
|
||||
use common_meta::key::flow::flow_state::FlowStateManager;
|
||||
@@ -513,7 +513,7 @@ impl MetasrvBuilder {
|
||||
Some(handler_group_builder) => handler_group_builder,
|
||||
None => {
|
||||
let region_lease_handler = RegionLeaseHandler::new(
|
||||
distributed_time_constants::REGION_LEASE_SECS,
|
||||
default_distributed_time_constants().region_lease.as_secs(),
|
||||
table_metadata_manager.clone(),
|
||||
memory_region_keeper.clone(),
|
||||
customized_region_lease_renewer,
|
||||
|
||||
@@ -921,7 +921,7 @@ mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_meta::distributed_time_constants::REGION_LEASE_SECS;
|
||||
use common_meta::distributed_time_constants::default_distributed_time_constants;
|
||||
use common_meta::instruction::Instruction;
|
||||
use common_meta::key::test_utils::new_test_table_info;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
@@ -1192,8 +1192,10 @@ mod tests {
|
||||
.run_once()
|
||||
.await;
|
||||
|
||||
let region_lease = default_distributed_time_constants().region_lease.as_secs();
|
||||
|
||||
// Ensure it didn't run into the slow path.
|
||||
assert!(timer.elapsed().as_secs() < REGION_LEASE_SECS / 2);
|
||||
assert!(timer.elapsed().as_secs() < region_lease / 2);
|
||||
|
||||
runner.suite.verify_table_metadata().await;
|
||||
}
|
||||
@@ -1539,8 +1541,9 @@ mod tests {
|
||||
.run_once()
|
||||
.await;
|
||||
|
||||
let region_lease = default_distributed_time_constants().region_lease.as_secs();
|
||||
// Ensure it didn't run into the slow path.
|
||||
assert!(timer.elapsed().as_secs() < REGION_LEASE_SECS);
|
||||
assert!(timer.elapsed().as_secs() < region_lease);
|
||||
runner.suite.verify_table_metadata().await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,11 +13,10 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::MailboxMessage;
|
||||
use common_meta::RegionIdent;
|
||||
use common_meta::distributed_time_constants::REGION_LEASE_SECS;
|
||||
use common_meta::distributed_time_constants::default_distributed_time_constants;
|
||||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
use common_telemetry::{info, warn};
|
||||
@@ -30,9 +29,6 @@ use crate::procedure::region_migration::migration_end::RegionMigrationEnd;
|
||||
use crate::procedure::region_migration::{Context, State};
|
||||
use crate::service::mailbox::Channel;
|
||||
|
||||
/// Uses lease time of a region as the timeout of closing a downgraded region.
|
||||
const CLOSE_DOWNGRADED_REGION_TIMEOUT: Duration = Duration::from_secs(REGION_LEASE_SECS);
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct CloseDowngradedRegion;
|
||||
|
||||
@@ -112,7 +108,7 @@ impl CloseDowngradedRegion {
|
||||
let ch = Channel::Datanode(downgrade_leader_datanode.id);
|
||||
let receiver = ctx
|
||||
.mailbox
|
||||
.send(&ch, msg, CLOSE_DOWNGRADED_REGION_TIMEOUT)
|
||||
.send(&ch, msg, default_distributed_time_constants().region_lease)
|
||||
.await?;
|
||||
|
||||
match receiver.await {
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::time::Duration;
|
||||
|
||||
use api::v1::meta::MailboxMessage;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::distributed_time_constants::REGION_LEASE_SECS;
|
||||
use common_meta::distributed_time_constants::default_distributed_time_constants;
|
||||
use common_meta::instruction::{
|
||||
DowngradeRegion, DowngradeRegionReply, DowngradeRegionsReply, Instruction, InstructionReply,
|
||||
};
|
||||
@@ -64,7 +64,7 @@ impl State for DowngradeLeaderRegion {
|
||||
let now = Instant::now();
|
||||
// Ensures the `leader_region_lease_deadline` must exist after recovering.
|
||||
ctx.volatile_ctx
|
||||
.set_leader_region_lease_deadline(Duration::from_secs(REGION_LEASE_SECS));
|
||||
.set_leader_region_lease_deadline(default_distributed_time_constants().region_lease);
|
||||
|
||||
match self.downgrade_region_with_retry(ctx).await {
|
||||
Ok(_) => {
|
||||
@@ -277,14 +277,14 @@ impl DowngradeLeaderRegion {
|
||||
if let Some(last_connection_at) = last_connection_at {
|
||||
let now = current_time_millis();
|
||||
let elapsed = now - last_connection_at;
|
||||
let region_lease = Duration::from_secs(REGION_LEASE_SECS);
|
||||
let region_lease = default_distributed_time_constants().region_lease;
|
||||
|
||||
// It's safe to update the region leader lease deadline here because:
|
||||
// 1. The old region leader has already been marked as downgraded in metadata,
|
||||
// which means any attempts to renew its lease will be rejected.
|
||||
// 2. The pusher disconnect time record only gets removed when the datanode (from_peer)
|
||||
// establishes a new heartbeat connection stream.
|
||||
if elapsed >= (REGION_LEASE_SECS * 1000) as i64 {
|
||||
if elapsed >= (region_lease.as_secs() * 1000) as i64 {
|
||||
ctx.volatile_ctx.reset_leader_region_lease_deadline();
|
||||
info!(
|
||||
"Datanode {}({}) has been disconnected for longer than the region lease period ({:?}), reset leader region lease deadline to None, region: {:?}",
|
||||
@@ -697,7 +697,8 @@ mod tests {
|
||||
let procedure_ctx = new_procedure_context();
|
||||
let (next, _) = state.next(&mut ctx, &procedure_ctx).await.unwrap();
|
||||
let elapsed = timer.elapsed().as_secs();
|
||||
assert!(elapsed < REGION_LEASE_SECS / 2);
|
||||
let region_lease = default_distributed_time_constants().region_lease.as_secs();
|
||||
assert!(elapsed < region_lease / 2);
|
||||
assert_eq!(
|
||||
ctx.volatile_ctx
|
||||
.leader_region_last_entry_ids
|
||||
|
||||
@@ -14,11 +14,10 @@
|
||||
|
||||
use std::any::Any;
|
||||
use std::ops::Div;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::MailboxMessage;
|
||||
use common_meta::RegionIdent;
|
||||
use common_meta::distributed_time_constants::REGION_LEASE_SECS;
|
||||
use common_meta::distributed_time_constants::default_distributed_time_constants;
|
||||
use common_meta::instruction::{Instruction, InstructionReply, OpenRegion, SimpleReply};
|
||||
use common_meta::key::datanode_table::RegionInfo;
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
@@ -33,9 +32,6 @@ use crate::procedure::region_migration::flush_leader_region::PreFlushRegion;
|
||||
use crate::procedure::region_migration::{Context, State};
|
||||
use crate::service::mailbox::Channel;
|
||||
|
||||
/// Uses lease time of a region as the timeout of opening a candidate region.
|
||||
const OPEN_CANDIDATE_REGION_TIMEOUT: Duration = Duration::from_secs(REGION_LEASE_SECS);
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct OpenCandidateRegion;
|
||||
|
||||
@@ -157,7 +153,9 @@ impl OpenCandidateRegion {
|
||||
.context(error::ExceededDeadlineSnafu {
|
||||
operation: "Open candidate region",
|
||||
})?;
|
||||
let operation_timeout = operation_timeout.div(2).max(OPEN_CANDIDATE_REGION_TIMEOUT);
|
||||
let operation_timeout = operation_timeout
|
||||
.div(2)
|
||||
.max(default_distributed_time_constants().region_lease);
|
||||
let ch = Channel::Datanode(candidate.id);
|
||||
let now = Instant::now();
|
||||
let receiver = ctx.mailbox.send(&ch, msg, operation_timeout).await?;
|
||||
|
||||
@@ -99,6 +99,7 @@ impl heartbeat_server::Heartbeat for Metasrv {
|
||||
error!("Client disconnected: broken pipe");
|
||||
break;
|
||||
}
|
||||
error!(err; "Sending heartbeat response error");
|
||||
|
||||
if tx.send(Err(err)).await.is_err() {
|
||||
info!("ReceiverStream was dropped; shutting down");
|
||||
|
||||
@@ -12,17 +12,18 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_meta::distributed_time_constants::default_etcd_client_options;
|
||||
use common_meta::kv_backend::etcd::create_etcd_tls_options;
|
||||
use etcd_client::Client;
|
||||
use etcd_client::{Client, ConnectOptions};
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, BuildTlsOptionsSnafu, Result};
|
||||
use crate::metasrv::BackendClientOptions;
|
||||
|
||||
/// Creates an etcd client with TLS configuration.
|
||||
pub async fn create_etcd_client_with_tls(
|
||||
store_addrs: &[String],
|
||||
client_options: &BackendClientOptions,
|
||||
tls_config: Option<&TlsOption>,
|
||||
) -> Result<Client> {
|
||||
let etcd_endpoints = store_addrs
|
||||
@@ -31,7 +32,12 @@ pub async fn create_etcd_client_with_tls(
|
||||
.filter(|x| !x.is_empty())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut connect_options = default_etcd_client_options();
|
||||
let mut connect_options = ConnectOptions::new()
|
||||
.with_keep_alive_while_idle(true)
|
||||
.with_keep_alive(
|
||||
client_options.keep_alive_interval,
|
||||
client_options.keep_alive_timeout,
|
||||
);
|
||||
if let Some(tls_config) = tls_config
|
||||
&& let Some(tls_options) = create_etcd_tls_options(&convert_tls_option(tls_config))
|
||||
.context(BuildTlsOptionsSnafu)?
|
||||
|
||||
@@ -48,7 +48,7 @@ impl IndexValueCodec {
|
||||
) -> Result<()> {
|
||||
ensure!(!value.is_null(), IndexEncodeNullSnafu);
|
||||
|
||||
if field.data_type().is_string() {
|
||||
if field.encode_data_type().is_string() {
|
||||
let value = value
|
||||
.try_into_string()
|
||||
.context(FieldTypeMismatchSnafu)?
|
||||
|
||||
@@ -57,15 +57,20 @@ impl SortField {
|
||||
&self.data_type
|
||||
}
|
||||
|
||||
pub fn estimated_size(&self) -> usize {
|
||||
/// Returns the physical data type to encode of the field.
|
||||
///
|
||||
/// For example, a dictionary field will be encoded as its value type.
|
||||
pub fn encode_data_type(&self) -> &ConcreteDataType {
|
||||
match &self.data_type {
|
||||
ConcreteDataType::Dictionary(dict_type) => {
|
||||
Self::estimated_size_by_type(dict_type.value_type())
|
||||
}
|
||||
data_type => Self::estimated_size_by_type(data_type),
|
||||
ConcreteDataType::Dictionary(dict_type) => dict_type.value_type(),
|
||||
_ => &self.data_type,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn estimated_size(&self) -> usize {
|
||||
Self::estimated_size_by_type(self.encode_data_type())
|
||||
}
|
||||
|
||||
fn estimated_size_by_type(data_type: &ConcreteDataType) -> usize {
|
||||
match data_type {
|
||||
ConcreteDataType::Boolean(_) => 2,
|
||||
@@ -98,12 +103,7 @@ impl SortField {
|
||||
serializer: &mut Serializer<&mut Vec<u8>>,
|
||||
value: &ValueRef,
|
||||
) -> Result<()> {
|
||||
match self.data_type() {
|
||||
ConcreteDataType::Dictionary(dict_type) => {
|
||||
Self::serialize_by_type(dict_type.value_type(), serializer, value)
|
||||
}
|
||||
data_type => Self::serialize_by_type(data_type, serializer, value),
|
||||
}
|
||||
Self::serialize_by_type(self.encode_data_type(), serializer, value)
|
||||
}
|
||||
|
||||
fn serialize_by_type(
|
||||
@@ -194,12 +194,7 @@ impl SortField {
|
||||
|
||||
/// Deserialize a value from the deserializer.
|
||||
pub fn deserialize<B: Buf>(&self, deserializer: &mut Deserializer<B>) -> Result<Value> {
|
||||
match &self.data_type {
|
||||
ConcreteDataType::Dictionary(dict_type) => {
|
||||
Self::deserialize_by_type(dict_type.value_type(), deserializer)
|
||||
}
|
||||
data_type => Self::deserialize_by_type(data_type, deserializer),
|
||||
}
|
||||
Self::deserialize_by_type(self.encode_data_type(), deserializer)
|
||||
}
|
||||
|
||||
fn deserialize_by_type<B: Buf>(
|
||||
@@ -301,12 +296,7 @@ impl SortField {
|
||||
return Ok(1);
|
||||
}
|
||||
|
||||
match &self.data_type {
|
||||
ConcreteDataType::Dictionary(dict_type) => {
|
||||
Self::skip_deserialize_by_type(dict_type.value_type(), bytes, deserializer)
|
||||
}
|
||||
data_type => Self::skip_deserialize_by_type(data_type, bytes, deserializer),
|
||||
}
|
||||
Self::skip_deserialize_by_type(self.encode_data_type(), bytes, deserializer)
|
||||
}
|
||||
|
||||
fn skip_deserialize_by_type(
|
||||
|
||||
@@ -25,7 +25,7 @@ use tokio::sync::mpsc;
|
||||
use crate::compaction::compactor::{CompactionRegion, Compactor};
|
||||
use crate::compaction::memory_manager::{CompactionMemoryGuard, CompactionMemoryManager};
|
||||
use crate::compaction::picker::{CompactionTask, PickerOutput};
|
||||
use crate::error::{CompactRegionSnafu, CompactionMemoryExhaustedSnafu, MemoryAcquireFailedSnafu};
|
||||
use crate::error::{CompactRegionSnafu, CompactionMemoryExhaustedSnafu};
|
||||
use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
|
||||
use crate::metrics::{COMPACTION_FAILURE_COUNT, COMPACTION_MEMORY_WAIT, COMPACTION_STAGE_ELAPSED};
|
||||
use crate::region::RegionRoleState;
|
||||
@@ -95,80 +95,16 @@ impl CompactionTaskImpl {
|
||||
async fn acquire_memory_with_policy(&self) -> error::Result<CompactionMemoryGuard> {
|
||||
let region_id = self.compaction_region.region_id;
|
||||
let requested_bytes = self.estimated_memory_bytes;
|
||||
let limit_bytes = self.memory_manager.limit_bytes();
|
||||
let policy = self.memory_policy;
|
||||
|
||||
if limit_bytes > 0 && requested_bytes > limit_bytes {
|
||||
warn!(
|
||||
"Compaction for region {} requires {} bytes but limit is {} bytes; cannot satisfy request",
|
||||
region_id, requested_bytes, limit_bytes
|
||||
);
|
||||
return Err(CompactionMemoryExhaustedSnafu {
|
||||
let _timer = COMPACTION_MEMORY_WAIT.start_timer();
|
||||
self.memory_manager
|
||||
.acquire_with_policy(requested_bytes, policy)
|
||||
.await
|
||||
.context(CompactionMemoryExhaustedSnafu {
|
||||
region_id,
|
||||
required_bytes: requested_bytes,
|
||||
limit_bytes,
|
||||
policy: "exceed_limit".to_string(),
|
||||
}
|
||||
.build());
|
||||
}
|
||||
|
||||
match self.memory_policy {
|
||||
OnExhaustedPolicy::Wait {
|
||||
timeout: wait_timeout,
|
||||
} => {
|
||||
let timer = COMPACTION_MEMORY_WAIT.start_timer();
|
||||
|
||||
match tokio::time::timeout(
|
||||
wait_timeout,
|
||||
self.memory_manager.acquire(requested_bytes),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(guard)) => {
|
||||
timer.observe_duration();
|
||||
Ok(guard)
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
timer.observe_duration();
|
||||
Err(e).with_context(|_| MemoryAcquireFailedSnafu {
|
||||
region_id,
|
||||
policy: format!("wait_timeout({}ms)", wait_timeout.as_millis()),
|
||||
})
|
||||
}
|
||||
Err(_) => {
|
||||
timer.observe_duration();
|
||||
warn!(
|
||||
"Compaction for region {} waited {:?} for {} bytes but timed out",
|
||||
region_id, wait_timeout, requested_bytes
|
||||
);
|
||||
CompactionMemoryExhaustedSnafu {
|
||||
region_id,
|
||||
required_bytes: requested_bytes,
|
||||
limit_bytes,
|
||||
policy: format!("wait_timeout({}ms)", wait_timeout.as_millis()),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
OnExhaustedPolicy::Fail => {
|
||||
// Try to acquire, fail immediately if not available
|
||||
self.memory_manager
|
||||
.try_acquire(requested_bytes)
|
||||
.ok_or_else(|| {
|
||||
warn!(
|
||||
"Compaction memory exhausted for region {} (policy=fail, need {} bytes, limit {} bytes)",
|
||||
region_id, requested_bytes, limit_bytes
|
||||
);
|
||||
CompactionMemoryExhaustedSnafu {
|
||||
region_id,
|
||||
required_bytes: requested_bytes,
|
||||
limit_bytes,
|
||||
policy: "fail".to_string(),
|
||||
}
|
||||
.build()
|
||||
})
|
||||
}
|
||||
}
|
||||
policy: format!("{policy:?}"),
|
||||
})
|
||||
}
|
||||
|
||||
/// Remove expired ssts files, update manifest immediately
|
||||
|
||||
@@ -872,9 +872,9 @@ StorageSstEntry { file_path: "test/11_0000000002/index/<file_id>.puffin", file_s
|
||||
StorageSstEntry { file_path: "test/22_0000000042/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
|
||||
StorageSstEntry { file_path: "test/22_0000000042/index/<file_id>.puffin", file_size: None, last_modified_ms: None, node_id: None }"#).await;
|
||||
test_list_ssts_with_format(true, r#"
|
||||
ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"#,
|
||||
ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"#,
|
||||
r#"
|
||||
StorageSstEntry { file_path: "test/11_0000000001/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
|
||||
StorageSstEntry { file_path: "test/11_0000000001/index/<file_id>.puffin", file_size: None, last_modified_ms: None, node_id: None }
|
||||
|
||||
@@ -1042,20 +1042,8 @@ pub enum Error {
|
||||
#[snafu(display("Manual compaction is override by following operations."))]
|
||||
ManualCompactionOverride {},
|
||||
|
||||
#[snafu(display(
|
||||
"Compaction memory limit exceeded for region {region_id}: required {required_bytes} bytes, limit {limit_bytes} bytes (policy: {policy})",
|
||||
))]
|
||||
#[snafu(display("Compaction memory exhausted for region {region_id} (policy: {policy})",))]
|
||||
CompactionMemoryExhausted {
|
||||
region_id: RegionId,
|
||||
required_bytes: u64,
|
||||
limit_bytes: u64,
|
||||
policy: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to acquire memory for region {region_id} (policy: {policy})"))]
|
||||
MemoryAcquireFailed {
|
||||
region_id: RegionId,
|
||||
policy: String,
|
||||
#[snafu(source)]
|
||||
@@ -1359,9 +1347,7 @@ impl ErrorExt for Error {
|
||||
|
||||
ManualCompactionOverride {} => StatusCode::Cancelled,
|
||||
|
||||
CompactionMemoryExhausted { .. } => StatusCode::RuntimeResourcesExhausted,
|
||||
|
||||
MemoryAcquireFailed { source, .. } => source.status_code(),
|
||||
CompactionMemoryExhausted { source, .. } => source.status_code(),
|
||||
|
||||
IncompatibleWalProviderChange { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
|
||||
@@ -801,7 +801,8 @@ fn memtable_flat_sources(
|
||||
if last_iter_rows > min_flush_rows {
|
||||
let maybe_dedup = merge_and_dedup(
|
||||
&schema,
|
||||
options,
|
||||
options.append_mode,
|
||||
options.merge_mode(),
|
||||
field_column_start,
|
||||
std::mem::replace(&mut input_iters, Vec::with_capacity(num_ranges)),
|
||||
)?;
|
||||
@@ -813,7 +814,13 @@ fn memtable_flat_sources(
|
||||
|
||||
// Handle remaining iters.
|
||||
if !input_iters.is_empty() {
|
||||
let maybe_dedup = merge_and_dedup(&schema, options, field_column_start, input_iters)?;
|
||||
let maybe_dedup = merge_and_dedup(
|
||||
&schema,
|
||||
options.append_mode,
|
||||
options.merge_mode(),
|
||||
field_column_start,
|
||||
input_iters,
|
||||
)?;
|
||||
|
||||
flat_sources.sources.push(FlatSource::Iter(maybe_dedup));
|
||||
}
|
||||
@@ -822,19 +829,64 @@ fn memtable_flat_sources(
|
||||
Ok(flat_sources)
|
||||
}
|
||||
|
||||
fn merge_and_dedup(
|
||||
/// Merges multiple record batch iterators and applies deduplication based on the specified mode.
|
||||
///
|
||||
/// This function is used during the flush process to combine data from multiple memtable ranges
|
||||
/// into a single stream while handling duplicate records according to the configured merge strategy.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `schema` - The Arrow schema reference that defines the structure of the record batches
|
||||
/// * `append_mode` - When true, no deduplication is performed and all records are preserved.
|
||||
/// This is used for append-only workloads where duplicate handling is not required.
|
||||
/// * `merge_mode` - The strategy used for deduplication when not in append mode:
|
||||
/// - `MergeMode::LastRow`: Keeps the last record for each primary key
|
||||
/// - `MergeMode::LastNonNull`: Keeps the last non-null values for each field
|
||||
/// * `field_column_start` - The starting column index for fields in the record batch.
|
||||
/// Used when `MergeMode::LastNonNull` to identify which columns
|
||||
/// contain field values versus primary key columns.
|
||||
/// * `input_iters` - A vector of record batch iterators to be merged and deduplicated
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns a boxed record batch iterator that yields the merged and potentially deduplicated
|
||||
/// record batches.
|
||||
///
|
||||
/// # Behavior
|
||||
///
|
||||
/// 1. Creates a `FlatMergeIterator` to merge all input iterators in sorted order based on
|
||||
/// primary key and timestamp
|
||||
/// 2. If `append_mode` is true, returns the merge iterator directly without deduplication
|
||||
/// 3. If `append_mode` is false, wraps the merge iterator with a `FlatDedupIterator` that
|
||||
/// applies the specified merge mode:
|
||||
/// - `LastRow`: Removes duplicate rows, keeping only the last one
|
||||
/// - `LastNonNull`: Removes duplicates but preserves the last non-null value for each field
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```ignore
|
||||
/// let merged_iter = merge_and_dedup(
|
||||
/// &schema,
|
||||
/// false, // not append mode, apply dedup
|
||||
/// MergeMode::LastRow,
|
||||
/// 2, // fields start at column 2 after primary key columns
|
||||
/// vec![iter1, iter2, iter3],
|
||||
/// )?;
|
||||
/// ```
|
||||
pub fn merge_and_dedup(
|
||||
schema: &SchemaRef,
|
||||
options: &RegionOptions,
|
||||
append_mode: bool,
|
||||
merge_mode: MergeMode,
|
||||
field_column_start: usize,
|
||||
input_iters: Vec<BoxedRecordBatchIterator>,
|
||||
) -> Result<BoxedRecordBatchIterator> {
|
||||
let merge_iter = FlatMergeIterator::new(schema.clone(), input_iters, DEFAULT_READ_BATCH_SIZE)?;
|
||||
let maybe_dedup = if options.append_mode {
|
||||
let maybe_dedup = if append_mode {
|
||||
// No dedup in append mode
|
||||
Box::new(merge_iter) as _
|
||||
} else {
|
||||
// Dedup according to merge mode.
|
||||
match options.merge_mode() {
|
||||
match merge_mode {
|
||||
MergeMode::LastRow => {
|
||||
Box::new(FlatDedupIterator::new(merge_iter, FlatLastRow::new(false))) as _
|
||||
}
|
||||
|
||||
@@ -540,7 +540,7 @@ impl LocalGcWorker {
|
||||
fn filter_deletable_files(
|
||||
&self,
|
||||
entries: Vec<Entry>,
|
||||
in_use_filenames: &HashSet<&FileId>,
|
||||
in_use_filenames: &HashSet<FileId>,
|
||||
may_linger_filenames: &HashSet<&FileId>,
|
||||
eligible_for_removal: &HashSet<&FileId>,
|
||||
unknown_file_may_linger_until: chrono::DateTime<chrono::Utc>,
|
||||
@@ -641,9 +641,6 @@ impl LocalGcWorker {
|
||||
.flatten()
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
// in use filenames, include sst and index files
|
||||
let in_use_filenames = in_used.iter().collect::<HashSet<_>>();
|
||||
|
||||
// When full_file_listing is false, skip expensive list operations and only delete
|
||||
// files that are tracked in recently_removed_files
|
||||
if !self.full_file_listing {
|
||||
@@ -653,7 +650,7 @@ impl LocalGcWorker {
|
||||
// 3. Have passed the lingering time
|
||||
let files_to_delete: Vec<FileId> = eligible_for_removal
|
||||
.iter()
|
||||
.filter(|file_id| !in_use_filenames.contains(*file_id))
|
||||
.filter(|file_id| !in_used.contains(*file_id))
|
||||
.map(|&f| *f)
|
||||
.collect();
|
||||
|
||||
@@ -672,7 +669,7 @@ impl LocalGcWorker {
|
||||
let (all_unused_files_ready_for_delete, all_in_exist_linger_files) = self
|
||||
.filter_deletable_files(
|
||||
all_entries,
|
||||
&in_use_filenames,
|
||||
in_used,
|
||||
&may_linger_filenames,
|
||||
&eligible_for_removal,
|
||||
unknown_file_may_linger_until,
|
||||
|
||||
@@ -74,7 +74,7 @@ impl BulkIterContext {
|
||||
.collect();
|
||||
|
||||
let read_format = ReadFormat::new(
|
||||
region_metadata,
|
||||
region_metadata.clone(),
|
||||
projection,
|
||||
true,
|
||||
None,
|
||||
@@ -82,10 +82,18 @@ impl BulkIterContext {
|
||||
skip_auto_convert,
|
||||
)?;
|
||||
|
||||
let dyn_filters = predicate
|
||||
.as_ref()
|
||||
.map(|pred| pred.dyn_filters().clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
Ok(Self {
|
||||
base: RangeBase {
|
||||
filters: simple_filters,
|
||||
dyn_filters,
|
||||
read_format,
|
||||
prune_schema: region_metadata.schema.clone(),
|
||||
expected_metadata: Some(region_metadata),
|
||||
codec,
|
||||
// we don't need to compat batch since all batch in memtable have the same schema.
|
||||
compat_batch: None,
|
||||
|
||||
@@ -1181,7 +1181,9 @@ pub fn build_file_range_scan_stream(
|
||||
};
|
||||
for range in ranges {
|
||||
let build_reader_start = Instant::now();
|
||||
let reader = range.reader(stream_ctx.input.series_row_selector, fetch_metrics.as_deref()).await?;
|
||||
let Some(reader) = range.reader(stream_ctx.input.series_row_selector, fetch_metrics.as_deref()).await? else {
|
||||
continue;
|
||||
};
|
||||
let build_cost = build_reader_start.elapsed();
|
||||
part_metrics.inc_build_reader_cost(build_cost);
|
||||
let compat_batch = range.compat_batch();
|
||||
@@ -1239,7 +1241,7 @@ pub fn build_flat_file_range_scan_stream(
|
||||
};
|
||||
for range in ranges {
|
||||
let build_reader_start = Instant::now();
|
||||
let mut reader = range.flat_reader(fetch_metrics.as_deref()).await?;
|
||||
let Some(mut reader) = range.flat_reader(fetch_metrics.as_deref()).await? else{continue};
|
||||
let build_cost = build_reader_start.elapsed();
|
||||
part_metrics.inc_build_reader_cost(build_cost);
|
||||
|
||||
|
||||
@@ -95,21 +95,32 @@ mod tests {
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::OpType;
|
||||
use api::v1::{OpType, SemanticType};
|
||||
use common_function::function::FunctionRef;
|
||||
use common_function::function_factory::ScalarFunctionFactory;
|
||||
use common_function::scalars::matches::MatchesFunction;
|
||||
use common_function::scalars::matches_term::MatchesTermFunction;
|
||||
use common_time::Timestamp;
|
||||
use datafusion_common::{Column, ScalarValue};
|
||||
use datafusion_expr::expr::ScalarFunction;
|
||||
use datafusion_expr::{BinaryExpr, Expr, Literal, Operator, col, lit};
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::array::{
|
||||
ArrayRef, BinaryDictionaryBuilder, RecordBatch, StringDictionaryBuilder,
|
||||
ArrayRef, BinaryDictionaryBuilder, RecordBatch, StringArray, StringDictionaryBuilder,
|
||||
TimestampMillisecondArray, UInt8Array, UInt64Array,
|
||||
};
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema, UInt32Type};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{FulltextAnalyzer, FulltextBackend, FulltextOptions};
|
||||
use object_store::ObjectStore;
|
||||
use parquet::arrow::AsyncArrowWriter;
|
||||
use parquet::basic::{Compression, Encoding, ZstdLevel};
|
||||
use parquet::file::metadata::KeyValue;
|
||||
use parquet::file::properties::WriterProperties;
|
||||
use store_api::codec::PrimaryKeyEncoding;
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
|
||||
use store_api::region_request::PathType;
|
||||
use store_api::storage::{ColumnSchema, RegionId};
|
||||
use table::predicate::Predicate;
|
||||
use tokio_util::compat::FuturesAsyncWriteCompatExt;
|
||||
|
||||
@@ -122,6 +133,7 @@ mod tests {
|
||||
use crate::sst::file::{FileHandle, FileMeta, RegionFileId, RegionIndexId};
|
||||
use crate::sst::file_purger::NoopFilePurger;
|
||||
use crate::sst::index::bloom_filter::applier::BloomFilterIndexApplierBuilder;
|
||||
use crate::sst::index::fulltext_index::applier::builder::FulltextIndexApplierBuilder;
|
||||
use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder;
|
||||
use crate::sst::index::{IndexBuildType, Indexer, IndexerBuilder, IndexerBuilderImpl};
|
||||
use crate::sst::parquet::format::PrimaryKeyWriteFormat;
|
||||
@@ -133,11 +145,13 @@ mod tests {
|
||||
use crate::test_util::sst_util::{
|
||||
assert_parquet_metadata_eq, build_test_binary_test_region_metadata, new_batch_by_range,
|
||||
new_batch_with_binary, new_batch_with_custom_sequence, new_primary_key, new_source,
|
||||
sst_file_handle, sst_file_handle_with_file_id, sst_region_metadata,
|
||||
new_sparse_primary_key, sst_file_handle, sst_file_handle_with_file_id, sst_region_metadata,
|
||||
sst_region_metadata_with_encoding,
|
||||
};
|
||||
use crate::test_util::{TestEnv, check_reader_result};
|
||||
|
||||
const FILE_DIR: &str = "/";
|
||||
const REGION_ID: RegionId = RegionId::new(0, 0);
|
||||
|
||||
#[derive(Clone)]
|
||||
struct FixedPathProvider {
|
||||
@@ -1064,6 +1078,154 @@ mod tests {
|
||||
FlatSource::Iter(Box::new(batches.into_iter().map(Ok)))
|
||||
}
|
||||
|
||||
/// Creates a flat format RecordBatch for testing with sparse primary key encoding.
|
||||
/// Similar to `new_record_batch_by_range` but without individual primary key columns.
|
||||
fn new_record_batch_by_range_sparse(
|
||||
tags: &[&str],
|
||||
start: usize,
|
||||
end: usize,
|
||||
metadata: &Arc<RegionMetadata>,
|
||||
) -> RecordBatch {
|
||||
assert!(end >= start);
|
||||
let flat_schema = to_flat_sst_arrow_schema(
|
||||
metadata,
|
||||
&FlatSchemaOptions::from_encoding(PrimaryKeyEncoding::Sparse),
|
||||
);
|
||||
|
||||
let num_rows = end - start;
|
||||
let mut columns: Vec<ArrayRef> = Vec::new();
|
||||
|
||||
// NOTE: Individual primary key columns (tag_0, tag_1) are NOT included in sparse format
|
||||
|
||||
// Add field column (field_0)
|
||||
let field_values: Vec<u64> = (start..end).map(|v| v as u64).collect();
|
||||
columns.push(Arc::new(UInt64Array::from(field_values)) as ArrayRef);
|
||||
|
||||
// Add time index column (ts)
|
||||
let timestamps: Vec<i64> = (start..end).map(|v| v as i64).collect();
|
||||
columns.push(Arc::new(TimestampMillisecondArray::from(timestamps)) as ArrayRef);
|
||||
|
||||
// Add encoded primary key column using sparse encoding
|
||||
let table_id = 1u32; // Test table ID
|
||||
let tsid = 100u64; // Base TSID
|
||||
let pk = new_sparse_primary_key(tags, metadata, table_id, tsid);
|
||||
|
||||
let mut pk_builder = BinaryDictionaryBuilder::<UInt32Type>::new();
|
||||
for _ in 0..num_rows {
|
||||
pk_builder.append(&pk).unwrap();
|
||||
}
|
||||
columns.push(Arc::new(pk_builder.finish()) as ArrayRef);
|
||||
|
||||
// Add sequence column
|
||||
columns.push(Arc::new(UInt64Array::from_value(1000, num_rows)) as ArrayRef);
|
||||
|
||||
// Add op_type column
|
||||
columns.push(Arc::new(UInt8Array::from_value(OpType::Put as u8, num_rows)) as ArrayRef);
|
||||
|
||||
RecordBatch::try_new(flat_schema, columns).unwrap()
|
||||
}
|
||||
|
||||
/// Helper function to create IndexerBuilderImpl for tests.
|
||||
fn create_test_indexer_builder(
|
||||
env: &TestEnv,
|
||||
object_store: ObjectStore,
|
||||
file_path: RegionFilePathFactory,
|
||||
metadata: Arc<RegionMetadata>,
|
||||
row_group_size: usize,
|
||||
) -> IndexerBuilderImpl {
|
||||
let puffin_manager = env.get_puffin_manager().build(object_store, file_path);
|
||||
let intermediate_manager = env.get_intermediate_manager();
|
||||
|
||||
IndexerBuilderImpl {
|
||||
build_type: IndexBuildType::Flush,
|
||||
metadata,
|
||||
row_group_size,
|
||||
puffin_manager,
|
||||
write_cache_enabled: false,
|
||||
intermediate_manager,
|
||||
index_options: IndexOptions {
|
||||
inverted_index: InvertedIndexOptions {
|
||||
segment_row_count: 1,
|
||||
..Default::default()
|
||||
},
|
||||
},
|
||||
inverted_index_config: Default::default(),
|
||||
fulltext_index_config: Default::default(),
|
||||
bloom_filter_index_config: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to write flat SST and return SstInfo.
|
||||
async fn write_flat_sst(
|
||||
object_store: ObjectStore,
|
||||
metadata: Arc<RegionMetadata>,
|
||||
indexer_builder: IndexerBuilderImpl,
|
||||
file_path: RegionFilePathFactory,
|
||||
flat_source: FlatSource,
|
||||
write_opts: &WriteOptions,
|
||||
) -> SstInfo {
|
||||
let mut metrics = Metrics::new(WriteType::Flush);
|
||||
let mut writer = ParquetWriter::new_with_object_store(
|
||||
object_store,
|
||||
metadata,
|
||||
IndexConfig::default(),
|
||||
indexer_builder,
|
||||
file_path,
|
||||
&mut metrics,
|
||||
)
|
||||
.await;
|
||||
|
||||
writer
|
||||
.write_all_flat(flat_source, write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.remove(0)
|
||||
}
|
||||
|
||||
/// Helper function to create FileHandle from SstInfo.
|
||||
fn create_file_handle_from_sst_info(
|
||||
info: &SstInfo,
|
||||
metadata: &Arc<RegionMetadata>,
|
||||
) -> FileHandle {
|
||||
FileHandle::new(
|
||||
FileMeta {
|
||||
region_id: metadata.region_id,
|
||||
file_id: info.file_id,
|
||||
time_range: info.time_range,
|
||||
level: 0,
|
||||
file_size: info.file_size,
|
||||
max_row_group_uncompressed_size: info.max_row_group_uncompressed_size,
|
||||
available_indexes: info.index_metadata.build_available_indexes(),
|
||||
indexes: info.index_metadata.build_indexes(),
|
||||
index_file_size: info.index_metadata.file_size,
|
||||
index_version: 0,
|
||||
num_row_groups: info.num_row_groups,
|
||||
num_rows: info.num_rows as u64,
|
||||
sequence: None,
|
||||
partition_expr: match &metadata.partition_expr {
|
||||
Some(json_str) => partition::expr::PartitionExpr::from_json_str(json_str)
|
||||
.expect("partition expression should be valid JSON"),
|
||||
None => None,
|
||||
},
|
||||
num_series: 0,
|
||||
},
|
||||
Arc::new(NoopFilePurger),
|
||||
)
|
||||
}
|
||||
|
||||
/// Helper function to create test cache with standard settings.
|
||||
fn create_test_cache() -> Arc<CacheManager> {
|
||||
Arc::new(
|
||||
CacheManager::builder()
|
||||
.index_result_cache_size(1024 * 1024)
|
||||
.index_metadata_size(1024 * 1024)
|
||||
.index_content_page_size(1024 * 1024)
|
||||
.index_content_size(1024 * 1024)
|
||||
.puffin_metadata_size(1024 * 1024)
|
||||
.build(),
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_write_flat_with_index() {
|
||||
let mut env = TestEnv::new().await;
|
||||
@@ -1238,4 +1400,709 @@ mod tests {
|
||||
assert_eq!(*override_batch, expected_batch);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_write_flat_read_with_inverted_index() {
|
||||
let mut env = TestEnv::new().await;
|
||||
let object_store = env.init_object_store_manager();
|
||||
let file_path = RegionFilePathFactory::new(FILE_DIR.to_string(), PathType::Bare);
|
||||
let metadata = Arc::new(sst_region_metadata());
|
||||
let row_group_size = 100;
|
||||
|
||||
// Create flat format RecordBatches with non-overlapping timestamp ranges
|
||||
// Each batch becomes one row group (row_group_size = 100)
|
||||
// Data: ts tag_0 tag_1
|
||||
// RG 0: 0-50 [a, d]
|
||||
// RG 0: 50-100 [b, d]
|
||||
// RG 1: 100-150 [c, d]
|
||||
// RG 1: 150-200 [c, f]
|
||||
let flat_batches = vec![
|
||||
new_record_batch_by_range(&["a", "d"], 0, 50),
|
||||
new_record_batch_by_range(&["b", "d"], 50, 100),
|
||||
new_record_batch_by_range(&["c", "d"], 100, 150),
|
||||
new_record_batch_by_range(&["c", "f"], 150, 200),
|
||||
];
|
||||
|
||||
let flat_source = new_flat_source_from_record_batches(flat_batches);
|
||||
|
||||
let write_opts = WriteOptions {
|
||||
row_group_size,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let indexer_builder = create_test_indexer_builder(
|
||||
&env,
|
||||
object_store.clone(),
|
||||
file_path.clone(),
|
||||
metadata.clone(),
|
||||
row_group_size,
|
||||
);
|
||||
|
||||
let info = write_flat_sst(
|
||||
object_store.clone(),
|
||||
metadata.clone(),
|
||||
indexer_builder,
|
||||
file_path.clone(),
|
||||
flat_source,
|
||||
&write_opts,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(200, info.num_rows);
|
||||
assert!(info.file_size > 0);
|
||||
assert!(info.index_metadata.file_size > 0);
|
||||
|
||||
let handle = create_file_handle_from_sst_info(&info, &metadata);
|
||||
|
||||
let cache = create_test_cache();
|
||||
|
||||
// Test 1: Filter by tag_0 = "b"
|
||||
// Expected: Only rows with tag_0="b"
|
||||
let preds = vec![col("tag_0").eq(lit("b"))];
|
||||
let inverted_index_applier = InvertedIndexApplierBuilder::new(
|
||||
FILE_DIR.to_string(),
|
||||
PathType::Bare,
|
||||
object_store.clone(),
|
||||
&metadata,
|
||||
HashSet::from_iter([0]),
|
||||
env.get_puffin_manager(),
|
||||
)
|
||||
.with_puffin_metadata_cache(cache.puffin_metadata_cache().cloned())
|
||||
.with_inverted_index_cache(cache.inverted_index_cache().cloned())
|
||||
.build(&preds)
|
||||
.unwrap()
|
||||
.map(Arc::new);
|
||||
|
||||
let builder = ParquetReaderBuilder::new(
|
||||
FILE_DIR.to_string(),
|
||||
PathType::Bare,
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.inverted_index_appliers([inverted_index_applier.clone(), None])
|
||||
.cache(CacheStrategy::EnableAll(cache.clone()));
|
||||
|
||||
let mut metrics = ReaderMetrics::default();
|
||||
let (_context, selection) = builder.build_reader_input(&mut metrics).await.unwrap();
|
||||
|
||||
// Verify selection contains only RG 0 (tag_0="b", ts 0-100)
|
||||
assert_eq!(selection.row_group_count(), 1);
|
||||
assert_eq!(50, selection.get(0).unwrap().row_count());
|
||||
|
||||
// Verify filtering metrics
|
||||
assert_eq!(metrics.filter_metrics.rg_total, 2);
|
||||
assert_eq!(metrics.filter_metrics.rg_minmax_filtered, 1);
|
||||
assert_eq!(metrics.filter_metrics.rg_inverted_filtered, 0);
|
||||
assert_eq!(metrics.filter_metrics.rows_inverted_filtered, 50);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_write_flat_read_with_bloom_filter() {
|
||||
let mut env = TestEnv::new().await;
|
||||
let object_store = env.init_object_store_manager();
|
||||
let file_path = RegionFilePathFactory::new(FILE_DIR.to_string(), PathType::Bare);
|
||||
let metadata = Arc::new(sst_region_metadata());
|
||||
let row_group_size = 100;
|
||||
|
||||
// Create flat format RecordBatches with non-overlapping timestamp ranges
|
||||
// Each batch becomes one row group (row_group_size = 100)
|
||||
// Data: ts tag_0 tag_1
|
||||
// RG 0: 0-50 [a, d]
|
||||
// RG 0: 50-100 [b, e]
|
||||
// RG 1: 100-150 [c, d]
|
||||
// RG 1: 150-200 [c, f]
|
||||
let flat_batches = vec![
|
||||
new_record_batch_by_range(&["a", "d"], 0, 50),
|
||||
new_record_batch_by_range(&["b", "e"], 50, 100),
|
||||
new_record_batch_by_range(&["c", "d"], 100, 150),
|
||||
new_record_batch_by_range(&["c", "f"], 150, 200),
|
||||
];
|
||||
|
||||
let flat_source = new_flat_source_from_record_batches(flat_batches);
|
||||
|
||||
let write_opts = WriteOptions {
|
||||
row_group_size,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let indexer_builder = create_test_indexer_builder(
|
||||
&env,
|
||||
object_store.clone(),
|
||||
file_path.clone(),
|
||||
metadata.clone(),
|
||||
row_group_size,
|
||||
);
|
||||
|
||||
let info = write_flat_sst(
|
||||
object_store.clone(),
|
||||
metadata.clone(),
|
||||
indexer_builder,
|
||||
file_path.clone(),
|
||||
flat_source,
|
||||
&write_opts,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(200, info.num_rows);
|
||||
assert!(info.file_size > 0);
|
||||
assert!(info.index_metadata.file_size > 0);
|
||||
|
||||
let handle = create_file_handle_from_sst_info(&info, &metadata);
|
||||
|
||||
let cache = create_test_cache();
|
||||
|
||||
// Filter by ts >= 50 AND ts < 200 AND tag_1 = "d"
|
||||
// Expected: RG 0 (ts 0-100) and RG 1 (ts 100-200), both have tag_1="d"
|
||||
let preds = vec![
|
||||
col("ts").gt_eq(lit(ScalarValue::TimestampMillisecond(Some(50), None))),
|
||||
col("ts").lt(lit(ScalarValue::TimestampMillisecond(Some(200), None))),
|
||||
col("tag_1").eq(lit("d")),
|
||||
];
|
||||
let bloom_filter_applier = BloomFilterIndexApplierBuilder::new(
|
||||
FILE_DIR.to_string(),
|
||||
PathType::Bare,
|
||||
object_store.clone(),
|
||||
&metadata,
|
||||
env.get_puffin_manager(),
|
||||
)
|
||||
.with_puffin_metadata_cache(cache.puffin_metadata_cache().cloned())
|
||||
.with_bloom_filter_index_cache(cache.bloom_filter_index_cache().cloned())
|
||||
.build(&preds)
|
||||
.unwrap()
|
||||
.map(Arc::new);
|
||||
|
||||
let builder = ParquetReaderBuilder::new(
|
||||
FILE_DIR.to_string(),
|
||||
PathType::Bare,
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.bloom_filter_index_appliers([None, bloom_filter_applier.clone()])
|
||||
.cache(CacheStrategy::EnableAll(cache.clone()));
|
||||
|
||||
let mut metrics = ReaderMetrics::default();
|
||||
let (_context, selection) = builder.build_reader_input(&mut metrics).await.unwrap();
|
||||
|
||||
// Verify selection contains RG 0 and RG 1
|
||||
assert_eq!(selection.row_group_count(), 2);
|
||||
assert_eq!(50, selection.get(0).unwrap().row_count());
|
||||
assert_eq!(50, selection.get(1).unwrap().row_count());
|
||||
|
||||
// Verify filtering metrics
|
||||
assert_eq!(metrics.filter_metrics.rg_total, 2);
|
||||
assert_eq!(metrics.filter_metrics.rg_minmax_filtered, 0);
|
||||
assert_eq!(metrics.filter_metrics.rg_bloom_filtered, 0);
|
||||
assert_eq!(metrics.filter_metrics.rows_bloom_filtered, 100);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_write_flat_read_with_inverted_index_sparse() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let mut env = TestEnv::new().await;
|
||||
let object_store = env.init_object_store_manager();
|
||||
let file_path = RegionFilePathFactory::new(FILE_DIR.to_string(), PathType::Bare);
|
||||
let metadata = Arc::new(sst_region_metadata_with_encoding(
|
||||
PrimaryKeyEncoding::Sparse,
|
||||
));
|
||||
let row_group_size = 100;
|
||||
|
||||
// Create flat format RecordBatches with non-overlapping timestamp ranges
|
||||
// Each batch becomes one row group (row_group_size = 100)
|
||||
// Data: ts tag_0 tag_1
|
||||
// RG 0: 0-50 [a, d]
|
||||
// RG 0: 50-100 [b, d]
|
||||
// RG 1: 100-150 [c, d]
|
||||
// RG 1: 150-200 [c, f]
|
||||
let flat_batches = vec![
|
||||
new_record_batch_by_range_sparse(&["a", "d"], 0, 50, &metadata),
|
||||
new_record_batch_by_range_sparse(&["b", "d"], 50, 100, &metadata),
|
||||
new_record_batch_by_range_sparse(&["c", "d"], 100, 150, &metadata),
|
||||
new_record_batch_by_range_sparse(&["c", "f"], 150, 200, &metadata),
|
||||
];
|
||||
|
||||
let flat_source = new_flat_source_from_record_batches(flat_batches);
|
||||
|
||||
let write_opts = WriteOptions {
|
||||
row_group_size,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let indexer_builder = create_test_indexer_builder(
|
||||
&env,
|
||||
object_store.clone(),
|
||||
file_path.clone(),
|
||||
metadata.clone(),
|
||||
row_group_size,
|
||||
);
|
||||
|
||||
let info = write_flat_sst(
|
||||
object_store.clone(),
|
||||
metadata.clone(),
|
||||
indexer_builder,
|
||||
file_path.clone(),
|
||||
flat_source,
|
||||
&write_opts,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(200, info.num_rows);
|
||||
assert!(info.file_size > 0);
|
||||
assert!(info.index_metadata.file_size > 0);
|
||||
|
||||
let handle = create_file_handle_from_sst_info(&info, &metadata);
|
||||
|
||||
let cache = create_test_cache();
|
||||
|
||||
// Test 1: Filter by tag_0 = "b"
|
||||
// Expected: Only rows with tag_0="b"
|
||||
let preds = vec![col("tag_0").eq(lit("b"))];
|
||||
let inverted_index_applier = InvertedIndexApplierBuilder::new(
|
||||
FILE_DIR.to_string(),
|
||||
PathType::Bare,
|
||||
object_store.clone(),
|
||||
&metadata,
|
||||
HashSet::from_iter([0]),
|
||||
env.get_puffin_manager(),
|
||||
)
|
||||
.with_puffin_metadata_cache(cache.puffin_metadata_cache().cloned())
|
||||
.with_inverted_index_cache(cache.inverted_index_cache().cloned())
|
||||
.build(&preds)
|
||||
.unwrap()
|
||||
.map(Arc::new);
|
||||
|
||||
let builder = ParquetReaderBuilder::new(
|
||||
FILE_DIR.to_string(),
|
||||
PathType::Bare,
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.inverted_index_appliers([inverted_index_applier.clone(), None])
|
||||
.cache(CacheStrategy::EnableAll(cache.clone()));
|
||||
|
||||
let mut metrics = ReaderMetrics::default();
|
||||
let (_context, selection) = builder.build_reader_input(&mut metrics).await.unwrap();
|
||||
|
||||
// RG 0 has 50 matching rows (tag_0="b")
|
||||
assert_eq!(selection.row_group_count(), 1);
|
||||
assert_eq!(50, selection.get(0).unwrap().row_count());
|
||||
|
||||
// Verify filtering metrics
|
||||
// Note: With sparse encoding, tag columns aren't stored separately,
|
||||
// so minmax filtering on tags doesn't work (only inverted index)
|
||||
assert_eq!(metrics.filter_metrics.rg_total, 2);
|
||||
assert_eq!(metrics.filter_metrics.rg_minmax_filtered, 0); // No minmax stats for tags in sparse format
|
||||
assert_eq!(metrics.filter_metrics.rg_inverted_filtered, 1);
|
||||
assert_eq!(metrics.filter_metrics.rows_inverted_filtered, 150);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_write_flat_read_with_bloom_filter_sparse() {
|
||||
let mut env = TestEnv::new().await;
|
||||
let object_store = env.init_object_store_manager();
|
||||
let file_path = RegionFilePathFactory::new(FILE_DIR.to_string(), PathType::Bare);
|
||||
let metadata = Arc::new(sst_region_metadata_with_encoding(
|
||||
PrimaryKeyEncoding::Sparse,
|
||||
));
|
||||
let row_group_size = 100;
|
||||
|
||||
// Create flat format RecordBatches with non-overlapping timestamp ranges
|
||||
// Each batch becomes one row group (row_group_size = 100)
|
||||
// Data: ts tag_0 tag_1
|
||||
// RG 0: 0-50 [a, d]
|
||||
// RG 0: 50-100 [b, e]
|
||||
// RG 1: 100-150 [c, d]
|
||||
// RG 1: 150-200 [c, f]
|
||||
let flat_batches = vec![
|
||||
new_record_batch_by_range_sparse(&["a", "d"], 0, 50, &metadata),
|
||||
new_record_batch_by_range_sparse(&["b", "e"], 50, 100, &metadata),
|
||||
new_record_batch_by_range_sparse(&["c", "d"], 100, 150, &metadata),
|
||||
new_record_batch_by_range_sparse(&["c", "f"], 150, 200, &metadata),
|
||||
];
|
||||
|
||||
let flat_source = new_flat_source_from_record_batches(flat_batches);
|
||||
|
||||
let write_opts = WriteOptions {
|
||||
row_group_size,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let indexer_builder = create_test_indexer_builder(
|
||||
&env,
|
||||
object_store.clone(),
|
||||
file_path.clone(),
|
||||
metadata.clone(),
|
||||
row_group_size,
|
||||
);
|
||||
|
||||
let info = write_flat_sst(
|
||||
object_store.clone(),
|
||||
metadata.clone(),
|
||||
indexer_builder,
|
||||
file_path.clone(),
|
||||
flat_source,
|
||||
&write_opts,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(200, info.num_rows);
|
||||
assert!(info.file_size > 0);
|
||||
assert!(info.index_metadata.file_size > 0);
|
||||
|
||||
let handle = create_file_handle_from_sst_info(&info, &metadata);
|
||||
|
||||
let cache = create_test_cache();
|
||||
|
||||
// Filter by ts >= 50 AND ts < 200 AND tag_1 = "d"
|
||||
// Expected: RG 0 (ts 0-100) and RG 1 (ts 100-200), both have tag_1="d"
|
||||
let preds = vec![
|
||||
col("ts").gt_eq(lit(ScalarValue::TimestampMillisecond(Some(50), None))),
|
||||
col("ts").lt(lit(ScalarValue::TimestampMillisecond(Some(200), None))),
|
||||
col("tag_1").eq(lit("d")),
|
||||
];
|
||||
let bloom_filter_applier = BloomFilterIndexApplierBuilder::new(
|
||||
FILE_DIR.to_string(),
|
||||
PathType::Bare,
|
||||
object_store.clone(),
|
||||
&metadata,
|
||||
env.get_puffin_manager(),
|
||||
)
|
||||
.with_puffin_metadata_cache(cache.puffin_metadata_cache().cloned())
|
||||
.with_bloom_filter_index_cache(cache.bloom_filter_index_cache().cloned())
|
||||
.build(&preds)
|
||||
.unwrap()
|
||||
.map(Arc::new);
|
||||
|
||||
let builder = ParquetReaderBuilder::new(
|
||||
FILE_DIR.to_string(),
|
||||
PathType::Bare,
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.bloom_filter_index_appliers([None, bloom_filter_applier.clone()])
|
||||
.cache(CacheStrategy::EnableAll(cache.clone()));
|
||||
|
||||
let mut metrics = ReaderMetrics::default();
|
||||
let (_context, selection) = builder.build_reader_input(&mut metrics).await.unwrap();
|
||||
|
||||
// Verify selection contains RG 0 and RG 1
|
||||
assert_eq!(selection.row_group_count(), 2);
|
||||
assert_eq!(50, selection.get(0).unwrap().row_count());
|
||||
assert_eq!(50, selection.get(1).unwrap().row_count());
|
||||
|
||||
// Verify filtering metrics
|
||||
assert_eq!(metrics.filter_metrics.rg_total, 2);
|
||||
assert_eq!(metrics.filter_metrics.rg_minmax_filtered, 0);
|
||||
assert_eq!(metrics.filter_metrics.rg_bloom_filtered, 0);
|
||||
assert_eq!(metrics.filter_metrics.rows_bloom_filtered, 100);
|
||||
}
|
||||
|
||||
/// Creates region metadata for testing fulltext indexes.
|
||||
/// Schema: tag_0, text_bloom, text_tantivy, field_0, ts
|
||||
fn fulltext_region_metadata() -> RegionMetadata {
|
||||
let mut builder = RegionMetadataBuilder::new(REGION_ID);
|
||||
builder
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
"tag_0".to_string(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id: 0,
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
"text_bloom".to_string(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
)
|
||||
.with_fulltext_options(FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Bloom,
|
||||
granularity: 1,
|
||||
false_positive_rate_in_10000: 50,
|
||||
})
|
||||
.unwrap(),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 1,
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
"text_tantivy".to_string(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
)
|
||||
.with_fulltext_options(FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Tantivy,
|
||||
granularity: 1,
|
||||
false_positive_rate_in_10000: 50,
|
||||
})
|
||||
.unwrap(),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 2,
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
"field_0".to_string(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
true,
|
||||
),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 3,
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
"ts".to_string(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
semantic_type: SemanticType::Timestamp,
|
||||
column_id: 4,
|
||||
})
|
||||
.primary_key(vec![0]);
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
/// Creates a flat format RecordBatch with string fields for fulltext testing.
|
||||
fn new_fulltext_record_batch_by_range(
|
||||
tag: &str,
|
||||
text_bloom: &str,
|
||||
text_tantivy: &str,
|
||||
start: usize,
|
||||
end: usize,
|
||||
) -> RecordBatch {
|
||||
assert!(end >= start);
|
||||
let metadata = Arc::new(fulltext_region_metadata());
|
||||
let flat_schema = to_flat_sst_arrow_schema(&metadata, &FlatSchemaOptions::default());
|
||||
|
||||
let num_rows = end - start;
|
||||
let mut columns = Vec::new();
|
||||
|
||||
// Add primary key column (tag_0) as dictionary array
|
||||
let mut tag_builder = StringDictionaryBuilder::<UInt32Type>::new();
|
||||
for _ in 0..num_rows {
|
||||
tag_builder.append_value(tag);
|
||||
}
|
||||
columns.push(Arc::new(tag_builder.finish()) as ArrayRef);
|
||||
|
||||
// Add text_bloom field (fulltext with bloom backend)
|
||||
let text_bloom_values: Vec<_> = (0..num_rows).map(|_| text_bloom).collect();
|
||||
columns.push(Arc::new(StringArray::from(text_bloom_values)));
|
||||
|
||||
// Add text_tantivy field (fulltext with tantivy backend)
|
||||
let text_tantivy_values: Vec<_> = (0..num_rows).map(|_| text_tantivy).collect();
|
||||
columns.push(Arc::new(StringArray::from(text_tantivy_values)));
|
||||
|
||||
// Add field column (field_0)
|
||||
let field_values: Vec<u64> = (start..end).map(|v| v as u64).collect();
|
||||
columns.push(Arc::new(UInt64Array::from(field_values)));
|
||||
|
||||
// Add time index column (ts)
|
||||
let timestamps: Vec<i64> = (start..end).map(|v| v as i64).collect();
|
||||
columns.push(Arc::new(TimestampMillisecondArray::from(timestamps)));
|
||||
|
||||
// Add encoded primary key column
|
||||
let pk = new_primary_key(&[tag]);
|
||||
let mut pk_builder = BinaryDictionaryBuilder::<UInt32Type>::new();
|
||||
for _ in 0..num_rows {
|
||||
pk_builder.append(&pk).unwrap();
|
||||
}
|
||||
columns.push(Arc::new(pk_builder.finish()));
|
||||
|
||||
// Add sequence column
|
||||
columns.push(Arc::new(UInt64Array::from_value(1000, num_rows)));
|
||||
|
||||
// Add op_type column
|
||||
columns.push(Arc::new(UInt8Array::from_value(
|
||||
OpType::Put as u8,
|
||||
num_rows,
|
||||
)));
|
||||
|
||||
RecordBatch::try_new(flat_schema, columns).unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_write_flat_read_with_fulltext_index() {
|
||||
let mut env = TestEnv::new().await;
|
||||
let object_store = env.init_object_store_manager();
|
||||
let file_path = RegionFilePathFactory::new(FILE_DIR.to_string(), PathType::Bare);
|
||||
let metadata = Arc::new(fulltext_region_metadata());
|
||||
let row_group_size = 50;
|
||||
|
||||
// Create flat format RecordBatches with different text content
|
||||
// RG 0: 0-50 tag="a", bloom="hello world", tantivy="quick brown fox"
|
||||
// RG 1: 50-100 tag="b", bloom="hello world", tantivy="quick brown fox"
|
||||
// RG 2: 100-150 tag="c", bloom="goodbye world", tantivy="lazy dog"
|
||||
// RG 3: 150-200 tag="d", bloom="goodbye world", tantivy="lazy dog"
|
||||
let flat_batches = vec![
|
||||
new_fulltext_record_batch_by_range("a", "hello world", "quick brown fox", 0, 50),
|
||||
new_fulltext_record_batch_by_range("b", "hello world", "quick brown fox", 50, 100),
|
||||
new_fulltext_record_batch_by_range("c", "goodbye world", "lazy dog", 100, 150),
|
||||
new_fulltext_record_batch_by_range("d", "goodbye world", "lazy dog", 150, 200),
|
||||
];
|
||||
|
||||
let flat_source = new_flat_source_from_record_batches(flat_batches);
|
||||
|
||||
let write_opts = WriteOptions {
|
||||
row_group_size,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let indexer_builder = create_test_indexer_builder(
|
||||
&env,
|
||||
object_store.clone(),
|
||||
file_path.clone(),
|
||||
metadata.clone(),
|
||||
row_group_size,
|
||||
);
|
||||
|
||||
let mut info = write_flat_sst(
|
||||
object_store.clone(),
|
||||
metadata.clone(),
|
||||
indexer_builder,
|
||||
file_path.clone(),
|
||||
flat_source,
|
||||
&write_opts,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(200, info.num_rows);
|
||||
assert!(info.file_size > 0);
|
||||
assert!(info.index_metadata.file_size > 0);
|
||||
|
||||
// Verify fulltext indexes were created
|
||||
assert!(info.index_metadata.fulltext_index.index_size > 0);
|
||||
assert_eq!(info.index_metadata.fulltext_index.row_count, 200);
|
||||
// text_bloom (column_id 1) and text_tantivy (column_id 2)
|
||||
info.index_metadata.fulltext_index.columns.sort_unstable();
|
||||
assert_eq!(info.index_metadata.fulltext_index.columns, vec![1, 2]);
|
||||
|
||||
assert_eq!(
|
||||
(
|
||||
Timestamp::new_millisecond(0),
|
||||
Timestamp::new_millisecond(199)
|
||||
),
|
||||
info.time_range
|
||||
);
|
||||
|
||||
let handle = create_file_handle_from_sst_info(&info, &metadata);
|
||||
|
||||
let cache = create_test_cache();
|
||||
|
||||
// Helper functions to create fulltext function expressions
|
||||
let matches_func = || {
|
||||
Arc::new(
|
||||
ScalarFunctionFactory::from(Arc::new(MatchesFunction::default()) as FunctionRef)
|
||||
.provide(Default::default()),
|
||||
)
|
||||
};
|
||||
|
||||
let matches_term_func = || {
|
||||
Arc::new(
|
||||
ScalarFunctionFactory::from(
|
||||
Arc::new(MatchesTermFunction::default()) as FunctionRef,
|
||||
)
|
||||
.provide(Default::default()),
|
||||
)
|
||||
};
|
||||
|
||||
// Test 1: Filter by text_bloom field using matches_term (bloom backend)
|
||||
// Expected: RG 0 and RG 1 (rows 0-100) which have "hello" term
|
||||
let preds = vec![Expr::ScalarFunction(ScalarFunction {
|
||||
args: vec![col("text_bloom"), "hello".lit()],
|
||||
func: matches_term_func(),
|
||||
})];
|
||||
|
||||
let fulltext_applier = FulltextIndexApplierBuilder::new(
|
||||
FILE_DIR.to_string(),
|
||||
PathType::Bare,
|
||||
object_store.clone(),
|
||||
env.get_puffin_manager(),
|
||||
&metadata,
|
||||
)
|
||||
.with_puffin_metadata_cache(cache.puffin_metadata_cache().cloned())
|
||||
.with_bloom_filter_cache(cache.bloom_filter_index_cache().cloned())
|
||||
.build(&preds)
|
||||
.unwrap()
|
||||
.map(Arc::new);
|
||||
|
||||
let builder = ParquetReaderBuilder::new(
|
||||
FILE_DIR.to_string(),
|
||||
PathType::Bare,
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.fulltext_index_appliers([None, fulltext_applier.clone()])
|
||||
.cache(CacheStrategy::EnableAll(cache.clone()));
|
||||
|
||||
let mut metrics = ReaderMetrics::default();
|
||||
let (_context, selection) = builder.build_reader_input(&mut metrics).await.unwrap();
|
||||
|
||||
// Verify selection contains RG 0 and RG 1 (text_bloom="hello world")
|
||||
assert_eq!(selection.row_group_count(), 2);
|
||||
assert_eq!(50, selection.get(0).unwrap().row_count());
|
||||
assert_eq!(50, selection.get(1).unwrap().row_count());
|
||||
|
||||
// Verify filtering metrics
|
||||
assert_eq!(metrics.filter_metrics.rg_total, 4);
|
||||
assert_eq!(metrics.filter_metrics.rg_minmax_filtered, 0);
|
||||
assert_eq!(metrics.filter_metrics.rg_fulltext_filtered, 2);
|
||||
assert_eq!(metrics.filter_metrics.rows_fulltext_filtered, 100);
|
||||
|
||||
// Test 2: Filter by text_tantivy field using matches (tantivy backend)
|
||||
// Expected: RG 2 and RG 3 (rows 100-200) which have "lazy" in query
|
||||
let preds = vec![Expr::ScalarFunction(ScalarFunction {
|
||||
args: vec![col("text_tantivy"), "lazy".lit()],
|
||||
func: matches_func(),
|
||||
})];
|
||||
|
||||
let fulltext_applier = FulltextIndexApplierBuilder::new(
|
||||
FILE_DIR.to_string(),
|
||||
PathType::Bare,
|
||||
object_store.clone(),
|
||||
env.get_puffin_manager(),
|
||||
&metadata,
|
||||
)
|
||||
.with_puffin_metadata_cache(cache.puffin_metadata_cache().cloned())
|
||||
.with_bloom_filter_cache(cache.bloom_filter_index_cache().cloned())
|
||||
.build(&preds)
|
||||
.unwrap()
|
||||
.map(Arc::new);
|
||||
|
||||
let builder = ParquetReaderBuilder::new(
|
||||
FILE_DIR.to_string(),
|
||||
PathType::Bare,
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.fulltext_index_appliers([None, fulltext_applier.clone()])
|
||||
.cache(CacheStrategy::EnableAll(cache.clone()));
|
||||
|
||||
let mut metrics = ReaderMetrics::default();
|
||||
let (_context, selection) = builder.build_reader_input(&mut metrics).await.unwrap();
|
||||
|
||||
// Verify selection contains RG 2 and RG 3 (text_tantivy="lazy dog")
|
||||
assert_eq!(selection.row_group_count(), 2);
|
||||
assert_eq!(50, selection.get(2).unwrap().row_count());
|
||||
assert_eq!(50, selection.get(3).unwrap().row_count());
|
||||
|
||||
// Verify filtering metrics
|
||||
assert_eq!(metrics.filter_metrics.rg_total, 4);
|
||||
assert_eq!(metrics.filter_metrics.rg_minmax_filtered, 0);
|
||||
assert_eq!(metrics.filter_metrics.rg_fulltext_filtered, 2);
|
||||
assert_eq!(metrics.filter_metrics.rows_fulltext_filtered, 100);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,15 +21,19 @@ use std::sync::Arc;
|
||||
|
||||
use api::v1::{OpType, SemanticType};
|
||||
use common_telemetry::error;
|
||||
use datafusion::physical_plan::expressions::DynamicFilterPhysicalExpr;
|
||||
use datatypes::arrow::array::{ArrayRef, BooleanArray};
|
||||
use datatypes::arrow::buffer::BooleanBuffer;
|
||||
use datatypes::arrow::record_batch::RecordBatch;
|
||||
use datatypes::schema::Schema;
|
||||
use mito_codec::row_converter::{CompositeValues, PrimaryKeyCodec};
|
||||
use parquet::arrow::arrow_reader::RowSelection;
|
||||
use parquet::file::metadata::ParquetMetaData;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::codec::PrimaryKeyEncoding;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::{ColumnId, TimeSeriesRowSelector};
|
||||
use table::predicate::Predicate;
|
||||
|
||||
use crate::error::{
|
||||
ComputeArrowSnafu, DataTypeMismatchSnafu, DecodeSnafu, DecodeStatsSnafu, RecordBatchSnafu,
|
||||
@@ -46,6 +50,7 @@ use crate::sst::parquet::reader::{
|
||||
FlatRowGroupReader, MaybeFilter, RowGroupReader, RowGroupReaderBuilder, SimpleFilterContext,
|
||||
};
|
||||
use crate::sst::parquet::row_group::ParquetFetchMetrics;
|
||||
use crate::sst::parquet::stats::RowGroupPruningStats;
|
||||
|
||||
/// Checks if a row group contains delete operations by examining the min value of op_type column.
|
||||
///
|
||||
@@ -114,12 +119,62 @@ impl FileRange {
|
||||
row_selection.row_count() == rows_in_group as usize
|
||||
}
|
||||
|
||||
/// Performs pruning before reading the [FileRange].
|
||||
/// It use latest dynamic filters with row group statistics to prune the range.
|
||||
///
|
||||
/// Returns false if the entire range is pruned and can be skipped.
|
||||
fn in_dynamic_filter_range(&self) -> bool {
|
||||
if self.context.base.dyn_filters.is_empty() {
|
||||
return true;
|
||||
}
|
||||
let curr_row_group = self
|
||||
.context
|
||||
.reader_builder
|
||||
.parquet_metadata()
|
||||
.row_group(self.row_group_idx);
|
||||
let read_format = self.context.read_format();
|
||||
let prune_schema = &self.context.base.prune_schema;
|
||||
let stats = RowGroupPruningStats::new(
|
||||
std::slice::from_ref(curr_row_group),
|
||||
read_format,
|
||||
self.context.base.expected_metadata.clone(),
|
||||
self.compute_skip_fields(),
|
||||
);
|
||||
|
||||
// not costly to create a predicate here since dynamic filters are wrapped in Arc
|
||||
let pred = Predicate::new(vec![]).with_dyn_filters(self.context.base.dyn_filters.clone());
|
||||
|
||||
pred.prune_with_stats(&stats, prune_schema.arrow_schema())
|
||||
.first()
|
||||
.cloned()
|
||||
.unwrap_or(true) // unexpected, not skip just in case
|
||||
}
|
||||
|
||||
fn compute_skip_fields(&self) -> bool {
|
||||
match self.context.base.pre_filter_mode {
|
||||
PreFilterMode::All => false,
|
||||
PreFilterMode::SkipFields => true,
|
||||
PreFilterMode::SkipFieldsOnDelete => {
|
||||
// Check if this specific row group contains delete op
|
||||
row_group_contains_delete(
|
||||
self.context.reader_builder.parquet_metadata(),
|
||||
self.row_group_idx,
|
||||
self.context.reader_builder.file_path(),
|
||||
)
|
||||
.unwrap_or(true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a reader to read the [FileRange].
|
||||
pub(crate) async fn reader(
|
||||
&self,
|
||||
selector: Option<TimeSeriesRowSelector>,
|
||||
fetch_metrics: Option<&ParquetFetchMetrics>,
|
||||
) -> Result<PruneReader> {
|
||||
) -> Result<Option<PruneReader>> {
|
||||
if !self.in_dynamic_filter_range() {
|
||||
return Ok(None);
|
||||
}
|
||||
let parquet_reader = self
|
||||
.context
|
||||
.reader_builder
|
||||
@@ -170,14 +225,17 @@ impl FileRange {
|
||||
)
|
||||
};
|
||||
|
||||
Ok(prune_reader)
|
||||
Ok(Some(prune_reader))
|
||||
}
|
||||
|
||||
/// Creates a flat reader that returns RecordBatch.
|
||||
pub(crate) async fn flat_reader(
|
||||
&self,
|
||||
fetch_metrics: Option<&ParquetFetchMetrics>,
|
||||
) -> Result<FlatPruneReader> {
|
||||
) -> Result<Option<FlatPruneReader>> {
|
||||
if !self.in_dynamic_filter_range() {
|
||||
return Ok(None);
|
||||
}
|
||||
let parquet_reader = self
|
||||
.context
|
||||
.reader_builder
|
||||
@@ -198,7 +256,7 @@ impl FileRange {
|
||||
skip_fields,
|
||||
);
|
||||
|
||||
Ok(flat_prune_reader)
|
||||
Ok(Some(flat_prune_reader))
|
||||
}
|
||||
|
||||
/// Returns the helper to compat batches.
|
||||
@@ -224,22 +282,10 @@ pub(crate) type FileRangeContextRef = Arc<FileRangeContext>;
|
||||
|
||||
impl FileRangeContext {
|
||||
/// Creates a new [FileRangeContext].
|
||||
pub(crate) fn new(
|
||||
reader_builder: RowGroupReaderBuilder,
|
||||
filters: Vec<SimpleFilterContext>,
|
||||
read_format: ReadFormat,
|
||||
codec: Arc<dyn PrimaryKeyCodec>,
|
||||
pre_filter_mode: PreFilterMode,
|
||||
) -> Self {
|
||||
pub(crate) fn new(reader_builder: RowGroupReaderBuilder, base: RangeBase) -> Self {
|
||||
Self {
|
||||
reader_builder,
|
||||
base: RangeBase {
|
||||
filters,
|
||||
read_format,
|
||||
codec,
|
||||
compat_batch: None,
|
||||
pre_filter_mode,
|
||||
},
|
||||
base,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -323,8 +369,13 @@ pub enum PreFilterMode {
|
||||
pub(crate) struct RangeBase {
|
||||
/// Filters pushed down.
|
||||
pub(crate) filters: Vec<SimpleFilterContext>,
|
||||
/// Dynamic filter physical exprs.
|
||||
pub(crate) dyn_filters: Arc<Vec<DynamicFilterPhysicalExpr>>,
|
||||
/// Helper to read the SST.
|
||||
pub(crate) read_format: ReadFormat,
|
||||
pub(crate) expected_metadata: Option<RegionMetadataRef>,
|
||||
/// Schema used for pruning with dynamic filters.
|
||||
pub(crate) prune_schema: Arc<Schema>,
|
||||
/// Decoder for primary keys
|
||||
pub(crate) codec: Arc<dyn PrimaryKeyCodec>,
|
||||
/// Optional helper to compat batches.
|
||||
|
||||
@@ -62,7 +62,7 @@ use crate::sst::index::inverted_index::applier::{
|
||||
InvertedIndexApplierRef, InvertedIndexApplyMetrics,
|
||||
};
|
||||
use crate::sst::parquet::file_range::{
|
||||
FileRangeContext, FileRangeContextRef, PreFilterMode, row_group_contains_delete,
|
||||
FileRangeContext, FileRangeContextRef, PreFilterMode, RangeBase, row_group_contains_delete,
|
||||
};
|
||||
use crate::sst::parquet::format::{ReadFormat, need_override_sequence};
|
||||
use crate::sst::parquet::metadata::MetadataLoader;
|
||||
@@ -342,6 +342,12 @@ impl ParquetReaderBuilder {
|
||||
);
|
||||
}
|
||||
|
||||
let prune_schema = self
|
||||
.expected_metadata
|
||||
.as_ref()
|
||||
.map(|meta| meta.schema.clone())
|
||||
.unwrap_or_else(|| region_meta.schema.clone());
|
||||
|
||||
let reader_builder = RowGroupReaderBuilder {
|
||||
file_handle: self.file_handle.clone(),
|
||||
file_path,
|
||||
@@ -368,14 +374,26 @@ impl ParquetReaderBuilder {
|
||||
vec![]
|
||||
};
|
||||
|
||||
let dyn_filters = if let Some(predicate) = &self.predicate {
|
||||
predicate.dyn_filters().clone()
|
||||
} else {
|
||||
Arc::new(vec![])
|
||||
};
|
||||
|
||||
let codec = build_primary_key_codec(read_format.metadata());
|
||||
|
||||
let context = FileRangeContext::new(
|
||||
reader_builder,
|
||||
filters,
|
||||
read_format,
|
||||
codec,
|
||||
self.pre_filter_mode,
|
||||
RangeBase {
|
||||
filters,
|
||||
dyn_filters,
|
||||
read_format,
|
||||
expected_metadata: self.expected_metadata.clone(),
|
||||
prune_schema,
|
||||
codec,
|
||||
compat_batch: None,
|
||||
pre_filter_mode: self.pre_filter_mode,
|
||||
},
|
||||
);
|
||||
|
||||
metrics.build_cost += start.elapsed();
|
||||
|
||||
@@ -29,7 +29,7 @@ use tokio::sync::mpsc::Sender;
|
||||
use crate::access_layer::{AccessLayer, AccessLayerRef};
|
||||
use crate::cache::CacheManager;
|
||||
use crate::compaction::CompactionScheduler;
|
||||
use crate::compaction::memory_manager::{CompactionMemoryManager, new_compaction_memory_manager};
|
||||
use crate::compaction::memory_manager::new_compaction_memory_manager;
|
||||
use crate::config::MitoConfig;
|
||||
use crate::error::Result;
|
||||
use crate::flush::FlushScheduler;
|
||||
|
||||
@@ -27,6 +27,10 @@ use parquet::file::metadata::ParquetMetaData;
|
||||
use store_api::metadata::{
|
||||
ColumnMetadata, RegionMetadata, RegionMetadataBuilder, RegionMetadataRef,
|
||||
};
|
||||
use store_api::metric_engine_consts::{
|
||||
DATA_SCHEMA_TABLE_ID_COLUMN_NAME, DATA_SCHEMA_TSID_COLUMN_NAME,
|
||||
};
|
||||
use store_api::storage::consts::ReservedColumnId;
|
||||
use store_api::storage::{FileId, RegionId};
|
||||
|
||||
use crate::read::{Batch, BatchBuilder, Source};
|
||||
@@ -36,11 +40,44 @@ use crate::test_util::{VecBatchReader, new_batch_builder, new_noop_file_purger};
|
||||
/// Test region id.
|
||||
const REGION_ID: RegionId = RegionId::new(0, 0);
|
||||
|
||||
/// Creates a new region metadata for testing SSTs.
|
||||
/// Creates a new region metadata for testing SSTs with specified encoding.
|
||||
///
|
||||
/// Schema: tag_0, tag_1, field_0, ts
|
||||
pub fn sst_region_metadata() -> RegionMetadata {
|
||||
/// Dense schema: tag_0, tag_1, field_0, ts
|
||||
/// Sparse schema: __table_id, __tsid, tag_0, tag_1, field_0, ts
|
||||
pub fn sst_region_metadata_with_encoding(
|
||||
encoding: store_api::codec::PrimaryKeyEncoding,
|
||||
) -> RegionMetadata {
|
||||
let mut builder = RegionMetadataBuilder::new(REGION_ID);
|
||||
|
||||
// For sparse encoding, add internal columns first
|
||||
if encoding == store_api::codec::PrimaryKeyEncoding::Sparse {
|
||||
builder
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
DATA_SCHEMA_TABLE_ID_COLUMN_NAME.to_string(),
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_skipping_options(SkippingIndexOptions {
|
||||
granularity: 1,
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap(),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id: ReservedColumnId::table_id(),
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
DATA_SCHEMA_TSID_COLUMN_NAME.to_string(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
false,
|
||||
),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id: ReservedColumnId::tsid(),
|
||||
});
|
||||
}
|
||||
|
||||
// Add user-defined columns (tag_0, tag_1, field_0, ts)
|
||||
builder
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
@@ -83,12 +120,32 @@ pub fn sst_region_metadata() -> RegionMetadata {
|
||||
),
|
||||
semantic_type: SemanticType::Timestamp,
|
||||
column_id: 3,
|
||||
})
|
||||
.primary_key(vec![0, 1]);
|
||||
});
|
||||
|
||||
// Set primary key based on encoding
|
||||
if encoding == store_api::codec::PrimaryKeyEncoding::Sparse {
|
||||
builder.primary_key(vec![
|
||||
ReservedColumnId::table_id(),
|
||||
ReservedColumnId::tsid(),
|
||||
0, // tag_0
|
||||
1, // tag_1
|
||||
]);
|
||||
} else {
|
||||
builder.primary_key(vec![0, 1]); // Dense: just user tags
|
||||
}
|
||||
|
||||
builder.primary_key_encoding(encoding);
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
/// Encodes a primary key for specific tags.
|
||||
/// Creates a new region metadata for testing SSTs.
|
||||
///
|
||||
/// Schema: tag_0, tag_1, field_0, ts
|
||||
pub fn sst_region_metadata() -> RegionMetadata {
|
||||
sst_region_metadata_with_encoding(store_api::codec::PrimaryKeyEncoding::Dense)
|
||||
}
|
||||
|
||||
/// Encodes a primary key for specific tags using dense encoding.
|
||||
pub fn new_primary_key(tags: &[&str]) -> Vec<u8> {
|
||||
let fields = (0..tags.len())
|
||||
.map(|idx| {
|
||||
@@ -104,6 +161,31 @@ pub fn new_primary_key(tags: &[&str]) -> Vec<u8> {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// Encodes a primary key for specific tags using sparse encoding.
|
||||
/// Includes internal columns (table_id, tsid) required by sparse format.
|
||||
pub fn new_sparse_primary_key(
|
||||
tags: &[&str],
|
||||
metadata: &Arc<RegionMetadata>,
|
||||
table_id: u32,
|
||||
tsid: u64,
|
||||
) -> Vec<u8> {
|
||||
use mito_codec::row_converter::PrimaryKeyCodec;
|
||||
|
||||
let codec = mito_codec::row_converter::SparsePrimaryKeyCodec::new(metadata);
|
||||
|
||||
// Sparse encoding requires internal columns first, then user tags
|
||||
let values = vec![
|
||||
(ReservedColumnId::table_id(), ValueRef::UInt32(table_id)),
|
||||
(ReservedColumnId::tsid(), ValueRef::UInt64(tsid)),
|
||||
(0, ValueRef::String(tags[0])), // tag_0
|
||||
(1, ValueRef::String(tags[1])), // tag_1
|
||||
];
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
codec.encode_value_refs(&values, &mut buffer).unwrap();
|
||||
buffer
|
||||
}
|
||||
|
||||
/// Creates a [Source] from `batches`.
|
||||
pub fn new_source(batches: &[Batch]) -> Source {
|
||||
let reader = VecBatchReader::new(batches);
|
||||
|
||||
@@ -16,7 +16,6 @@ use std::time::Duration;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_base::secrets::{ExposeSecret, SecretString};
|
||||
use common_telemetry::tracing::warn;
|
||||
use opendal::services::{Azblob, Gcs, Oss, S3};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@@ -118,23 +117,25 @@ pub struct S3Connection {
|
||||
/// By default, opendal will send API to https://s3.us-east-1.amazonaws.com/bucket_name
|
||||
/// Enabled, opendal will send API to https://bucket_name.s3.us-east-1.amazonaws.com
|
||||
pub enable_virtual_host_style: bool,
|
||||
/// Disable EC2 metadata service.
|
||||
/// By default, opendal will use EC2 metadata service to load credentials from the instance metadata,
|
||||
/// when access key id and secret access key are not provided.
|
||||
/// If enabled, opendal will *NOT* use EC2 metadata service.
|
||||
pub disable_ec2_metadata: bool,
|
||||
}
|
||||
|
||||
impl From<&S3Connection> for S3 {
|
||||
fn from(connection: &S3Connection) -> Self {
|
||||
let root = util::normalize_dir(&connection.root);
|
||||
|
||||
let mut builder = S3::default().root(&root).bucket(&connection.bucket);
|
||||
let mut builder = S3::default()
|
||||
.root(&root)
|
||||
.bucket(&connection.bucket)
|
||||
.access_key_id(connection.access_key_id.expose_secret())
|
||||
.secret_access_key(connection.secret_access_key.expose_secret());
|
||||
|
||||
if !connection.access_key_id.expose_secret().is_empty()
|
||||
&& !connection.secret_access_key.expose_secret().is_empty()
|
||||
{
|
||||
builder = builder
|
||||
.access_key_id(connection.access_key_id.expose_secret())
|
||||
.secret_access_key(connection.secret_access_key.expose_secret());
|
||||
} else {
|
||||
warn!("No access key id or secret access key provided, using anonymous access");
|
||||
builder = builder.allow_anonymous().disable_ec2_metadata();
|
||||
if connection.disable_ec2_metadata {
|
||||
builder = builder.disable_ec2_metadata();
|
||||
}
|
||||
|
||||
if let Some(endpoint) = &connection.endpoint {
|
||||
|
||||
@@ -410,8 +410,7 @@ fn sql_value_to_value(
|
||||
})?
|
||||
} else {
|
||||
common_sql::convert::sql_value_to_value(
|
||||
column,
|
||||
&column_schema.data_type,
|
||||
column_schema,
|
||||
sql_val,
|
||||
timezone,
|
||||
None,
|
||||
|
||||
@@ -52,6 +52,7 @@ use common_time::Timestamp;
|
||||
use common_time::range::TimestampRange;
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use humantime::format_duration;
|
||||
use itertools::Itertools;
|
||||
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
|
||||
@@ -644,11 +645,20 @@ impl StatementExecutor {
|
||||
})?
|
||||
.unit();
|
||||
|
||||
let start_column = ColumnSchema::new(
|
||||
"range_start",
|
||||
ConcreteDataType::timestamp_datatype(time_unit),
|
||||
false,
|
||||
);
|
||||
let end_column = ColumnSchema::new(
|
||||
"range_end",
|
||||
ConcreteDataType::timestamp_datatype(time_unit),
|
||||
false,
|
||||
);
|
||||
let mut time_ranges = Vec::with_capacity(sql_values_time_range.len());
|
||||
for (start, end) in sql_values_time_range {
|
||||
let start = common_sql::convert::sql_value_to_value(
|
||||
"range_start",
|
||||
&ConcreteDataType::timestamp_datatype(time_unit),
|
||||
&start_column,
|
||||
start,
|
||||
Some(&query_ctx.timezone()),
|
||||
None,
|
||||
@@ -667,8 +677,7 @@ impl StatementExecutor {
|
||||
})?;
|
||||
|
||||
let end = common_sql::convert::sql_value_to_value(
|
||||
"range_end",
|
||||
&ConcreteDataType::timestamp_datatype(time_unit),
|
||||
&end_column,
|
||||
end,
|
||||
Some(&query_ctx.timezone()),
|
||||
None,
|
||||
|
||||
@@ -242,8 +242,12 @@ fn values_to_vectors_by_exact_types(
|
||||
args.iter()
|
||||
.zip(exact_types.iter())
|
||||
.map(|(value, data_type)| {
|
||||
let data_type = &ConcreteDataType::from_arrow_type(data_type);
|
||||
let value = sql_value_to_value(DUMMY_COLUMN, data_type, value, tz, None, false)
|
||||
let schema = ColumnSchema::new(
|
||||
DUMMY_COLUMN,
|
||||
ConcreteDataType::from_arrow_type(data_type),
|
||||
true,
|
||||
);
|
||||
let value = sql_value_to_value(&schema, value, tz, None, false)
|
||||
.context(error::SqlCommonSnafu)?;
|
||||
|
||||
Ok(value_to_vector(value))
|
||||
@@ -260,10 +264,12 @@ fn values_to_vectors_by_valid_types(
|
||||
args.iter()
|
||||
.map(|value| {
|
||||
for data_type in valid_types {
|
||||
let data_type = &ConcreteDataType::from_arrow_type(data_type);
|
||||
if let Ok(value) =
|
||||
sql_value_to_value(DUMMY_COLUMN, data_type, value, tz, None, false)
|
||||
{
|
||||
let schema = ColumnSchema::new(
|
||||
DUMMY_COLUMN,
|
||||
ConcreteDataType::from_arrow_type(data_type),
|
||||
true,
|
||||
);
|
||||
if let Ok(value) = sql_value_to_value(&schema, value, tz, None, false) {
|
||||
return Ok(value_to_vector(value));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ use common_time::{Timestamp, Timezone};
|
||||
use datafusion_common::tree_node::TreeNodeVisitor;
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{RawSchema, Schema};
|
||||
use datatypes::schema::{ColumnSchema, RawSchema, Schema};
|
||||
use datatypes::value::Value;
|
||||
use partition::expr::{Operand, PartitionExpr, RestrictedOp};
|
||||
use partition::multi_dim::MultiDimPartitionRule;
|
||||
@@ -2001,8 +2001,7 @@ fn convert_value(
|
||||
unary_op: Option<UnaryOperator>,
|
||||
) -> Result<Value> {
|
||||
sql_value_to_value(
|
||||
"<NONAME>",
|
||||
&data_type,
|
||||
&ColumnSchema::new("<NONAME>", data_type, true),
|
||||
value,
|
||||
Some(timezone),
|
||||
unary_op,
|
||||
|
||||
@@ -23,11 +23,16 @@ use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use arrow::array::ArrayRef;
|
||||
use arrow::array::{
|
||||
ArrayRef, AsArray, TimestampMicrosecondArray, TimestampMillisecondArray,
|
||||
TimestampNanosecondArray, TimestampSecondArray,
|
||||
};
|
||||
use arrow::compute::{concat, concat_batches, take_record_batch};
|
||||
use arrow_schema::SchemaRef;
|
||||
use arrow_schema::{Schema, SchemaRef};
|
||||
use common_recordbatch::{DfRecordBatch, DfSendableRecordBatchStream};
|
||||
use common_telemetry::warn;
|
||||
use common_time::Timestamp;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datafusion::common::arrow::compute::sort_to_indices;
|
||||
use datafusion::execution::memory_pool::{MemoryConsumer, MemoryReservation};
|
||||
use datafusion::execution::{RecordBatchStream, TaskContext};
|
||||
@@ -40,8 +45,9 @@ use datafusion::physical_plan::{
|
||||
DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties, TopK,
|
||||
TopKDynamicFilters,
|
||||
};
|
||||
use datafusion_common::tree_node::{Transformed, TreeNode};
|
||||
use datafusion_common::{DataFusionError, internal_err};
|
||||
use datafusion_physical_expr::expressions::{DynamicFilterPhysicalExpr, lit};
|
||||
use datafusion_physical_expr::expressions::{Column, DynamicFilterPhysicalExpr, lit};
|
||||
use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
|
||||
use futures::{Stream, StreamExt};
|
||||
use itertools::Itertools;
|
||||
@@ -347,6 +353,9 @@ struct PartSortStream {
|
||||
range_groups: Vec<(Timestamp, usize, usize)>,
|
||||
/// Current group being processed (index into range_groups).
|
||||
cur_group_idx: usize,
|
||||
/// Dynamic Filter for all TopK instance, notice the `PartSortExec`/`PartSortStream`/`TopK` must share the same filter
|
||||
/// so that updates from each `TopK` can be seen by others(and by the table scan operator).
|
||||
filter: Option<Arc<RwLock<TopKDynamicFilters>>>,
|
||||
}
|
||||
|
||||
impl PartSortStream {
|
||||
@@ -360,7 +369,7 @@ impl PartSortStream {
|
||||
filter: Option<Arc<RwLock<TopKDynamicFilters>>>,
|
||||
) -> datafusion_common::Result<Self> {
|
||||
let buffer = if let Some(limit) = limit {
|
||||
let Some(filter) = filter else {
|
||||
let Some(filter) = filter.clone() else {
|
||||
return internal_err!(
|
||||
"TopKDynamicFilters must be provided when limit is set at {}",
|
||||
snafu::location!()
|
||||
@@ -377,7 +386,7 @@ impl PartSortStream {
|
||||
context.session_config().batch_size(),
|
||||
context.runtime_env(),
|
||||
&sort.metrics,
|
||||
filter,
|
||||
filter.clone(),
|
||||
)?,
|
||||
0,
|
||||
)
|
||||
@@ -407,23 +416,11 @@ impl PartSortStream {
|
||||
root_metrics: sort.metrics.clone(),
|
||||
range_groups,
|
||||
cur_group_idx: 0,
|
||||
filter,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! ts_to_timestamp {
|
||||
($t:ty, $unit:expr, $arr:expr) => {{
|
||||
let arr = $arr
|
||||
.as_any()
|
||||
.downcast_ref::<arrow::array::PrimitiveArray<$t>>()
|
||||
.unwrap();
|
||||
|
||||
arr.iter()
|
||||
.map(|v| v.map(|v| Timestamp::new(v, common_time::timestamp::TimeUnit::from(&$unit))))
|
||||
.collect_vec()
|
||||
}};
|
||||
}
|
||||
|
||||
macro_rules! array_check_helper {
|
||||
($t:ty, $unit:expr, $arr:expr, $cur_range:expr, $min_max_idx:expr) => {{
|
||||
if $cur_range.start.unit().as_arrow_time_unit() != $unit
|
||||
@@ -546,9 +543,10 @@ impl PartSortStream {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// A temporary solution for stop read earlier when current group do not overlap with any of those next group
|
||||
/// Stop read earlier when current group do not overlap with any of those next group
|
||||
/// If not overlap, we can stop read further input as current top k is final
|
||||
fn can_stop_early(&mut self) -> datafusion_common::Result<bool> {
|
||||
/// Use dynamic filter to evaluate the next group's primary end
|
||||
fn can_stop_early(&mut self, schema: &Arc<Schema>) -> datafusion_common::Result<bool> {
|
||||
let topk_cnt = match &self.buffer {
|
||||
PartSortBuffer::Top(_, cnt) => *cnt,
|
||||
_ => return Ok(false),
|
||||
@@ -557,37 +555,74 @@ impl PartSortStream {
|
||||
if Some(topk_cnt) < self.limit {
|
||||
return Ok(false);
|
||||
}
|
||||
// else check if last value in topk is not in next group range
|
||||
let topk_buffer = self.sort_top_buffer()?;
|
||||
let min_batch = topk_buffer.slice(topk_buffer.num_rows() - 1, 1);
|
||||
let min_sort_column = self.expression.evaluate_to_sort_column(&min_batch)?.values;
|
||||
let last_val = downcast_ts_array!(
|
||||
min_sort_column.data_type() => (ts_to_timestamp, min_sort_column),
|
||||
_ => internal_err!(
|
||||
"Unsupported data type for sort column: {:?}",
|
||||
min_sort_column.data_type()
|
||||
)?,
|
||||
)[0];
|
||||
let Some(last_val) = last_val else {
|
||||
return Ok(false);
|
||||
};
|
||||
let next_group_primary_end = if self.cur_group_idx + 1 < self.range_groups.len() {
|
||||
self.range_groups[self.cur_group_idx + 1].0
|
||||
} else {
|
||||
// no next group
|
||||
return Ok(false);
|
||||
};
|
||||
let descending = self.expression.options.descending;
|
||||
let not_in_next_group_range = if descending {
|
||||
last_val >= next_group_primary_end
|
||||
} else {
|
||||
last_val < next_group_primary_end
|
||||
|
||||
// dyn filter is updated based on the last value of topk heap("threshold")
|
||||
// it's a max-heap for a ASC TopK operator
|
||||
// so can use dyn filter to prune data range
|
||||
let filter = self
|
||||
.filter
|
||||
.as_ref()
|
||||
.expect("TopKDynamicFilters must be provided when limit is set");
|
||||
let filter = filter.read().expr().current()?;
|
||||
let mut ts_index = None;
|
||||
// invariant: the filter must contain only the same column expr that's time index column
|
||||
let filter = filter
|
||||
.transform_down(|c| {
|
||||
// rewrite all column's index as 0
|
||||
if let Some(column) = c.as_any().downcast_ref::<Column>() {
|
||||
ts_index = Some(column.index());
|
||||
Ok(Transformed::yes(
|
||||
Arc::new(Column::new(column.name(), 0)) as Arc<dyn PhysicalExpr>
|
||||
))
|
||||
} else {
|
||||
Ok(Transformed::no(c))
|
||||
}
|
||||
})?
|
||||
.data;
|
||||
let Some(ts_index) = ts_index else {
|
||||
return Ok(false); // dyn filter is still true, cannot decide, continue read
|
||||
};
|
||||
|
||||
// refill topk buffer count
|
||||
self.push_buffer(topk_buffer)?;
|
||||
|
||||
Ok(not_in_next_group_range)
|
||||
let field = if schema.fields().len() <= ts_index {
|
||||
warn!(
|
||||
"Schema mismatch when evaluating dynamic filter for PartSortExec at {}, schema: {:?}, ts_index: {}",
|
||||
self.partition, schema, ts_index
|
||||
);
|
||||
return Ok(false); // schema mismatch, cannot decide, continue read
|
||||
} else {
|
||||
schema.field(ts_index)
|
||||
};
|
||||
let schema = Arc::new(Schema::new(vec![field.clone()]));
|
||||
// convert next_group_primary_end to array&filter, if eval to false, means no overlap, can stop early
|
||||
let primary_end_array = match next_group_primary_end.unit() {
|
||||
TimeUnit::Second => Arc::new(TimestampSecondArray::from(vec![
|
||||
next_group_primary_end.value(),
|
||||
])) as ArrayRef,
|
||||
TimeUnit::Millisecond => Arc::new(TimestampMillisecondArray::from(vec![
|
||||
next_group_primary_end.value(),
|
||||
])) as ArrayRef,
|
||||
TimeUnit::Microsecond => Arc::new(TimestampMicrosecondArray::from(vec![
|
||||
next_group_primary_end.value(),
|
||||
])) as ArrayRef,
|
||||
TimeUnit::Nanosecond => Arc::new(TimestampNanosecondArray::from(vec![
|
||||
next_group_primary_end.value(),
|
||||
])) as ArrayRef,
|
||||
};
|
||||
let primary_end_batch = DfRecordBatch::try_new(schema, vec![primary_end_array])?;
|
||||
let res = filter.evaluate(&primary_end_batch)?;
|
||||
let array = res.into_array(primary_end_batch.num_rows())?;
|
||||
let filter = array.as_boolean().clone();
|
||||
let overlap = filter.iter().next().flatten();
|
||||
if let Some(false) = overlap {
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the given partition index is within the current group.
|
||||
@@ -740,9 +775,13 @@ impl PartSortStream {
|
||||
|
||||
/// Internal method for sorting `Top` buffer (with limit).
|
||||
fn sort_top_buffer(&mut self) -> datafusion_common::Result<DfRecordBatch> {
|
||||
let filter = Arc::new(RwLock::new(TopKDynamicFilters::new(Arc::new(
|
||||
DynamicFilterPhysicalExpr::new(vec![], lit(true)),
|
||||
))));
|
||||
let Some(filter) = self.filter.clone() else {
|
||||
return internal_err!(
|
||||
"TopKDynamicFilters must be provided when sorting with limit at {}",
|
||||
snafu::location!()
|
||||
);
|
||||
};
|
||||
|
||||
let new_top_buffer = TopK::try_new(
|
||||
self.partition,
|
||||
self.schema().clone(),
|
||||
@@ -879,7 +918,7 @@ impl PartSortStream {
|
||||
|
||||
// When TopK is fulfilled and we are switching to a new group, stop consuming further ranges if possible.
|
||||
// read from topk heap and determine whether we can stop earlier.
|
||||
if !in_same_group && self.can_stop_early()? {
|
||||
if !in_same_group && self.can_stop_early(&batch.schema())? {
|
||||
self.input_complete = true;
|
||||
self.evaluating_batch = None;
|
||||
return Ok(());
|
||||
@@ -1067,6 +1106,60 @@ mod test {
|
||||
use super::*;
|
||||
use crate::test_util::{MockInputExec, new_ts_array};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_can_stop_early_with_empty_topk_buffer() {
|
||||
let unit = TimeUnit::Millisecond;
|
||||
let schema = Arc::new(Schema::new(vec![Field::new(
|
||||
"ts",
|
||||
DataType::Timestamp(unit, None),
|
||||
false,
|
||||
)]));
|
||||
|
||||
// Build a minimal PartSortExec and stream, but inject a dynamic filter that
|
||||
// always evaluates to false so TopK will filter out all rows internally.
|
||||
let mock_input = Arc::new(MockInputExec::new(vec![vec![]], schema.clone()));
|
||||
let exec = PartSortExec::try_new(
|
||||
PhysicalSortExpr {
|
||||
expr: Arc::new(Column::new("ts", 0)),
|
||||
options: SortOptions {
|
||||
descending: true,
|
||||
..Default::default()
|
||||
},
|
||||
},
|
||||
Some(3),
|
||||
vec![vec![]],
|
||||
mock_input.clone(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let filter = Arc::new(RwLock::new(TopKDynamicFilters::new(Arc::new(
|
||||
DynamicFilterPhysicalExpr::new(vec![], lit(false)),
|
||||
))));
|
||||
|
||||
let input_stream = mock_input
|
||||
.execute(0, Arc::new(TaskContext::default()))
|
||||
.unwrap();
|
||||
let mut stream = PartSortStream::new(
|
||||
Arc::new(TaskContext::default()),
|
||||
&exec,
|
||||
Some(3),
|
||||
input_stream,
|
||||
vec![],
|
||||
0,
|
||||
Some(filter),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Push 3 rows so the external counter reaches `limit`, while TopK keeps no rows.
|
||||
let batch = DfRecordBatch::try_new(schema.clone(), vec![new_ts_array(unit, vec![1, 2, 3])])
|
||||
.unwrap();
|
||||
stream.push_buffer(batch).unwrap();
|
||||
|
||||
// The TopK result buffer is empty, so we cannot determine early-stop.
|
||||
// Ensure this path returns `Ok(false)` (and, importantly, does not panic).
|
||||
assert!(!stream.can_stop_early(&schema).unwrap());
|
||||
}
|
||||
|
||||
#[ignore = "hard to gen expected data correctly here, TODO(discord9): fix it later"]
|
||||
#[tokio::test]
|
||||
async fn fuzzy_test() {
|
||||
@@ -2033,12 +2126,11 @@ mod test {
|
||||
|
||||
// Group 1 (end=100) has 6 rows, TopK will keep top 4
|
||||
// Group 2 (end=98) has 3 rows - threshold (96) < 98, so next group
|
||||
// could theoretically have better values. But limit exhaustion stops us.
|
||||
// Note: Data values must not overlap between ranges to avoid ambiguity.
|
||||
// could theoretically have better values. Continue reading.
|
||||
let input_ranged_data = vec![
|
||||
(
|
||||
PartitionRange {
|
||||
start: Timestamp::new(70, unit.into()),
|
||||
start: Timestamp::new(90, unit.into()),
|
||||
end: Timestamp::new(100, unit.into()),
|
||||
num_rows: 6,
|
||||
identifier: 0,
|
||||
@@ -2825,4 +2917,88 @@ mod test {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// First group: [0,20), data: [0, 5, 15]
|
||||
/// Second group: [10, 30), data: [21, 25, 29]
|
||||
/// after first group, calling early stop manually, and check if filter is updated
|
||||
#[tokio::test]
|
||||
async fn test_early_stop_check_update_dyn_filter() {
|
||||
let unit = TimeUnit::Millisecond;
|
||||
let schema = Arc::new(Schema::new(vec![Field::new(
|
||||
"ts",
|
||||
DataType::Timestamp(unit, None),
|
||||
false,
|
||||
)]));
|
||||
|
||||
let mock_input = Arc::new(MockInputExec::new(vec![vec![]], schema.clone()));
|
||||
let exec = PartSortExec::try_new(
|
||||
PhysicalSortExpr {
|
||||
expr: Arc::new(Column::new("ts", 0)),
|
||||
options: SortOptions {
|
||||
descending: false,
|
||||
..Default::default()
|
||||
},
|
||||
},
|
||||
Some(3),
|
||||
vec![vec![
|
||||
PartitionRange {
|
||||
start: Timestamp::new(0, unit.into()),
|
||||
end: Timestamp::new(20, unit.into()),
|
||||
num_rows: 3,
|
||||
identifier: 1,
|
||||
},
|
||||
PartitionRange {
|
||||
start: Timestamp::new(10, unit.into()),
|
||||
end: Timestamp::new(30, unit.into()),
|
||||
num_rows: 3,
|
||||
identifier: 1,
|
||||
},
|
||||
]],
|
||||
mock_input.clone(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let filter = exec.filter.clone().unwrap();
|
||||
let input_stream = mock_input
|
||||
.execute(0, Arc::new(TaskContext::default()))
|
||||
.unwrap();
|
||||
let mut stream = PartSortStream::new(
|
||||
Arc::new(TaskContext::default()),
|
||||
&exec,
|
||||
Some(3),
|
||||
input_stream,
|
||||
vec![],
|
||||
0,
|
||||
Some(filter.clone()),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// initially, snapshot_generation is 1
|
||||
assert_eq!(filter.read().expr().snapshot_generation(), 1);
|
||||
let batch =
|
||||
DfRecordBatch::try_new(schema.clone(), vec![new_ts_array(unit, vec![0, 5, 15])])
|
||||
.unwrap();
|
||||
stream.push_buffer(batch).unwrap();
|
||||
|
||||
// after pushing first batch, snapshot_generation is updated to 2
|
||||
assert_eq!(filter.read().expr().snapshot_generation(), 2);
|
||||
assert!(!stream.can_stop_early(&schema).unwrap());
|
||||
// still two as not updated
|
||||
assert_eq!(filter.read().expr().snapshot_generation(), 2);
|
||||
|
||||
let _ = stream.sort_top_buffer().unwrap();
|
||||
|
||||
let batch =
|
||||
DfRecordBatch::try_new(schema.clone(), vec![new_ts_array(unit, vec![21, 25, 29])])
|
||||
.unwrap();
|
||||
stream.push_buffer(batch).unwrap();
|
||||
// still two as not updated
|
||||
assert_eq!(filter.read().expr().snapshot_generation(), 2);
|
||||
let new = stream.sort_top_buffer().unwrap();
|
||||
// still two as not updated
|
||||
assert_eq!(filter.read().expr().snapshot_generation(), 2);
|
||||
|
||||
// dyn filter kick in, and filter out all rows >= 15(the filter is rows<15)
|
||||
assert_eq!(new.num_rows(), 0)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,6 +41,7 @@ common-frontend.workspace = true
|
||||
common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-mem-prof = { workspace = true, optional = true }
|
||||
common-memory-manager.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-plugins.workspace = true
|
||||
common-pprof = { workspace = true, optional = true }
|
||||
@@ -87,7 +88,7 @@ operator.workspace = true
|
||||
otel-arrow-rust.workspace = true
|
||||
parking_lot.workspace = true
|
||||
pg_interval = "0.4"
|
||||
pgwire = { version = "0.36.3", default-features = false, features = [
|
||||
pgwire = { version = "0.37", default-features = false, features = [
|
||||
"server-api-ring",
|
||||
"pg-ext-types",
|
||||
] }
|
||||
|
||||
@@ -95,6 +95,13 @@ pub enum Error {
|
||||
error: tonic::transport::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Request memory limit exceeded"))]
|
||||
MemoryLimitExceeded {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_memory_manager::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("{} server is already started", server))]
|
||||
AlreadyStarted {
|
||||
server: String,
|
||||
@@ -785,6 +792,8 @@ impl ErrorExt for Error {
|
||||
Cancelled { .. } => StatusCode::Cancelled,
|
||||
|
||||
Suspended { .. } => StatusCode::Suspended,
|
||||
|
||||
MemoryLimitExceeded { .. } => StatusCode::RateLimited,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -52,7 +52,6 @@ use crate::error::{AlreadyStartedSnafu, InternalSnafu, Result, StartGrpcSnafu, T
|
||||
use crate::metrics::MetricsMiddlewareLayer;
|
||||
use crate::otel_arrow::{HeaderInterceptor, OtelArrowServiceHandler};
|
||||
use crate::query_handler::OpenTelemetryProtocolHandlerRef;
|
||||
use crate::request_limiter::RequestMemoryLimiter;
|
||||
use crate::server::Server;
|
||||
use crate::tls::TlsOption;
|
||||
|
||||
@@ -69,8 +68,6 @@ pub struct GrpcOptions {
|
||||
pub max_recv_message_size: ReadableSize,
|
||||
/// Max gRPC sending(encoding) message size
|
||||
pub max_send_message_size: ReadableSize,
|
||||
/// Maximum total memory for all concurrent gRPC request messages. 0 disables the limit.
|
||||
pub max_total_message_memory: ReadableSize,
|
||||
/// Compression mode in Arrow Flight service.
|
||||
pub flight_compression: FlightCompression,
|
||||
pub runtime_size: usize,
|
||||
@@ -81,6 +78,12 @@ pub struct GrpcOptions {
|
||||
/// Default to `None`, means infinite.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub max_connection_age: Option<Duration>,
|
||||
/// The HTTP/2 keep-alive interval.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub http2_keep_alive_interval: Duration,
|
||||
/// The HTTP/2 keep-alive timeout.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub http2_keep_alive_timeout: Duration,
|
||||
}
|
||||
|
||||
impl GrpcOptions {
|
||||
@@ -120,7 +123,6 @@ impl GrpcOptions {
|
||||
GrpcServerConfig {
|
||||
max_recv_message_size: self.max_recv_message_size.as_bytes() as usize,
|
||||
max_send_message_size: self.max_send_message_size.as_bytes() as usize,
|
||||
max_total_message_memory: self.max_total_message_memory.as_bytes() as usize,
|
||||
tls: self.tls.clone(),
|
||||
max_connection_age: self.max_connection_age,
|
||||
}
|
||||
@@ -139,11 +141,12 @@ impl Default for GrpcOptions {
|
||||
server_addr: String::new(),
|
||||
max_recv_message_size: DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE,
|
||||
max_send_message_size: DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
|
||||
max_total_message_memory: ReadableSize(0),
|
||||
flight_compression: FlightCompression::ArrowIpc,
|
||||
runtime_size: 8,
|
||||
tls: TlsOption::default(),
|
||||
max_connection_age: None,
|
||||
http2_keep_alive_interval: Duration::from_secs(10),
|
||||
http2_keep_alive_timeout: Duration::from_secs(3),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -159,11 +162,12 @@ impl GrpcOptions {
|
||||
server_addr: format!("127.0.0.1:{}", DEFAULT_INTERNAL_GRPC_ADDR_PORT),
|
||||
max_recv_message_size: DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE,
|
||||
max_send_message_size: DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
|
||||
max_total_message_memory: ReadableSize(0),
|
||||
flight_compression: FlightCompression::ArrowIpc,
|
||||
runtime_size: 8,
|
||||
tls: TlsOption::default(),
|
||||
max_connection_age: None,
|
||||
http2_keep_alive_interval: Duration::from_secs(10),
|
||||
http2_keep_alive_timeout: Duration::from_secs(3),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -224,7 +228,6 @@ pub struct GrpcServer {
|
||||
bind_addr: Option<SocketAddr>,
|
||||
name: Option<String>,
|
||||
config: GrpcServerConfig,
|
||||
memory_limiter: RequestMemoryLimiter,
|
||||
}
|
||||
|
||||
/// Grpc Server configuration
|
||||
@@ -234,8 +237,6 @@ pub struct GrpcServerConfig {
|
||||
pub max_recv_message_size: usize,
|
||||
// Max gRPC sending(encoding) message size
|
||||
pub max_send_message_size: usize,
|
||||
/// Maximum total memory for all concurrent gRPC request messages. 0 disables the limit.
|
||||
pub max_total_message_memory: usize,
|
||||
pub tls: TlsOption,
|
||||
/// Maximum time that a channel may exist.
|
||||
/// Useful when the server wants to control the reconnection of its clients.
|
||||
@@ -248,7 +249,6 @@ impl Default for GrpcServerConfig {
|
||||
Self {
|
||||
max_recv_message_size: DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE.as_bytes() as usize,
|
||||
max_send_message_size: DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE.as_bytes() as usize,
|
||||
max_total_message_memory: 0,
|
||||
tls: TlsOption::default(),
|
||||
max_connection_age: None,
|
||||
}
|
||||
@@ -288,11 +288,6 @@ impl GrpcServer {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the memory limiter for monitoring current memory usage
|
||||
pub fn memory_limiter(&self) -> &RequestMemoryLimiter {
|
||||
&self.memory_limiter
|
||||
}
|
||||
}
|
||||
|
||||
pub struct HealthCheckHandler;
|
||||
|
||||
@@ -46,7 +46,7 @@ use crate::grpc::{GrpcServer, GrpcServerConfig};
|
||||
use crate::otel_arrow::{HeaderInterceptor, OtelArrowServiceHandler};
|
||||
use crate::prometheus_handler::PrometheusHandlerRef;
|
||||
use crate::query_handler::OpenTelemetryProtocolHandlerRef;
|
||||
use crate::request_limiter::RequestMemoryLimiter;
|
||||
use crate::request_memory_limiter::ServerMemoryLimiter;
|
||||
use crate::tls::TlsOption;
|
||||
|
||||
/// Add a gRPC service (`service`) to a `builder`([RoutesBuilder]).
|
||||
@@ -92,12 +92,14 @@ pub struct GrpcServerBuilder {
|
||||
HeaderInterceptor,
|
||||
>,
|
||||
>,
|
||||
memory_limiter: RequestMemoryLimiter,
|
||||
memory_limiter: ServerMemoryLimiter,
|
||||
}
|
||||
|
||||
impl GrpcServerBuilder {
|
||||
pub fn new(config: GrpcServerConfig, runtime: Runtime) -> Self {
|
||||
let memory_limiter = RequestMemoryLimiter::new(config.max_total_message_memory);
|
||||
// Create a default unlimited limiter (can be overridden with with_memory_limiter)
|
||||
let memory_limiter = ServerMemoryLimiter::default();
|
||||
|
||||
Self {
|
||||
name: None,
|
||||
config,
|
||||
@@ -109,6 +111,12 @@ impl GrpcServerBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set a global memory limiter for all server protocols.
|
||||
pub fn with_memory_limiter(mut self, limiter: ServerMemoryLimiter) -> Self {
|
||||
self.memory_limiter = limiter;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn config(&self) -> &GrpcServerConfig {
|
||||
&self.config
|
||||
}
|
||||
@@ -117,7 +125,7 @@ impl GrpcServerBuilder {
|
||||
&self.runtime
|
||||
}
|
||||
|
||||
pub fn memory_limiter(&self) -> &RequestMemoryLimiter {
|
||||
pub fn memory_limiter(&self) -> &ServerMemoryLimiter {
|
||||
&self.memory_limiter
|
||||
}
|
||||
|
||||
@@ -238,7 +246,6 @@ impl GrpcServerBuilder {
|
||||
bind_addr: None,
|
||||
name: self.name,
|
||||
config: self.config,
|
||||
memory_limiter: self.memory_limiter,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,8 +26,7 @@ use tonic::{Request, Response, Status, Streaming};
|
||||
use crate::grpc::greptime_handler::GreptimeRequestHandler;
|
||||
use crate::grpc::{TonicResult, cancellation};
|
||||
use crate::hint_headers;
|
||||
use crate::metrics::{METRIC_GRPC_MEMORY_USAGE_BYTES, METRIC_GRPC_REQUESTS_REJECTED_TOTAL};
|
||||
use crate::request_limiter::RequestMemoryLimiter;
|
||||
use crate::request_memory_limiter::ServerMemoryLimiter;
|
||||
|
||||
pub(crate) struct DatabaseService {
|
||||
handler: GreptimeRequestHandler,
|
||||
@@ -52,25 +51,12 @@ impl GreptimeDatabase for DatabaseService {
|
||||
remote_addr, hints
|
||||
);
|
||||
|
||||
let _guard = request
|
||||
.extensions()
|
||||
.get::<RequestMemoryLimiter>()
|
||||
.filter(|limiter| limiter.is_enabled())
|
||||
.and_then(|limiter| {
|
||||
let message_size = request.get_ref().encoded_len();
|
||||
limiter
|
||||
.try_acquire(message_size)
|
||||
.map(|guard| {
|
||||
guard.inspect(|g| {
|
||||
METRIC_GRPC_MEMORY_USAGE_BYTES.set(g.current_usage() as i64);
|
||||
})
|
||||
})
|
||||
.inspect_err(|_| {
|
||||
METRIC_GRPC_REQUESTS_REJECTED_TOTAL.inc();
|
||||
})
|
||||
.transpose()
|
||||
})
|
||||
.transpose()?;
|
||||
let _guard = if let Some(limiter) = request.extensions().get::<ServerMemoryLimiter>() {
|
||||
let message_size = request.get_ref().encoded_len() as u64;
|
||||
Some(limiter.acquire(message_size).await?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let handler = self.handler.clone();
|
||||
let request_future = async move {
|
||||
@@ -119,7 +105,7 @@ impl GreptimeDatabase for DatabaseService {
|
||||
remote_addr, hints
|
||||
);
|
||||
|
||||
let limiter = request.extensions().get::<RequestMemoryLimiter>().cloned();
|
||||
let limiter = request.extensions().get::<ServerMemoryLimiter>().cloned();
|
||||
|
||||
let handler = self.handler.clone();
|
||||
let request_future = async move {
|
||||
@@ -129,24 +115,12 @@ impl GreptimeDatabase for DatabaseService {
|
||||
while let Some(request) = stream.next().await {
|
||||
let request = request?;
|
||||
|
||||
let _guard = limiter
|
||||
.as_ref()
|
||||
.filter(|limiter| limiter.is_enabled())
|
||||
.and_then(|limiter| {
|
||||
let message_size = request.encoded_len();
|
||||
limiter
|
||||
.try_acquire(message_size)
|
||||
.map(|guard| {
|
||||
guard.inspect(|g| {
|
||||
METRIC_GRPC_MEMORY_USAGE_BYTES.set(g.current_usage() as i64);
|
||||
})
|
||||
})
|
||||
.inspect_err(|_| {
|
||||
METRIC_GRPC_REQUESTS_REJECTED_TOTAL.inc();
|
||||
})
|
||||
.transpose()
|
||||
})
|
||||
.transpose()?;
|
||||
let _guard = if let Some(limiter_ref) = &limiter {
|
||||
let message_size = request.encoded_len() as u64;
|
||||
Some(limiter_ref.acquire(message_size).await?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let output = handler.handle_request(request, hints.clone()).await?;
|
||||
match output.data {
|
||||
OutputData::AffectedRows(rows) => affected_rows += rows,
|
||||
|
||||
@@ -29,6 +29,7 @@ use bytes;
|
||||
use bytes::Bytes;
|
||||
use common_grpc::flight::do_put::{DoPutMetadata, DoPutResponse};
|
||||
use common_grpc::flight::{FlightDecoder, FlightEncoder, FlightMessage};
|
||||
use common_memory_manager::MemoryGuard;
|
||||
use common_query::{Output, OutputData};
|
||||
use common_recordbatch::DfRecordBatch;
|
||||
use common_telemetry::debug;
|
||||
@@ -39,7 +40,7 @@ use futures::{Stream, future, ready};
|
||||
use futures_util::{StreamExt, TryStreamExt};
|
||||
use prost::Message;
|
||||
use session::context::{QueryContext, QueryContextRef};
|
||||
use snafu::{ResultExt, ensure};
|
||||
use snafu::{IntoError, ResultExt, ensure};
|
||||
use table::table_name::TableName;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
@@ -49,8 +50,8 @@ use crate::error::{InvalidParameterSnafu, Result, ToJsonSnafu};
|
||||
pub use crate::grpc::flight::stream::FlightRecordBatchStream;
|
||||
use crate::grpc::greptime_handler::{GreptimeRequestHandler, get_request_type};
|
||||
use crate::grpc::{FlightCompression, TonicResult, context_auth};
|
||||
use crate::metrics::{METRIC_GRPC_MEMORY_USAGE_BYTES, METRIC_GRPC_REQUESTS_REJECTED_TOTAL};
|
||||
use crate::request_limiter::{RequestMemoryGuard, RequestMemoryLimiter};
|
||||
use crate::request_memory_limiter::ServerMemoryLimiter;
|
||||
use crate::request_memory_metrics::RequestMemoryMetrics;
|
||||
use crate::{error, hint_headers};
|
||||
|
||||
pub type TonicStream<T> = Pin<Box<dyn Stream<Item = TonicResult<T>> + Send + 'static>>;
|
||||
@@ -219,7 +220,7 @@ impl FlightCraft for GreptimeRequestHandler {
|
||||
) -> TonicResult<Response<TonicStream<PutResult>>> {
|
||||
let (headers, extensions, stream) = request.into_parts();
|
||||
|
||||
let limiter = extensions.get::<RequestMemoryLimiter>().cloned();
|
||||
let limiter = extensions.get::<ServerMemoryLimiter>().cloned();
|
||||
|
||||
let query_ctx = context_auth::create_query_context_from_grpc_metadata(&headers)?;
|
||||
context_auth::check_auth(self.user_provider.clone(), &headers, query_ctx.clone()).await?;
|
||||
@@ -260,7 +261,7 @@ pub struct PutRecordBatchRequest {
|
||||
pub record_batch: DfRecordBatch,
|
||||
pub schema_bytes: Bytes,
|
||||
pub flight_data: FlightData,
|
||||
pub(crate) _guard: Option<RequestMemoryGuard>,
|
||||
pub(crate) _guard: Option<MemoryGuard<RequestMemoryMetrics>>,
|
||||
}
|
||||
|
||||
impl PutRecordBatchRequest {
|
||||
@@ -270,28 +271,24 @@ impl PutRecordBatchRequest {
|
||||
request_id: i64,
|
||||
schema_bytes: Bytes,
|
||||
flight_data: FlightData,
|
||||
limiter: Option<&RequestMemoryLimiter>,
|
||||
limiter: Option<&ServerMemoryLimiter>,
|
||||
) -> Result<Self> {
|
||||
let memory_usage = flight_data.data_body.len()
|
||||
+ flight_data.app_metadata.len()
|
||||
+ flight_data.data_header.len();
|
||||
|
||||
let _guard = limiter
|
||||
.filter(|limiter| limiter.is_enabled())
|
||||
.map(|limiter| {
|
||||
limiter
|
||||
.try_acquire(memory_usage)
|
||||
.map(|guard| {
|
||||
guard.inspect(|g| {
|
||||
METRIC_GRPC_MEMORY_USAGE_BYTES.set(g.current_usage() as i64);
|
||||
})
|
||||
})
|
||||
.inspect_err(|_| {
|
||||
METRIC_GRPC_REQUESTS_REJECTED_TOTAL.inc();
|
||||
})
|
||||
})
|
||||
.transpose()?
|
||||
.flatten();
|
||||
let _guard = if let Some(limiter) = limiter {
|
||||
let guard = limiter.try_acquire(memory_usage as u64).ok_or_else(|| {
|
||||
let inner_err = common_memory_manager::Error::MemoryLimitExceeded {
|
||||
requested_bytes: memory_usage as u64,
|
||||
limit_bytes: limiter.limit_bytes(),
|
||||
};
|
||||
error::MemoryLimitExceededSnafu.into_error(inner_err)
|
||||
})?;
|
||||
Some(guard)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
table_name,
|
||||
@@ -308,7 +305,7 @@ pub struct PutRecordBatchRequestStream {
|
||||
flight_data_stream: Streaming<FlightData>,
|
||||
catalog: String,
|
||||
schema_name: String,
|
||||
limiter: Option<RequestMemoryLimiter>,
|
||||
limiter: Option<ServerMemoryLimiter>,
|
||||
// Client now lazily sends schema data so we cannot eagerly wait for it.
|
||||
// Instead, we need to decode while receiving record batches.
|
||||
state: StreamState,
|
||||
@@ -331,7 +328,7 @@ impl PutRecordBatchRequestStream {
|
||||
flight_data_stream: Streaming<FlightData>,
|
||||
catalog: String,
|
||||
schema: String,
|
||||
limiter: Option<RequestMemoryLimiter>,
|
||||
limiter: Option<ServerMemoryLimiter>,
|
||||
) -> TonicResult<Self> {
|
||||
Ok(Self {
|
||||
flight_data_stream,
|
||||
@@ -395,7 +392,6 @@ impl Stream for PutRecordBatchRequestStream {
|
||||
|
||||
match poll {
|
||||
Some(Ok(flight_data)) => {
|
||||
// Clone limiter once to avoid borrowing issues
|
||||
let limiter = self.limiter.clone();
|
||||
|
||||
match &mut self.state {
|
||||
|
||||
@@ -18,15 +18,15 @@ use futures::future::BoxFuture;
|
||||
use tonic::server::NamedService;
|
||||
use tower::{Layer, Service};
|
||||
|
||||
use crate::request_limiter::RequestMemoryLimiter;
|
||||
use crate::request_memory_limiter::ServerMemoryLimiter;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct MemoryLimiterExtensionLayer {
|
||||
limiter: RequestMemoryLimiter,
|
||||
limiter: ServerMemoryLimiter,
|
||||
}
|
||||
|
||||
impl MemoryLimiterExtensionLayer {
|
||||
pub fn new(limiter: RequestMemoryLimiter) -> Self {
|
||||
pub fn new(limiter: ServerMemoryLimiter) -> Self {
|
||||
Self { limiter }
|
||||
}
|
||||
}
|
||||
@@ -45,7 +45,7 @@ impl<S> Layer<S> for MemoryLimiterExtensionLayer {
|
||||
#[derive(Clone)]
|
||||
pub struct MemoryLimiterExtensionService<S> {
|
||||
inner: S,
|
||||
limiter: RequestMemoryLimiter,
|
||||
limiter: ServerMemoryLimiter,
|
||||
}
|
||||
|
||||
impl<S: NamedService> NamedService for MemoryLimiterExtensionService<S> {
|
||||
|
||||
@@ -34,12 +34,10 @@ impl HeartbeatOptions {
|
||||
pub fn frontend_default() -> Self {
|
||||
Self {
|
||||
// Frontend can send heartbeat with a longer interval.
|
||||
interval: Duration::from_millis(
|
||||
distributed_time_constants::FRONTEND_HEARTBEAT_INTERVAL_MILLIS,
|
||||
),
|
||||
retry_interval: Duration::from_millis(
|
||||
distributed_time_constants::HEARTBEAT_INTERVAL_MILLIS,
|
||||
interval: distributed_time_constants::frontend_heartbeat_interval(
|
||||
distributed_time_constants::BASE_HEARTBEAT_INTERVAL,
|
||||
),
|
||||
retry_interval: distributed_time_constants::BASE_HEARTBEAT_INTERVAL,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -47,10 +45,8 @@ impl HeartbeatOptions {
|
||||
impl Default for HeartbeatOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
interval: Duration::from_millis(distributed_time_constants::HEARTBEAT_INTERVAL_MILLIS),
|
||||
retry_interval: Duration::from_millis(
|
||||
distributed_time_constants::HEARTBEAT_INTERVAL_MILLIS,
|
||||
),
|
||||
interval: distributed_time_constants::BASE_HEARTBEAT_INTERVAL,
|
||||
retry_interval: distributed_time_constants::BASE_HEARTBEAT_INTERVAL,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,7 +83,7 @@ use crate::query_handler::{
|
||||
OpenTelemetryProtocolHandlerRef, OpentsdbProtocolHandlerRef, PipelineHandlerRef,
|
||||
PromStoreProtocolHandlerRef,
|
||||
};
|
||||
use crate::request_limiter::RequestMemoryLimiter;
|
||||
use crate::request_memory_limiter::ServerMemoryLimiter;
|
||||
use crate::server::Server;
|
||||
|
||||
pub mod authorize;
|
||||
@@ -134,7 +134,7 @@ pub struct HttpServer {
|
||||
router: StdMutex<Router>,
|
||||
shutdown_tx: Mutex<Option<Sender<()>>>,
|
||||
user_provider: Option<UserProviderRef>,
|
||||
memory_limiter: RequestMemoryLimiter,
|
||||
memory_limiter: ServerMemoryLimiter,
|
||||
|
||||
// plugins
|
||||
plugins: Plugins,
|
||||
@@ -157,9 +157,6 @@ pub struct HttpOptions {
|
||||
|
||||
pub body_limit: ReadableSize,
|
||||
|
||||
/// Maximum total memory for all concurrent HTTP request bodies. 0 disables the limit.
|
||||
pub max_total_body_memory: ReadableSize,
|
||||
|
||||
/// Validation mode while decoding Prometheus remote write requests.
|
||||
pub prom_validation_mode: PromValidationMode,
|
||||
|
||||
@@ -204,7 +201,6 @@ impl Default for HttpOptions {
|
||||
timeout: Duration::from_secs(0),
|
||||
disable_dashboard: false,
|
||||
body_limit: DEFAULT_BODY_LIMIT,
|
||||
max_total_body_memory: ReadableSize(0),
|
||||
cors_allowed_origins: Vec::new(),
|
||||
enable_cors: true,
|
||||
prom_validation_mode: PromValidationMode::Strict,
|
||||
@@ -539,12 +535,12 @@ pub struct GreptimeOptionsConfigState {
|
||||
pub greptime_config_options: String,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct HttpServerBuilder {
|
||||
options: HttpOptions,
|
||||
plugins: Plugins,
|
||||
user_provider: Option<UserProviderRef>,
|
||||
router: Router,
|
||||
memory_limiter: ServerMemoryLimiter,
|
||||
}
|
||||
|
||||
impl HttpServerBuilder {
|
||||
@@ -554,9 +550,16 @@ impl HttpServerBuilder {
|
||||
plugins: Plugins::default(),
|
||||
user_provider: None,
|
||||
router: Router::new(),
|
||||
memory_limiter: ServerMemoryLimiter::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set a global memory limiter for all server protocols.
|
||||
pub fn with_memory_limiter(mut self, limiter: ServerMemoryLimiter) -> Self {
|
||||
self.memory_limiter = limiter;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_sql_handler(self, sql_handler: ServerSqlQueryHandlerRef) -> Self {
|
||||
let sql_router = HttpServer::route_sql(ApiState { sql_handler });
|
||||
|
||||
@@ -750,8 +753,6 @@ impl HttpServerBuilder {
|
||||
}
|
||||
|
||||
pub fn build(self) -> HttpServer {
|
||||
let memory_limiter =
|
||||
RequestMemoryLimiter::new(self.options.max_total_body_memory.as_bytes() as usize);
|
||||
HttpServer {
|
||||
options: self.options,
|
||||
user_provider: self.user_provider,
|
||||
@@ -759,7 +760,7 @@ impl HttpServerBuilder {
|
||||
plugins: self.plugins,
|
||||
router: StdMutex::new(self.router),
|
||||
bind_addr: None,
|
||||
memory_limiter,
|
||||
memory_limiter: self.memory_limiter,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user