Compare commits

..

3 Commits

Author SHA1 Message Date
Copilot
16cce185fc chore: translate Chinese comments to English in bump-versions.ts (#7424)
* Initial plan

* chore: translate Chinese comments to English in bump-versions.ts

Co-authored-by: discord9 <55937128+discord9@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: discord9 <55937128+discord9@users.noreply.github.com>
2025-12-17 11:32:32 +08:00
discord9
35624ffdfe Merge branch 'main' into chore/manual_pre_release_docs 2025-12-17 11:05:42 +08:00
discord9
aab2c7343a chore: manually choose pre release docs?
Signed-off-by: discord9 <discord9@163.com>
2025-11-12 11:33:20 +08:00
183 changed files with 3174 additions and 8348 deletions

View File

@@ -51,7 +51,7 @@ runs:
run: |
helm upgrade \
--install my-greptimedb \
--set 'meta.backendStorage.etcd.endpoints[0]=${{ inputs.etcd-endpoints }}' \
--set meta.backendStorage.etcd.endpoints=${{ inputs.etcd-endpoints }} \
--set meta.enableRegionFailover=${{ inputs.enable-region-failover }} \
--set image.registry=${{ inputs.image-registry }} \
--set image.repository=${{ inputs.image-repository }} \

View File

@@ -49,17 +49,6 @@ function create_version() {
echo "GITHUB_REF_NAME is empty in push event" >&2
exit 1
fi
# For tag releases, ensure GITHUB_REF_NAME matches the version in Cargo.toml
CARGO_VERSION=$(grep '^version = ' Cargo.toml | cut -d '"' -f 2 | head -n 1)
EXPECTED_REF_NAME="v${CARGO_VERSION}"
if [ "$GITHUB_REF_NAME" != "$EXPECTED_REF_NAME" ]; then
echo "Error: GITHUB_REF_NAME '$GITHUB_REF_NAME' does not match Cargo.toml version 'v${CARGO_VERSION}'" >&2
echo "Expected tag name: '$EXPECTED_REF_NAME'" >&2
exit 1
fi
echo "$GITHUB_REF_NAME"
elif [ "$GITHUB_EVENT_NAME" = workflow_dispatch ]; then
echo "$NEXT_RELEASE_VERSION-$(git rev-parse --short HEAD)-$(date "+%Y%m%d-%s")"

View File

@@ -81,7 +81,7 @@ function deploy_greptimedb_cluster() {
--create-namespace \
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
--set initializer.tag="$GREPTIMEDB_INITIALIZER_IMAGE_TAG" \
--set "meta.backendStorage.etcd.endpoints[0]=etcd.$install_namespace.svc.cluster.local:2379" \
--set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
--set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
-n "$install_namespace"
@@ -119,7 +119,7 @@ function deploy_greptimedb_cluster_with_s3_storage() {
--create-namespace \
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
--set initializer.tag="$GREPTIMEDB_INITIALIZER_IMAGE_TAG" \
--set "meta.backendStorage.etcd.endpoints[0]=etcd.$install_namespace.svc.cluster.local:2379" \
--set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
--set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
--set objectStorage.s3.bucket="$AWS_CI_TEST_BUCKET" \
--set objectStorage.s3.region="$AWS_REGION" \

View File

@@ -1,154 +0,0 @@
name: Check Git Dependencies on Main Branch
on:
pull_request:
branches: [main]
paths:
- 'Cargo.toml'
push:
branches: [main]
paths:
- 'Cargo.toml'
jobs:
check-git-deps:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Check git dependencies
env:
WHITELIST_DEPS: "greptime-proto,meter-core,meter-macros"
run: |
#!/bin/bash
set -e
echo "Checking whitelisted git dependencies..."
# Function to check if a commit is on main branch
check_commit_on_main() {
local repo_url="$1"
local commit="$2"
local repo_name=$(basename "$repo_url" .git)
echo "Checking $repo_name"
echo "Repo: $repo_url"
echo "Commit: $commit"
# Create a temporary directory for cloning
local temp_dir=$(mktemp -d)
# Clone the repository
if git clone "$repo_url" "$temp_dir" 2>/dev/null; then
cd "$temp_dir"
# Try to determine the main branch name
local main_branch="main"
if ! git rev-parse --verify origin/main >/dev/null 2>&1; then
if git rev-parse --verify origin/master >/dev/null 2>&1; then
main_branch="master"
else
# Try to get the default branch
main_branch=$(git symbolic-ref refs/remotes/origin/HEAD | sed 's@^refs/remotes/origin/@@')
fi
fi
echo "Main branch: $main_branch"
# Check if commit exists
if git cat-file -e "$commit" 2>/dev/null; then
# Check if commit is on main branch
if git merge-base --is-ancestor "$commit" "origin/$main_branch" 2>/dev/null; then
echo "PASS: Commit $commit is on $main_branch branch"
cd - >/dev/null
rm -rf "$temp_dir"
return 0
else
echo "FAIL: Commit $commit is NOT on $main_branch branch"
# Try to find which branch contains this commit
local branch_name=$(git branch -r --contains "$commit" 2>/dev/null | head -1 | sed 's/^[[:space:]]*origin\///' | sed 's/[[:space:]]*$//')
if [[ -n "$branch_name" ]]; then
echo "Found on branch: $branch_name"
fi
cd - >/dev/null
rm -rf "$temp_dir"
return 1
fi
else
echo "FAIL: Commit $commit not found in repository"
cd - >/dev/null
rm -rf "$temp_dir"
return 1
fi
else
echo "FAIL: Failed to clone $repo_url"
rm -rf "$temp_dir"
return 1
fi
}
# Extract whitelisted git dependencies from Cargo.toml
echo "Extracting git dependencies from Cargo.toml..."
# Create temporary array to store dependencies
declare -a deps=()
# Build awk pattern from whitelist
IFS=',' read -ra WHITELIST <<< "$WHITELIST_DEPS"
awk_pattern=""
for dep in "${WHITELIST[@]}"; do
if [[ -n "$awk_pattern" ]]; then
awk_pattern="$awk_pattern|"
fi
awk_pattern="$awk_pattern$dep"
done
# Extract whitelisted dependencies
while IFS= read -r line; do
if [[ -n "$line" ]]; then
deps+=("$line")
fi
done < <(awk -v pattern="$awk_pattern" '
$0 ~ pattern ".*git = \"https:/" {
match($0, /git = "([^"]+)"/, arr)
git_url = arr[1]
if (match($0, /rev = "([^"]+)"/, rev_arr)) {
rev = rev_arr[1]
print git_url " " rev
} else {
# Check next line for rev
getline
if (match($0, /rev = "([^"]+)"/, rev_arr)) {
rev = rev_arr[1]
print git_url " " rev
}
}
}
' Cargo.toml)
echo "Found ${#deps[@]} dependencies to check:"
for dep in "${deps[@]}"; do
echo " $dep"
done
failed=0
for dep in "${deps[@]}"; do
read -r repo_url commit <<< "$dep"
if ! check_commit_on_main "$repo_url" "$commit"; then
failed=1
fi
done
echo "Check completed."
if [[ $failed -eq 1 ]]; then
echo "ERROR: Some git dependencies are not on their main branches!"
echo "Please update the commits to point to main branch commits."
exit 1
else
echo "SUCCESS: All git dependencies are on their main branches!"
fi

View File

@@ -102,30 +102,6 @@ like `feat`/`fix`/`docs`, with a concise summary of code change following. AVOID
All commit messages SHOULD adhere to the [Conventional Commits specification](https://conventionalcommits.org/).
## AI-Assisted contributions
We has the following policy for AI-assisted PRs:
- The PR author should **understand the core ideas** behind the implementation **end-to-end**, and be able to justify the design and code during review.
- **Calls out unknowns and assumptions**. It's okay to not fully understand some bits of AI generated code. You should comment on these cases and point them out to reviewers so that they can use their knowledge of the codebase to clear up any concerns. For example, you might comment "calling this function here seems to work but I'm not familiar with how it works internally, I wonder if there's a race condition if it is called concurrently".
### Why fully AI-generated PRs without understanding are not helpful
Today, AI tools cannot reliably make complex changes to DataFusion on their own, which is why we rely on pull requests and code review.
The purposes of code review are:
1. Finish the intended task.
2. Share knowledge between authors and reviewers, as a long-term investment in the project. For this reason, even if someone familiar with the codebase can finish a task quickly, we're still happy to help a new contributor work on it even if it takes longer.
An AI dump for an issue doesnt meet these purposes. Maintainers could finish the task faster by using AI directly, and the submitters gain little knowledge if they act only as a pass through AI proxy without understanding.
Please understand the reviewing capacity is **very limited** for the project, so large PRs which appear to not have the requisite understanding might not get reviewed, and eventually closed or redirected.
### Better ways to contribute than an “AI dump”
It's recommended to write a high-quality issue with a clear problem statement and a minimal, reproducible example. This can make it easier for others to contribute.
## Getting Help
There are many ways to get help when you're stuck. It is recommended to ask for help by opening an issue, with a detailed description

205
Cargo.lock generated
View File

@@ -212,7 +212,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
[[package]]
name = "api"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"arrow-schema",
"common-base",
@@ -733,7 +733,7 @@ dependencies = [
[[package]]
name = "auth"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"async-trait",
@@ -1383,7 +1383,7 @@ dependencies = [
[[package]]
name = "cache"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"catalog",
"common-error",
@@ -1418,7 +1418,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "catalog"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"arrow",
@@ -1763,7 +1763,7 @@ checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
[[package]]
name = "cli"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"async-stream",
"async-trait",
@@ -1786,7 +1786,6 @@ dependencies = [
"common-recordbatch",
"common-runtime",
"common-telemetry",
"common-test-util",
"common-time",
"common-version",
"common-wal",
@@ -1817,7 +1816,7 @@ dependencies = [
[[package]]
name = "client"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"arc-swap",
@@ -1850,7 +1849,7 @@ dependencies = [
"snafu 0.8.6",
"store-api",
"substrait 0.37.3",
"substrait 1.0.0-beta.3",
"substrait 1.0.0-beta.2",
"tokio",
"tokio-stream",
"tonic 0.13.1",
@@ -1890,7 +1889,7 @@ dependencies = [
[[package]]
name = "cmd"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"async-trait",
"auth",
@@ -2024,7 +2023,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
[[package]]
name = "common-base"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"anymap2",
"async-trait",
@@ -2048,14 +2047,14 @@ dependencies = [
[[package]]
name = "common-catalog"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"const_format",
]
[[package]]
name = "common-config"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"common-base",
"common-error",
@@ -2080,7 +2079,7 @@ dependencies = [
[[package]]
name = "common-datasource"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"arrow",
"arrow-schema",
@@ -2115,7 +2114,7 @@ dependencies = [
[[package]]
name = "common-decimal"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"bigdecimal 0.4.8",
"common-error",
@@ -2128,7 +2127,7 @@ dependencies = [
[[package]]
name = "common-error"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"common-macro",
"http 1.3.1",
@@ -2139,7 +2138,7 @@ dependencies = [
[[package]]
name = "common-event-recorder"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"async-trait",
@@ -2161,7 +2160,7 @@ dependencies = [
[[package]]
name = "common-frontend"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"async-trait",
@@ -2183,14 +2182,13 @@ dependencies = [
[[package]]
name = "common-function"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"ahash 0.8.12",
"api",
"approx 0.5.1",
"arc-swap",
"arrow",
"arrow-cast",
"arrow-schema",
"async-trait",
"bincode",
@@ -2221,7 +2219,6 @@ dependencies = [
"h3o",
"hyperloglogplus",
"jsonb",
"jsonpath-rust 0.7.5",
"memchr",
"mito-codec",
"nalgebra",
@@ -2245,7 +2242,7 @@ dependencies = [
[[package]]
name = "common-greptimedb-telemetry"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"async-trait",
"common-runtime",
@@ -2262,7 +2259,7 @@ dependencies = [
[[package]]
name = "common-grpc"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"arrow-flight",
@@ -2297,7 +2294,7 @@ dependencies = [
[[package]]
name = "common-grpc-expr"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"common-base",
@@ -2317,7 +2314,7 @@ dependencies = [
[[package]]
name = "common-macro"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"greptime-proto",
"once_cell",
@@ -2328,7 +2325,7 @@ dependencies = [
[[package]]
name = "common-mem-prof"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"anyhow",
"common-error",
@@ -2344,7 +2341,7 @@ dependencies = [
[[package]]
name = "common-memory-manager"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"common-error",
"common-macro",
@@ -2357,7 +2354,7 @@ dependencies = [
[[package]]
name = "common-meta"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"anymap2",
"api",
@@ -2429,7 +2426,7 @@ dependencies = [
[[package]]
name = "common-options"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"common-grpc",
"humantime-serde",
@@ -2438,11 +2435,11 @@ dependencies = [
[[package]]
name = "common-plugins"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
[[package]]
name = "common-pprof"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"common-error",
"common-macro",
@@ -2454,7 +2451,7 @@ dependencies = [
[[package]]
name = "common-procedure"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"async-stream",
@@ -2483,7 +2480,7 @@ dependencies = [
[[package]]
name = "common-procedure-test"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"async-trait",
"common-procedure",
@@ -2493,7 +2490,7 @@ dependencies = [
[[package]]
name = "common-query"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"async-trait",
@@ -2519,7 +2516,7 @@ dependencies = [
[[package]]
name = "common-recordbatch"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"arc-swap",
"common-base",
@@ -2543,7 +2540,7 @@ dependencies = [
[[package]]
name = "common-runtime"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"async-trait",
"clap 4.5.40",
@@ -2572,7 +2569,7 @@ dependencies = [
[[package]]
name = "common-session"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"serde",
"strum 0.27.1",
@@ -2580,14 +2577,12 @@ dependencies = [
[[package]]
name = "common-sql"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"arrow-schema",
"common-base",
"common-decimal",
"common-error",
"common-macro",
"common-telemetry",
"common-time",
"datafusion-sql",
"datatypes",
@@ -2600,7 +2595,7 @@ dependencies = [
[[package]]
name = "common-stat"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"common-base",
"common-runtime",
@@ -2615,7 +2610,7 @@ dependencies = [
[[package]]
name = "common-telemetry"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"backtrace",
"common-base",
@@ -2644,7 +2639,7 @@ dependencies = [
[[package]]
name = "common-test-util"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"client",
"common-grpc",
@@ -2657,7 +2652,7 @@ dependencies = [
[[package]]
name = "common-time"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"arrow",
"chrono",
@@ -2675,7 +2670,7 @@ dependencies = [
[[package]]
name = "common-version"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"build-data",
"cargo-manifest",
@@ -2686,7 +2681,7 @@ dependencies = [
[[package]]
name = "common-wal"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"common-base",
"common-error",
@@ -2709,7 +2704,7 @@ dependencies = [
[[package]]
name = "common-workload"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"common-telemetry",
"serde",
@@ -4017,7 +4012,7 @@ dependencies = [
[[package]]
name = "datanode"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"arrow-flight",
@@ -4081,7 +4076,7 @@ dependencies = [
[[package]]
name = "datatypes"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"arrow",
"arrow-array",
@@ -4638,9 +4633,8 @@ dependencies = [
[[package]]
name = "etcd-client"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88365f1a5671eb2f7fc240adb216786bc6494b38ce15f1d26ad6eaa303d5e822"
version = "0.15.0"
source = "git+https://github.com/GreptimeTeam/etcd-client?rev=f62df834f0cffda355eba96691fe1a9a332b75a7#f62df834f0cffda355eba96691fe1a9a332b75a7"
dependencies = [
"http 1.3.1",
"prost 0.13.5",
@@ -4756,7 +4750,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "file-engine"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"async-trait",
@@ -4888,7 +4882,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
[[package]]
name = "flow"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"arrow",
@@ -4957,7 +4951,7 @@ dependencies = [
"sql",
"store-api",
"strum 0.27.1",
"substrait 1.0.0-beta.3",
"substrait 1.0.0-beta.2",
"table",
"tokio",
"tonic 0.13.1",
@@ -5018,7 +5012,7 @@ checksum = "28dd6caf6059519a65843af8fe2a3ae298b14b80179855aeb4adc2c1934ee619"
[[package]]
name = "frontend"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"arc-swap",
@@ -5040,7 +5034,6 @@ dependencies = [
"common-function",
"common-grpc",
"common-macro",
"common-memory-manager",
"common-meta",
"common-options",
"common-procedure",
@@ -5466,7 +5459,7 @@ dependencies = [
[[package]]
name = "greptime-proto"
version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=173efe5ec62722089db7c531c0b0d470a072b915#173efe5ec62722089db7c531c0b0d470a072b915"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=0423fa30203187c75e2937a668df1da699c8b96c#0423fa30203187c75e2937a668df1da699c8b96c"
dependencies = [
"prost 0.13.5",
"prost-types 0.13.5",
@@ -6234,7 +6227,7 @@ dependencies = [
[[package]]
name = "index"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"async-trait",
"asynchronous-codec",
@@ -7175,7 +7168,7 @@ checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "log-query"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"chrono",
"common-error",
@@ -7187,7 +7180,7 @@ dependencies = [
[[package]]
name = "log-store"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"async-stream",
"async-trait",
@@ -7488,7 +7481,7 @@ dependencies = [
[[package]]
name = "meta-client"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"async-trait",
@@ -7516,7 +7509,7 @@ dependencies = [
[[package]]
name = "meta-srv"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"async-trait",
@@ -7616,7 +7609,7 @@ dependencies = [
[[package]]
name = "metric-engine"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"aquamarine",
@@ -7713,7 +7706,7 @@ dependencies = [
[[package]]
name = "mito-codec"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"bytes",
@@ -7738,7 +7731,7 @@ dependencies = [
[[package]]
name = "mito2"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"aquamarine",
@@ -8478,7 +8471,7 @@ dependencies = [
[[package]]
name = "object-store"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"anyhow",
"bytes",
@@ -8763,7 +8756,7 @@ dependencies = [
[[package]]
name = "operator"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"ahash 0.8.12",
"api",
@@ -8823,7 +8816,7 @@ dependencies = [
"sql",
"sqlparser",
"store-api",
"substrait 1.0.0-beta.3",
"substrait 1.0.0-beta.2",
"table",
"tokio",
"tokio-util",
@@ -9109,7 +9102,7 @@ dependencies = [
[[package]]
name = "partition"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"async-trait",
@@ -9325,9 +9318,9 @@ dependencies = [
[[package]]
name = "pgwire"
version = "0.37.0"
version = "0.36.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02d86d57e732d40382ceb9bfea80901d839bae8571aa11c06af9177aed9dfb6c"
checksum = "70a2bcdcc4b20a88e0648778ecf00415bbd5b447742275439c22176835056f99"
dependencies = [
"async-trait",
"base64 0.22.1",
@@ -9346,7 +9339,6 @@ dependencies = [
"ryu",
"serde",
"serde_json",
"smol_str",
"stringprep",
"thiserror 2.0.17",
"tokio",
@@ -9467,7 +9459,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pipeline"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"ahash 0.8.12",
"api",
@@ -9623,7 +9615,7 @@ dependencies = [
[[package]]
name = "plugins"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"auth",
"catalog",
@@ -9925,7 +9917,7 @@ dependencies = [
[[package]]
name = "promql"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"ahash 0.8.12",
"async-trait",
@@ -10208,7 +10200,7 @@ dependencies = [
[[package]]
name = "puffin"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"async-compression 0.4.19",
"async-trait",
@@ -10250,7 +10242,7 @@ dependencies = [
[[package]]
name = "query"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"ahash 0.8.12",
"api",
@@ -10317,7 +10309,7 @@ dependencies = [
"sql",
"sqlparser",
"store-api",
"substrait 1.0.0-beta.3",
"substrait 1.0.0-beta.2",
"table",
"tokio",
"tokio-stream",
@@ -11511,11 +11503,10 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
[[package]]
name = "serde"
version = "1.0.228"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
dependencies = [
"serde_core",
"serde_derive",
]
@@ -11529,20 +11520,11 @@ dependencies = [
"serde",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [
"proc-macro2",
"quote",
@@ -11669,7 +11651,7 @@ dependencies = [
[[package]]
name = "servers"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"ahash 0.8.12",
"api",
@@ -11695,7 +11677,6 @@ dependencies = [
"common-grpc",
"common-macro",
"common-mem-prof",
"common-memory-manager",
"common-meta",
"common-plugins",
"common-pprof",
@@ -11798,7 +11779,7 @@ dependencies = [
[[package]]
name = "session"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"ahash 0.8.12",
"api",
@@ -12018,16 +11999,6 @@ dependencies = [
"serde",
]
[[package]]
name = "smol_str"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3498b0a27f93ef1402f20eefacfaa1691272ac4eca1cdc8c596cb0a245d6cbf5"
dependencies = [
"borsh",
"serde_core",
]
[[package]]
name = "snafu"
version = "0.7.5"
@@ -12142,7 +12113,7 @@ dependencies = [
[[package]]
name = "sql"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"arrow-buffer",
@@ -12202,7 +12173,7 @@ dependencies = [
[[package]]
name = "sqlness-runner"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"async-trait",
"clap 4.5.40",
@@ -12233,7 +12204,7 @@ dependencies = [
[[package]]
name = "sqlparser"
version = "0.58.0"
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=a0ce2bc6eb3e804532932f39833c32432f5c9a39#a0ce2bc6eb3e804532932f39833c32432f5c9a39"
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=4b519a5caa95472cc3988f5556813a583dd35af1#4b519a5caa95472cc3988f5556813a583dd35af1"
dependencies = [
"lazy_static",
"log",
@@ -12257,7 +12228,7 @@ dependencies = [
[[package]]
name = "sqlparser_derive"
version = "0.3.0"
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=a0ce2bc6eb3e804532932f39833c32432f5c9a39#a0ce2bc6eb3e804532932f39833c32432f5c9a39"
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=4b519a5caa95472cc3988f5556813a583dd35af1#4b519a5caa95472cc3988f5556813a583dd35af1"
dependencies = [
"proc-macro2",
"quote",
@@ -12479,7 +12450,7 @@ dependencies = [
[[package]]
name = "standalone"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"async-trait",
"catalog",
@@ -12488,7 +12459,6 @@ dependencies = [
"common-config",
"common-error",
"common-macro",
"common-memory-manager",
"common-meta",
"common-options",
"common-procedure",
@@ -12521,7 +12491,7 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "store-api"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"aquamarine",
@@ -12734,7 +12704,7 @@ dependencies = [
[[package]]
name = "substrait"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"async-trait",
"bytes",
@@ -12857,7 +12827,7 @@ dependencies = [
[[package]]
name = "table"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"async-trait",
@@ -13126,7 +13096,7 @@ checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
[[package]]
name = "tests-fuzz"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"arbitrary",
"async-trait",
@@ -13170,7 +13140,7 @@ dependencies = [
[[package]]
name = "tests-integration"
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
dependencies = [
"api",
"arrow-flight",
@@ -13191,7 +13161,6 @@ dependencies = [
"common-event-recorder",
"common-frontend",
"common-grpc",
"common-memory-manager",
"common-meta",
"common-procedure",
"common-query",
@@ -13246,7 +13215,7 @@ dependencies = [
"sqlx",
"standalone",
"store-api",
"substrait 1.0.0-beta.3",
"substrait 1.0.0-beta.2",
"table",
"tempfile",
"time",

View File

@@ -75,7 +75,7 @@ members = [
resolver = "2"
[workspace.package]
version = "1.0.0-beta.3"
version = "1.0.0-beta.2"
edition = "2024"
license = "Apache-2.0"
@@ -103,7 +103,6 @@ aquamarine = "0.6"
arrow = { version = "56.2", features = ["prettyprint"] }
arrow-array = { version = "56.2", default-features = false, features = ["chrono-tz"] }
arrow-buffer = "56.2"
arrow-cast = "56.2"
arrow-flight = "56.2"
arrow-ipc = { version = "56.2", default-features = false, features = ["lz4", "zstd"] }
arrow-schema = { version = "56.2", features = ["serde"] }
@@ -144,14 +143,14 @@ derive_builder = "0.20"
derive_more = { version = "2.1", features = ["full"] }
dotenv = "0.15"
either = "1.15"
etcd-client = { version = "0.16.1", features = [
etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62df834f0cffda355eba96691fe1a9a332b75a7", features = [
"tls",
"tls-roots",
] }
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "173efe5ec62722089db7c531c0b0d470a072b915" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "0423fa30203187c75e2937a668df1da699c8b96c" }
hex = "0.4"
http = "1"
humantime = "2.1"
@@ -333,7 +332,7 @@ datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.g
datafusion-datasource = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "a0ce2bc6eb3e804532932f39833c32432f5c9a39" } # branch = "v0.58.x"
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "4b519a5caa95472cc3988f5556813a583dd35af1" } # branch = "v0.58.x"
[profile.release]
debug = 1

View File

@@ -14,12 +14,11 @@
| --- | -----| ------- | ----------- |
| `default_timezone` | String | Unset | The default timezone of the server. |
| `default_column_prefix` | String | Unset | The default column prefix for auto-created time index and value columns. |
| `max_in_flight_write_bytes` | String | Unset | Maximum total memory for all concurrent write request bodies and messages (HTTP, gRPC, Flight).<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
| `write_bytes_exhausted_policy` | String | Unset | Policy when write bytes quota is exhausted.<br/>Options: "wait" (default, 10s timeout), "wait(<duration>)" (e.g., "wait(30s)"), "fail" |
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited.<br/>NOTE: This setting affects scan_memory_limit's privileged tier allocation.<br/>When set, 70% of queries get privileged memory access (full scan_memory_limit).<br/>The remaining 30% get standard tier access (70% of scan_memory_limit). |
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
| `max_in_flight_write_bytes` | String | Unset | The maximum in-flight write bytes. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
@@ -27,12 +26,14 @@
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
| `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
| `http.max_total_body_memory` | String | Unset | Maximum total memory for all concurrent HTTP request bodies.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
| `http.prom_validation_mode` | String | `strict` | Whether to enable validation for Prometheus remote write requests.<br/>Available options:<br/>- strict: deny invalid UTF-8 strings (default).<br/>- lossy: allow invalid UTF-8 strings, replace invalid characters with REPLACEMENT_CHARACTER(U+FFFD).<br/>- unchecked: do not valid strings. |
| `grpc` | -- | -- | The gRPC server options. |
| `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
| `grpc.max_total_message_memory` | String | Unset | Maximum total memory for all concurrent gRPC request messages.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
| `grpc.max_connection_age` | String | Unset | The maximum connection age for gRPC connection.<br/>The value can be a human-readable time string. For example: `10m` for ten minutes or `1h` for one hour.<br/>Refer to https://grpc.io/docs/guides/keepalive/ for more details. |
| `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
| `grpc.tls.mode` | String | `disable` | TLS mode. |
@@ -82,8 +83,6 @@
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. |
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
| `wal.connect_timeout` | String | `3s` | The connect timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
| `wal.timeout` | String | `3s` | The timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
| `wal.num_topics` | Integer | `64` | Number of topics.<br/>**It's only used when the provider is `kafka`**. |
| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default)<br/>**It's only used when the provider is `kafka`**. |
@@ -226,8 +225,7 @@
| --- | -----| ------- | ----------- |
| `default_timezone` | String | Unset | The default timezone of the server. |
| `default_column_prefix` | String | Unset | The default column prefix for auto-created time index and value columns. |
| `max_in_flight_write_bytes` | String | Unset | Maximum total memory for all concurrent write request bodies and messages (HTTP, gRPC, Flight).<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
| `write_bytes_exhausted_policy` | String | Unset | Policy when write bytes quota is exhausted.<br/>Options: "wait" (default, 10s timeout), "wait(<duration>)" (e.g., "wait(30s)"), "fail" |
| `max_in_flight_write_bytes` | String | Unset | The maximum in-flight write bytes. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
@@ -238,6 +236,7 @@
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
| `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
| `http.max_total_body_memory` | String | Unset | Maximum total memory for all concurrent HTTP request bodies.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
| `http.prom_validation_mode` | String | `strict` | Whether to enable validation for Prometheus remote write requests.<br/>Available options:<br/>- strict: deny invalid UTF-8 strings (default).<br/>- lossy: allow invalid UTF-8 strings, replace invalid characters with REPLACEMENT_CHARACTER(U+FFFD).<br/>- unchecked: do not valid strings. |
@@ -245,6 +244,7 @@
| `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
| `grpc.server_addr` | String | `127.0.0.1:4001` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
| `grpc.max_total_message_memory` | String | Unset | Maximum total memory for all concurrent gRPC request messages.<br/>Set to 0 to disable the limit. Default: "0" (unlimited) |
| `grpc.flight_compression` | String | `arrow_ipc` | Compression mode for frontend side Arrow IPC service. Available options:<br/>- `none`: disable all compression<br/>- `transport`: only enable gRPC transport compression (zstd)<br/>- `arrow_ipc`: only enable Arrow IPC compression (lz4)<br/>- `all`: enable all compression.<br/>Default to `none` |
| `grpc.max_connection_age` | String | Unset | The maximum connection age for gRPC connection.<br/>The value can be a human-readable time string. For example: `10m` for ten minutes or `1h` for one hour.<br/>Refer to https://grpc.io/docs/guides/keepalive/ for more details. |
| `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
@@ -344,15 +344,14 @@
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
| `backend` | String | `etcd_store` | The datastore for meta server.<br/>Available values:<br/>- `etcd_store` (default value)<br/>- `memory_store`<br/>- `postgres_store`<br/>- `mysql_store` |
| `meta_table_name` | String | `greptime_metakv` | Table name in RDS to store metadata. Effect when using a RDS kvbackend.<br/>**Only used when backend is `postgres_store`.** |
| `meta_schema_name` | String | `greptime_schema` | Optional PostgreSQL schema for metadata table and election table name qualification.<br/>When PostgreSQL public schema is not writable (e.g., PostgreSQL 15+ with restricted public),<br/>set this to a writable schema. GreptimeDB will use `meta_schema_name`.`meta_table_name`.<br/>**Only used when backend is `postgres_store`.** |
| `auto_create_schema` | Bool | `true` | Automatically create PostgreSQL schema if it doesn't exist.<br/>When enabled, the system will execute `CREATE SCHEMA IF NOT EXISTS <schema_name>`<br/>before creating metadata tables. This is useful in production environments where<br/>manual schema creation may be restricted.<br/>Default is true.<br/>Note: The PostgreSQL user must have CREATE SCHEMA permission for this to work.<br/>**Only used when backend is `postgres_store`.** |
| `meta_schema_name` | String | `greptime_schema` | Optional PostgreSQL schema for metadata table and election table name qualification.<br/>When PostgreSQL public schema is not writable (e.g., PostgreSQL 15+ with restricted public),<br/>set this to a writable schema. GreptimeDB will use `meta_schema_name`.`meta_table_name`.<br/>GreptimeDB will NOT create the schema automatically; please ensure it exists or the user has permission.<br/>**Only used when backend is `postgres_store`.** |
| `meta_election_lock_id` | Integer | `1` | Advisory lock id in PostgreSQL for election. Effect when using PostgreSQL as kvbackend<br/>Only used when backend is `postgres_store`. |
| `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
| `use_memory_store` | Bool | `false` | Store data in memory. |
| `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
| `region_failure_detector_initialization_delay` | String | `10m` | The delay before starting region failure detection.<br/>This delay helps prevent Metasrv from triggering unnecessary region failovers before all Datanodes are fully started.<br/>Especially useful when the cluster is not deployed with GreptimeDB Operator and maintenance mode is not enabled. |
| `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.<br/>**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
| `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
| `heartbeat_interval` | String | `3s` | Base heartbeat interval for calculating distributed time constants.<br/>The frontend heartbeat interval is 6 times of the base heartbeat interval.<br/>The flownode/datanode heartbeat interval is 1 times of the base heartbeat interval.<br/>e.g., If the base heartbeat interval is 3s, the frontend heartbeat interval is 18s, the flownode/datanode heartbeat interval is 3s.<br/>If you change this value, you need to change the heartbeat interval of the flownode/frontend/datanode accordingly. |
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
@@ -362,18 +361,12 @@
| `backend_tls.cert_path` | String | `""` | Path to client certificate file (for client authentication)<br/>Like "/path/to/client.crt" |
| `backend_tls.key_path` | String | `""` | Path to client private key file (for client authentication)<br/>Like "/path/to/client.key" |
| `backend_tls.ca_cert_path` | String | `""` | Path to CA certificate file (for server certificate verification)<br/>Required when using custom CAs or self-signed certificates<br/>Leave empty to use system root certificates only<br/>Like "/path/to/ca.crt" |
| `backend_client` | -- | -- | The backend client options.<br/>Currently, only applicable when using etcd as the metadata store. |
| `backend_client.keep_alive_timeout` | String | `3s` | The keep alive timeout for backend client. |
| `backend_client.keep_alive_interval` | String | `10s` | The keep alive interval for backend client. |
| `backend_client.connect_timeout` | String | `3s` | The connect timeout for backend client. |
| `grpc` | -- | -- | The gRPC server options. |
| `grpc.bind_addr` | String | `127.0.0.1:3002` | The address to bind the gRPC server. |
| `grpc.server_addr` | String | `127.0.0.1:3002` | The communication server address for the frontend and datanode to connect to metasrv.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `bind_addr`. |
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
| `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
| `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
| `grpc.http2_keep_alive_interval` | String | `10s` | The server side HTTP/2 keep-alive interval |
| `grpc.http2_keep_alive_timeout` | String | `3s` | The server side HTTP/2 keep-alive timeout. |
| `http` | -- | -- | The HTTP server options. |
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
| `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
@@ -483,8 +476,6 @@
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. |
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
| `wal.connect_timeout` | String | `3s` | The connect timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
| `wal.timeout` | String | `3s` | The timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
| `wal.max_batch_bytes` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
| `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |

View File

@@ -169,14 +169,6 @@ recovery_parallelism = 2
## **It's only used when the provider is `kafka`**.
broker_endpoints = ["127.0.0.1:9092"]
## The connect timeout for kafka client.
## **It's only used when the provider is `kafka`**.
#+ connect_timeout = "3s"
## The timeout for kafka client.
## **It's only used when the provider is `kafka`**.
#+ timeout = "3s"
## The max size of a single producer batch.
## Warning: Kafka has a default limit of 1MB per message in a topic.
## **It's only used when the provider is `kafka`**.
@@ -233,7 +225,6 @@ overwrite_entry_start_id = false
# endpoint = "https://s3.amazonaws.com"
# region = "us-west-2"
# enable_virtual_host_style = false
# disable_ec2_metadata = false
# Example of using Oss as the storage.
# [storage]

View File

@@ -6,15 +6,9 @@ default_timezone = "UTC"
## @toml2docs:none-default
default_column_prefix = "greptime"
## Maximum total memory for all concurrent write request bodies and messages (HTTP, gRPC, Flight).
## Set to 0 to disable the limit. Default: "0" (unlimited)
## The maximum in-flight write bytes.
## @toml2docs:none-default
#+ max_in_flight_write_bytes = "1GB"
## Policy when write bytes quota is exhausted.
## Options: "wait" (default, 10s timeout), "wait(<duration>)" (e.g., "wait(30s)"), "fail"
## @toml2docs:none-default
#+ write_bytes_exhausted_policy = "wait"
#+ max_in_flight_write_bytes = "500MB"
## The runtime options.
#+ [runtime]
@@ -41,6 +35,10 @@ timeout = "0s"
## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
## Set to 0 to disable limit.
body_limit = "64MB"
## Maximum total memory for all concurrent HTTP request bodies.
## Set to 0 to disable the limit. Default: "0" (unlimited)
## @toml2docs:none-default
#+ max_total_body_memory = "1GB"
## HTTP CORS support, it's turned on by default
## This allows browser to access http APIs without CORS restrictions
enable_cors = true
@@ -64,6 +62,10 @@ bind_addr = "127.0.0.1:4001"
server_addr = "127.0.0.1:4001"
## The number of server worker threads.
runtime_size = 8
## Maximum total memory for all concurrent gRPC request messages.
## Set to 0 to disable the limit. Default: "0" (unlimited)
## @toml2docs:none-default
#+ max_total_message_memory = "1GB"
## Compression mode for frontend side Arrow IPC service. Available options:
## - `none`: disable all compression
## - `transport`: only enable gRPC transport compression (zstd)
@@ -129,6 +131,7 @@ key_path = ""
## For now, gRPC tls config does not support auto reload.
watch = false
## MySQL server options.
[mysql]
## Whether to enable.

View File

@@ -34,17 +34,10 @@ meta_table_name = "greptime_metakv"
## Optional PostgreSQL schema for metadata table and election table name qualification.
## When PostgreSQL public schema is not writable (e.g., PostgreSQL 15+ with restricted public),
## set this to a writable schema. GreptimeDB will use `meta_schema_name`.`meta_table_name`.
## GreptimeDB will NOT create the schema automatically; please ensure it exists or the user has permission.
## **Only used when backend is `postgres_store`.**
meta_schema_name = "greptime_schema"
## Automatically create PostgreSQL schema if it doesn't exist.
## When enabled, the system will execute `CREATE SCHEMA IF NOT EXISTS <schema_name>`
## before creating metadata tables. This is useful in production environments where
## manual schema creation may be restricted.
## Default is true.
## Note: The PostgreSQL user must have CREATE SCHEMA permission for this to work.
## **Only used when backend is `postgres_store`.**
auto_create_schema = true
meta_schema_name = "greptime_schema"
## Advisory lock id in PostgreSQL for election. Effect when using PostgreSQL as kvbackend
## Only used when backend is `postgres_store`.
@@ -57,6 +50,9 @@ meta_election_lock_id = 1
## For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector".
selector = "round_robin"
## Store data in memory.
use_memory_store = false
## Whether to enable region failover.
## This feature is only available on GreptimeDB running on cluster mode and
## - Using Remote WAL
@@ -75,13 +71,6 @@ allow_region_failover_on_local_wal = false
## Max allowed idle time before removing node info from metasrv memory.
node_max_idle_time = "24hours"
## Base heartbeat interval for calculating distributed time constants.
## The frontend heartbeat interval is 6 times of the base heartbeat interval.
## The flownode/datanode heartbeat interval is 1 times of the base heartbeat interval.
## e.g., If the base heartbeat interval is 3s, the frontend heartbeat interval is 18s, the flownode/datanode heartbeat interval is 3s.
## If you change this value, you need to change the heartbeat interval of the flownode/frontend/datanode accordingly.
#+ heartbeat_interval = "3s"
## Whether to enable greptimedb telemetry. Enabled by default.
#+ enable_telemetry = true
@@ -120,16 +109,6 @@ key_path = ""
## Like "/path/to/ca.crt"
ca_cert_path = ""
## The backend client options.
## Currently, only applicable when using etcd as the metadata store.
#+ [backend_client]
## The keep alive timeout for backend client.
#+ keep_alive_timeout = "3s"
## The keep alive interval for backend client.
#+ keep_alive_interval = "10s"
## The connect timeout for backend client.
#+ connect_timeout = "3s"
## The gRPC server options.
[grpc]
## The address to bind the gRPC server.
@@ -144,10 +123,6 @@ runtime_size = 8
max_recv_message_size = "512MB"
## The maximum send message size for gRPC server.
max_send_message_size = "512MB"
## The server side HTTP/2 keep-alive interval
#+ http2_keep_alive_interval = "10s"
## The server side HTTP/2 keep-alive timeout.
#+ http2_keep_alive_timeout = "3s"
## The HTTP server options.
[http]

View File

@@ -6,16 +6,6 @@ default_timezone = "UTC"
## @toml2docs:none-default
default_column_prefix = "greptime"
## Maximum total memory for all concurrent write request bodies and messages (HTTP, gRPC, Flight).
## Set to 0 to disable the limit. Default: "0" (unlimited)
## @toml2docs:none-default
#+ max_in_flight_write_bytes = "1GB"
## Policy when write bytes quota is exhausted.
## Options: "wait" (default, 10s timeout), "wait(<duration>)" (e.g., "wait(30s)"), "fail"
## @toml2docs:none-default
#+ write_bytes_exhausted_policy = "wait"
## Initialize all regions in the background during the startup.
## By default, it provides services after all regions have been initialized.
init_regions_in_background = false
@@ -32,6 +22,10 @@ max_concurrent_queries = 0
## Enable telemetry to collect anonymous usage data. Enabled by default.
#+ enable_telemetry = true
## The maximum in-flight write bytes.
## @toml2docs:none-default
#+ max_in_flight_write_bytes = "500MB"
## The runtime options.
#+ [runtime]
## The number of threads to execute the runtime for global read operations.
@@ -49,6 +43,10 @@ timeout = "0s"
## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
## Set to 0 to disable limit.
body_limit = "64MB"
## Maximum total memory for all concurrent HTTP request bodies.
## Set to 0 to disable the limit. Default: "0" (unlimited)
## @toml2docs:none-default
#+ max_total_body_memory = "1GB"
## HTTP CORS support, it's turned on by default
## This allows browser to access http APIs without CORS restrictions
enable_cors = true
@@ -69,6 +67,10 @@ prom_validation_mode = "strict"
bind_addr = "127.0.0.1:4001"
## The number of server worker threads.
runtime_size = 8
## Maximum total memory for all concurrent gRPC request messages.
## Set to 0 to disable the limit. Default: "0" (unlimited)
## @toml2docs:none-default
#+ max_total_message_memory = "1GB"
## The maximum connection age for gRPC connection.
## The value can be a human-readable time string. For example: `10m` for ten minutes or `1h` for one hour.
## Refer to https://grpc.io/docs/guides/keepalive/ for more details.
@@ -228,14 +230,6 @@ recovery_parallelism = 2
## **It's only used when the provider is `kafka`**.
broker_endpoints = ["127.0.0.1:9092"]
## The connect timeout for kafka client.
## **It's only used when the provider is `kafka`**.
#+ connect_timeout = "3s"
## The timeout for kafka client.
## **It's only used when the provider is `kafka`**.
#+ timeout = "3s"
## Automatically create topics for WAL.
## Set to `true` to automatically create topics for WAL.
## Otherwise, use topics named `topic_name_prefix_[0..num_topics)`
@@ -338,7 +332,6 @@ max_running_procedures = 128
# endpoint = "https://s3.amazonaws.com"
# region = "us-west-2"
# enable_virtual_host_style = false
# disable_ec2_metadata = false
# Example of using Oss as the storage.
# [storage]

View File

@@ -15,6 +15,7 @@
*/
import * as core from "@actions/core";
import semver from "semver";
import {obtainClient} from "@/common";
interface RepoConfig {
@@ -57,18 +58,28 @@ const REPO_CONFIGS: Record<string, RepoConfig> = {
return ['bump-nightly-version.yml', version];
}
const parts = version.split('.');
if (parts.length !== 3) {
throw new Error('Invalid version format');
// Parse the version using semver library
const parsedVersion = semver.parse(version);
if (!parsedVersion) {
throw new Error(`Invalid semantic version format: ${version}`);
}
// If patch version (last number) is 0, it's a major version
// Return only major.minor version
if (parts[2] === '0') {
return ['bump-version.yml', `${parts[0]}.${parts[1]}`];
// If there is a pre-release identifier, throw an error for manual determination
if (parsedVersion.prerelease && parsedVersion.prerelease.length > 0) {
throw new Error(
`Pre-release version "${version}" requires manual determination of which workflow to use.\n` +
`Please choose based on the specific situation:\n` +
` - bump-version.yml (for major.minor format, such as ${parsedVersion.major}.${parsedVersion.minor})\n` +
` - bump-patch-version.yml (for patch version, such as ${parsedVersion.major}.${parsedVersion.minor}.${parsedVersion.patch})\n` +
`Considerations include: Is this the first pre-release version? Is it close to the official release?`
);
}
// Original logic for official versions
if (parsedVersion.patch === 0) {
return ['bump-version.yml', `${parsedVersion.major}.${parsedVersion.minor}`];
}
// Otherwise it's a patch version, use full version
return ['bump-patch-version.yml', version];
}
}

View File

@@ -13,7 +13,8 @@
"conventional-commits-parser": "^5.0.0",
"dayjs": "^1.11.11",
"dotenv": "^16.4.5",
"lodash": "^4.17.21"
"lodash": "^4.17.21",
"semver": "^7.7.3"
},
"devDependencies": {
"@types/conventional-commits-parser": "^5.0.0",

872
cyborg/pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,94 +0,0 @@
---
Feature Name: Vector Index
Tracking Issue: TBD
Date: 2025-12-04
Author: "TBD"
---
# Summary
Introduce a per-SST approximate nearest neighbor (ANN) index for `VECTOR(dim)` columns with a pluggable engine. USearch HNSW is the initial engine, while the design keeps VSAG (default when linked) and future engines selectable at DDL or alter time and encoded in the index metadata. The index is built alongside SST creation and accelerates `ORDER BY vec_*_distance(column, <literal vector>) LIMIT k` queries, falling back to the existing brute-force path when an index is unavailable or ineligible.
# Motivation
Vector distances are currently computed with nalgebra across all rows (O(N)) before sorting, which does not scale to millions of vectors. An on-disk ANN index with sub-linear search reduces latency and compute cost for common RAG, semantic search, and recommendation workloads without changing SQL.
# Details
## Current Behavior
`VECTOR(dim)` values are stored as binary blobs. Queries call `vec_cos_distance`/`vec_l2sq_distance`/`vec_dot_product` via nalgebra for every row and then sort; there is no indexing or caching.
## Index Eligibility and Configuration
Only `VECTOR(dim)` columns can be indexed. A column metadata flag follows the existing column-option pattern with an intentionally small surface area:
- `engine`: `vsag` (default when the binding is built) or `usearch`. If a configured engine is unavailable at runtime, the builder logs and falls back to `usearch` while leaving the option intact for future rebuilds.
- `metric`: `cosine` (default), `l2sq`, or `dot`; mismatches with query functions force brute-force execution.
- `m`: HNSW graph connectivity (higher = denser graph, more memory, better recall), default `16`.
- `ef_construct`: build-time expansion, default `128`.
- `ef_search`: query-time expansion, default `64`; engines may clamp values.
Option semantics mirror HNSW defaults so both USearch and VSAG can honor them; engine-specific tunables stay in reserved key-value pairs inside the blob header for forward compatibility.
DDL reuses column extensions similar to inverted/fulltext indexes:
```sql
CREATE TABLE embeddings (
ts TIMESTAMP TIME INDEX,
id STRING PRIMARY KEY,
vec VECTOR(384) VECTOR INDEX WITH (engine = 'vsag', metric = 'cosine', ef_search = 64)
);
```
Altering column options toggles the flag, can switch engines (for example `usearch` -> `vsag`), and triggers rebuilds through the existing alter/compaction flow. Engine choice stays in table metadata and each blob header; new SSTs use the configured engine while older SSTs remain readable under their recorded engine until compaction or a manual rebuild rewrites them.
## Storage and Format
- One vector index per indexed column per SST, stored as a Puffin blob with type `greptime-vector-index-v1`.
- Each blob records the engine (`usearch`, `vsag`, future values) and engine parameters in the header so readers can select the matching decoder. Mixed-engine SSTs remain readable because the engine id travels with the blob.
- USearch uses `f32` vectors and SST row offsets (`u64`) as keys; nulls and `OpType::Delete` rows are skipped. Row ids are the absolute SST ordinal so readers can derive `RowSelection` directly from parquet row group lengths without extra side tables.
- Blob layout:
- Header: version, column id, dimension, engine id, metric, `m`, `ef_construct`, `ef_search`, and reserved engine-specific key-value pairs.
- Counts: total rows written and indexed rows.
- Payload: USearch binary produced by `save_to_buffer`.
- An empty index (no eligible vectors) results in no available index entry for that column.
- `puffin_manager` registers the blob type so caches and readers discover it alongside inverted/fulltext/bloom blobs in the same index file.
## Row Visibility and Duplicates
- The indexer increments `row_offset` for every incoming row (including skipped/null/delete rows) so offsets stay aligned with parquet ordering across row groups.
- Only `OpType::Put` rows with the expected dimension are inserted; `OpType::Delete` and malformed rows are skipped but still advance `row_offset`, matching the data planes visibility rules.
- Multiple versions of the same primary key remain in the graph; the read path intersects search hits with the standard mito2 deduplication/visibility pipeline (sequence-aware dedup, delete filtering, projection) before returning results.
- Searches overfetch beyond `k` to compensate for rows discarded by visibility checks and to avoid reissuing index reads.
## Build Path (mito2 write)
Extend `sst::index::Indexer` to optionally create a `VectorIndexer` when region metadata marks a column as vector-indexed, mirroring how inverted/fulltext/bloom filters attach to `IndexerBuilderImpl` in `mito2`.
The indexer consumes `Batch`/`RecordBatch` data and shares memory tracking and abort semantics with existing indexers:
- Maintain a running `row_offset` that follows SST write order and spans row groups so the search result can be turned into `RowSelection`.
- For each `OpType::Put`, if the vector is non-null and matches the declared dimension, insert into USearch with `row_offset` as the key; otherwise skip.
- Track memory with existing index build metrics; on failure, abort only the vector index while keeping SST writing unaffected.
Engine selection is table-driven: the builder picks the configured engine (default `vsag`, fallback `usearch` if `vsag` is not compiled in) and dispatches to the matching implementation. Unknown engines skip index build with a warning.
On `finish`, serialize the engine-tagged index into the Puffin writer and record `IndexType::Vector` metadata for the column. `IndexOutput` and `FileMeta::indexes/available_indexes` gain a vector entry so manifest updates and `RegionVersion` surface per-column presence, following patterns used by inverted/fulltext/bloom indexes. Planner/metadata validation ensures that mismatched dimensions only reduce the indexed-row count and do not break reads.
## Read Path (mito2 query)
A planner rule in `query` identifies eligible plans on mito2 tables: a single `ORDER BY vec_cos_distance|vec_l2sq_distance|vec_dot_product(<vector column>, <literal vector>)` in ascending order plus a `LIMIT`/`TopK`. The rule rejects plans with multiple sort keys, non-literal query vectors, or additional projections that would change the distance expression and falls back to brute-force in those cases.
For eligible scans, build a `VectorIndexScan` execution node that:
- Consults SST metadata for `IndexType::Vector`, loads the index via Puffin using the existing `mito2::cache::index` infrastructure, and dispatches to the engine declared in the blob header (USearch/VSAG/etc.).
- Runs the engines `search` with an overfetch (for example 2×k) to tolerate rows filtered by deletes, dimension mismatches, or late-stage dedup; keys already match SST row offsets produced by the writer.
- Converts hits to `RowSelection` using parquet row group lengths and reuses the parquet reader so visibility, projection, and deduplication logic stay unchanged; distances are recomputed with `vec_*_distance` before the final trim to k to guarantee ordering and to merge distributed partial results deterministically.
Any unsupported shape, load error, or cache miss falls back to the current brute-force execution path.
## Lifecycle and Maintenance
Lifecycle piggybacks on the existing SST/index flow: rebuilds run where other secondary indexes do, graphs are always rebuilt from source rows (no HNSW merge), and cleanup/versioning/caching reuse the existing Puffin and index cache paths.
# Implementation Plan
1. Add the `usearch` dependency (wrapper module in `index` or `mito2`) and map minimal HNSW options; keep an engine trait that allows plugging VSAG without changing the rest of the pipeline.
2. Introduce `IndexType::Vector` and a column metadata key for vector index options (including `engine`); add SQL parser and `SHOW CREATE TABLE` support for `VECTOR INDEX WITH (...)`.
3. Implement `vector_index` build/read modules under `mito2` (and `index` if shared), including Puffin serialization that records engine id, blob-type registration with `puffin_manager`, and integration with the `Indexer` builder, `IndexOutput`, manifest updates, and compaction rebuild.
4. Extend the query planner/execution to detect eligible plans and drive a `RowSelection`-based ANN scan with a fallback path, dispatching by engine at read time and using existing Puffin and index caches.
5. Add unit tests for serialization/search correctness and an end-to-end test covering plan rewrite, cache usage, engine selection, and fallback; add a mixed-engine test to confirm old USearch blobs still serve after a VSAG switch.
6. Follow up with an optional VSAG engine binding (feature flag), validate parity with USearch on dense vectors, exercise alternative algorithms (for example PQ), and flip the default `engine` to `vsag` when the binding is present.
# Alternatives
- **VSAG (follow-up engine):** C++ library with HNSW and additional algorithms (for example SINDI for sparse vectors and PQ) targeting in-memory and disk-friendly search. Provides parameter generators and a roadmap for GPU-assisted build and graph compression. Compared to FAISS it is newer with fewer integrations but bundles sparse/dense coverage and out-of-core focus in one engine. Fits the pluggable-engine design and would become the default `engine = 'vsag'` when linked; USearch remains available for lighter dependencies.
- **FAISS:** Broad feature set (IVF/IVFPQ/PQ/HNSW, GPU acceleration, scalar filtering, pre/post filters) and battle-tested performance across datasets, but it requires a heavier C++/GPU toolchain, has no official Rust binding, and is less disk-centric than VSAG; integrating it would add more build/distribution burden than USearch/VSAG.
- **Do nothing:** Keep brute-force evaluation, which remains O(N) and unacceptable at scale.

View File

@@ -67,7 +67,6 @@ tracing-appender.workspace = true
[dev-dependencies]
common-meta = { workspace = true, features = ["testing"] }
common-test-util.workspace = true
common-version.workspace = true
serde.workspace = true
tempfile.workspace = true

View File

@@ -15,8 +15,5 @@
mod object_store;
mod store;
pub use object_store::{
ObjectStoreConfig, PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection,
PrefixedS3Connection, new_fs_object_store,
};
pub use object_store::{ObjectStoreConfig, new_fs_object_store};
pub use store::StoreConfig;

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_base::secrets::{ExposeSecret, SecretString};
use common_base::secrets::SecretString;
use common_error::ext::BoxedError;
use object_store::services::{Azblob, Fs, Gcs, Oss, S3};
use object_store::util::{with_instrument_layers, with_retry_layers};
@@ -22,69 +22,9 @@ use snafu::ResultExt;
use crate::error::{self};
/// Trait to convert CLI field types to target struct field types.
/// This enables `Option<SecretString>` (CLI) -> `SecretString` (target) conversions,
/// allowing us to distinguish "not provided" from "provided but empty".
trait IntoField<T> {
fn into_field(self) -> T;
}
/// Identity conversion for types that are the same.
impl<T> IntoField<T> for T {
fn into_field(self) -> T {
self
}
}
/// Convert `Option<SecretString>` to `SecretString`, using default for None.
impl IntoField<SecretString> for Option<SecretString> {
fn into_field(self) -> SecretString {
self.unwrap_or_default()
}
}
/// Trait for checking if a field is effectively empty.
///
/// **`is_empty()`**: Checks if the field has no meaningful value
/// - Used when backend is enabled to validate required fields
/// - `None`, `Some("")`, `false`, or `""` are considered empty
trait FieldValidator {
/// Check if the field is empty (has no meaningful value).
fn is_empty(&self) -> bool;
}
/// String fields: empty if the string is empty
impl FieldValidator for String {
fn is_empty(&self) -> bool {
self.is_empty()
}
}
/// Bool fields: false is considered "empty", true is "provided"
impl FieldValidator for bool {
fn is_empty(&self) -> bool {
!self
}
}
/// Option<String> fields: None or empty content is empty
impl FieldValidator for Option<String> {
fn is_empty(&self) -> bool {
self.as_ref().is_none_or(|s| s.is_empty())
}
}
/// Option<SecretString> fields: None or empty secret is empty
/// For secrets, Some("") is treated as "not provided" for both checks
impl FieldValidator for Option<SecretString> {
fn is_empty(&self) -> bool {
self.as_ref().is_none_or(|s| s.expose_secret().is_empty())
}
}
macro_rules! wrap_with_clap_prefix {
(
$new_name:ident, $prefix:literal, $enable_flag:literal, $base:ty, {
$new_name:ident, $prefix:literal, $base:ty, {
$( $( #[doc = $doc:expr] )? $( #[alias = $alias:literal] )? $field:ident : $type:ty $( = $default:expr )? ),* $(,)?
}
) => {
@@ -94,16 +34,15 @@ macro_rules! wrap_with_clap_prefix {
$(
$( #[doc = $doc] )?
$( #[clap(alias = $alias)] )?
#[clap(long, requires = $enable_flag $(, default_value_t = $default )? )]
pub [<$prefix $field>]: $type,
#[clap(long $(, default_value_t = $default )? )]
[<$prefix $field>]: $type,
)*
}
impl From<$new_name> for $base {
fn from(w: $new_name) -> Self {
Self {
// Use into_field() to handle Option<SecretString> -> SecretString conversion
$( $field: w.[<$prefix $field>].into_field() ),*
$( $field: w.[<$prefix $field>] ),*
}
}
}
@@ -111,90 +50,9 @@ macro_rules! wrap_with_clap_prefix {
};
}
/// Macro for declarative backend validation.
///
/// # Validation Rules
///
/// For each storage backend (S3, OSS, GCS, Azblob), this function validates:
/// **When backend is enabled** (e.g., `--s3`): All required fields must be non-empty
///
/// Note: When backend is disabled, clap's `requires` attribute ensures no configuration
/// fields can be provided at parse time.
///
/// # Syntax
///
/// ```ignore
/// validate_backend!(
/// enable: self.enable_s3,
/// name: "S3",
/// required: [(field1, "name1"), (field2, "name2"), ...],
/// custom_validator: |missing| { ... } // optional
/// )
/// ```
///
/// # Arguments
///
/// - `enable`: Boolean expression indicating if backend is enabled
/// - `name`: Human-readable backend name for error messages
/// - `required`: Array of (field_ref, field_name) tuples for required fields
/// - `custom_validator`: Optional closure for complex validation logic
///
/// # Example
///
/// ```ignore
/// validate_backend!(
/// enable: self.enable_s3,
/// name: "S3",
/// required: [
/// (&self.s3.s3_bucket, "bucket"),
/// (&self.s3.s3_access_key_id, "access key ID"),
/// ]
/// )
/// ```
macro_rules! validate_backend {
(
enable: $enable:expr,
name: $backend_name:expr,
required: [ $( ($field:expr, $field_name:expr) ),* $(,)? ]
$(, custom_validator: $custom_validator:expr)?
) => {{
if $enable {
// Check required fields when backend is enabled
let mut missing = Vec::new();
$(
if FieldValidator::is_empty($field) {
missing.push($field_name);
}
)*
// Run custom validation if provided
$(
$custom_validator(&mut missing);
)?
if !missing.is_empty() {
return Err(BoxedError::new(
error::MissingConfigSnafu {
msg: format!(
"{} {} must be set when --{} is enabled.",
$backend_name,
missing.join(", "),
$backend_name.to_lowercase()
),
}
.build(),
));
}
}
Ok(())
}};
}
wrap_with_clap_prefix! {
PrefixedAzblobConnection,
"azblob-",
"enable_azblob",
AzblobConnection,
{
#[doc = "The container of the object store."]
@@ -202,9 +60,9 @@ wrap_with_clap_prefix! {
#[doc = "The root of the object store."]
root: String = Default::default(),
#[doc = "The account name of the object store."]
account_name: Option<SecretString>,
account_name: SecretString = Default::default(),
#[doc = "The account key of the object store."]
account_key: Option<SecretString>,
account_key: SecretString = Default::default(),
#[doc = "The endpoint of the object store."]
endpoint: String = Default::default(),
#[doc = "The SAS token of the object store."]
@@ -212,33 +70,9 @@ wrap_with_clap_prefix! {
}
}
impl PrefixedAzblobConnection {
pub fn validate(&self) -> Result<(), BoxedError> {
validate_backend!(
enable: true,
name: "AzBlob",
required: [
(&self.azblob_container, "container"),
(&self.azblob_root, "root"),
(&self.azblob_account_name, "account name"),
(&self.azblob_endpoint, "endpoint"),
],
custom_validator: |missing: &mut Vec<&str>| {
// account_key is only required if sas_token is not provided
if self.azblob_sas_token.is_none()
&& self.azblob_account_key.is_empty()
{
missing.push("account key (when sas_token is not provided)");
}
}
)
}
}
wrap_with_clap_prefix! {
PrefixedS3Connection,
"s3-",
"enable_s3",
S3Connection,
{
#[doc = "The bucket of the object store."]
@@ -246,39 +80,21 @@ wrap_with_clap_prefix! {
#[doc = "The root of the object store."]
root: String = Default::default(),
#[doc = "The access key ID of the object store."]
access_key_id: Option<SecretString>,
access_key_id: SecretString = Default::default(),
#[doc = "The secret access key of the object store."]
secret_access_key: Option<SecretString>,
secret_access_key: SecretString = Default::default(),
#[doc = "The endpoint of the object store."]
endpoint: Option<String>,
#[doc = "The region of the object store."]
region: Option<String>,
#[doc = "Enable virtual host style for the object store."]
enable_virtual_host_style: bool = Default::default(),
#[doc = "Disable EC2 metadata service for the object store."]
disable_ec2_metadata: bool = Default::default(),
}
}
impl PrefixedS3Connection {
pub fn validate(&self) -> Result<(), BoxedError> {
validate_backend!(
enable: true,
name: "S3",
required: [
(&self.s3_bucket, "bucket"),
(&self.s3_access_key_id, "access key ID"),
(&self.s3_secret_access_key, "secret access key"),
(&self.s3_region, "region"),
]
)
}
}
wrap_with_clap_prefix! {
PrefixedOssConnection,
"oss-",
"enable_oss",
OssConnection,
{
#[doc = "The bucket of the object store."]
@@ -286,33 +102,17 @@ wrap_with_clap_prefix! {
#[doc = "The root of the object store."]
root: String = Default::default(),
#[doc = "The access key ID of the object store."]
access_key_id: Option<SecretString>,
access_key_id: SecretString = Default::default(),
#[doc = "The access key secret of the object store."]
access_key_secret: Option<SecretString>,
access_key_secret: SecretString = Default::default(),
#[doc = "The endpoint of the object store."]
endpoint: String = Default::default(),
}
}
impl PrefixedOssConnection {
pub fn validate(&self) -> Result<(), BoxedError> {
validate_backend!(
enable: true,
name: "OSS",
required: [
(&self.oss_bucket, "bucket"),
(&self.oss_access_key_id, "access key ID"),
(&self.oss_access_key_secret, "access key secret"),
(&self.oss_endpoint, "endpoint"),
]
)
}
}
wrap_with_clap_prefix! {
PrefixedGcsConnection,
"gcs-",
"enable_gcs",
GcsConnection,
{
#[doc = "The root of the object store."]
@@ -322,72 +122,40 @@ wrap_with_clap_prefix! {
#[doc = "The scope of the object store."]
scope: String = Default::default(),
#[doc = "The credential path of the object store."]
credential_path: Option<SecretString>,
credential_path: SecretString = Default::default(),
#[doc = "The credential of the object store."]
credential: Option<SecretString>,
credential: SecretString = Default::default(),
#[doc = "The endpoint of the object store."]
endpoint: String = Default::default(),
}
}
impl PrefixedGcsConnection {
pub fn validate(&self) -> Result<(), BoxedError> {
validate_backend!(
enable: true,
name: "GCS",
required: [
(&self.gcs_bucket, "bucket"),
(&self.gcs_root, "root"),
(&self.gcs_scope, "scope"),
]
// No custom_validator needed: GCS supports Application Default Credentials (ADC)
// where neither credential_path nor credential is required.
// Endpoint is also optional (defaults to https://storage.googleapis.com).
)
}
}
/// Common config for object store.
///
/// # Dependency Enforcement
///
/// Each backend's configuration fields (e.g., `--s3-bucket`) requires its corresponding
/// enable flag (e.g., `--s3`) to be present. This is enforced by `clap` at parse time
/// using the `requires` attribute.
///
/// For example, attempting to use `--s3-bucket my-bucket` without `--s3` will result in:
/// ```text
/// error: The argument '--s3-bucket <BUCKET>' requires '--s3'
/// ```
///
/// This ensures that users cannot accidentally provide backend-specific configuration
/// without explicitly enabling that backend.
/// common config for object store.
#[derive(clap::Parser, Debug, Clone, PartialEq, Default)]
#[clap(group(clap::ArgGroup::new("storage_backend").required(false).multiple(false)))]
pub struct ObjectStoreConfig {
/// Whether to use S3 object store.
#[clap(long = "s3", group = "storage_backend")]
#[clap(long, alias = "s3")]
pub enable_s3: bool,
#[clap(flatten)]
pub s3: PrefixedS3Connection,
/// Whether to use OSS.
#[clap(long = "oss", group = "storage_backend")]
#[clap(long, alias = "oss")]
pub enable_oss: bool,
#[clap(flatten)]
pub oss: PrefixedOssConnection,
/// Whether to use GCS.
#[clap(long = "gcs", group = "storage_backend")]
#[clap(long, alias = "gcs")]
pub enable_gcs: bool,
#[clap(flatten)]
pub gcs: PrefixedGcsConnection,
/// Whether to use Azure Blob.
#[clap(long = "azblob", group = "storage_backend")]
#[clap(long, alias = "azblob")]
pub enable_azblob: bool,
#[clap(flatten)]
@@ -405,66 +173,52 @@ pub fn new_fs_object_store(root: &str) -> std::result::Result<ObjectStore, Boxed
Ok(with_instrument_layers(object_store, false))
}
macro_rules! gen_object_store_builder {
($method:ident, $field:ident, $conn_type:ty, $service_type:ty) => {
pub fn $method(&self) -> Result<ObjectStore, BoxedError> {
let config = <$conn_type>::from(self.$field.clone());
common_telemetry::info!(
"Building object store with {}: {:?}",
stringify!($field),
config
);
let object_store = ObjectStore::new(<$service_type>::from(&config))
.context(error::InitBackendSnafu)
.map_err(BoxedError::new)?
.finish();
Ok(with_instrument_layers(
with_retry_layers(object_store),
false,
))
}
};
}
impl ObjectStoreConfig {
gen_object_store_builder!(build_s3, s3, S3Connection, S3);
gen_object_store_builder!(build_oss, oss, OssConnection, Oss);
gen_object_store_builder!(build_gcs, gcs, GcsConnection, Gcs);
gen_object_store_builder!(build_azblob, azblob, AzblobConnection, Azblob);
pub fn validate(&self) -> Result<(), BoxedError> {
if self.enable_s3 {
self.s3.validate()?;
}
if self.enable_oss {
self.oss.validate()?;
}
if self.enable_gcs {
self.gcs.validate()?;
}
if self.enable_azblob {
self.azblob.validate()?;
}
Ok(())
}
/// Builds the object store from the config.
pub fn build(&self) -> Result<Option<ObjectStore>, BoxedError> {
self.validate()?;
if self.enable_s3 {
self.build_s3().map(Some)
let object_store = if self.enable_s3 {
let s3 = S3Connection::from(self.s3.clone());
common_telemetry::info!("Building object store with s3: {:?}", s3);
Some(
ObjectStore::new(S3::from(&s3))
.context(error::InitBackendSnafu)
.map_err(BoxedError::new)?
.finish(),
)
} else if self.enable_oss {
self.build_oss().map(Some)
let oss = OssConnection::from(self.oss.clone());
common_telemetry::info!("Building object store with oss: {:?}", oss);
Some(
ObjectStore::new(Oss::from(&oss))
.context(error::InitBackendSnafu)
.map_err(BoxedError::new)?
.finish(),
)
} else if self.enable_gcs {
self.build_gcs().map(Some)
let gcs = GcsConnection::from(self.gcs.clone());
common_telemetry::info!("Building object store with gcs: {:?}", gcs);
Some(
ObjectStore::new(Gcs::from(&gcs))
.context(error::InitBackendSnafu)
.map_err(BoxedError::new)?
.finish(),
)
} else if self.enable_azblob {
self.build_azblob().map(Some)
let azblob = AzblobConnection::from(self.azblob.clone());
common_telemetry::info!("Building object store with azblob: {:?}", azblob);
Some(
ObjectStore::new(Azblob::from(&azblob))
.context(error::InitBackendSnafu)
.map_err(BoxedError::new)?
.finish(),
)
} else {
Ok(None)
}
None
};
let object_store = object_store
.map(|object_store| with_instrument_layers(with_retry_layers(object_store), false));
Ok(object_store)
}
}

View File

@@ -19,7 +19,7 @@ use common_error::ext::BoxedError;
use common_meta::kv_backend::KvBackendRef;
use common_meta::kv_backend::chroot::ChrootKvBackend;
use common_meta::kv_backend::etcd::EtcdStore;
use meta_srv::metasrv::{BackendClientOptions, BackendImpl};
use meta_srv::metasrv::BackendImpl;
use meta_srv::utils::etcd::create_etcd_client_with_tls;
use servers::tls::{TlsMode, TlsOption};
@@ -61,12 +61,6 @@ pub struct StoreConfig {
#[cfg(feature = "pg_kvbackend")]
#[clap(long)]
pub meta_schema_name: Option<String>,
/// Automatically create PostgreSQL schema if it doesn't exist (default: true).
#[cfg(feature = "pg_kvbackend")]
#[clap(long, default_value_t = true)]
pub auto_create_schema: bool,
/// TLS mode for backend store connections (etcd, PostgreSQL, MySQL)
#[clap(long = "backend-tls-mode", value_enum, default_value = "disable")]
pub backend_tls_mode: TlsMode,
@@ -118,13 +112,9 @@ impl StoreConfig {
let kvbackend = match self.backend {
BackendImpl::EtcdStore => {
let tls_config = self.tls_config();
let etcd_client = create_etcd_client_with_tls(
store_addrs,
&BackendClientOptions::default(),
tls_config.as_ref(),
)
.await
.map_err(BoxedError::new)?;
let etcd_client = create_etcd_client_with_tls(store_addrs, tls_config.as_ref())
.await
.map_err(BoxedError::new)?;
Ok(EtcdStore::with_etcd_client(etcd_client, max_txn_ops))
}
#[cfg(feature = "pg_kvbackend")]
@@ -144,7 +134,6 @@ impl StoreConfig {
schema_name,
table_name,
max_txn_ops,
self.auto_create_schema,
)
.await
.map_err(BoxedError::new)?)

View File

@@ -14,7 +14,6 @@
mod export;
mod import;
mod storage_export;
use clap::Subcommand;
use client::DEFAULT_CATALOG_NAME;

File diff suppressed because it is too large Load Diff

View File

@@ -1,373 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::path::PathBuf;
use common_base::secrets::{ExposeSecret, SecretString};
use common_error::ext::BoxedError;
use crate::common::{
PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection, PrefixedS3Connection,
};
/// Helper function to extract secret string from Option<SecretString>.
/// Returns empty string if None.
fn expose_optional_secret(secret: &Option<SecretString>) -> &str {
secret
.as_ref()
.map(|s| s.expose_secret().as_str())
.unwrap_or("")
}
/// Helper function to format root path with leading slash if non-empty.
fn format_root_path(root: &str) -> String {
if root.is_empty() {
String::new()
} else {
format!("/{}", root)
}
}
/// Helper function to mask multiple secrets in a string.
fn mask_secrets(mut sql: String, secrets: &[&str]) -> String {
for secret in secrets {
if !secret.is_empty() {
sql = sql.replace(secret, "[REDACTED]");
}
}
sql
}
/// Helper function to format storage URI.
fn format_uri(scheme: &str, bucket: &str, root: &str, path: &str) -> String {
let root = format_root_path(root);
format!("{}://{}{}/{}", scheme, bucket, root, path)
}
/// Trait for storage backends that can be used for data export.
pub trait StorageExport: Send + Sync {
/// Generate the storage path for COPY DATABASE command.
/// Returns (path, connection_string) where connection_string includes CONNECTION clause.
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String);
/// Format the output path for logging purposes.
fn format_output_path(&self, file_path: &str) -> String;
/// Mask sensitive information in SQL commands for safe logging.
fn mask_sensitive_info(&self, sql: &str) -> String;
}
macro_rules! define_backend {
($name:ident, $config:ty) => {
#[derive(Clone)]
pub struct $name {
config: $config,
}
impl $name {
pub fn new(config: $config) -> Result<Self, BoxedError> {
config.validate()?;
Ok(Self { config })
}
}
};
}
/// Local file system storage backend.
#[derive(Clone)]
pub struct FsBackend {
output_dir: String,
}
impl FsBackend {
pub fn new(output_dir: String) -> Self {
Self { output_dir }
}
}
impl StorageExport for FsBackend {
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
if self.output_dir.is_empty() {
unreachable!("output_dir must be set when not using remote storage")
}
let path = PathBuf::from(&self.output_dir)
.join(catalog)
.join(format!("{schema}/"))
.to_string_lossy()
.to_string();
(path, String::new())
}
fn format_output_path(&self, file_path: &str) -> String {
format!("{}/{}", self.output_dir, file_path)
}
fn mask_sensitive_info(&self, sql: &str) -> String {
sql.to_string()
}
}
define_backend!(S3Backend, PrefixedS3Connection);
impl StorageExport for S3Backend {
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
let s3_path = format_uri(
"s3",
&self.config.s3_bucket,
&self.config.s3_root,
&format!("{}/{}/", catalog, schema),
);
let mut connection_options = vec![
format!(
"ACCESS_KEY_ID='{}'",
expose_optional_secret(&self.config.s3_access_key_id)
),
format!(
"SECRET_ACCESS_KEY='{}'",
expose_optional_secret(&self.config.s3_secret_access_key)
),
];
if let Some(region) = &self.config.s3_region {
connection_options.push(format!("REGION='{}'", region));
}
if let Some(endpoint) = &self.config.s3_endpoint {
connection_options.push(format!("ENDPOINT='{}'", endpoint));
}
let connection_str = format!(" CONNECTION ({})", connection_options.join(", "));
(s3_path, connection_str)
}
fn format_output_path(&self, file_path: &str) -> String {
format_uri(
"s3",
&self.config.s3_bucket,
&self.config.s3_root,
file_path,
)
}
fn mask_sensitive_info(&self, sql: &str) -> String {
mask_secrets(
sql.to_string(),
&[
expose_optional_secret(&self.config.s3_access_key_id),
expose_optional_secret(&self.config.s3_secret_access_key),
],
)
}
}
define_backend!(OssBackend, PrefixedOssConnection);
impl StorageExport for OssBackend {
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
let oss_path = format_uri(
"oss",
&self.config.oss_bucket,
&self.config.oss_root,
&format!("{}/{}/", catalog, schema),
);
let connection_options = [
format!(
"ACCESS_KEY_ID='{}'",
expose_optional_secret(&self.config.oss_access_key_id)
),
format!(
"ACCESS_KEY_SECRET='{}'",
expose_optional_secret(&self.config.oss_access_key_secret)
),
];
let connection_str = format!(" CONNECTION ({})", connection_options.join(", "));
(oss_path, connection_str)
}
fn format_output_path(&self, file_path: &str) -> String {
format_uri(
"oss",
&self.config.oss_bucket,
&self.config.oss_root,
file_path,
)
}
fn mask_sensitive_info(&self, sql: &str) -> String {
mask_secrets(
sql.to_string(),
&[
expose_optional_secret(&self.config.oss_access_key_id),
expose_optional_secret(&self.config.oss_access_key_secret),
],
)
}
}
define_backend!(GcsBackend, PrefixedGcsConnection);
impl StorageExport for GcsBackend {
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
let gcs_path = format_uri(
"gcs",
&self.config.gcs_bucket,
&self.config.gcs_root,
&format!("{}/{}/", catalog, schema),
);
let mut connection_options = Vec::new();
let credential_path = expose_optional_secret(&self.config.gcs_credential_path);
if !credential_path.is_empty() {
connection_options.push(format!("CREDENTIAL_PATH='{}'", credential_path));
}
let credential = expose_optional_secret(&self.config.gcs_credential);
if !credential.is_empty() {
connection_options.push(format!("CREDENTIAL='{}'", credential));
}
if !self.config.gcs_endpoint.is_empty() {
connection_options.push(format!("ENDPOINT='{}'", self.config.gcs_endpoint));
}
let connection_str = if connection_options.is_empty() {
String::new()
} else {
format!(" CONNECTION ({})", connection_options.join(", "))
};
(gcs_path, connection_str)
}
fn format_output_path(&self, file_path: &str) -> String {
format_uri(
"gcs",
&self.config.gcs_bucket,
&self.config.gcs_root,
file_path,
)
}
fn mask_sensitive_info(&self, sql: &str) -> String {
mask_secrets(
sql.to_string(),
&[
expose_optional_secret(&self.config.gcs_credential_path),
expose_optional_secret(&self.config.gcs_credential),
],
)
}
}
define_backend!(AzblobBackend, PrefixedAzblobConnection);
impl StorageExport for AzblobBackend {
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
let azblob_path = format_uri(
"azblob",
&self.config.azblob_container,
&self.config.azblob_root,
&format!("{}/{}/", catalog, schema),
);
let mut connection_options = vec![
format!(
"ACCOUNT_NAME='{}'",
expose_optional_secret(&self.config.azblob_account_name)
),
format!(
"ACCOUNT_KEY='{}'",
expose_optional_secret(&self.config.azblob_account_key)
),
];
if let Some(sas_token) = &self.config.azblob_sas_token {
connection_options.push(format!("SAS_TOKEN='{}'", sas_token));
}
let connection_str = format!(" CONNECTION ({})", connection_options.join(", "));
(azblob_path, connection_str)
}
fn format_output_path(&self, file_path: &str) -> String {
format_uri(
"azblob",
&self.config.azblob_container,
&self.config.azblob_root,
file_path,
)
}
fn mask_sensitive_info(&self, sql: &str) -> String {
mask_secrets(
sql.to_string(),
&[
expose_optional_secret(&self.config.azblob_account_name),
expose_optional_secret(&self.config.azblob_account_key),
],
)
}
}
#[derive(Clone)]
pub enum StorageType {
Fs(FsBackend),
S3(S3Backend),
Oss(OssBackend),
Gcs(GcsBackend),
Azblob(AzblobBackend),
}
impl StorageExport for StorageType {
fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
match self {
StorageType::Fs(backend) => backend.get_storage_path(catalog, schema),
StorageType::S3(backend) => backend.get_storage_path(catalog, schema),
StorageType::Oss(backend) => backend.get_storage_path(catalog, schema),
StorageType::Gcs(backend) => backend.get_storage_path(catalog, schema),
StorageType::Azblob(backend) => backend.get_storage_path(catalog, schema),
}
}
fn format_output_path(&self, file_path: &str) -> String {
match self {
StorageType::Fs(backend) => backend.format_output_path(file_path),
StorageType::S3(backend) => backend.format_output_path(file_path),
StorageType::Oss(backend) => backend.format_output_path(file_path),
StorageType::Gcs(backend) => backend.format_output_path(file_path),
StorageType::Azblob(backend) => backend.format_output_path(file_path),
}
}
fn mask_sensitive_info(&self, sql: &str) -> String {
match self {
StorageType::Fs(backend) => backend.mask_sensitive_info(sql),
StorageType::S3(backend) => backend.mask_sensitive_info(sql),
StorageType::Oss(backend) => backend.mask_sensitive_info(sql),
StorageType::Gcs(backend) => backend.mask_sensitive_info(sql),
StorageType::Azblob(backend) => backend.mask_sensitive_info(sql),
}
}
}
impl StorageType {
/// Returns true if the storage backend is remote (not local filesystem).
pub fn is_remote_storage(&self) -> bool {
!matches!(self, StorageType::Fs(_))
}
}

View File

@@ -253,6 +253,12 @@ pub enum Error {
error: ObjectStoreError,
},
#[snafu(display("S3 config need be set"))]
S3ConfigNotSet {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Output directory not set"))]
OutputDirNotSet {
#[snafu(implicit)]
@@ -358,9 +364,9 @@ impl ErrorExt for Error {
Error::Other { source, .. } => source.status_code(),
Error::OpenDal { .. } | Error::InitBackend { .. } => StatusCode::Internal,
Error::OutputDirNotSet { .. } | Error::EmptyStoreAddrs { .. } => {
StatusCode::InvalidArguments
}
Error::S3ConfigNotSet { .. }
| Error::OutputDirNotSet { .. }
| Error::EmptyStoreAddrs { .. } => StatusCode::InvalidArguments,
Error::BuildRuntime { source, .. } => source.status_code(),

View File

@@ -20,7 +20,6 @@ use async_trait::async_trait;
use clap::Parser;
use common_base::Plugins;
use common_config::Configurable;
use common_meta::distributed_time_constants::init_distributed_time_constants;
use common_telemetry::info;
use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
use common_version::{short_version, verbose_version};
@@ -155,6 +154,8 @@ pub struct StartCommand {
#[clap(short, long)]
selector: Option<String>,
#[clap(long)]
use_memory_store: Option<bool>,
#[clap(long)]
enable_region_failover: Option<bool>,
#[clap(long)]
http_addr: Option<String>,
@@ -184,6 +185,7 @@ impl Debug for StartCommand {
.field("store_addrs", &self.sanitize_store_addrs())
.field("config_file", &self.config_file)
.field("selector", &self.selector)
.field("use_memory_store", &self.use_memory_store)
.field("enable_region_failover", &self.enable_region_failover)
.field("http_addr", &self.http_addr)
.field("http_timeout", &self.http_timeout)
@@ -265,6 +267,10 @@ impl StartCommand {
.context(error::UnsupportedSelectorTypeSnafu { selector_type })?;
}
if let Some(use_memory_store) = self.use_memory_store {
opts.use_memory_store = use_memory_store;
}
if let Some(enable_region_failover) = self.enable_region_failover {
opts.enable_region_failover = enable_region_failover;
}
@@ -321,7 +327,6 @@ impl StartCommand {
log_versions(verbose_version(), short_version(), APP_NAME);
maybe_activate_heap_profile(&opts.component.memory);
create_resource_limit_metrics(APP_NAME);
init_distributed_time_constants(opts.component.heartbeat_interval);
info!("Metasrv start command: {:#?}", self);
@@ -384,6 +389,7 @@ mod tests {
server_addr = "127.0.0.1:3002"
store_addr = "127.0.0.1:2379"
selector = "LeaseBased"
use_memory_store = false
[logging]
level = "debug"
@@ -462,6 +468,7 @@ mod tests {
server_addr = "127.0.0.1:3002"
datanode_lease_secs = 15
selector = "LeaseBased"
use_memory_store = false
[http]
addr = "127.0.0.1:4000"

View File

@@ -552,8 +552,9 @@ impl StartCommand {
let grpc_handler = fe_instance.clone() as Arc<dyn GrpcQueryHandlerWithBoxedError>;
let weak_grpc_handler = Arc::downgrade(&grpc_handler);
frontend_instance_handler
.set_handler(weak_grpc_handler)
.await;
.lock()
.unwrap()
.replace(weak_grpc_handler);
// set the frontend invoker for flownode
let flow_streaming_engine = flownode.flow_engine().streaming_engine();

View File

@@ -59,6 +59,15 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to canonicalize path: {}", path))]
CanonicalizePath {
path: String,
#[snafu(source)]
error: std::io::Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid path '{}': expected a file, not a directory", path))]
InvalidPath {
path: String,
@@ -73,7 +82,8 @@ impl ErrorExt for Error {
Error::TomlFormat { .. }
| Error::LoadLayeredConfig { .. }
| Error::FileWatch { .. }
| Error::InvalidPath { .. } => StatusCode::InvalidArguments,
| Error::InvalidPath { .. }
| Error::CanonicalizePath { .. } => StatusCode::InvalidArguments,
Error::SerdeJson { .. } => StatusCode::Unexpected,
}
}

View File

@@ -30,7 +30,7 @@ use common_telemetry::{error, info, warn};
use notify::{EventKind, RecursiveMode, Watcher};
use snafu::ResultExt;
use crate::error::{FileWatchSnafu, InvalidPathSnafu, Result};
use crate::error::{CanonicalizePathSnafu, FileWatchSnafu, InvalidPathSnafu, Result};
/// Configuration for the file watcher behavior.
#[derive(Debug, Clone, Default)]
@@ -41,10 +41,15 @@ pub struct FileWatcherConfig {
impl FileWatcherConfig {
pub fn new() -> Self {
Default::default()
Self::default()
}
pub fn include_remove_events(mut self) -> Self {
pub fn with_modify_and_create(mut self) -> Self {
self.include_remove_events = false;
self
}
pub fn with_remove_events(mut self) -> Self {
self.include_remove_events = true;
self
}
@@ -88,8 +93,11 @@ impl FileWatcherBuilder {
path: path.display().to_string(),
}
);
self.file_paths.push(path.to_path_buf());
// Canonicalize the path for reliable comparison with event paths
let canonical = path.canonicalize().context(CanonicalizePathSnafu {
path: path.display().to_string(),
})?;
self.file_paths.push(canonical);
Ok(self)
}
@@ -136,6 +144,7 @@ impl FileWatcherBuilder {
}
let config = self.config;
let watched_files: HashSet<PathBuf> = self.file_paths.iter().cloned().collect();
info!(
"Spawning file watcher for paths: {:?} (watching parent directories)",
@@ -156,7 +165,25 @@ impl FileWatcherBuilder {
continue;
}
info!(?event.kind, ?event.paths, "Detected folder change");
// Check if any of the event paths match our watched files
let is_watched_file = event.paths.iter().any(|event_path| {
// Try to canonicalize the event path for comparison
// If the file was deleted, canonicalize will fail, so we also
// compare the raw path
if let Ok(canonical) = event_path.canonicalize()
&& watched_files.contains(&canonical)
{
return true;
}
// For deleted files, compare using the raw path
watched_files.contains(event_path)
});
if !is_watched_file {
continue;
}
info!(?event.kind, ?event.paths, "Detected file change");
callback();
}
Err(err) => {
@@ -274,4 +301,55 @@ mod tests {
"Watcher should have detected file recreation"
);
}
#[test]
fn test_file_watcher_ignores_other_files() {
common_telemetry::init_default_ut_logging();
let dir = create_temp_dir("test_file_watcher_other");
let watched_file = dir.path().join("watched.txt");
let other_file = dir.path().join("other.txt");
// Create both files
std::fs::write(&watched_file, "watched content").unwrap();
std::fs::write(&other_file, "other content").unwrap();
let counter = Arc::new(AtomicUsize::new(0));
let counter_clone = counter.clone();
FileWatcherBuilder::new()
.watch_path(&watched_file)
.unwrap()
.config(FileWatcherConfig::new())
.spawn(move || {
counter_clone.fetch_add(1, Ordering::SeqCst);
})
.unwrap();
// Give watcher time to start
std::thread::sleep(Duration::from_millis(100));
// Modify the other file - should NOT trigger callback
std::fs::write(&other_file, "modified other content").unwrap();
// Wait for potential event
std::thread::sleep(Duration::from_millis(500));
assert_eq!(
counter.load(Ordering::SeqCst),
0,
"Watcher should not have detected changes to other files"
);
// Now modify the watched file - SHOULD trigger callback
std::fs::write(&watched_file, "modified watched content").unwrap();
// Wait for the event to be processed
std::thread::sleep(Duration::from_millis(500));
assert!(
counter.load(Ordering::SeqCst) >= 1,
"Watcher should have detected change to watched file"
);
}
}

View File

@@ -27,7 +27,6 @@ const SECRET_ACCESS_KEY: &str = "secret_access_key";
const SESSION_TOKEN: &str = "session_token";
const REGION: &str = "region";
const ENABLE_VIRTUAL_HOST_STYLE: &str = "enable_virtual_host_style";
const DISABLE_EC2_METADATA: &str = "disable_ec2_metadata";
pub fn is_supported_in_s3(key: &str) -> bool {
[
@@ -37,7 +36,6 @@ pub fn is_supported_in_s3(key: &str) -> bool {
SESSION_TOKEN,
REGION,
ENABLE_VIRTUAL_HOST_STYLE,
DISABLE_EC2_METADATA,
]
.contains(&key)
}
@@ -84,21 +82,6 @@ pub fn build_s3_backend(
}
}
if let Some(disable_str) = connection.get(DISABLE_EC2_METADATA) {
let disable = disable_str.as_str().parse::<bool>().map_err(|e| {
error::InvalidConnectionSnafu {
msg: format!(
"failed to parse the option {}={}, {}",
DISABLE_EC2_METADATA, disable_str, e
),
}
.build()
})?;
if disable {
builder = builder.disable_ec2_metadata();
}
}
// TODO(weny): Consider finding a better way to eliminate duplicate code.
Ok(ObjectStore::new(builder)
.context(error::BuildBackendSnafu)?
@@ -126,7 +109,6 @@ mod tests {
assert!(is_supported_in_s3(SESSION_TOKEN));
assert!(is_supported_in_s3(REGION));
assert!(is_supported_in_s3(ENABLE_VIRTUAL_HOST_STYLE));
assert!(is_supported_in_s3(DISABLE_EC2_METADATA));
assert!(!is_supported_in_s3("foo"))
}
}

View File

@@ -17,10 +17,9 @@ ahash.workspace = true
api.workspace = true
arc-swap = "1.0"
arrow.workspace = true
arrow-cast.workspace = true
arrow-schema.workspace = true
async-trait.workspace = true
bincode = "=1.3.3"
bincode = "1.3"
catalog.workspace = true
chrono.workspace = true
common-base.workspace = true
@@ -47,7 +46,6 @@ geohash = { version = "0.13", optional = true }
h3o = { version = "0.6", optional = true }
hyperloglogplus = "0.4"
jsonb.workspace = true
jsonpath-rust = "0.7.5"
memchr = "2.7"
mito-codec.workspace = true
nalgebra.workspace = true

View File

@@ -13,24 +13,17 @@
// limitations under the License.
use std::fmt::{self, Display};
use std::str::FromStr;
use std::sync::Arc;
use arrow::array::{ArrayRef, BinaryViewArray, StringViewArray, StructArray};
use arrow::compute;
use arrow::datatypes::{Float64Type, Int64Type, UInt64Type};
use datafusion_common::DataFusionError;
use datafusion_common::arrow::array::{
Array, AsArray, BinaryViewBuilder, BooleanBuilder, Float64Builder, Int64Builder,
StringViewBuilder,
};
use datafusion_common::arrow::datatypes::DataType;
use datafusion_common::{DataFusionError, Result};
use datafusion_expr::type_coercion::aggregates::STRINGS;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
use datatypes::arrow_array::string_array_value_at_index;
use datatypes::json::JsonStructureSettings;
use jsonpath_rust::JsonPath;
use serde_json::Value;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature};
use crate::function::{Function, extract_args};
use crate::helper;
@@ -165,7 +158,11 @@ impl JsonGetString {
impl Default for JsonGetString {
fn default() -> Self {
Self {
signature: Signature::any(2, Volatility::Immutable),
// TODO(LFC): Use a more clear type here instead of "Binary" for Json input, once we have a "Json" type.
signature: helper::one_of_sigs2(
vec![DataType::Binary, DataType::BinaryView],
vec![DataType::Utf8, DataType::Utf8View],
),
}
}
}
@@ -175,7 +172,7 @@ impl Function for JsonGetString {
Self::NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
Ok(DataType::Utf8View)
}
@@ -183,203 +180,33 @@ impl Function for JsonGetString {
&self.signature
}
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
fn invoke_with_args(
&self,
args: ScalarFunctionArgs,
) -> datafusion_common::Result<ColumnarValue> {
let [arg0, arg1] = extract_args(self.name(), &args)?;
let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
let jsons = arg0.as_binary_view();
let arg1 = compute::cast(&arg1, &DataType::Utf8View)?;
let paths = arg1.as_string_view();
let result = match arg0.data_type() {
DataType::Binary | DataType::LargeBinary | DataType::BinaryView => {
let arg0 = compute::cast(&arg0, &DataType::BinaryView)?;
let jsons = arg0.as_binary_view();
jsonb_get_string(jsons, paths)?
}
DataType::Struct(_) => {
let jsons = arg0.as_struct();
json_struct_get_string(jsons, paths)?
}
_ => {
return Err(DataFusionError::Execution(format!(
"{} not supported argument type {}",
Self::NAME,
arg0.data_type(),
)));
}
};
let size = jsons.len();
let mut builder = StringViewBuilder::with_capacity(size);
Ok(ColumnarValue::Array(result))
}
}
fn jsonb_get_string(jsons: &BinaryViewArray, paths: &StringViewArray) -> Result<ArrayRef> {
let size = jsons.len();
let mut builder = StringViewBuilder::with_capacity(size);
for i in 0..size {
let json = jsons.is_valid(i).then(|| jsons.value(i));
let path = paths.is_valid(i).then(|| paths.value(i));
let result = match (json, path) {
(Some(json), Some(path)) => {
get_json_by_path(json, path).and_then(|json| jsonb::to_str(&json).ok())
}
_ => None,
};
builder.append_option(result);
}
Ok(Arc::new(builder.finish()))
}
fn json_struct_get_string(jsons: &StructArray, paths: &StringViewArray) -> Result<ArrayRef> {
let size = jsons.len();
let mut builder = StringViewBuilder::with_capacity(size);
for i in 0..size {
if jsons.is_null(i) || paths.is_null(i) {
builder.append_null();
continue;
}
let path = paths.value(i);
// naively assume the JSON path is our kind of indexing to the field, by removing its "root"
let field_path = path.replace("$.", "");
let column = jsons.column_by_name(&field_path);
if let Some(column) = column {
if let Some(v) = string_array_value_at_index(column, i) {
builder.append_value(v);
} else {
builder.append_value(arrow_cast::display::array_value_to_string(column, i)?);
}
} else {
let Some(raw) = jsons
.column_by_name(JsonStructureSettings::RAW_FIELD)
.and_then(|x| string_array_value_at_index(x, i))
else {
builder.append_null();
continue;
};
let path: JsonPath<Value> = JsonPath::try_from(path).map_err(|e| {
DataFusionError::Execution(format!("{path} is not a valid JSON path: {e}"))
})?;
// the wanted field is not retrievable from the JSON struct columns directly, we have
// to combine everything (columns and the "_raw") into a complete JSON value to find it
let value = json_struct_to_value(raw, jsons, i)?;
match path.find(&value) {
Value::Null => builder.append_null(),
Value::Array(values) => match values.as_slice() {
[] => builder.append_null(),
[x] => {
if let Some(s) = x.as_str() {
builder.append_value(s)
} else {
builder.append_value(x.to_string())
}
}
x => builder.append_value(
x.iter()
.map(|v| v.to_string())
.collect::<Vec<_>>()
.join(", "),
),
},
// Safety: guarded by the returns of `path.find` as documented
_ => unreachable!(),
}
}
}
Ok(Arc::new(builder.finish()))
}
fn json_struct_to_value(raw: &str, jsons: &StructArray, i: usize) -> Result<Value> {
let Ok(mut json) = Value::from_str(raw) else {
return Err(DataFusionError::Internal(format!(
"inner field '{}' is not a valid JSON string",
JsonStructureSettings::RAW_FIELD
)));
};
for (column_name, column) in jsons.column_names().into_iter().zip(jsons.columns()) {
if column_name == JsonStructureSettings::RAW_FIELD {
continue;
}
let (json_pointer, field) = if let Some((json_object, field)) = column_name.rsplit_once(".")
{
let json_pointer = format!("/{}", json_object.replace(".", "/"));
(json_pointer, field)
} else {
("".to_string(), column_name)
};
let Some(json_object) = json
.pointer_mut(&json_pointer)
.and_then(|x| x.as_object_mut())
else {
return Err(DataFusionError::Internal(format!(
"value at JSON pointer '{}' is not an object",
json_pointer
)));
};
macro_rules! insert {
($column: ident, $i: ident, $json_object: ident, $field: ident) => {{
if let Some(value) = $column
.is_valid($i)
.then(|| serde_json::Value::from($column.value($i)))
{
$json_object.insert($field.to_string(), value);
for i in 0..size {
let json = jsons.is_valid(i).then(|| jsons.value(i));
let path = paths.is_valid(i).then(|| paths.value(i));
let result = match (json, path) {
(Some(json), Some(path)) => {
get_json_by_path(json, path).and_then(|json| jsonb::to_str(&json).ok())
}
}};
_ => None,
};
builder.append_option(result);
}
match column.data_type() {
// boolean => Value::Bool
DataType::Boolean => {
let column = column.as_boolean();
insert!(column, i, json_object, field);
}
// int => Value::Number
DataType::Int64 => {
let column = column.as_primitive::<Int64Type>();
insert!(column, i, json_object, field);
}
DataType::UInt64 => {
let column = column.as_primitive::<UInt64Type>();
insert!(column, i, json_object, field);
}
DataType::Float64 => {
let column = column.as_primitive::<Float64Type>();
insert!(column, i, json_object, field);
}
// string => Value::String
DataType::Utf8 => {
let column = column.as_string::<i32>();
insert!(column, i, json_object, field);
}
DataType::LargeUtf8 => {
let column = column.as_string::<i64>();
insert!(column, i, json_object, field);
}
DataType::Utf8View => {
let column = column.as_string_view();
insert!(column, i, json_object, field);
}
// other => Value::Array and Value::Object
_ => {
return Err(DataFusionError::NotImplemented(format!(
"{} is not yet supported to be executed with field {} of datatype {}",
JsonGetString::NAME,
column_name,
column.data_type()
)));
}
}
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
}
Ok(json)
}
impl Display for JsonGetString {
@@ -469,13 +296,11 @@ impl Display for JsonGetObject {
mod tests {
use std::sync::Arc;
use arrow::array::{Float64Array, Int64Array, StructArray};
use arrow_schema::Field;
use datafusion_common::ScalarValue;
use datafusion_common::arrow::array::{BinaryArray, BinaryViewArray, StringArray};
use datafusion_common::arrow::datatypes::{Float64Type, Int64Type};
use datatypes::types::parse_string_to_jsonb;
use serde_json::json;
use super::*;
@@ -649,123 +474,42 @@ mod tests {
r#"{"a": "d", "b": {"c": "e"}, "c": "f"}"#,
r#"{"a": "g", "b": "h", "c": {"a": "g"}}"#,
];
let paths = vec!["$.a.b", "$.a", ""];
let results = [Some("a"), Some("d"), None];
// complete JSON is:
// {
// "kind": "foo",
// "payload": {
// "code": 404,
// "success": false,
// "result": {
// "error": "not found",
// "time_cost": 1.234
// }
// }
// }
let json_struct: ArrayRef = Arc::new(StructArray::new(
vec![
Field::new("kind", DataType::Utf8, true),
Field::new("payload.code", DataType::Int64, true),
Field::new("payload.result.time_cost", DataType::Float64, true),
Field::new(JsonStructureSettings::RAW_FIELD, DataType::Utf8View, true),
]
.into(),
vec![
Arc::new(StringArray::from_iter([Some("foo")])) as ArrayRef,
Arc::new(Int64Array::from_iter([Some(404)])),
Arc::new(Float64Array::from_iter([Some(1.234)])),
Arc::new(StringViewArray::from_iter([Some(
json! ({
"payload": {
"success": false,
"result": {
"error": "not found"
}
}
})
.to_string(),
)])),
],
None,
));
let paths = vec![
"$.a.b",
"$.a",
"",
"$.kind",
"$.payload.code",
"$.payload.result.time_cost",
"$.payload",
"$.payload.success",
"$.payload.result",
"$.payload.result.error",
"$.payload.result.not-exists",
"$.payload.not-exists",
"$.not-exists",
"$",
];
let expects = [
Some("a"),
Some("d"),
None,
Some("foo"),
Some("404"),
Some("1.234"),
Some(
r#"{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}"#,
),
Some("false"),
Some(r#"{"error":"not found","time_cost":1.234}"#),
Some("not found"),
None,
None,
None,
Some(
r#"{"kind":"foo","payload":{"code":404,"result":{"error":"not found","time_cost":1.234},"success":false}}"#,
),
];
let mut jsons = json_strings
let jsonbs = json_strings
.iter()
.map(|s| {
let value = jsonb::parse_value(s.as_bytes()).unwrap();
Arc::new(BinaryArray::from_iter_values([value.to_vec()])) as ArrayRef
value.to_vec()
})
.collect::<Vec<_>>();
let json_struct_arrays =
std::iter::repeat_n(json_struct, expects.len() - jsons.len()).collect::<Vec<_>>();
jsons.extend(json_struct_arrays);
for i in 0..jsons.len() {
let json = &jsons[i];
let path = paths[i];
let expect = expects[i];
let args = ScalarFunctionArgs {
args: vec![
ColumnarValue::Array(Arc::new(BinaryArray::from_iter_values(jsonbs))),
ColumnarValue::Array(Arc::new(StringArray::from_iter_values(paths))),
],
arg_fields: vec![],
number_rows: 3,
return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
config_options: Arc::new(Default::default()),
};
let result = json_get_string
.invoke_with_args(args)
.and_then(|x| x.to_array(3))
.unwrap();
let vector = result.as_string_view();
let args = ScalarFunctionArgs {
args: vec![
ColumnarValue::Array(json.clone()),
ColumnarValue::Scalar(path.into()),
],
arg_fields: vec![],
number_rows: 1,
return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
config_options: Arc::new(Default::default()),
};
let result = json_get_string
.invoke_with_args(args)
.and_then(|x| x.to_array(1))
.unwrap();
let result = result.as_string_view();
assert_eq!(1, result.len());
let actual = result.is_valid(0).then(|| result.value(0));
assert_eq!(actual, expect);
assert_eq!(3, vector.len());
for (i, gt) in results.iter().enumerate() {
let result = vector.is_valid(i).then(|| vector.value(i));
assert_eq!(*gt, result);
}
}
#[test]
fn test_json_get_object() -> Result<()> {
fn test_json_get_object() -> datafusion_common::Result<()> {
let udf = JsonGetObject::default();
assert_eq!("json_get_object", udf.name());
assert_eq!(

View File

@@ -13,7 +13,6 @@
// limitations under the License.
use std::any::Any;
use std::time::Duration;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
@@ -36,14 +35,6 @@ pub enum Error {
#[snafu(display("Memory semaphore unexpectedly closed"))]
MemorySemaphoreClosed,
#[snafu(display(
"Timeout waiting for memory quota: requested {requested_bytes} bytes, waited {waited:?}"
))]
MemoryAcquireTimeout {
requested_bytes: u64,
waited: Duration,
},
}
impl ErrorExt for Error {
@@ -53,7 +44,6 @@ impl ErrorExt for Error {
match self {
MemoryLimitExceeded { .. } => StatusCode::RuntimeResourcesExhausted,
MemorySemaphoreClosed => StatusCode::Unexpected,
MemoryAcquireTimeout { .. } => StatusCode::RuntimeResourcesExhausted,
}
}

View File

@@ -1,168 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
/// Memory permit granularity for different use cases.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum PermitGranularity {
/// 1 KB per permit
///
/// Use for:
/// - HTTP/gRPC request limiting (small, high-concurrency operations)
/// - Small batch operations
/// - Scenarios requiring fine-grained fairness
Kilobyte,
/// 1 MB per permit (default)
///
/// Use for:
/// - Query execution memory management
/// - Compaction memory control
/// - Large, long-running operations
#[default]
Megabyte,
}
impl PermitGranularity {
/// Returns the number of bytes per permit.
#[inline]
pub const fn bytes(self) -> u64 {
match self {
Self::Kilobyte => 1024,
Self::Megabyte => 1024 * 1024,
}
}
/// Returns a human-readable string representation.
pub const fn as_str(self) -> &'static str {
match self {
Self::Kilobyte => "1KB",
Self::Megabyte => "1MB",
}
}
/// Converts bytes to permits based on this granularity.
///
/// Rounds up to ensure the requested bytes are fully covered.
/// Clamped to Semaphore::MAX_PERMITS.
#[inline]
pub fn bytes_to_permits(self, bytes: u64) -> u32 {
use tokio::sync::Semaphore;
let granularity_bytes = self.bytes();
bytes
.saturating_add(granularity_bytes - 1)
.saturating_div(granularity_bytes)
.min(Semaphore::MAX_PERMITS as u64)
.min(u32::MAX as u64) as u32
}
/// Converts permits to bytes based on this granularity.
#[inline]
pub fn permits_to_bytes(self, permits: u32) -> u64 {
(permits as u64).saturating_mul(self.bytes())
}
}
impl fmt::Display for PermitGranularity {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.as_str())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bytes_to_permits_kilobyte() {
let granularity = PermitGranularity::Kilobyte;
// Exact multiples
assert_eq!(granularity.bytes_to_permits(1024), 1);
assert_eq!(granularity.bytes_to_permits(2048), 2);
assert_eq!(granularity.bytes_to_permits(10 * 1024), 10);
// Rounds up
assert_eq!(granularity.bytes_to_permits(1), 1);
assert_eq!(granularity.bytes_to_permits(1025), 2);
assert_eq!(granularity.bytes_to_permits(2047), 2);
}
#[test]
fn test_bytes_to_permits_megabyte() {
let granularity = PermitGranularity::Megabyte;
// Exact multiples
assert_eq!(granularity.bytes_to_permits(1024 * 1024), 1);
assert_eq!(granularity.bytes_to_permits(2 * 1024 * 1024), 2);
// Rounds up
assert_eq!(granularity.bytes_to_permits(1), 1);
assert_eq!(granularity.bytes_to_permits(1024), 1);
assert_eq!(granularity.bytes_to_permits(1024 * 1024 + 1), 2);
}
#[test]
fn test_bytes_to_permits_zero_bytes() {
assert_eq!(PermitGranularity::Kilobyte.bytes_to_permits(0), 0);
assert_eq!(PermitGranularity::Megabyte.bytes_to_permits(0), 0);
}
#[test]
fn test_bytes_to_permits_clamps_to_maximum() {
use tokio::sync::Semaphore;
let max_permits = (Semaphore::MAX_PERMITS as u64).min(u32::MAX as u64) as u32;
assert_eq!(
PermitGranularity::Kilobyte.bytes_to_permits(u64::MAX),
max_permits
);
assert_eq!(
PermitGranularity::Megabyte.bytes_to_permits(u64::MAX),
max_permits
);
}
#[test]
fn test_permits_to_bytes() {
assert_eq!(PermitGranularity::Kilobyte.permits_to_bytes(1), 1024);
assert_eq!(PermitGranularity::Kilobyte.permits_to_bytes(10), 10 * 1024);
assert_eq!(PermitGranularity::Megabyte.permits_to_bytes(1), 1024 * 1024);
assert_eq!(
PermitGranularity::Megabyte.permits_to_bytes(10),
10 * 1024 * 1024
);
}
#[test]
fn test_round_trip_conversion() {
// Kilobyte: bytes -> permits -> bytes (should round up)
let kb = PermitGranularity::Kilobyte;
let permits = kb.bytes_to_permits(1500);
let bytes = kb.permits_to_bytes(permits);
assert!(bytes >= 1500); // Must cover original request
assert_eq!(bytes, 2048); // 2KB
// Megabyte: bytes -> permits -> bytes (should round up)
let mb = PermitGranularity::Megabyte;
let permits = mb.bytes_to_permits(1500);
let bytes = mb.permits_to_bytes(permits);
assert!(bytes >= 1500);
assert_eq!(bytes, 1024 * 1024); // 1MB
}
}

View File

@@ -17,7 +17,7 @@ use std::{fmt, mem};
use common_telemetry::debug;
use tokio::sync::{OwnedSemaphorePermit, TryAcquireError};
use crate::manager::{MemoryMetrics, MemoryQuota};
use crate::manager::{MemoryMetrics, MemoryQuota, bytes_to_permits, permits_to_bytes};
/// Guard representing a slice of reserved memory.
pub struct MemoryGuard<M: MemoryMetrics> {
@@ -49,9 +49,7 @@ impl<M: MemoryMetrics> MemoryGuard<M> {
pub fn granted_bytes(&self) -> u64 {
match &self.state {
GuardState::Unlimited => 0,
GuardState::Limited { permit, quota } => {
quota.permits_to_bytes(permit.num_permits() as u32)
}
GuardState::Limited { permit, .. } => permits_to_bytes(permit.num_permits() as u32),
}
}
@@ -67,7 +65,7 @@ impl<M: MemoryMetrics> MemoryGuard<M> {
return true;
}
let additional_permits = quota.bytes_to_permits(bytes);
let additional_permits = bytes_to_permits(bytes);
match quota
.semaphore
@@ -101,12 +99,11 @@ impl<M: MemoryMetrics> MemoryGuard<M> {
return true;
}
let release_permits = quota.bytes_to_permits(bytes);
let release_permits = bytes_to_permits(bytes);
match permit.split(release_permits as usize) {
Some(released_permit) => {
let released_bytes =
quota.permits_to_bytes(released_permit.num_permits() as u32);
let released_bytes = permits_to_bytes(released_permit.num_permits() as u32);
drop(released_permit);
quota.update_in_use_metric();
debug!("Early released {} bytes from memory guard", released_bytes);
@@ -124,7 +121,7 @@ impl<M: MemoryMetrics> Drop for MemoryGuard<M> {
if let GuardState::Limited { permit, quota } =
mem::replace(&mut self.state, GuardState::Unlimited)
{
let bytes = quota.permits_to_bytes(permit.num_permits() as u32);
let bytes = permits_to_bytes(permit.num_permits() as u32);
drop(permit);
quota.update_in_use_metric();
debug!("Released memory: {} bytes", bytes);

View File

@@ -19,7 +19,6 @@
//! share the same allocation logic while using their own metrics.
mod error;
mod granularity;
mod guard;
mod manager;
mod policy;
@@ -28,9 +27,8 @@ mod policy;
mod tests;
pub use error::{Error, Result};
pub use granularity::PermitGranularity;
pub use guard::MemoryGuard;
pub use manager::{MemoryManager, MemoryMetrics};
pub use manager::{MemoryManager, MemoryMetrics, PERMIT_GRANULARITY_BYTES};
pub use policy::{DEFAULT_MEMORY_WAIT_TIMEOUT, OnExhaustedPolicy};
/// No-op metrics implementation for testing.

View File

@@ -17,12 +17,11 @@ use std::sync::Arc;
use snafu::ensure;
use tokio::sync::{Semaphore, TryAcquireError};
use crate::error::{
MemoryAcquireTimeoutSnafu, MemoryLimitExceededSnafu, MemorySemaphoreClosedSnafu, Result,
};
use crate::granularity::PermitGranularity;
use crate::error::{MemoryLimitExceededSnafu, MemorySemaphoreClosedSnafu, Result};
use crate::guard::MemoryGuard;
use crate::policy::OnExhaustedPolicy;
/// Minimum bytes controlled by one semaphore permit.
pub const PERMIT_GRANULARITY_BYTES: u64 = 1 << 20; // 1 MB
/// Trait for recording memory usage metrics.
pub trait MemoryMetrics: Clone + Send + Sync + 'static {
@@ -37,17 +36,10 @@ pub struct MemoryManager<M: MemoryMetrics> {
quota: Option<MemoryQuota<M>>,
}
impl<M: MemoryMetrics + Default> Default for MemoryManager<M> {
fn default() -> Self {
Self::new(0, M::default())
}
}
#[derive(Clone)]
pub(crate) struct MemoryQuota<M: MemoryMetrics> {
pub(crate) semaphore: Arc<Semaphore>,
pub(crate) limit_permits: u32,
pub(crate) granularity: PermitGranularity,
pub(crate) metrics: M,
}
@@ -55,25 +47,19 @@ impl<M: MemoryMetrics> MemoryManager<M> {
/// Creates a new memory manager with the given limit in bytes.
/// `limit_bytes = 0` disables the limit.
pub fn new(limit_bytes: u64, metrics: M) -> Self {
Self::with_granularity(limit_bytes, PermitGranularity::default(), metrics)
}
/// Creates a new memory manager with specified granularity.
pub fn with_granularity(limit_bytes: u64, granularity: PermitGranularity, metrics: M) -> Self {
if limit_bytes == 0 {
metrics.set_limit(0);
return Self { quota: None };
}
let limit_permits = granularity.bytes_to_permits(limit_bytes);
let limit_aligned_bytes = granularity.permits_to_bytes(limit_permits);
let limit_permits = bytes_to_permits(limit_bytes);
let limit_aligned_bytes = permits_to_bytes(limit_permits);
metrics.set_limit(limit_aligned_bytes as i64);
Self {
quota: Some(MemoryQuota {
semaphore: Arc::new(Semaphore::new(limit_permits as usize)),
limit_permits,
granularity,
metrics,
}),
}
@@ -83,7 +69,7 @@ impl<M: MemoryMetrics> MemoryManager<M> {
pub fn limit_bytes(&self) -> u64 {
self.quota
.as_ref()
.map(|quota| quota.permits_to_bytes(quota.limit_permits))
.map(|quota| permits_to_bytes(quota.limit_permits))
.unwrap_or(0)
}
@@ -91,7 +77,7 @@ impl<M: MemoryMetrics> MemoryManager<M> {
pub fn used_bytes(&self) -> u64 {
self.quota
.as_ref()
.map(|quota| quota.permits_to_bytes(quota.used_permits()))
.map(|quota| permits_to_bytes(quota.used_permits()))
.unwrap_or(0)
}
@@ -99,7 +85,7 @@ impl<M: MemoryMetrics> MemoryManager<M> {
pub fn available_bytes(&self) -> u64 {
self.quota
.as_ref()
.map(|quota| quota.permits_to_bytes(quota.available_permits_clamped()))
.map(|quota| permits_to_bytes(quota.available_permits_clamped()))
.unwrap_or(0)
}
@@ -112,13 +98,13 @@ impl<M: MemoryMetrics> MemoryManager<M> {
match &self.quota {
None => Ok(MemoryGuard::unlimited()),
Some(quota) => {
let permits = quota.bytes_to_permits(bytes);
let permits = bytes_to_permits(bytes);
ensure!(
permits <= quota.limit_permits,
MemoryLimitExceededSnafu {
requested_bytes: bytes,
limit_bytes: self.limit_bytes()
limit_bytes: permits_to_bytes(quota.limit_permits),
}
);
@@ -139,7 +125,7 @@ impl<M: MemoryMetrics> MemoryManager<M> {
match &self.quota {
None => Some(MemoryGuard::unlimited()),
Some(quota) => {
let permits = quota.bytes_to_permits(bytes);
let permits = bytes_to_permits(bytes);
match quota.semaphore.clone().try_acquire_many_owned(permits) {
Ok(permit) => {
@@ -154,56 +140,9 @@ impl<M: MemoryMetrics> MemoryManager<M> {
}
}
}
/// Acquires memory based on the given policy.
///
/// - For `OnExhaustedPolicy::Wait`: Waits up to the timeout duration for memory to become available
/// - For `OnExhaustedPolicy::Fail`: Returns immediately if memory is not available
///
/// # Errors
/// - `MemoryLimitExceeded`: Requested bytes exceed the total limit (both policies), or memory is currently exhausted (Fail policy only)
/// - `MemoryAcquireTimeout`: Timeout elapsed while waiting for memory (Wait policy only)
/// - `MemorySemaphoreClosed`: The internal semaphore is unexpectedly closed (rare, indicates system issue)
pub async fn acquire_with_policy(
&self,
bytes: u64,
policy: OnExhaustedPolicy,
) -> Result<MemoryGuard<M>> {
match policy {
OnExhaustedPolicy::Wait { timeout } => {
match tokio::time::timeout(timeout, self.acquire(bytes)).await {
Ok(Ok(guard)) => Ok(guard),
Ok(Err(e)) => Err(e),
Err(_elapsed) => {
// Timeout elapsed while waiting
MemoryAcquireTimeoutSnafu {
requested_bytes: bytes,
waited: timeout,
}
.fail()
}
}
}
OnExhaustedPolicy::Fail => self.try_acquire(bytes).ok_or_else(|| {
MemoryLimitExceededSnafu {
requested_bytes: bytes,
limit_bytes: self.limit_bytes(),
}
.build()
}),
}
}
}
impl<M: MemoryMetrics> MemoryQuota<M> {
pub(crate) fn bytes_to_permits(&self, bytes: u64) -> u32 {
self.granularity.bytes_to_permits(bytes)
}
pub(crate) fn permits_to_bytes(&self, permits: u32) -> u64 {
self.granularity.permits_to_bytes(permits)
}
pub(crate) fn used_permits(&self) -> u32 {
self.limit_permits
.saturating_sub(self.available_permits_clamped())
@@ -216,7 +155,19 @@ impl<M: MemoryMetrics> MemoryQuota<M> {
}
pub(crate) fn update_in_use_metric(&self) {
let bytes = self.permits_to_bytes(self.used_permits());
let bytes = permits_to_bytes(self.used_permits());
self.metrics.set_in_use(bytes as i64);
}
}
pub(crate) fn bytes_to_permits(bytes: u64) -> u32 {
bytes
.saturating_add(PERMIT_GRANULARITY_BYTES - 1)
.saturating_div(PERMIT_GRANULARITY_BYTES)
.min(Semaphore::MAX_PERMITS as u64)
.min(u32::MAX as u64) as u32
}
pub(crate) fn permits_to_bytes(permits: u32) -> u64 {
(permits as u64).saturating_mul(PERMIT_GRANULARITY_BYTES)
}

View File

@@ -14,10 +14,7 @@
use tokio::time::{Duration, sleep};
use crate::{MemoryManager, NoOpMetrics, PermitGranularity};
// Helper constant for tests - use default Megabyte granularity
const PERMIT_GRANULARITY_BYTES: u64 = PermitGranularity::Megabyte.bytes();
use crate::{MemoryManager, NoOpMetrics, PERMIT_GRANULARITY_BYTES};
#[test]
fn test_try_acquire_unlimited() {

View File

@@ -12,10 +12,27 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::OnceLock;
use std::time::Duration;
pub const BASE_HEARTBEAT_INTERVAL: Duration = Duration::from_secs(3);
use etcd_client::ConnectOptions;
/// Heartbeat interval time (is the basic unit of various time).
pub const HEARTBEAT_INTERVAL_MILLIS: u64 = 3000;
/// The frontend will also send heartbeats to Metasrv, sending an empty
/// heartbeat every HEARTBEAT_INTERVAL_MILLIS * 6 seconds.
pub const FRONTEND_HEARTBEAT_INTERVAL_MILLIS: u64 = HEARTBEAT_INTERVAL_MILLIS * 6;
/// The lease seconds of a region. It's set by 3 heartbeat intervals
/// (HEARTBEAT_INTERVAL_MILLIS × 3), plus some extra buffer (1 second).
pub const REGION_LEASE_SECS: u64 =
Duration::from_millis(HEARTBEAT_INTERVAL_MILLIS * 3).as_secs() + 1;
/// When creating table or region failover, a target node needs to be selected.
/// If the node's lease has expired, the `Selector` will not select it.
pub const DATANODE_LEASE_SECS: u64 = REGION_LEASE_SECS;
pub const FLOWNODE_LEASE_SECS: u64 = DATANODE_LEASE_SECS;
/// The lease seconds of metasrv leader.
pub const META_LEASE_SECS: u64 = 5;
@@ -35,6 +52,14 @@ pub const HEARTBEAT_CHANNEL_KEEP_ALIVE_INTERVAL_SECS: Duration = Duration::from_
/// The keep-alive timeout of the heartbeat channel.
pub const HEARTBEAT_CHANNEL_KEEP_ALIVE_TIMEOUT_SECS: Duration = Duration::from_secs(5);
/// The default options for the etcd client.
pub fn default_etcd_client_options() -> ConnectOptions {
ConnectOptions::new()
.with_keep_alive_while_idle(true)
.with_keep_alive(Duration::from_secs(15), Duration::from_secs(5))
.with_connect_timeout(Duration::from_secs(10))
}
/// The default mailbox round-trip timeout.
pub const MAILBOX_RTT_SECS: u64 = 1;
@@ -43,60 +68,3 @@ pub const TOPIC_STATS_REPORT_INTERVAL_SECS: u64 = 15;
/// The retention seconds of topic stats.
pub const TOPIC_STATS_RETENTION_SECS: u64 = TOPIC_STATS_REPORT_INTERVAL_SECS * 100;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
/// The distributed time constants.
pub struct DistributedTimeConstants {
pub heartbeat_interval: Duration,
pub frontend_heartbeat_interval: Duration,
pub region_lease: Duration,
pub datanode_lease: Duration,
pub flownode_lease: Duration,
}
/// The frontend heartbeat interval is 6 times of the base heartbeat interval.
pub fn frontend_heartbeat_interval(base_heartbeat_interval: Duration) -> Duration {
base_heartbeat_interval * 6
}
impl DistributedTimeConstants {
/// Create a new DistributedTimeConstants from the heartbeat interval.
pub fn from_heartbeat_interval(heartbeat_interval: Duration) -> Self {
let region_lease = heartbeat_interval * 3 + Duration::from_secs(1);
let datanode_lease = region_lease;
let flownode_lease = datanode_lease;
Self {
heartbeat_interval,
frontend_heartbeat_interval: frontend_heartbeat_interval(heartbeat_interval),
region_lease,
datanode_lease,
flownode_lease,
}
}
}
impl Default for DistributedTimeConstants {
fn default() -> Self {
Self::from_heartbeat_interval(BASE_HEARTBEAT_INTERVAL)
}
}
static DEFAULT_DISTRIBUTED_TIME_CONSTANTS: OnceLock<DistributedTimeConstants> = OnceLock::new();
/// Get the default distributed time constants.
pub fn default_distributed_time_constants() -> &'static DistributedTimeConstants {
DEFAULT_DISTRIBUTED_TIME_CONSTANTS.get_or_init(Default::default)
}
/// Initialize the default distributed time constants.
pub fn init_distributed_time_constants(base_heartbeat_interval: Duration) {
let distributed_time_constants =
DistributedTimeConstants::from_heartbeat_interval(base_heartbeat_interval);
DEFAULT_DISTRIBUTED_TIME_CONSTANTS
.set(distributed_time_constants)
.expect("Failed to set default distributed time constants");
common_telemetry::info!(
"Initialized default distributed time constants: {:#?}",
distributed_time_constants
);
}

View File

@@ -514,22 +514,6 @@ impl Display for GcRegionsReply {
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct EnterStagingRegion {
pub region_id: RegionId,
pub partition_expr: String,
}
impl Display for EnterStagingRegion {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"EnterStagingRegion(region_id={}, partition_expr={})",
self.region_id, self.partition_expr
)
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Display, PartialEq)]
pub enum Instruction {
/// Opens regions.
@@ -557,8 +541,6 @@ pub enum Instruction {
GcRegions(GcRegions),
/// Temporary suspend serving reads or writes
Suspend,
/// Makes regions enter staging state.
EnterStagingRegions(Vec<EnterStagingRegion>),
}
impl Instruction {
@@ -615,13 +597,6 @@ impl Instruction {
_ => None,
}
}
pub fn into_enter_staging_regions(self) -> Option<Vec<EnterStagingRegion>> {
match self {
Self::EnterStagingRegions(enter_staging) => Some(enter_staging),
_ => None,
}
}
}
/// The reply of [UpgradeRegion].
@@ -715,28 +690,6 @@ where
})
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
pub struct EnterStagingRegionReply {
pub region_id: RegionId,
/// Returns true if the region is under the new region rule.
pub ready: bool,
/// Indicates whether the region exists.
pub exists: bool,
/// Return error if any during the operation.
pub error: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
pub struct EnterStagingRegionsReply {
pub replies: Vec<EnterStagingRegionReply>,
}
impl EnterStagingRegionsReply {
pub fn new(replies: Vec<EnterStagingRegionReply>) -> Self {
Self { replies }
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum InstructionReply {
@@ -757,7 +710,6 @@ pub enum InstructionReply {
FlushRegions(FlushRegionReply),
GetFileRefs(GetFileRefsReply),
GcRegions(GcRegionsReply),
EnterStagingRegions(EnterStagingRegionsReply),
}
impl Display for InstructionReply {
@@ -774,13 +726,6 @@ impl Display for InstructionReply {
Self::FlushRegions(reply) => write!(f, "InstructionReply::FlushRegions({})", reply),
Self::GetFileRefs(reply) => write!(f, "InstructionReply::GetFileRefs({})", reply),
Self::GcRegions(reply) => write!(f, "InstructionReply::GcRegion({})", reply),
Self::EnterStagingRegions(reply) => {
write!(
f,
"InstructionReply::EnterStagingRegions({:?})",
reply.replies
)
}
}
}
}
@@ -821,20 +766,13 @@ impl InstructionReply {
_ => panic!("Expected FlushRegions reply"),
}
}
pub fn expect_enter_staging_regions_reply(self) -> Vec<EnterStagingRegionReply> {
match self {
Self::EnterStagingRegions(reply) => reply.replies,
_ => panic!("Expected EnterStagingRegion reply"),
}
}
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use store_api::storage::{FileId, FileRef};
use store_api::storage::FileId;
use super::*;
@@ -1209,14 +1147,12 @@ mod tests {
let mut manifest = FileRefsManifest::default();
let r0 = RegionId::new(1024, 1);
let r1 = RegionId::new(1024, 2);
manifest.file_refs.insert(
r0,
HashSet::from([FileRef::new(r0, FileId::random(), None)]),
);
manifest.file_refs.insert(
r1,
HashSet::from([FileRef::new(r1, FileId::random(), None)]),
);
manifest
.file_refs
.insert(r0, HashSet::from([FileId::random()]));
manifest
.file_refs
.insert(r1, HashSet::from([FileId::random()]));
manifest.manifest_version.insert(r0, 10);
manifest.manifest_version.insert(r1, 20);

View File

@@ -848,7 +848,7 @@ impl PgStore {
.context(CreatePostgresPoolSnafu)?,
};
Self::with_pg_pool(pool, None, table_name, max_txn_ops, false).await
Self::with_pg_pool(pool, None, table_name, max_txn_ops).await
}
/// Create [PgStore] impl of [KvBackendRef] from url (backward compatibility).
@@ -862,7 +862,6 @@ impl PgStore {
schema_name: Option<&str>,
table_name: &str,
max_txn_ops: usize,
auto_create_schema: bool,
) -> Result<KvBackendRef> {
// Ensure the postgres metadata backend is ready to use.
let client = match pool.get().await {
@@ -874,23 +873,9 @@ impl PgStore {
.fail();
}
};
// Automatically create schema if enabled and schema_name is provided.
if auto_create_schema
&& let Some(schema) = schema_name
&& !schema.is_empty()
{
let create_schema_sql = format!("CREATE SCHEMA IF NOT EXISTS \"{}\"", schema);
client
.execute(&create_schema_sql, &[])
.await
.with_context(|_| PostgresExecutionSnafu {
sql: create_schema_sql.clone(),
})?;
}
let template_factory = PgSqlTemplateFactory::new(schema_name, table_name);
let sql_template_set = template_factory.build();
// Do not attempt to create schema implicitly.
client
.execute(&sql_template_set.create_table_statement, &[])
.await
@@ -974,7 +959,7 @@ mod tests {
let Some(pool) = build_pg15_pool().await else {
return;
};
let res = PgStore::with_pg_pool(pool, None, "pg15_public_should_fail", 128, false).await;
let res = PgStore::with_pg_pool(pool, None, "pg15_public_should_fail", 128).await;
assert!(
res.is_err(),
"creating table in public should fail for test_user"
@@ -1229,249 +1214,4 @@ mod tests {
let t = PgSqlTemplateFactory::format_table_ident(Some(""), "test_table");
assert_eq!(t, "\"test_table\"");
}
#[tokio::test]
async fn test_auto_create_schema_enabled() {
common_telemetry::init_default_ut_logging();
maybe_skip_postgres_integration_test!();
let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap();
let mut cfg = Config::new();
cfg.url = Some(endpoints);
let pool = cfg
.create_pool(Some(Runtime::Tokio1), NoTls)
.context(CreatePostgresPoolSnafu)
.unwrap();
let schema_name = "test_auto_create_enabled";
let table_name = "test_table";
// Drop the schema if it exists to start clean
let client = pool.get().await.unwrap();
let _ = client
.execute(
&format!("DROP SCHEMA IF EXISTS \"{}\" CASCADE", schema_name),
&[],
)
.await;
// Create store with auto_create_schema enabled
let _ = PgStore::with_pg_pool(pool.clone(), Some(schema_name), table_name, 128, true)
.await
.unwrap();
// Verify schema was created
let row = client
.query_one(
"SELECT schema_name FROM information_schema.schemata WHERE schema_name = $1",
&[&schema_name],
)
.await
.unwrap();
let created_schema: String = row.get(0);
assert_eq!(created_schema, schema_name);
// Verify table was created in the schema
let row = client
.query_one(
"SELECT table_schema, table_name FROM information_schema.tables WHERE table_schema = $1 AND table_name = $2",
&[&schema_name, &table_name],
)
.await
.unwrap();
let created_table_schema: String = row.get(0);
let created_table_name: String = row.get(1);
assert_eq!(created_table_schema, schema_name);
assert_eq!(created_table_name, table_name);
// Cleanup
let _ = client
.execute(
&format!("DROP SCHEMA IF EXISTS \"{}\" CASCADE", schema_name),
&[],
)
.await;
}
#[tokio::test]
async fn test_auto_create_schema_disabled() {
common_telemetry::init_default_ut_logging();
maybe_skip_postgres_integration_test!();
let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap();
let mut cfg = Config::new();
cfg.url = Some(endpoints);
let pool = cfg
.create_pool(Some(Runtime::Tokio1), NoTls)
.context(CreatePostgresPoolSnafu)
.unwrap();
let schema_name = "test_auto_create_disabled";
let table_name = "test_table";
// Drop the schema if it exists to start clean
let client = pool.get().await.unwrap();
let _ = client
.execute(
&format!("DROP SCHEMA IF EXISTS \"{}\" CASCADE", schema_name),
&[],
)
.await;
// Try to create store with auto_create_schema disabled (should fail)
let result =
PgStore::with_pg_pool(pool.clone(), Some(schema_name), table_name, 128, false).await;
// Verify it failed because schema doesn't exist
assert!(
result.is_err(),
"Expected error when schema doesn't exist and auto_create_schema is disabled"
);
}
#[tokio::test]
async fn test_auto_create_schema_already_exists() {
common_telemetry::init_default_ut_logging();
maybe_skip_postgres_integration_test!();
let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap();
let mut cfg = Config::new();
cfg.url = Some(endpoints);
let pool = cfg
.create_pool(Some(Runtime::Tokio1), NoTls)
.context(CreatePostgresPoolSnafu)
.unwrap();
let schema_name = "test_auto_create_existing";
let table_name = "test_table";
// Manually create the schema first
let client = pool.get().await.unwrap();
let _ = client
.execute(
&format!("DROP SCHEMA IF EXISTS \"{}\" CASCADE", schema_name),
&[],
)
.await;
client
.execute(&format!("CREATE SCHEMA \"{}\"", schema_name), &[])
.await
.unwrap();
// Create store with auto_create_schema enabled (should succeed idempotently)
let _ = PgStore::with_pg_pool(pool.clone(), Some(schema_name), table_name, 128, true)
.await
.unwrap();
// Verify schema still exists
let row = client
.query_one(
"SELECT schema_name FROM information_schema.schemata WHERE schema_name = $1",
&[&schema_name],
)
.await
.unwrap();
let created_schema: String = row.get(0);
assert_eq!(created_schema, schema_name);
// Verify table was created in the schema
let row = client
.query_one(
"SELECT table_schema, table_name FROM information_schema.tables WHERE table_schema = $1 AND table_name = $2",
&[&schema_name, &table_name],
)
.await
.unwrap();
let created_table_schema: String = row.get(0);
let created_table_name: String = row.get(1);
assert_eq!(created_table_schema, schema_name);
assert_eq!(created_table_name, table_name);
// Cleanup
let _ = client
.execute(
&format!("DROP SCHEMA IF EXISTS \"{}\" CASCADE", schema_name),
&[],
)
.await;
}
#[tokio::test]
async fn test_auto_create_schema_no_schema_name() {
common_telemetry::init_default_ut_logging();
maybe_skip_postgres_integration_test!();
let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap();
let mut cfg = Config::new();
cfg.url = Some(endpoints);
let pool = cfg
.create_pool(Some(Runtime::Tokio1), NoTls)
.context(CreatePostgresPoolSnafu)
.unwrap();
let table_name = "test_table_no_schema";
// Create store with auto_create_schema enabled but no schema name (should succeed)
// This should create the table in the default schema (public)
let _ = PgStore::with_pg_pool(pool.clone(), None, table_name, 128, true)
.await
.unwrap();
// Verify table was created in public schema
let client = pool.get().await.unwrap();
let row = client
.query_one(
"SELECT table_schema, table_name FROM information_schema.tables WHERE table_name = $1",
&[&table_name],
)
.await
.unwrap();
let created_table_schema: String = row.get(0);
let created_table_name: String = row.get(1);
assert_eq!(created_table_name, table_name);
// Verify it's in public schema (or whichever is the default)
assert!(created_table_schema == "public" || !created_table_schema.is_empty());
// Cleanup
let _ = client
.execute(&format!("DROP TABLE IF EXISTS \"{}\"", table_name), &[])
.await;
}
#[tokio::test]
async fn test_auto_create_schema_with_empty_schema_name() {
common_telemetry::init_default_ut_logging();
maybe_skip_postgres_integration_test!();
let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap();
let mut cfg = Config::new();
cfg.url = Some(endpoints);
let pool = cfg
.create_pool(Some(Runtime::Tokio1), NoTls)
.context(CreatePostgresPoolSnafu)
.unwrap();
let table_name = "test_table_empty_schema";
// Create store with auto_create_schema enabled but empty schema name (should succeed)
// This should create the table in the default schema (public)
let _ = PgStore::with_pg_pool(pool.clone(), Some(""), table_name, 128, true)
.await
.unwrap();
// Verify table was created in public schema
let client = pool.get().await.unwrap();
let row = client
.query_one(
"SELECT table_schema, table_name FROM information_schema.tables WHERE table_name = $1",
&[&table_name],
)
.await
.unwrap();
let created_table_schema: String = row.get(0);
let created_table_name: String = row.get(1);
assert_eq!(created_table_name, table_name);
// Verify it's in public schema (or whichever is the default)
assert!(created_table_schema == "public" || !created_table_schema.is_empty());
// Cleanup
let _ = client
.execute(&format!("DROP TABLE IF EXISTS \"{}\"", table_name), &[])
.await;
}
}

View File

@@ -14,7 +14,7 @@
use common_telemetry::{debug, error, info};
use common_wal::config::kafka::common::{
DEFAULT_BACKOFF_CONFIG, KafkaConnectionConfig, KafkaTopicConfig,
DEFAULT_BACKOFF_CONFIG, DEFAULT_CONNECT_TIMEOUT, KafkaConnectionConfig, KafkaTopicConfig,
};
use rskafka::client::error::Error as RsKafkaError;
use rskafka::client::error::ProtocolError::TopicAlreadyExists;
@@ -211,8 +211,7 @@ pub async fn build_kafka_client(connection: &KafkaConnectionConfig) -> Result<Cl
// Builds an kafka controller client for creating topics.
let mut builder = ClientBuilder::new(connection.broker_endpoints.clone())
.backoff_config(DEFAULT_BACKOFF_CONFIG)
.connect_timeout(Some(connection.connect_timeout))
.timeout(Some(connection.timeout));
.connect_timeout(Some(DEFAULT_CONNECT_TIMEOUT));
if let Some(sasl) = &connection.sasl {
builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
};

View File

@@ -5,12 +5,10 @@ edition.workspace = true
license.workspace = true
[dependencies]
arrow-schema.workspace = true
common-base.workspace = true
common-decimal.workspace = true
common-error.workspace = true
common-macro.workspace = true
common-telemetry.workspace = true
common-time.workspace = true
datafusion-sql.workspace = true
datatypes.workspace = true

View File

@@ -14,12 +14,11 @@
use std::str::FromStr;
use arrow_schema::extension::ExtensionType;
use common_time::Timestamp;
use common_time::timezone::Timezone;
use datatypes::extension::json::JsonExtensionType;
use datatypes::json::JsonStructureSettings;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
use datatypes::schema::ColumnDefaultConstraint;
use datatypes::types::{JsonFormat, parse_string_to_jsonb, parse_string_to_vector_type_value};
use datatypes::value::{OrderedF32, OrderedF64, Value};
use snafu::{OptionExt, ResultExt, ensure};
@@ -125,14 +124,13 @@ pub(crate) fn sql_number_to_value(data_type: &ConcreteDataType, n: &str) -> Resu
/// If `auto_string_to_numeric` is true, tries to cast the string value to numeric values,
/// and returns error if the cast fails.
pub fn sql_value_to_value(
column_schema: &ColumnSchema,
column_name: &str,
data_type: &ConcreteDataType,
sql_val: &SqlValue,
timezone: Option<&Timezone>,
unary_op: Option<UnaryOperator>,
auto_string_to_numeric: bool,
) -> Result<Value> {
let column_name = &column_schema.name;
let data_type = &column_schema.data_type;
let mut value = match sql_val {
SqlValue::Number(n, _) => sql_number_to_value(data_type, n)?,
SqlValue::Null => Value::Null,
@@ -148,9 +146,13 @@ pub fn sql_value_to_value(
(*b).into()
}
SqlValue::DoubleQuotedString(s) | SqlValue::SingleQuotedString(s) => {
parse_string_to_value(column_schema, s.clone(), timezone, auto_string_to_numeric)?
}
SqlValue::DoubleQuotedString(s) | SqlValue::SingleQuotedString(s) => parse_string_to_value(
column_name,
s.clone(),
data_type,
timezone,
auto_string_to_numeric,
)?,
SqlValue::HexStringLiteral(s) => {
// Should not directly write binary into json column
ensure!(
@@ -242,12 +244,12 @@ pub fn sql_value_to_value(
}
pub(crate) fn parse_string_to_value(
column_schema: &ColumnSchema,
column_name: &str,
s: String,
data_type: &ConcreteDataType,
timezone: Option<&Timezone>,
auto_string_to_numeric: bool,
) -> Result<Value> {
let data_type = &column_schema.data_type;
if auto_string_to_numeric && let Some(value) = auto_cast_to_numeric(&s, data_type)? {
return Ok(value);
}
@@ -255,7 +257,7 @@ pub(crate) fn parse_string_to_value(
ensure!(
data_type.is_stringifiable(),
ColumnTypeMismatchSnafu {
column_name: column_schema.name.clone(),
column_name,
expect: data_type.clone(),
actual: ConcreteDataType::string_datatype(),
}
@@ -301,21 +303,23 @@ pub(crate) fn parse_string_to_value(
}
}
ConcreteDataType::Binary(_) => Ok(Value::Binary(s.as_bytes().into())),
ConcreteDataType::Json(j) => match &j.format {
JsonFormat::Jsonb => {
let v = parse_string_to_jsonb(&s).context(DatatypeSnafu)?;
Ok(Value::Binary(v.into()))
ConcreteDataType::Json(j) => {
match &j.format {
JsonFormat::Jsonb => {
let v = parse_string_to_jsonb(&s).context(DatatypeSnafu)?;
Ok(Value::Binary(v.into()))
}
JsonFormat::Native(_inner) => {
// Always use the structured version at this level.
let serde_json_value =
serde_json::from_str(&s).context(DeserializeSnafu { json: s })?;
let json_structure_settings = JsonStructureSettings::Structured(None);
json_structure_settings
.encode(serde_json_value)
.context(DatatypeSnafu)
}
}
JsonFormat::Native(_) => {
let extension_type: Option<JsonExtensionType> =
column_schema.extension_type().context(DatatypeSnafu)?;
let json_structure_settings = extension_type
.and_then(|x| x.metadata().json_structure_settings.clone())
.unwrap_or_default();
let v = serde_json::from_str(&s).context(DeserializeSnafu { json: s })?;
json_structure_settings.encode(v).context(DatatypeSnafu)
}
},
}
ConcreteDataType::Vector(d) => {
let v = parse_string_to_vector_type_value(&s, Some(d.dim)).context(DatatypeSnafu)?;
Ok(Value::Binary(v.into()))
@@ -413,265 +417,305 @@ mod test {
use super::*;
macro_rules! call_parse_string_to_value {
($column_name: expr, $input: expr, $data_type: expr) => {
call_parse_string_to_value!($column_name, $input, $data_type, None)
};
($column_name: expr, $input: expr, $data_type: expr, timezone = $timezone: expr) => {
call_parse_string_to_value!($column_name, $input, $data_type, Some($timezone))
};
($column_name: expr, $input: expr, $data_type: expr, $timezone: expr) => {{
let column_schema = ColumnSchema::new($column_name, $data_type, true);
parse_string_to_value(&column_schema, $input, $timezone, true)
}};
}
#[test]
fn test_string_to_value_auto_numeric() -> Result<()> {
fn test_string_to_value_auto_numeric() {
// Test string to boolean with auto cast
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"true".to_string(),
ConcreteDataType::boolean_datatype()
)?;
&ConcreteDataType::boolean_datatype(),
None,
true,
)
.unwrap();
assert_eq!(Value::Boolean(true), result);
// Test invalid string to boolean with auto cast
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"not_a_boolean".to_string(),
ConcreteDataType::boolean_datatype()
&ConcreteDataType::boolean_datatype(),
None,
true,
);
assert!(result.is_err());
// Test string to int8
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"42".to_string(),
ConcreteDataType::int8_datatype()
)?;
&ConcreteDataType::int8_datatype(),
None,
true,
)
.unwrap();
assert_eq!(Value::Int8(42), result);
// Test invalid string to int8 with auto cast
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"not_an_int8".to_string(),
ConcreteDataType::int8_datatype()
&ConcreteDataType::int8_datatype(),
None,
true,
);
assert!(result.is_err());
// Test string to int16
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"1000".to_string(),
ConcreteDataType::int16_datatype()
)?;
&ConcreteDataType::int16_datatype(),
None,
true,
)
.unwrap();
assert_eq!(Value::Int16(1000), result);
// Test invalid string to int16 with auto cast
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"not_an_int16".to_string(),
ConcreteDataType::int16_datatype()
&ConcreteDataType::int16_datatype(),
None,
true,
);
assert!(result.is_err());
// Test string to int32
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"100000".to_string(),
ConcreteDataType::int32_datatype()
)?;
&ConcreteDataType::int32_datatype(),
None,
true,
)
.unwrap();
assert_eq!(Value::Int32(100000), result);
// Test invalid string to int32 with auto cast
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"not_an_int32".to_string(),
ConcreteDataType::int32_datatype()
&ConcreteDataType::int32_datatype(),
None,
true,
);
assert!(result.is_err());
// Test string to int64
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"1000000".to_string(),
ConcreteDataType::int64_datatype()
)?;
&ConcreteDataType::int64_datatype(),
None,
true,
)
.unwrap();
assert_eq!(Value::Int64(1000000), result);
// Test invalid string to int64 with auto cast
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"not_an_int64".to_string(),
ConcreteDataType::int64_datatype()
&ConcreteDataType::int64_datatype(),
None,
true,
);
assert!(result.is_err());
// Test string to uint8
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"200".to_string(),
ConcreteDataType::uint8_datatype()
)?;
&ConcreteDataType::uint8_datatype(),
None,
true,
)
.unwrap();
assert_eq!(Value::UInt8(200), result);
// Test invalid string to uint8 with auto cast
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"not_a_uint8".to_string(),
ConcreteDataType::uint8_datatype()
&ConcreteDataType::uint8_datatype(),
None,
true,
);
assert!(result.is_err());
// Test string to uint16
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"60000".to_string(),
ConcreteDataType::uint16_datatype()
)?;
&ConcreteDataType::uint16_datatype(),
None,
true,
)
.unwrap();
assert_eq!(Value::UInt16(60000), result);
// Test invalid string to uint16 with auto cast
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"not_a_uint16".to_string(),
ConcreteDataType::uint16_datatype()
&ConcreteDataType::uint16_datatype(),
None,
true,
);
assert!(result.is_err());
// Test string to uint32
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"4000000000".to_string(),
ConcreteDataType::uint32_datatype()
)?;
&ConcreteDataType::uint32_datatype(),
None,
true,
)
.unwrap();
assert_eq!(Value::UInt32(4000000000), result);
// Test invalid string to uint32 with auto cast
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"not_a_uint32".to_string(),
ConcreteDataType::uint32_datatype()
&ConcreteDataType::uint32_datatype(),
None,
true,
);
assert!(result.is_err());
// Test string to uint64
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"18446744073709551615".to_string(),
ConcreteDataType::uint64_datatype()
)?;
&ConcreteDataType::uint64_datatype(),
None,
true,
)
.unwrap();
assert_eq!(Value::UInt64(18446744073709551615), result);
// Test invalid string to uint64 with auto cast
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"not_a_uint64".to_string(),
ConcreteDataType::uint64_datatype()
&ConcreteDataType::uint64_datatype(),
None,
true,
);
assert!(result.is_err());
// Test string to float32
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"3.5".to_string(),
ConcreteDataType::float32_datatype()
)?;
&ConcreteDataType::float32_datatype(),
None,
true,
)
.unwrap();
assert_eq!(Value::Float32(OrderedF32::from(3.5)), result);
// Test invalid string to float32 with auto cast
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"not_a_float32".to_string(),
ConcreteDataType::float32_datatype()
&ConcreteDataType::float32_datatype(),
None,
true,
);
assert!(result.is_err());
// Test string to float64
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"3.5".to_string(),
ConcreteDataType::float64_datatype()
)?;
&ConcreteDataType::float64_datatype(),
None,
true,
)
.unwrap();
assert_eq!(Value::Float64(OrderedF64::from(3.5)), result);
// Test invalid string to float64 with auto cast
let result = call_parse_string_to_value!(
let result = parse_string_to_value(
"col",
"not_a_float64".to_string(),
ConcreteDataType::float64_datatype()
&ConcreteDataType::float64_datatype(),
None,
true,
);
assert!(result.is_err());
Ok(())
}
macro_rules! call_sql_value_to_value {
($column_name: expr, $data_type: expr, $sql_value: expr) => {
call_sql_value_to_value!($column_name, $data_type, $sql_value, None, None, false)
};
($column_name: expr, $data_type: expr, $sql_value: expr, timezone = $timezone: expr) => {
call_sql_value_to_value!(
$column_name,
$data_type,
$sql_value,
Some($timezone),
None,
false
)
};
($column_name: expr, $data_type: expr, $sql_value: expr, unary_op = $unary_op: expr) => {
call_sql_value_to_value!(
$column_name,
$data_type,
$sql_value,
None,
Some($unary_op),
false
)
};
($column_name: expr, $data_type: expr, $sql_value: expr, auto_string_to_numeric) => {
call_sql_value_to_value!($column_name, $data_type, $sql_value, None, None, true)
};
($column_name: expr, $data_type: expr, $sql_value: expr, $timezone: expr, $unary_op: expr, $auto_string_to_numeric: expr) => {{
let column_schema = ColumnSchema::new($column_name, $data_type, true);
sql_value_to_value(
&column_schema,
$sql_value,
$timezone,
$unary_op,
$auto_string_to_numeric,
)
}};
}
#[test]
fn test_sql_value_to_value() -> Result<()> {
fn test_sql_value_to_value() {
let sql_val = SqlValue::Null;
assert_eq!(
Value::Null,
call_sql_value_to_value!("a", ConcreteDataType::float64_datatype(), &sql_val)?
sql_value_to_value(
"a",
&ConcreteDataType::float64_datatype(),
&sql_val,
None,
None,
false
)
.unwrap()
);
let sql_val = SqlValue::Boolean(true);
assert_eq!(
Value::Boolean(true),
call_sql_value_to_value!("a", ConcreteDataType::boolean_datatype(), &sql_val)?
sql_value_to_value(
"a",
&ConcreteDataType::boolean_datatype(),
&sql_val,
None,
None,
false
)
.unwrap()
);
let sql_val = SqlValue::Number("3.0".to_string(), false);
assert_eq!(
Value::Float64(OrderedFloat(3.0)),
call_sql_value_to_value!("a", ConcreteDataType::float64_datatype(), &sql_val)?
sql_value_to_value(
"a",
&ConcreteDataType::float64_datatype(),
&sql_val,
None,
None,
false
)
.unwrap()
);
let sql_val = SqlValue::Number("3.0".to_string(), false);
let v = call_sql_value_to_value!("a", ConcreteDataType::boolean_datatype(), &sql_val);
let v = sql_value_to_value(
"a",
&ConcreteDataType::boolean_datatype(),
&sql_val,
None,
None,
false,
);
assert!(v.is_err());
assert!(format!("{v:?}").contains("Failed to parse number '3.0' to boolean column type"));
let sql_val = SqlValue::Boolean(true);
let v = call_sql_value_to_value!("a", ConcreteDataType::float64_datatype(), &sql_val);
let v = sql_value_to_value(
"a",
&ConcreteDataType::float64_datatype(),
&sql_val,
None,
None,
false,
);
assert!(v.is_err());
assert!(
format!("{v:?}").contains(
@@ -681,18 +725,41 @@ mod test {
);
let sql_val = SqlValue::HexStringLiteral("48656c6c6f20776f726c6421".to_string());
let v = call_sql_value_to_value!("a", ConcreteDataType::binary_datatype(), &sql_val)?;
let v = sql_value_to_value(
"a",
&ConcreteDataType::binary_datatype(),
&sql_val,
None,
None,
false,
)
.unwrap();
assert_eq!(Value::Binary(Bytes::from(b"Hello world!".as_slice())), v);
let sql_val = SqlValue::DoubleQuotedString("MorningMyFriends".to_string());
let v = call_sql_value_to_value!("a", ConcreteDataType::binary_datatype(), &sql_val)?;
let v = sql_value_to_value(
"a",
&ConcreteDataType::binary_datatype(),
&sql_val,
None,
None,
false,
)
.unwrap();
assert_eq!(
Value::Binary(Bytes::from(b"MorningMyFriends".as_slice())),
v
);
let sql_val = SqlValue::HexStringLiteral("9AF".to_string());
let v = call_sql_value_to_value!("a", ConcreteDataType::binary_datatype(), &sql_val);
let v = sql_value_to_value(
"a",
&ConcreteDataType::binary_datatype(),
&sql_val,
None,
None,
false,
);
assert!(v.is_err());
assert!(
format!("{v:?}").contains("odd number of digits"),
@@ -700,16 +767,38 @@ mod test {
);
let sql_val = SqlValue::HexStringLiteral("AG".to_string());
let v = call_sql_value_to_value!("a", ConcreteDataType::binary_datatype(), &sql_val);
let v = sql_value_to_value(
"a",
&ConcreteDataType::binary_datatype(),
&sql_val,
None,
None,
false,
);
assert!(v.is_err());
assert!(format!("{v:?}").contains("invalid character"), "v is {v:?}",);
let sql_val = SqlValue::DoubleQuotedString("MorningMyFriends".to_string());
let v = call_sql_value_to_value!("a", ConcreteDataType::json_datatype(), &sql_val);
let v = sql_value_to_value(
"a",
&ConcreteDataType::json_datatype(),
&sql_val,
None,
None,
false,
);
assert!(v.is_err());
let sql_val = SqlValue::DoubleQuotedString(r#"{"a":"b"}"#.to_string());
let v = call_sql_value_to_value!("a", ConcreteDataType::json_datatype(), &sql_val)?;
let v = sql_value_to_value(
"a",
&ConcreteDataType::json_datatype(),
&sql_val,
None,
None,
false,
)
.unwrap();
assert_eq!(
Value::Binary(Bytes::from(
jsonb::parse_value(r#"{"a":"b"}"#.as_bytes())
@@ -719,15 +808,16 @@ mod test {
)),
v
);
Ok(())
}
#[test]
fn test_parse_json_to_jsonb() {
match call_parse_string_to_value!(
match parse_string_to_value(
"json_col",
r#"{"a": "b"}"#.to_string(),
ConcreteDataType::json_datatype()
&ConcreteDataType::json_datatype(),
None,
false,
) {
Ok(Value::Binary(b)) => {
assert_eq!(
@@ -743,10 +833,12 @@ mod test {
}
assert!(
call_parse_string_to_value!(
parse_string_to_value(
"json_col",
r#"Nicola Kovac is the best rifler in the world"#.to_string(),
ConcreteDataType::json_datatype()
&ConcreteDataType::json_datatype(),
None,
false,
)
.is_err()
)
@@ -786,10 +878,13 @@ mod test {
#[test]
fn test_parse_date_literal() {
let value = call_sql_value_to_value!(
let value = sql_value_to_value(
"date",
ConcreteDataType::date_datatype(),
&SqlValue::DoubleQuotedString("2022-02-22".to_string())
&ConcreteDataType::date_datatype(),
&SqlValue::DoubleQuotedString("2022-02-22".to_string()),
None,
None,
false,
)
.unwrap();
assert_eq!(ConcreteDataType::date_datatype(), value.data_type());
@@ -800,11 +895,13 @@ mod test {
}
// with timezone
let value = call_sql_value_to_value!(
let value = sql_value_to_value(
"date",
ConcreteDataType::date_datatype(),
&ConcreteDataType::date_datatype(),
&SqlValue::DoubleQuotedString("2022-02-22".to_string()),
timezone = &Timezone::from_tz_string("+07:00").unwrap()
Some(&Timezone::from_tz_string("+07:00").unwrap()),
None,
false,
)
.unwrap();
assert_eq!(ConcreteDataType::date_datatype(), value.data_type());
@@ -816,12 +913,16 @@ mod test {
}
#[test]
fn test_parse_timestamp_literal() -> Result<()> {
match call_parse_string_to_value!(
fn test_parse_timestamp_literal() {
match parse_string_to_value(
"timestamp_col",
"2022-02-22T00:01:01+08:00".to_string(),
ConcreteDataType::timestamp_millisecond_datatype()
)? {
&ConcreteDataType::timestamp_millisecond_datatype(),
None,
false,
)
.unwrap()
{
Value::Timestamp(ts) => {
assert_eq!(1645459261000, ts.value());
assert_eq!(TimeUnit::Millisecond, ts.unit());
@@ -831,11 +932,15 @@ mod test {
}
}
match call_parse_string_to_value!(
match parse_string_to_value(
"timestamp_col",
"2022-02-22T00:01:01+08:00".to_string(),
ConcreteDataType::timestamp_datatype(TimeUnit::Second)
)? {
&ConcreteDataType::timestamp_datatype(TimeUnit::Second),
None,
false,
)
.unwrap()
{
Value::Timestamp(ts) => {
assert_eq!(1645459261, ts.value());
assert_eq!(TimeUnit::Second, ts.unit());
@@ -845,11 +950,15 @@ mod test {
}
}
match call_parse_string_to_value!(
match parse_string_to_value(
"timestamp_col",
"2022-02-22T00:01:01+08:00".to_string(),
ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond)
)? {
&ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond),
None,
false,
)
.unwrap()
{
Value::Timestamp(ts) => {
assert_eq!(1645459261000000, ts.value());
assert_eq!(TimeUnit::Microsecond, ts.unit());
@@ -859,11 +968,15 @@ mod test {
}
}
match call_parse_string_to_value!(
match parse_string_to_value(
"timestamp_col",
"2022-02-22T00:01:01+08:00".to_string(),
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond)
)? {
&ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
None,
false,
)
.unwrap()
{
Value::Timestamp(ts) => {
assert_eq!(1645459261000000000, ts.value());
assert_eq!(TimeUnit::Nanosecond, ts.unit());
@@ -874,21 +987,26 @@ mod test {
}
assert!(
call_parse_string_to_value!(
parse_string_to_value(
"timestamp_col",
"2022-02-22T00:01:01+08".to_string(),
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond)
&ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
None,
false,
)
.is_err()
);
// with timezone
match call_parse_string_to_value!(
match parse_string_to_value(
"timestamp_col",
"2022-02-22T00:01:01".to_string(),
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
timezone = &Timezone::from_tz_string("Asia/Shanghai").unwrap()
)? {
&ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap()),
false,
)
.unwrap()
{
Value::Timestamp(ts) => {
assert_eq!(1645459261000000000, ts.value());
assert_eq!("2022-02-21 16:01:01+0000", ts.to_iso8601_string());
@@ -898,42 +1016,51 @@ mod test {
unreachable!()
}
}
Ok(())
}
#[test]
fn test_parse_placeholder_value() {
assert!(
call_sql_value_to_value!(
sql_value_to_value(
"test",
ConcreteDataType::string_datatype(),
&SqlValue::Placeholder("default".into())
)
.is_err()
);
assert!(
call_sql_value_to_value!(
"test",
ConcreteDataType::string_datatype(),
&ConcreteDataType::string_datatype(),
&SqlValue::Placeholder("default".into()),
unary_op = UnaryOperator::Minus
None,
None,
false
)
.is_err()
);
assert!(
call_sql_value_to_value!(
sql_value_to_value(
"test",
ConcreteDataType::uint16_datatype(),
&ConcreteDataType::string_datatype(),
&SqlValue::Placeholder("default".into()),
None,
Some(UnaryOperator::Minus),
false
)
.is_err()
);
assert!(
sql_value_to_value(
"test",
&ConcreteDataType::uint16_datatype(),
&SqlValue::Number("3".into(), false),
unary_op = UnaryOperator::Minus
None,
Some(UnaryOperator::Minus),
false
)
.is_err()
);
assert!(
call_sql_value_to_value!(
sql_value_to_value(
"test",
ConcreteDataType::uint16_datatype(),
&SqlValue::Number("3".into(), false)
&ConcreteDataType::uint16_datatype(),
&SqlValue::Number("3".into(), false),
None,
None,
false
)
.is_ok()
);
@@ -943,60 +1070,77 @@ mod test {
fn test_auto_string_to_numeric() {
// Test with auto_string_to_numeric=true
let sql_val = SqlValue::SingleQuotedString("123".to_string());
let v = call_sql_value_to_value!(
let v = sql_value_to_value(
"a",
ConcreteDataType::int32_datatype(),
&ConcreteDataType::int32_datatype(),
&sql_val,
auto_string_to_numeric
None,
None,
true,
)
.unwrap();
assert_eq!(Value::Int32(123), v);
// Test with a float string
let sql_val = SqlValue::SingleQuotedString("3.5".to_string());
let v = call_sql_value_to_value!(
let v = sql_value_to_value(
"a",
ConcreteDataType::float64_datatype(),
&ConcreteDataType::float64_datatype(),
&sql_val,
auto_string_to_numeric
None,
None,
true,
)
.unwrap();
assert_eq!(Value::Float64(OrderedFloat(3.5)), v);
// Test with auto_string_to_numeric=false
let sql_val = SqlValue::SingleQuotedString("123".to_string());
let v = call_sql_value_to_value!("a", ConcreteDataType::int32_datatype(), &sql_val);
let v = sql_value_to_value(
"a",
&ConcreteDataType::int32_datatype(),
&sql_val,
None,
None,
false,
);
assert!(v.is_err());
// Test with an invalid numeric string but auto_string_to_numeric=true
// Should return an error now with the new auto_cast_to_numeric behavior
let sql_val = SqlValue::SingleQuotedString("not_a_number".to_string());
let v = call_sql_value_to_value!(
let v = sql_value_to_value(
"a",
ConcreteDataType::int32_datatype(),
&ConcreteDataType::int32_datatype(),
&sql_val,
auto_string_to_numeric
None,
None,
true,
);
assert!(v.is_err());
// Test with boolean type
let sql_val = SqlValue::SingleQuotedString("true".to_string());
let v = call_sql_value_to_value!(
let v = sql_value_to_value(
"a",
ConcreteDataType::boolean_datatype(),
&ConcreteDataType::boolean_datatype(),
&sql_val,
auto_string_to_numeric
None,
None,
true,
)
.unwrap();
assert_eq!(Value::Boolean(true), v);
// Non-numeric types should still be handled normally
let sql_val = SqlValue::SingleQuotedString("hello".to_string());
let v = call_sql_value_to_value!(
let v = sql_value_to_value(
"a",
ConcreteDataType::string_datatype(),
&ConcreteDataType::string_datatype(),
&sql_val,
auto_string_to_numeric
None,
None,
true,
);
assert!(v.is_ok());
}

View File

@@ -14,8 +14,8 @@
use common_time::timezone::Timezone;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::ColumnDefaultConstraint;
use datatypes::schema::constraint::{CURRENT_TIMESTAMP, CURRENT_TIMESTAMP_FN};
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
use snafu::ensure;
use sqlparser::ast::ValueWithSpan;
pub use sqlparser::ast::{
@@ -47,12 +47,9 @@ pub fn parse_column_default_constraint(
);
let default_constraint = match &opt.option {
ColumnOption::Default(Expr::Value(v)) => {
let schema = ColumnSchema::new(column_name, data_type.clone(), true);
ColumnDefaultConstraint::Value(sql_value_to_value(
&schema, &v.value, timezone, None, false,
)?)
}
ColumnOption::Default(Expr::Value(v)) => ColumnDefaultConstraint::Value(
sql_value_to_value(column_name, data_type, &v.value, timezone, None, false)?,
),
ColumnOption::Default(Expr::Function(func)) => {
let mut func = format!("{func}").to_lowercase();
// normalize CURRENT_TIMESTAMP to CURRENT_TIMESTAMP()
@@ -83,7 +80,8 @@ pub fn parse_column_default_constraint(
if let Expr::Value(v) = &**expr {
let value = sql_value_to_value(
&ColumnSchema::new(column_name, data_type.clone(), true),
column_name,
data_type,
&v.value,
timezone,
Some(*op),

View File

@@ -71,7 +71,6 @@ pub fn convert_metric_to_write_request(
timestamp,
}],
exemplars: vec![],
histograms: vec![],
}),
MetricType::GAUGE => timeseries.push(TimeSeries {
labels: convert_label(m.get_label(), mf_name, None),
@@ -80,7 +79,6 @@ pub fn convert_metric_to_write_request(
timestamp,
}],
exemplars: vec![],
histograms: vec![],
}),
MetricType::HISTOGRAM => {
let h = m.get_histogram();
@@ -99,7 +97,6 @@ pub fn convert_metric_to_write_request(
timestamp,
}],
exemplars: vec![],
histograms: vec![],
});
if upper_bound.is_sign_positive() && upper_bound.is_infinite() {
inf_seen = true;
@@ -117,7 +114,6 @@ pub fn convert_metric_to_write_request(
timestamp,
}],
exemplars: vec![],
histograms: vec![],
});
}
timeseries.push(TimeSeries {
@@ -131,7 +127,6 @@ pub fn convert_metric_to_write_request(
timestamp,
}],
exemplars: vec![],
histograms: vec![],
});
timeseries.push(TimeSeries {
labels: convert_label(
@@ -144,7 +139,6 @@ pub fn convert_metric_to_write_request(
timestamp,
}],
exemplars: vec![],
histograms: vec![],
});
}
MetricType::SUMMARY => {
@@ -161,7 +155,6 @@ pub fn convert_metric_to_write_request(
timestamp,
}],
exemplars: vec![],
histograms: vec![],
});
}
timeseries.push(TimeSeries {
@@ -175,7 +168,6 @@ pub fn convert_metric_to_write_request(
timestamp,
}],
exemplars: vec![],
histograms: vec![],
});
timeseries.push(TimeSeries {
labels: convert_label(
@@ -188,7 +180,6 @@ pub fn convert_metric_to_write_request(
timestamp,
}],
exemplars: vec![],
histograms: vec![],
});
}
MetricType::UNTYPED => {
@@ -283,7 +274,7 @@ mod test {
assert_eq!(
format!("{:?}", write_quest.timeseries),
r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }]"#
r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }]"#
);
let gauge_opts = Opts::new("test_gauge", "test help")
@@ -297,7 +288,7 @@ mod test {
let write_quest = convert_metric_to_write_request(mf, None, 0);
assert_eq!(
format!("{:?}", write_quest.timeseries),
r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_gauge" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 42.0, timestamp: 0 }], exemplars: [], histograms: [] }]"#
r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_gauge" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 42.0, timestamp: 0 }], exemplars: [] }]"#
);
}
@@ -314,20 +305,20 @@ mod test {
.iter()
.map(|x| format!("{:?}", x))
.collect();
let ans = r#"TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.005" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.01" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.025" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.05" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.1" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.25" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "1" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "2.5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "10" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "+Inf" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_sum" }, Label { name: "a", value: "1" }], samples: [Sample { value: 0.25, timestamp: 0 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_count" }, Label { name: "a", value: "1" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }"#;
let ans = r#"TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.005" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.01" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.025" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.05" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.1" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.25" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "1" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "2.5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "10" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "+Inf" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_sum" }, Label { name: "a", value: "1" }], samples: [Sample { value: 0.25, timestamp: 0 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_count" }, Label { name: "a", value: "1" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }"#;
assert_eq!(write_quest_str.join("\n"), ans);
}
@@ -364,10 +355,10 @@ TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_count" },
.iter()
.map(|x| format!("{:?}", x))
.collect();
let ans = r#"TimeSeries { labels: [Label { name: "__name__", value: "test_summary" }, Label { name: "quantile", value: "50" }], samples: [Sample { value: 3.0, timestamp: 20 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_summary" }, Label { name: "quantile", value: "100" }], samples: [Sample { value: 5.0, timestamp: 20 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_summary_sum" }], samples: [Sample { value: 15.0, timestamp: 20 }], exemplars: [], histograms: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_summary_count" }], samples: [Sample { value: 5.0, timestamp: 20 }], exemplars: [], histograms: [] }"#;
let ans = r#"TimeSeries { labels: [Label { name: "__name__", value: "test_summary" }, Label { name: "quantile", value: "50" }], samples: [Sample { value: 3.0, timestamp: 20 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_summary" }, Label { name: "quantile", value: "100" }], samples: [Sample { value: 5.0, timestamp: 20 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_summary_sum" }], samples: [Sample { value: 15.0, timestamp: 20 }], exemplars: [] }
TimeSeries { labels: [Label { name: "__name__", value: "test_summary_count" }], samples: [Sample { value: 5.0, timestamp: 20 }], exemplars: [] }"#;
assert_eq!(write_quest_str.join("\n"), ans);
}
@@ -394,11 +385,11 @@ TimeSeries { labels: [Label { name: "__name__", value: "test_summary_count" }],
let write_quest2 = convert_metric_to_write_request(mf, Some(&filter), 0);
assert_eq!(
format!("{:?}", write_quest1.timeseries),
r#"[TimeSeries { labels: [Label { name: "__name__", value: "filter_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }, TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 2.0, timestamp: 0 }], exemplars: [], histograms: [] }]"#
r#"[TimeSeries { labels: [Label { name: "__name__", value: "filter_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }, TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 2.0, timestamp: 0 }], exemplars: [] }]"#
);
assert_eq!(
format!("{:?}", write_quest2.timeseries),
r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 2.0, timestamp: 0 }], exemplars: [], histograms: [] }]"#
r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 2.0, timestamp: 0 }], exemplars: [] }]"#
);
}
}

View File

@@ -206,8 +206,6 @@ mod tests {
client_cert_path: None,
client_key_path: None,
}),
connect_timeout: Duration::from_secs(3),
timeout: Duration::from_secs(3),
},
kafka_topic: KafkaTopicConfig {
num_topics: 32,
@@ -241,8 +239,6 @@ mod tests {
client_cert_path: None,
client_key_path: None,
}),
connect_timeout: Duration::from_secs(3),
timeout: Duration::from_secs(3),
},
max_batch_bytes: ReadableSize::mb(1),
consumer_wait_timeout: Duration::from_millis(100),

View File

@@ -36,6 +36,9 @@ pub const DEFAULT_BACKOFF_CONFIG: BackoffConfig = BackoffConfig {
deadline: Some(Duration::from_secs(3)),
};
/// The default connect timeout for kafka client.
pub const DEFAULT_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
/// Default interval for auto WAL pruning.
pub const DEFAULT_AUTO_PRUNE_INTERVAL: Duration = Duration::from_mins(30);
/// Default limit for concurrent auto pruning tasks.
@@ -164,12 +167,6 @@ pub struct KafkaConnectionConfig {
pub sasl: Option<KafkaClientSasl>,
/// Client TLS config
pub tls: Option<KafkaClientTls>,
/// The connect timeout for kafka client.
#[serde(with = "humantime_serde")]
pub connect_timeout: Duration,
/// The timeout for kafka client.
#[serde(with = "humantime_serde")]
pub timeout: Duration,
}
impl Default for KafkaConnectionConfig {
@@ -178,8 +175,6 @@ impl Default for KafkaConnectionConfig {
broker_endpoints: vec![BROKER_ENDPOINT.to_string()],
sasl: None,
tls: None,
connect_timeout: Duration::from_secs(3),
timeout: Duration::from_secs(3),
}
}
}

View File

@@ -24,7 +24,6 @@ use store_api::storage::GcReport;
mod close_region;
mod downgrade_region;
mod enter_staging;
mod file_ref;
mod flush_region;
mod gc_worker;
@@ -33,7 +32,6 @@ mod upgrade_region;
use crate::heartbeat::handler::close_region::CloseRegionsHandler;
use crate::heartbeat::handler::downgrade_region::DowngradeRegionsHandler;
use crate::heartbeat::handler::enter_staging::EnterStagingRegionsHandler;
use crate::heartbeat::handler::file_ref::GetFileRefsHandler;
use crate::heartbeat::handler::flush_region::FlushRegionsHandler;
use crate::heartbeat::handler::gc_worker::GcRegionsHandler;
@@ -125,9 +123,6 @@ impl RegionHeartbeatResponseHandler {
Instruction::GcRegions(_) => Ok(Some(Box::new(GcRegionsHandler.into()))),
Instruction::InvalidateCaches(_) => InvalidHeartbeatResponseSnafu.fail(),
Instruction::Suspend => Ok(None),
Instruction::EnterStagingRegions(_) => {
Ok(Some(Box::new(EnterStagingRegionsHandler.into())))
}
}
}
}
@@ -141,7 +136,6 @@ pub enum InstructionHandlers {
UpgradeRegions(UpgradeRegionsHandler),
GetFileRefs(GetFileRefsHandler),
GcRegions(GcRegionsHandler),
EnterStagingRegions(EnterStagingRegionsHandler),
}
macro_rules! impl_from_handler {
@@ -163,8 +157,7 @@ impl_from_handler!(
DowngradeRegionsHandler => DowngradeRegions,
UpgradeRegionsHandler => UpgradeRegions,
GetFileRefsHandler => GetFileRefs,
GcRegionsHandler => GcRegions,
EnterStagingRegionsHandler => EnterStagingRegions
GcRegionsHandler => GcRegions
);
macro_rules! dispatch_instr {
@@ -209,7 +202,6 @@ dispatch_instr!(
UpgradeRegions => UpgradeRegions,
GetFileRefs => GetFileRefs,
GcRegions => GcRegions,
EnterStagingRegions => EnterStagingRegions
);
#[async_trait]
@@ -262,9 +254,7 @@ mod tests {
use common_meta::heartbeat::mailbox::{
HeartbeatMailbox, IncomingMessage, MailboxRef, MessageMeta,
};
use common_meta::instruction::{
DowngradeRegion, EnterStagingRegion, OpenRegion, UpgradeRegion,
};
use common_meta::instruction::{DowngradeRegion, OpenRegion, UpgradeRegion};
use mito2::config::MitoConfig;
use mito2::engine::MITO_ENGINE_NAME;
use mito2::test_util::{CreateRequestBuilder, TestEnv};
@@ -345,16 +335,6 @@ mod tests {
region_id,
..Default::default()
}]);
assert!(
heartbeat_handler
.is_acceptable(&heartbeat_env.create_handler_ctx((meta.clone(), instruction)))
);
// Enter staging region
let instruction = Instruction::EnterStagingRegions(vec![EnterStagingRegion {
region_id,
partition_expr: "".to_string(),
}]);
assert!(
heartbeat_handler.is_acceptable(&heartbeat_env.create_handler_ctx((meta, instruction)))
);

View File

@@ -1,243 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_meta::instruction::{
EnterStagingRegion, EnterStagingRegionReply, EnterStagingRegionsReply, InstructionReply,
};
use common_telemetry::{error, warn};
use futures::future::join_all;
use store_api::region_request::{EnterStagingRequest, RegionRequest};
use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
#[derive(Debug, Clone, Copy, Default)]
pub struct EnterStagingRegionsHandler;
#[async_trait::async_trait]
impl InstructionHandler for EnterStagingRegionsHandler {
type Instruction = Vec<EnterStagingRegion>;
async fn handle(
&self,
ctx: &HandlerContext,
enter_staging: Self::Instruction,
) -> Option<InstructionReply> {
let futures = enter_staging.into_iter().map(|enter_staging_region| {
Self::handle_enter_staging_region(ctx, enter_staging_region)
});
let results = join_all(futures).await;
Some(InstructionReply::EnterStagingRegions(
EnterStagingRegionsReply::new(results),
))
}
}
impl EnterStagingRegionsHandler {
async fn handle_enter_staging_region(
ctx: &HandlerContext,
EnterStagingRegion {
region_id,
partition_expr,
}: EnterStagingRegion,
) -> EnterStagingRegionReply {
let Some(writable) = ctx.region_server.is_region_leader(region_id) else {
warn!("Region: {} is not found", region_id);
return EnterStagingRegionReply {
region_id,
ready: false,
exists: false,
error: None,
};
};
if !writable {
warn!("Region: {} is not writable", region_id);
return EnterStagingRegionReply {
region_id,
ready: false,
exists: true,
error: Some("Region is not writable".into()),
};
}
match ctx
.region_server
.handle_request(
region_id,
RegionRequest::EnterStaging(EnterStagingRequest { partition_expr }),
)
.await
{
Ok(_) => EnterStagingRegionReply {
region_id,
ready: true,
exists: true,
error: None,
},
Err(err) => {
error!(err; "Failed to enter staging region");
EnterStagingRegionReply {
region_id,
ready: false,
exists: true,
error: Some(format!("{err:?}")),
}
}
}
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_meta::instruction::EnterStagingRegion;
use mito2::config::MitoConfig;
use mito2::engine::MITO_ENGINE_NAME;
use mito2::test_util::{CreateRequestBuilder, TestEnv};
use store_api::path_utils::table_dir;
use store_api::region_engine::RegionRole;
use store_api::region_request::RegionRequest;
use store_api::storage::RegionId;
use crate::heartbeat::handler::enter_staging::EnterStagingRegionsHandler;
use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
use crate::region_server::RegionServer;
use crate::tests::{MockRegionEngine, mock_region_server};
const PARTITION_EXPR: &str = "partition_expr";
#[tokio::test]
async fn test_region_not_exist() {
let mut mock_region_server = mock_region_server();
let (mock_engine, _) = MockRegionEngine::new(MITO_ENGINE_NAME);
mock_region_server.register_engine(mock_engine);
let handler_context = HandlerContext::new_for_test(mock_region_server);
let region_id = RegionId::new(1024, 1);
let replies = EnterStagingRegionsHandler
.handle(
&handler_context,
vec![EnterStagingRegion {
region_id,
partition_expr: "".to_string(),
}],
)
.await
.unwrap();
let replies = replies.expect_enter_staging_regions_reply();
let reply = &replies[0];
assert!(!reply.exists);
assert!(reply.error.is_none());
assert!(!reply.ready);
}
#[tokio::test]
async fn test_region_not_writable() {
let mock_region_server = mock_region_server();
let region_id = RegionId::new(1024, 1);
let (mock_engine, _) =
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
region_engine.mock_role = Some(Some(RegionRole::Follower));
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| Ok(0)));
});
mock_region_server.register_test_region(region_id, mock_engine);
let handler_context = HandlerContext::new_for_test(mock_region_server);
let replies = EnterStagingRegionsHandler
.handle(
&handler_context,
vec![EnterStagingRegion {
region_id,
partition_expr: "".to_string(),
}],
)
.await
.unwrap();
let replies = replies.expect_enter_staging_regions_reply();
let reply = &replies[0];
assert!(reply.exists);
assert!(reply.error.is_some());
assert!(!reply.ready);
}
async fn prepare_region(region_server: &RegionServer) {
let builder = CreateRequestBuilder::new();
let mut create_req = builder.build();
create_req.table_dir = table_dir("test", 1024);
let region_id = RegionId::new(1024, 1);
region_server
.handle_request(region_id, RegionRequest::Create(create_req))
.await
.unwrap();
}
#[tokio::test]
async fn test_enter_staging() {
let mut region_server = mock_region_server();
let region_id = RegionId::new(1024, 1);
let mut engine_env = TestEnv::new().await;
let engine = engine_env.create_engine(MitoConfig::default()).await;
region_server.register_engine(Arc::new(engine.clone()));
prepare_region(&region_server).await;
let handler_context = HandlerContext::new_for_test(region_server);
let replies = EnterStagingRegionsHandler
.handle(
&handler_context,
vec![EnterStagingRegion {
region_id,
partition_expr: PARTITION_EXPR.to_string(),
}],
)
.await
.unwrap();
let replies = replies.expect_enter_staging_regions_reply();
let reply = &replies[0];
assert!(reply.exists);
assert!(reply.error.is_none());
assert!(reply.ready);
// Should be ok to enter staging mode again with the same partition expr
let replies = EnterStagingRegionsHandler
.handle(
&handler_context,
vec![EnterStagingRegion {
region_id,
partition_expr: PARTITION_EXPR.to_string(),
}],
)
.await
.unwrap();
let replies = replies.expect_enter_staging_regions_reply();
let reply = &replies[0];
assert!(reply.exists);
assert!(reply.error.is_none());
assert!(reply.ready);
// Should throw error if try to enter staging mode again with a different partition expr
let replies = EnterStagingRegionsHandler
.handle(
&handler_context,
vec![EnterStagingRegion {
region_id,
partition_expr: "".to_string(),
}],
)
.await
.unwrap();
let replies = replies.expect_enter_staging_regions_reply();
let reply = &replies[0];
assert!(reply.exists);
assert!(reply.error.is_some());
assert!(!reply.ready);
}
}

View File

@@ -19,7 +19,6 @@ use arrow::datatypes::{
Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
TimestampNanosecondType, TimestampSecondType,
};
use arrow_array::Array;
use common_time::time::Time;
use common_time::{Duration, Timestamp};
@@ -127,28 +126,3 @@ pub fn duration_array_value(array: &ArrayRef, i: usize) -> Duration {
};
Duration::new(v, time_unit.into())
}
/// Get the string value at index `i` for `Utf8`, `LargeUtf8`, or `Utf8View` arrays.
///
/// Returns `None` when the array type is not a string type or the value is null.
///
/// # Panics
///
/// If index `i` is out of bounds.
pub fn string_array_value_at_index(array: &ArrayRef, i: usize) -> Option<&str> {
match array.data_type() {
DataType::Utf8 => {
let array = array.as_string::<i32>();
array.is_valid(i).then(|| array.value(i))
}
DataType::LargeUtf8 => {
let array = array.as_string::<i64>();
array.is_valid(i).then(|| array.value(i))
}
DataType::Utf8View => {
let array = array.as_string_view();
array.is_valid(i).then(|| array.value(i))
}
_ => None,
}
}

View File

@@ -26,9 +26,9 @@ use std::sync::Arc;
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value as Json};
use snafu::{OptionExt, ResultExt, ensure};
use snafu::{ResultExt, ensure};
use crate::error::{self, InvalidJsonSnafu, Result, SerializeSnafu};
use crate::error::{self, Error};
use crate::json::value::{JsonValue, JsonVariant};
use crate::types::json_type::{JsonNativeType, JsonNumberType, JsonObjectType};
use crate::types::{StructField, StructType};
@@ -71,7 +71,7 @@ impl JsonStructureSettings {
pub const RAW_FIELD: &'static str = "_raw";
/// Decode an encoded StructValue back into a serde_json::Value.
pub fn decode(&self, value: Value) -> Result<Json> {
pub fn decode(&self, value: Value) -> Result<Json, Error> {
let context = JsonContext {
key_path: String::new(),
settings: self,
@@ -82,7 +82,7 @@ impl JsonStructureSettings {
/// Decode a StructValue that was encoded with current settings back into a fully structured StructValue.
/// This is useful for reconstructing the original structure from encoded data, especially when
/// unstructured encoding was used for some fields.
pub fn decode_struct(&self, struct_value: StructValue) -> Result<StructValue> {
pub fn decode_struct(&self, struct_value: StructValue) -> Result<StructValue, Error> {
let context = JsonContext {
key_path: String::new(),
settings: self,
@@ -91,11 +91,7 @@ impl JsonStructureSettings {
}
/// Encode a serde_json::Value into a Value::Json using current settings.
pub fn encode(&self, json: Json) -> Result<Value> {
if let Some(json_struct) = self.json_struct() {
return encode_by_struct(json_struct, json);
}
pub fn encode(&self, json: Json) -> Result<Value, Error> {
let context = JsonContext {
key_path: String::new(),
settings: self,
@@ -108,21 +104,13 @@ impl JsonStructureSettings {
&self,
json: Json,
data_type: Option<&JsonNativeType>,
) -> Result<Value> {
) -> Result<Value, Error> {
let context = JsonContext {
key_path: String::new(),
settings: self,
};
encode_json_with_context(json, data_type, &context).map(|v| Value::Json(Box::new(v)))
}
fn json_struct(&self) -> Option<&StructType> {
match &self {
JsonStructureSettings::Structured(fields) => fields.as_ref(),
JsonStructureSettings::PartialUnstructuredByKey { fields, .. } => fields.as_ref(),
_ => None,
}
}
}
impl Default for JsonStructureSettings {
@@ -156,54 +144,12 @@ impl<'a> JsonContext<'a> {
}
}
fn encode_by_struct(json_struct: &StructType, mut json: Json) -> Result<Value> {
let Some(json_object) = json.as_object_mut() else {
return InvalidJsonSnafu {
value: "expect JSON object when struct is provided",
}
.fail();
};
let mut encoded = BTreeMap::new();
fn extract_field(json_object: &mut Map<String, Json>, field: &str) -> Result<Option<Json>> {
let (first, rest) = field.split_once('.').unwrap_or((field, ""));
if rest.is_empty() {
Ok(json_object.remove(first))
} else {
let Some(value) = json_object.get_mut(first) else {
return Ok(None);
};
let json_object = value.as_object_mut().with_context(|| InvalidJsonSnafu {
value: format!(r#"expect "{}" an object"#, first),
})?;
extract_field(json_object, rest)
}
}
let fields = json_struct.fields();
for field in fields.iter() {
let Some(field_value) = extract_field(json_object, field.name())? else {
continue;
};
let field_type: JsonNativeType = field.data_type().into();
let field_value = try_convert_to_expected_type(field_value, &field_type)?;
encoded.insert(field.name().to_string(), field_value);
}
let rest = serde_json::to_string(json_object).context(SerializeSnafu)?;
encoded.insert(JsonStructureSettings::RAW_FIELD.to_string(), rest.into());
let value: JsonValue = encoded.into();
Ok(Value::Json(Box::new(value)))
}
/// Main encoding function with key path tracking
pub fn encode_json_with_context<'a>(
json: Json,
data_type: Option<&JsonNativeType>,
context: &JsonContext<'a>,
) -> Result<JsonValue> {
) -> Result<JsonValue, Error> {
// Check if the entire encoding should be unstructured
if matches!(context.settings, JsonStructureSettings::UnstructuredRaw) {
let json_string = json.to_string();
@@ -269,7 +215,7 @@ fn encode_json_object_with_context<'a>(
mut json_object: Map<String, Json>,
fields: Option<&JsonObjectType>,
context: &JsonContext<'a>,
) -> Result<JsonValue> {
) -> Result<JsonValue, Error> {
let mut object = BTreeMap::new();
// First, process fields from the provided schema in their original order
if let Some(fields) = fields {
@@ -302,7 +248,7 @@ fn encode_json_array_with_context<'a>(
json_array: Vec<Json>,
item_type: Option<&JsonNativeType>,
context: &JsonContext<'a>,
) -> Result<JsonValue> {
) -> Result<JsonValue, Error> {
let json_array_len = json_array.len();
let mut items = Vec::with_capacity(json_array_len);
let mut element_type = item_type.cloned();
@@ -340,7 +286,7 @@ fn encode_json_value_with_context<'a>(
json: Json,
expected_type: Option<&JsonNativeType>,
context: &JsonContext<'a>,
) -> Result<JsonValue> {
) -> Result<JsonValue, Error> {
// Check if current key should be treated as unstructured
if context.is_unstructured_key() {
return Ok(json.to_string().into());
@@ -355,7 +301,7 @@ fn encode_json_value_with_context<'a>(
if let Some(expected) = expected_type
&& let Ok(value) = try_convert_to_expected_type(i, expected)
{
return Ok(value.into());
return Ok(value);
}
Ok(i.into())
} else if let Some(u) = n.as_u64() {
@@ -363,7 +309,7 @@ fn encode_json_value_with_context<'a>(
if let Some(expected) = expected_type
&& let Ok(value) = try_convert_to_expected_type(u, expected)
{
return Ok(value.into());
return Ok(value);
}
if u <= i64::MAX as u64 {
Ok((u as i64).into())
@@ -375,7 +321,7 @@ fn encode_json_value_with_context<'a>(
if let Some(expected) = expected_type
&& let Ok(value) = try_convert_to_expected_type(f, expected)
{
return Ok(value.into());
return Ok(value);
}
// Default to f64 for floating point numbers
@@ -389,7 +335,7 @@ fn encode_json_value_with_context<'a>(
if let Some(expected) = expected_type
&& let Ok(value) = try_convert_to_expected_type(s.as_str(), expected)
{
return Ok(value.into());
return Ok(value);
}
Ok(s.into())
}
@@ -399,7 +345,10 @@ fn encode_json_value_with_context<'a>(
}
/// Main decoding function with key path tracking
pub fn decode_value_with_context(value: Value, context: &JsonContext) -> Result<Json> {
pub fn decode_value_with_context<'a>(
value: Value,
context: &JsonContext<'a>,
) -> Result<Json, Error> {
// Check if the entire decoding should be unstructured
if matches!(context.settings, JsonStructureSettings::UnstructuredRaw) {
return decode_unstructured_value(value);
@@ -421,7 +370,7 @@ pub fn decode_value_with_context(value: Value, context: &JsonContext) -> Result<
fn decode_struct_with_context<'a>(
struct_value: StructValue,
context: &JsonContext<'a>,
) -> Result<Json> {
) -> Result<Json, Error> {
let mut json_object = Map::with_capacity(struct_value.len());
let (items, fields) = struct_value.into_parts();
@@ -436,7 +385,10 @@ fn decode_struct_with_context<'a>(
}
/// Decode a list value to JSON array
fn decode_list_with_context(list_value: ListValue, context: &JsonContext) -> Result<Json> {
fn decode_list_with_context<'a>(
list_value: ListValue,
context: &JsonContext<'a>,
) -> Result<Json, Error> {
let mut json_array = Vec::with_capacity(list_value.len());
let data_items = list_value.take_items();
@@ -451,7 +403,7 @@ fn decode_list_with_context(list_value: ListValue, context: &JsonContext) -> Res
}
/// Decode unstructured value (stored as string)
fn decode_unstructured_value(value: Value) -> Result<Json> {
fn decode_unstructured_value(value: Value) -> Result<Json, Error> {
match value {
// Handle expected format: StructValue with single _raw field
Value::Struct(struct_value) => {
@@ -491,7 +443,7 @@ fn decode_unstructured_value(value: Value) -> Result<Json> {
}
/// Decode primitive value to JSON
fn decode_primitive_value(value: Value) -> Result<Json> {
fn decode_primitive_value(value: Value) -> Result<Json, Error> {
match value {
Value::Null => Ok(Json::Null),
Value::Boolean(b) => Ok(Json::Bool(b)),
@@ -535,7 +487,7 @@ fn decode_primitive_value(value: Value) -> Result<Json> {
fn decode_struct_with_settings<'a>(
struct_value: StructValue,
context: &JsonContext<'a>,
) -> Result<StructValue> {
) -> Result<StructValue, Error> {
// Check if we can return the struct directly (Structured case)
if matches!(context.settings, JsonStructureSettings::Structured(_)) {
return Ok(struct_value);
@@ -615,7 +567,7 @@ fn decode_struct_with_settings<'a>(
fn decode_list_with_settings<'a>(
list_value: ListValue,
context: &JsonContext<'a>,
) -> Result<ListValue> {
) -> Result<ListValue, Error> {
let mut items = Vec::with_capacity(list_value.len());
let (data_items, datatype) = list_value.into_parts();
@@ -640,7 +592,7 @@ fn decode_list_with_settings<'a>(
}
/// Helper function to decode a struct that was encoded with UnstructuredRaw settings
fn decode_unstructured_raw_struct(struct_value: StructValue) -> Result<StructValue> {
fn decode_unstructured_raw_struct(struct_value: StructValue) -> Result<StructValue, Error> {
// For UnstructuredRaw, the struct must have exactly one field named "_raw"
if struct_value.struct_type().fields().len() == 1 {
let field = &struct_value.struct_type().fields()[0];
@@ -684,9 +636,12 @@ fn decode_unstructured_raw_struct(struct_value: StructValue) -> Result<StructVal
}
/// Helper function to try converting a value to an expected type
fn try_convert_to_expected_type<T>(value: T, expected_type: &JsonNativeType) -> Result<JsonVariant>
fn try_convert_to_expected_type<T>(
value: T,
expected_type: &JsonNativeType,
) -> Result<JsonValue, Error>
where
T: Into<JsonVariant>,
T: Into<JsonValue>,
{
let value = value.into();
let cast_error = || {
@@ -695,7 +650,7 @@ where
}
.fail()
};
let actual_type = &value.native_type();
let actual_type = value.json_type().native_type();
match (actual_type, expected_type) {
(x, y) if x == y => Ok(value),
(JsonNativeType::Number(x), JsonNativeType::Number(y)) => match (x, y) {
@@ -736,107 +691,6 @@ mod tests {
use crate::data_type::ConcreteDataType;
use crate::types::ListType;
#[test]
fn test_encode_by_struct() {
let json_struct: StructType = [
StructField::new("s", ConcreteDataType::string_datatype(), true),
StructField::new("foo.i", ConcreteDataType::int64_datatype(), true),
StructField::new("x.y.z", ConcreteDataType::boolean_datatype(), true),
]
.into();
let json = json!({
"s": "hello",
"t": "world",
"foo": {
"i": 1,
"j": 2
},
"x": {
"y": {
"z": true
}
}
});
let value = encode_by_struct(&json_struct, json).unwrap();
assert_eq!(
value.to_string(),
r#"Json({ _raw: {"foo":{"j":2},"t":"world","x":{"y":{}}}, foo.i: 1, s: hello, x.y.z: true })"#
);
let json = json!({
"t": "world",
"foo": {
"i": 1,
"j": 2
},
"x": {
"y": {
"z": true
}
}
});
let value = encode_by_struct(&json_struct, json).unwrap();
assert_eq!(
value.to_string(),
r#"Json({ _raw: {"foo":{"j":2},"t":"world","x":{"y":{}}}, foo.i: 1, x.y.z: true })"#
);
let json = json!({
"s": 1234,
"foo": {
"i": 1,
"j": 2
},
"x": {
"y": {
"z": true
}
}
});
let value = encode_by_struct(&json_struct, json).unwrap();
assert_eq!(
value.to_string(),
r#"Json({ _raw: {"foo":{"j":2},"x":{"y":{}}}, foo.i: 1, s: 1234, x.y.z: true })"#
);
let json = json!({
"s": "hello",
"t": "world",
"foo": {
"i": "bar",
"j": 2
},
"x": {
"y": {
"z": true
}
}
});
let result = encode_by_struct(&json_struct, json);
assert_eq!(
result.unwrap_err().to_string(),
"Cannot cast value bar to Number(I64)"
);
let json = json!({
"s": "hello",
"t": "world",
"foo": {
"i": 1,
"j": 2
},
"x": {
"y": "z"
}
});
let result = encode_by_struct(&json_struct, json);
assert_eq!(
result.unwrap_err().to_string(),
r#"Invalid JSON: expect "y" an object"#
);
}
#[test]
fn test_encode_json_null() {
let json = Json::Null;

View File

@@ -82,18 +82,6 @@ impl From<f64> for JsonNumber {
}
}
impl From<Number> for JsonNumber {
fn from(n: Number) -> Self {
if let Some(i) = n.as_i64() {
i.into()
} else if let Some(i) = n.as_u64() {
i.into()
} else {
n.as_f64().unwrap_or(f64::NAN).into()
}
}
}
impl Display for JsonNumber {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
@@ -121,28 +109,7 @@ pub enum JsonVariant {
}
impl JsonVariant {
pub(crate) fn as_i64(&self) -> Option<i64> {
match self {
JsonVariant::Number(n) => n.as_i64(),
_ => None,
}
}
pub(crate) fn as_u64(&self) -> Option<u64> {
match self {
JsonVariant::Number(n) => n.as_u64(),
_ => None,
}
}
pub(crate) fn as_f64(&self) -> Option<f64> {
match self {
JsonVariant::Number(n) => Some(n.as_f64()),
_ => None,
}
}
pub(crate) fn native_type(&self) -> JsonNativeType {
fn native_type(&self) -> JsonNativeType {
match self {
JsonVariant::Null => JsonNativeType::Null,
JsonVariant::Bool(_) => JsonNativeType::Bool,
@@ -238,32 +205,6 @@ impl<K: Into<String>, V: Into<JsonVariant>, const N: usize> From<[(K, V); N]> fo
}
}
impl From<serde_json::Value> for JsonVariant {
fn from(v: serde_json::Value) -> Self {
fn helper(v: serde_json::Value) -> JsonVariant {
match v {
serde_json::Value::Null => JsonVariant::Null,
serde_json::Value::Bool(b) => b.into(),
serde_json::Value::Number(n) => n.into(),
serde_json::Value::String(s) => s.into(),
serde_json::Value::Array(array) => {
JsonVariant::Array(array.into_iter().map(helper).collect())
}
serde_json::Value::Object(object) => {
JsonVariant::Object(object.into_iter().map(|(k, v)| (k, helper(v))).collect())
}
}
}
helper(v)
}
}
impl From<BTreeMap<String, JsonVariant>> for JsonVariant {
fn from(v: BTreeMap<String, JsonVariant>) -> Self {
Self::Object(v)
}
}
impl Display for JsonVariant {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
@@ -336,11 +277,24 @@ impl JsonValue {
}
pub(crate) fn as_i64(&self) -> Option<i64> {
self.json_variant.as_i64()
match self.json_variant {
JsonVariant::Number(n) => n.as_i64(),
_ => None,
}
}
pub(crate) fn as_u64(&self) -> Option<u64> {
self.json_variant.as_u64()
match self.json_variant {
JsonVariant::Number(n) => n.as_u64(),
_ => None,
}
}
pub(crate) fn as_f64(&self) -> Option<f64> {
match self.json_variant {
JsonVariant::Number(n) => Some(n.as_f64()),
_ => None,
}
}
pub(crate) fn as_f64_lossy(&self) -> Option<f64> {

View File

@@ -122,9 +122,9 @@ pub struct StructField {
}
impl StructField {
pub fn new<T: Into<String>>(name: T, data_type: ConcreteDataType, nullable: bool) -> Self {
pub fn new(name: String, data_type: ConcreteDataType, nullable: bool) -> Self {
StructField {
name: name.into(),
name,
data_type,
nullable,
metadata: BTreeMap::new(),

View File

@@ -15,7 +15,7 @@
//! Frontend client to run flow as batching task which is time-window-aware normal query triggered every tick set by user
use std::collections::HashMap;
use std::sync::{Arc, Mutex, Weak};
use std::sync::{Arc, Weak};
use std::time::SystemTime;
use api::v1::greptime_request::Request;
@@ -38,7 +38,6 @@ use servers::query_handler::grpc::GrpcQueryHandler;
use session::context::{QueryContextBuilder, QueryContextRef};
use session::hints::READ_PREFERENCE_HINT;
use snafu::{OptionExt, ResultExt};
use tokio::sync::SetOnce;
use crate::batching_mode::BatchingModeOptions;
use crate::error::{
@@ -76,19 +75,7 @@ impl<E: ErrorExt + Send + Sync + 'static, T: GrpcQueryHandler<Error = E> + Send
}
}
#[derive(Debug, Clone)]
pub struct HandlerMutable {
handler: Arc<Mutex<Option<Weak<dyn GrpcQueryHandlerWithBoxedError>>>>,
is_initialized: Arc<SetOnce<()>>,
}
impl HandlerMutable {
pub async fn set_handler(&self, handler: Weak<dyn GrpcQueryHandlerWithBoxedError>) {
*self.handler.lock().unwrap() = Some(handler);
// Ignore the error, as we allow the handler to be set multiple times.
let _ = self.is_initialized.set(());
}
}
type HandlerMutable = Arc<std::sync::Mutex<Option<Weak<dyn GrpcQueryHandlerWithBoxedError>>>>;
/// A simple frontend client able to execute sql using grpc protocol
///
@@ -113,11 +100,7 @@ pub enum FrontendClient {
impl FrontendClient {
/// Create a new empty frontend client, with a `HandlerMutable` to set the grpc handler later
pub fn from_empty_grpc_handler(query: QueryOptions) -> (Self, HandlerMutable) {
let is_initialized = Arc::new(SetOnce::new());
let handler = HandlerMutable {
handler: Arc::new(Mutex::new(None)),
is_initialized,
};
let handler = Arc::new(std::sync::Mutex::new(None));
(
Self::Standalone {
database_client: handler.clone(),
@@ -127,13 +110,23 @@ impl FrontendClient {
)
}
/// Waits until the frontend client is initialized.
pub async fn wait_initialized(&self) {
if let FrontendClient::Standalone {
database_client, ..
} = self
{
database_client.is_initialized.wait().await;
/// Check if the frontend client is initialized.
///
/// In distributed mode, it is always initialized.
/// In standalone mode, it checks if the database client is set.
pub fn is_initialized(&self) -> bool {
match self {
FrontendClient::Distributed { .. } => true,
FrontendClient::Standalone {
database_client, ..
} => {
let guard = database_client.lock();
if let Ok(guard) = guard {
guard.is_some()
} else {
false
}
}
}
}
@@ -165,14 +158,8 @@ impl FrontendClient {
grpc_handler: Weak<dyn GrpcQueryHandlerWithBoxedError>,
query: QueryOptions,
) -> Self {
let is_initialized = Arc::new(SetOnce::new_with(Some(())));
let handler = HandlerMutable {
handler: Arc::new(Mutex::new(Some(grpc_handler))),
is_initialized: is_initialized.clone(),
};
Self::Standalone {
database_client: handler,
database_client: Arc::new(std::sync::Mutex::new(Some(grpc_handler))),
query,
}
}
@@ -354,7 +341,6 @@ impl FrontendClient {
{
let database_client = {
database_client
.handler
.lock()
.map_err(|e| {
UnexpectedSnafu {
@@ -432,7 +418,6 @@ impl FrontendClient {
{
let database_client = {
database_client
.handler
.lock()
.map_err(|e| {
UnexpectedSnafu {
@@ -495,73 +480,3 @@ impl std::fmt::Display for PeerDesc {
}
}
}
#[cfg(test)]
mod tests {
use std::time::Duration;
use common_query::Output;
use tokio::time::timeout;
use super::*;
#[derive(Debug)]
struct NoopHandler;
#[async_trait::async_trait]
impl GrpcQueryHandlerWithBoxedError for NoopHandler {
async fn do_query(
&self,
_query: Request,
_ctx: QueryContextRef,
) -> std::result::Result<Output, BoxedError> {
Ok(Output::new_with_affected_rows(0))
}
}
#[tokio::test]
async fn wait_initialized() {
let (client, handler_mut) =
FrontendClient::from_empty_grpc_handler(QueryOptions::default());
assert!(
timeout(Duration::from_millis(50), client.wait_initialized())
.await
.is_err()
);
let handler: Arc<dyn GrpcQueryHandlerWithBoxedError> = Arc::new(NoopHandler);
handler_mut.set_handler(Arc::downgrade(&handler)).await;
timeout(Duration::from_secs(1), client.wait_initialized())
.await
.expect("wait_initialized should complete after handler is set");
timeout(Duration::from_millis(10), client.wait_initialized())
.await
.expect("wait_initialized should be a no-op once initialized");
let handler: Arc<dyn GrpcQueryHandlerWithBoxedError> = Arc::new(NoopHandler);
let client =
FrontendClient::from_grpc_handler(Arc::downgrade(&handler), QueryOptions::default());
assert!(
timeout(Duration::from_millis(10), client.wait_initialized())
.await
.is_ok()
);
let meta_client = Arc::new(MetaClient::default());
let client = FrontendClient::from_meta_client(
meta_client,
None,
QueryOptions::default(),
BatchingModeOptions::default(),
)
.unwrap();
assert!(
timeout(Duration::from_millis(10), client.wait_initialized())
.await
.is_ok()
);
}
}

View File

@@ -490,6 +490,7 @@ impl<'a> FlownodeServiceBuilder<'a> {
let config = GrpcServerConfig {
max_recv_message_size: opts.grpc.max_recv_message_size.as_bytes() as usize,
max_send_message_size: opts.grpc.max_send_message_size.as_bytes() as usize,
max_total_message_memory: opts.grpc.max_total_message_memory.as_bytes() as usize,
tls: opts.grpc.tls.clone(),
max_connection_age: opts.grpc.max_connection_age,
};

View File

@@ -32,7 +32,6 @@ common-frontend.workspace = true
common-function.workspace = true
common-grpc.workspace = true
common-macro.workspace = true
common-memory-manager.workspace = true
common-meta.workspace = true
common-options.workspace = true
common-procedure.workspace = true

View File

@@ -357,6 +357,14 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to acquire more permits from limiter"))]
AcquireLimiter {
#[snafu(source)]
error: tokio::sync::AcquireError,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Service suspended"))]
Suspended {
#[snafu(implicit)]
@@ -441,6 +449,8 @@ impl ErrorExt for Error {
Error::StatementTimeout { .. } => StatusCode::Cancelled,
Error::AcquireLimiter { .. } => StatusCode::Internal,
Error::Suspended { .. } => StatusCode::Suspended,
}
}

View File

@@ -17,7 +17,6 @@ use std::sync::Arc;
use common_base::readable_size::ReadableSize;
use common_config::config::Configurable;
use common_event_recorder::EventRecorderOptions;
use common_memory_manager::OnExhaustedPolicy;
use common_options::datanode::DatanodeClientOptions;
use common_options::memory::MemoryOptions;
use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, TracingOptions};
@@ -46,12 +45,6 @@ pub struct FrontendOptions {
pub default_timezone: Option<String>,
pub default_column_prefix: Option<String>,
pub heartbeat: HeartbeatOptions,
/// Maximum total memory for all concurrent write request bodies and messages (HTTP, gRPC, Flight).
/// Set to 0 to disable the limit. Default: "0" (unlimited)
pub max_in_flight_write_bytes: ReadableSize,
/// Policy when write bytes quota is exhausted.
/// Options: "wait" (default, 10s), "wait(<duration>)", "fail"
pub write_bytes_exhausted_policy: OnExhaustedPolicy,
pub http: HttpOptions,
pub grpc: GrpcOptions,
/// The internal gRPC options for the frontend service.
@@ -70,6 +63,7 @@ pub struct FrontendOptions {
pub user_provider: Option<String>,
pub tracing: TracingOptions,
pub query: QueryOptions,
pub max_in_flight_write_bytes: Option<ReadableSize>,
pub slow_query: SlowQueryOptions,
pub memory: MemoryOptions,
/// The event recorder options.
@@ -83,8 +77,6 @@ impl Default for FrontendOptions {
default_timezone: None,
default_column_prefix: None,
heartbeat: HeartbeatOptions::frontend_default(),
max_in_flight_write_bytes: ReadableSize(0),
write_bytes_exhausted_policy: OnExhaustedPolicy::default(),
http: HttpOptions::default(),
grpc: GrpcOptions::default(),
internal_grpc: None,
@@ -101,6 +93,7 @@ impl Default for FrontendOptions {
user_provider: None,
tracing: TracingOptions::default(),
query: QueryOptions::default(),
max_in_flight_write_bytes: None,
slow_query: SlowQueryOptions::default(),
memory: MemoryOptions::default(),
event_recorder: EventRecorderOptions::default(),
@@ -164,6 +157,7 @@ mod tests {
use common_error::from_header_to_err_code_msg;
use common_error::status_code::StatusCode;
use common_grpc::channel_manager::ChannelManager;
use common_meta::distributed_time_constants::FRONTEND_HEARTBEAT_INTERVAL_MILLIS;
use common_meta::heartbeat::handler::HandlerGroupExecutor;
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
use common_meta::heartbeat::handler::suspend::SuspendHandler;
@@ -406,10 +400,6 @@ mod tests {
..Default::default()
},
meta_client: Some(meta_client_options.clone()),
heartbeat: HeartbeatOptions {
interval: Duration::from_secs(1),
..Default::default()
},
..Default::default()
};
@@ -419,8 +409,7 @@ mod tests {
let meta_client = create_meta_client(&meta_client_options, server.clone()).await;
let frontend = create_frontend(&options, meta_client).await?;
let frontend_heartbeat_interval = options.heartbeat.interval;
tokio::time::sleep(frontend_heartbeat_interval).await;
tokio::time::sleep(Duration::from_millis(FRONTEND_HEARTBEAT_INTERVAL_MILLIS)).await;
// initial state: not suspend:
assert!(!frontend.instance.is_suspended());
verify_suspend_state_by_http(&frontend, Ok(r#"[{"records":{"schema":{"column_schemas":[{"name":"Int64(1)","data_type":"Int64"}]},"rows":[[1]],"total_rows":1}}]"#)).await;
@@ -437,7 +426,7 @@ mod tests {
// make heartbeat server returned "suspend" instruction,
server.suspend.store(true, Ordering::Relaxed);
tokio::time::sleep(frontend_heartbeat_interval).await;
tokio::time::sleep(Duration::from_millis(FRONTEND_HEARTBEAT_INTERVAL_MILLIS)).await;
// ... then the frontend is suspended:
assert!(frontend.instance.is_suspended());
verify_suspend_state_by_http(
@@ -453,7 +442,7 @@ mod tests {
// make heartbeat server NOT returned "suspend" instruction,
server.suspend.store(false, Ordering::Relaxed);
tokio::time::sleep(frontend_heartbeat_interval).await;
tokio::time::sleep(Duration::from_millis(FRONTEND_HEARTBEAT_INTERVAL_MILLIS)).await;
// ... then frontend's suspend state is cleared:
assert!(!frontend.instance.is_suspended());
verify_suspend_state_by_http(&frontend, Ok(r#"[{"records":{"schema":{"column_schemas":[{"name":"Int64(1)","data_type":"Int64"}]},"rows":[[1]],"total_rows":1}}]"#)).await;

View File

@@ -97,6 +97,7 @@ use crate::error::{
ParseSqlSnafu, PermissionSnafu, PlanStatementSnafu, Result, SqlExecInterceptedSnafu,
StatementTimeoutSnafu, TableOperationSnafu,
};
use crate::limiter::LimiterRef;
use crate::stream_wrapper::CancellableStreamWrapper;
lazy_static! {
@@ -117,6 +118,7 @@ pub struct Instance {
deleter: DeleterRef,
table_metadata_manager: TableMetadataManagerRef,
event_recorder: Option<EventRecorderRef>,
limiter: Option<LimiterRef>,
process_manager: ProcessManagerRef,
slow_query_options: SlowQueryOptions,
suspend: Arc<AtomicBool>,

View File

@@ -49,6 +49,7 @@ use crate::events::EventHandlerImpl;
use crate::frontend::FrontendOptions;
use crate::instance::Instance;
use crate::instance::region_query::FrontendRegionQueryHandler;
use crate::limiter::Limiter;
/// The frontend [`Instance`] builder.
pub struct FrontendBuilder {
@@ -247,6 +248,14 @@ impl FrontendBuilder {
self.options.event_recorder.ttl,
))));
// Create the limiter if the max_in_flight_write_bytes is set.
let limiter = self
.options
.max_in_flight_write_bytes
.map(|max_in_flight_write_bytes| {
Arc::new(Limiter::new(max_in_flight_write_bytes.as_bytes() as usize))
});
Ok(Instance {
catalog_manager: self.catalog_manager,
pipeline_operator,
@@ -257,6 +266,7 @@ impl FrontendBuilder {
deleter,
table_metadata_manager: Arc::new(TableMetadataManager::new(kv_backend)),
event_recorder: Some(event_recorder),
limiter,
process_manager,
otlp_metrics_table_legacy_cache: DashMap::new(),
slow_query_options: self.options.slow_query.clone(),

View File

@@ -71,6 +71,12 @@ impl GrpcQueryHandler for Instance {
.check_permission(ctx.current_user(), PermissionReq::GrpcRequest(&request))
.context(PermissionSnafu)?;
let _guard = if let Some(limiter) = &self.limiter {
Some(limiter.limit_request(&request).await?)
} else {
None
};
let output = match request {
Request::Inserts(requests) => self.handle_inserts(requests, ctx.clone()).await?,
Request::RowInserts(requests) => {

View File

@@ -22,7 +22,7 @@ use common_error::ext::BoxedError;
use common_time::Timestamp;
use common_time::timestamp::TimeUnit;
use servers::error::{
AuthSnafu, CatalogSnafu, Error, TimestampOverflowSnafu, UnexpectedResultSnafu,
AuthSnafu, CatalogSnafu, Error, OtherSnafu, TimestampOverflowSnafu, UnexpectedResultSnafu,
};
use servers::influxdb::InfluxdbRequest;
use servers::interceptor::{LineProtocolInterceptor, LineProtocolInterceptorRef};
@@ -59,6 +59,18 @@ impl InfluxdbLineProtocolHandler for Instance {
.post_lines_conversion(requests, ctx.clone())
.await?;
let _guard = if let Some(limiter) = &self.limiter {
Some(
limiter
.limit_row_inserts(&requests)
.await
.map_err(BoxedError::new)
.context(OtherSnafu)?,
)
} else {
None
};
self.handle_influx_row_inserts(requests, ctx)
.await
.map_err(BoxedError::new)

View File

@@ -23,7 +23,8 @@ use datatypes::timestamp::TimestampNanosecond;
use pipeline::pipeline_operator::PipelineOperator;
use pipeline::{Pipeline, PipelineInfo, PipelineVersion};
use servers::error::{
AuthSnafu, Error as ServerError, ExecuteGrpcRequestSnafu, PipelineSnafu, Result as ServerResult,
AuthSnafu, Error as ServerError, ExecuteGrpcRequestSnafu, OtherSnafu, PipelineSnafu,
Result as ServerResult,
};
use servers::interceptor::{LogIngestInterceptor, LogIngestInterceptorRef};
use servers::query_handler::PipelineHandler;
@@ -123,6 +124,18 @@ impl Instance {
log: RowInsertRequests,
ctx: QueryContextRef,
) -> ServerResult<Output> {
let _guard = if let Some(limiter) = &self.limiter {
Some(
limiter
.limit_row_inserts(&log)
.await
.map_err(BoxedError::new)
.context(OtherSnafu)?,
)
} else {
None
};
self.inserter
.handle_log_inserts(log, ctx, self.statement_executor.as_ref())
.await
@@ -135,6 +148,18 @@ impl Instance {
rows: RowInsertRequests,
ctx: QueryContextRef,
) -> ServerResult<Output> {
let _guard = if let Some(limiter) = &self.limiter {
Some(
limiter
.limit_row_inserts(&rows)
.await
.map_err(BoxedError::new)
.context(OtherSnafu)?,
)
} else {
None
};
self.inserter
.handle_trace_inserts(rows, ctx, self.statement_executor.as_ref())
.await

View File

@@ -16,7 +16,7 @@ use async_trait::async_trait;
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
use common_error::ext::BoxedError;
use common_telemetry::tracing;
use servers::error::{self as server_error, AuthSnafu, ExecuteGrpcQuerySnafu};
use servers::error::{self as server_error, AuthSnafu, ExecuteGrpcQuerySnafu, OtherSnafu};
use servers::opentsdb::codec::DataPoint;
use servers::opentsdb::data_point_to_grpc_row_insert_requests;
use servers::query_handler::OpentsdbProtocolHandler;
@@ -41,6 +41,18 @@ impl OpentsdbProtocolHandler for Instance {
let (requests, _) = data_point_to_grpc_row_insert_requests(data_points)?;
let _guard = if let Some(limiter) = &self.limiter {
Some(
limiter
.limit_row_inserts(&requests)
.await
.map_err(BoxedError::new)
.context(OtherSnafu)?,
)
} else {
None
};
// OpenTSDB is single value.
let output = self
.handle_row_inserts(requests, ctx, true, true)

View File

@@ -24,7 +24,7 @@ use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
use otel_arrow_rust::proto::opentelemetry::collector::metrics::v1::ExportMetricsServiceRequest;
use pipeline::{GreptimePipelineParams, PipelineWay};
use servers::error::{self, AuthSnafu, Result as ServerResult};
use servers::error::{self, AuthSnafu, OtherSnafu, Result as ServerResult};
use servers::http::prom_store::PHYSICAL_TABLE_PARAM;
use servers::interceptor::{OpenTelemetryProtocolInterceptor, OpenTelemetryProtocolInterceptorRef};
use servers::otlp;
@@ -83,6 +83,18 @@ impl OpenTelemetryProtocolHandler for Instance {
ctx
};
let _guard = if let Some(limiter) = &self.limiter {
Some(
limiter
.limit_row_inserts(&requests)
.await
.map_err(BoxedError::new)
.context(OtherSnafu)?,
)
} else {
None
};
// If the user uses the legacy path, it is by default without metric engine.
if metric_ctx.is_legacy || !metric_ctx.with_metric_engine {
self.handle_row_inserts(requests, ctx, false, false)
@@ -179,6 +191,18 @@ impl OpenTelemetryProtocolHandler for Instance {
)
.await?;
let _guard = if let Some(limiter) = &self.limiter {
Some(
limiter
.limit_ctx_req(&opt_req)
.await
.map_err(BoxedError::new)
.context(OtherSnafu)?,
)
} else {
None
};
let mut outputs = vec![];
for (temp_ctx, requests) in opt_req.as_req_iter(ctx) {

View File

@@ -175,6 +175,18 @@ impl PromStoreProtocolHandler for Instance {
.get::<PromStoreProtocolInterceptorRef<servers::error::Error>>();
interceptor_ref.pre_write(&request, ctx.clone())?;
let _guard = if let Some(limiter) = &self.limiter {
Some(
limiter
.limit_row_inserts(&request)
.await
.map_err(BoxedError::new)
.context(error::OtherSnafu)?,
)
} else {
None
};
let output = if with_metric_engine {
let physical_table = ctx
.extension(PHYSICAL_TABLE_PARAM)

View File

@@ -19,6 +19,7 @@ pub mod events;
pub mod frontend;
pub mod heartbeat;
pub mod instance;
pub(crate) mod limiter;
pub(crate) mod metrics;
pub mod server;
pub mod service_config;

332
src/frontend/src/limiter.rs Normal file
View File

@@ -0,0 +1,332 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use api::v1::column::Values;
use api::v1::greptime_request::Request;
use api::v1::value::ValueData;
use api::v1::{
Decimal128, InsertRequests, IntervalMonthDayNano, JsonValue, RowInsertRequest,
RowInsertRequests, json_value,
};
use pipeline::ContextReq;
use snafu::ResultExt;
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
use crate::error::{AcquireLimiterSnafu, Result};
pub(crate) type LimiterRef = Arc<Limiter>;
/// A frontend request limiter that controls the total size of in-flight write
/// requests.
pub(crate) struct Limiter {
max_in_flight_write_bytes: usize,
byte_counter: Arc<Semaphore>,
}
impl Limiter {
pub fn new(max_in_flight_write_bytes: usize) -> Self {
Self {
byte_counter: Arc::new(Semaphore::new(max_in_flight_write_bytes)),
max_in_flight_write_bytes,
}
}
pub async fn limit_request(&self, request: &Request) -> Result<OwnedSemaphorePermit> {
let size = match request {
Request::Inserts(requests) => self.insert_requests_data_size(requests),
Request::RowInserts(requests) => {
self.rows_insert_requests_data_size(requests.inserts.iter())
}
_ => 0,
};
self.limit_in_flight_write_bytes(size).await
}
pub async fn limit_row_inserts(
&self,
requests: &RowInsertRequests,
) -> Result<OwnedSemaphorePermit> {
let size = self.rows_insert_requests_data_size(requests.inserts.iter());
self.limit_in_flight_write_bytes(size).await
}
pub async fn limit_ctx_req(&self, opt_req: &ContextReq) -> Result<OwnedSemaphorePermit> {
let size = self.rows_insert_requests_data_size(opt_req.ref_all_req());
self.limit_in_flight_write_bytes(size).await
}
/// Await until more inflight bytes are available
pub async fn limit_in_flight_write_bytes(&self, bytes: usize) -> Result<OwnedSemaphorePermit> {
self.byte_counter
.clone()
.acquire_many_owned(bytes as u32)
.await
.context(AcquireLimiterSnafu)
}
/// Returns the current in-flight write bytes.
#[allow(dead_code)]
pub fn in_flight_write_bytes(&self) -> usize {
self.max_in_flight_write_bytes - self.byte_counter.available_permits()
}
fn insert_requests_data_size(&self, request: &InsertRequests) -> usize {
let mut size: usize = 0;
for insert in &request.inserts {
for column in &insert.columns {
if let Some(values) = &column.values {
size += Self::size_of_column_values(values);
}
}
}
size
}
fn rows_insert_requests_data_size<'a>(
&self,
inserts: impl Iterator<Item = &'a RowInsertRequest>,
) -> usize {
let mut size: usize = 0;
for insert in inserts {
if let Some(rows) = &insert.rows {
for row in &rows.rows {
for value in &row.values {
if let Some(value) = &value.value_data {
size += Self::size_of_value_data(value);
}
}
}
}
}
size
}
fn size_of_column_values(values: &Values) -> usize {
let mut size: usize = 0;
size += values.i8_values.len() * size_of::<i32>();
size += values.i16_values.len() * size_of::<i32>();
size += values.i32_values.len() * size_of::<i32>();
size += values.i64_values.len() * size_of::<i64>();
size += values.u8_values.len() * size_of::<u32>();
size += values.u16_values.len() * size_of::<u32>();
size += values.u32_values.len() * size_of::<u32>();
size += values.u64_values.len() * size_of::<u64>();
size += values.f32_values.len() * size_of::<f32>();
size += values.f64_values.len() * size_of::<f64>();
size += values.bool_values.len() * size_of::<bool>();
size += values
.binary_values
.iter()
.map(|v| v.len() * size_of::<u8>())
.sum::<usize>();
size += values.string_values.iter().map(|v| v.len()).sum::<usize>();
size += values.date_values.len() * size_of::<i32>();
size += values.datetime_values.len() * size_of::<i64>();
size += values.timestamp_second_values.len() * size_of::<i64>();
size += values.timestamp_millisecond_values.len() * size_of::<i64>();
size += values.timestamp_microsecond_values.len() * size_of::<i64>();
size += values.timestamp_nanosecond_values.len() * size_of::<i64>();
size += values.time_second_values.len() * size_of::<i64>();
size += values.time_millisecond_values.len() * size_of::<i64>();
size += values.time_microsecond_values.len() * size_of::<i64>();
size += values.time_nanosecond_values.len() * size_of::<i64>();
size += values.interval_year_month_values.len() * size_of::<i64>();
size += values.interval_day_time_values.len() * size_of::<i64>();
size += values.interval_month_day_nano_values.len() * size_of::<IntervalMonthDayNano>();
size += values.decimal128_values.len() * size_of::<Decimal128>();
size += values
.list_values
.iter()
.map(|v| {
v.items
.iter()
.map(|item| {
item.value_data
.as_ref()
.map(Self::size_of_value_data)
.unwrap_or(0)
})
.sum::<usize>()
})
.sum::<usize>();
size += values
.struct_values
.iter()
.map(|v| {
v.items
.iter()
.map(|item| {
item.value_data
.as_ref()
.map(Self::size_of_value_data)
.unwrap_or(0)
})
.sum::<usize>()
})
.sum::<usize>();
size
}
fn size_of_value_data(value: &ValueData) -> usize {
match value {
ValueData::I8Value(_) => size_of::<i32>(),
ValueData::I16Value(_) => size_of::<i32>(),
ValueData::I32Value(_) => size_of::<i32>(),
ValueData::I64Value(_) => size_of::<i64>(),
ValueData::U8Value(_) => size_of::<u32>(),
ValueData::U16Value(_) => size_of::<u32>(),
ValueData::U32Value(_) => size_of::<u32>(),
ValueData::U64Value(_) => size_of::<u64>(),
ValueData::F32Value(_) => size_of::<f32>(),
ValueData::F64Value(_) => size_of::<f64>(),
ValueData::BoolValue(_) => size_of::<bool>(),
ValueData::BinaryValue(v) => v.len() * size_of::<u8>(),
ValueData::StringValue(v) => v.len(),
ValueData::DateValue(_) => size_of::<i32>(),
ValueData::DatetimeValue(_) => size_of::<i64>(),
ValueData::TimestampSecondValue(_) => size_of::<i64>(),
ValueData::TimestampMillisecondValue(_) => size_of::<i64>(),
ValueData::TimestampMicrosecondValue(_) => size_of::<i64>(),
ValueData::TimestampNanosecondValue(_) => size_of::<i64>(),
ValueData::TimeSecondValue(_) => size_of::<i64>(),
ValueData::TimeMillisecondValue(_) => size_of::<i64>(),
ValueData::TimeMicrosecondValue(_) => size_of::<i64>(),
ValueData::TimeNanosecondValue(_) => size_of::<i64>(),
ValueData::IntervalYearMonthValue(_) => size_of::<i32>(),
ValueData::IntervalDayTimeValue(_) => size_of::<i64>(),
ValueData::IntervalMonthDayNanoValue(_) => size_of::<IntervalMonthDayNano>(),
ValueData::Decimal128Value(_) => size_of::<Decimal128>(),
ValueData::ListValue(list_values) => list_values
.items
.iter()
.map(|item| {
item.value_data
.as_ref()
.map(Self::size_of_value_data)
.unwrap_or(0)
})
.sum(),
ValueData::StructValue(struct_values) => struct_values
.items
.iter()
.map(|item| {
item.value_data
.as_ref()
.map(Self::size_of_value_data)
.unwrap_or(0)
})
.sum(),
ValueData::JsonValue(v) => {
fn calc(v: &JsonValue) -> usize {
let Some(value) = v.value.as_ref() else {
return 0;
};
match value {
json_value::Value::Boolean(_) => size_of::<bool>(),
json_value::Value::Int(_) => size_of::<i64>(),
json_value::Value::Uint(_) => size_of::<u64>(),
json_value::Value::Float(_) => size_of::<f64>(),
json_value::Value::Str(s) => s.len(),
json_value::Value::Array(array) => array.items.iter().map(calc).sum(),
json_value::Value::Object(object) => object
.entries
.iter()
.flat_map(|entry| {
entry.value.as_ref().map(|v| entry.key.len() + calc(v))
})
.sum(),
}
}
calc(v)
}
}
}
}
#[cfg(test)]
mod tests {
use api::v1::column::Values;
use api::v1::greptime_request::Request;
use api::v1::{Column, InsertRequest};
use super::*;
fn generate_request(size: usize) -> Request {
let i8_values = vec![0; size / 4];
Request::Inserts(InsertRequests {
inserts: vec![InsertRequest {
columns: vec![Column {
values: Some(Values {
i8_values,
..Default::default()
}),
..Default::default()
}],
..Default::default()
}],
})
}
#[tokio::test]
async fn test_limiter() {
let limiter_ref: LimiterRef = Arc::new(Limiter::new(1024));
let tasks_count = 10;
let request_data_size = 100;
let mut handles = vec![];
// Generate multiple requests to test the limiter.
for _ in 0..tasks_count {
let limiter = limiter_ref.clone();
let handle = tokio::spawn(async move {
let result = limiter
.limit_request(&generate_request(request_data_size))
.await;
assert!(result.is_ok());
});
handles.push(handle);
}
// Wait for all threads to complete.
for handle in handles {
handle.await.unwrap();
}
}
#[tokio::test]
async fn test_in_flight_write_bytes() {
let limiter_ref: LimiterRef = Arc::new(Limiter::new(1024));
let req1 = generate_request(100);
let result1 = limiter_ref
.limit_request(&req1)
.await
.expect("failed to acquire permits");
assert_eq!(limiter_ref.in_flight_write_bytes(), 100);
let req2 = generate_request(200);
let result2 = limiter_ref
.limit_request(&req2)
.await
.expect("failed to acquire permits");
assert_eq!(limiter_ref.in_flight_write_bytes(), 300);
drop(result1);
assert_eq!(limiter_ref.in_flight_write_bytes(), 200);
drop(result2);
assert_eq!(limiter_ref.in_flight_write_bytes(), 0);
}
}

View File

@@ -40,7 +40,6 @@ use servers::otel_arrow::OtelArrowServiceHandler;
use servers::postgres::PostgresServer;
use servers::query_handler::grpc::ServerGrpcQueryHandlerAdapter;
use servers::query_handler::sql::ServerSqlQueryHandlerAdapter;
use servers::request_memory_limiter::ServerMemoryLimiter;
use servers::server::{Server, ServerHandlers};
use servers::tls::{ReloadableTlsServerConfig, maybe_watch_server_tls_config};
use snafu::ResultExt;
@@ -77,25 +76,15 @@ where
}
}
pub fn grpc_server_builder(
&self,
opts: &GrpcOptions,
request_memory_limiter: ServerMemoryLimiter,
) -> Result<GrpcServerBuilder> {
pub fn grpc_server_builder(&self, opts: &GrpcOptions) -> Result<GrpcServerBuilder> {
let builder = GrpcServerBuilder::new(opts.as_config(), common_runtime::global_runtime())
.with_memory_limiter(request_memory_limiter)
.with_tls_config(opts.tls.clone())
.context(error::InvalidTlsConfigSnafu)?;
Ok(builder)
}
pub fn http_server_builder(
&self,
opts: &FrontendOptions,
request_memory_limiter: ServerMemoryLimiter,
) -> HttpServerBuilder {
pub fn http_server_builder(&self, opts: &FrontendOptions) -> HttpServerBuilder {
let mut builder = HttpServerBuilder::new(opts.http.clone())
.with_memory_limiter(request_memory_limiter)
.with_sql_handler(ServerSqlQueryHandlerAdapter::arc(self.instance.clone()));
let validator = self.plugins.get::<LogValidatorRef>();
@@ -180,12 +169,11 @@ where
meta_client: &Option<MetaClientOptions>,
name: Option<String>,
external: bool,
request_memory_limiter: ServerMemoryLimiter,
) -> Result<GrpcServer> {
let builder = if let Some(builder) = self.grpc_server_builder.take() {
builder
} else {
self.grpc_server_builder(grpc, request_memory_limiter)?
self.grpc_server_builder(grpc)?
};
let user_provider = if external {
@@ -247,16 +235,11 @@ where
Ok(grpc_server)
}
fn build_http_server(
&mut self,
opts: &FrontendOptions,
toml: String,
request_memory_limiter: ServerMemoryLimiter,
) -> Result<HttpServer> {
fn build_http_server(&mut self, opts: &FrontendOptions, toml: String) -> Result<HttpServer> {
let builder = if let Some(builder) = self.http_server_builder.take() {
builder
} else {
self.http_server_builder(opts, request_memory_limiter)
self.http_server_builder(opts)
};
let http_server = builder
@@ -274,12 +257,6 @@ where
let toml = opts.to_toml().context(TomlFormatSnafu)?;
let opts: FrontendOptions = opts.into();
// Create request memory limiter for all server protocols
let request_memory_limiter = ServerMemoryLimiter::new(
opts.max_in_flight_write_bytes.as_bytes(),
opts.write_bytes_exhausted_policy,
);
let handlers = ServerHandlers::default();
let user_provider = self.plugins.get::<UserProviderRef>();
@@ -287,13 +264,7 @@ where
{
// Always init GRPC server
let grpc_addr = parse_addr(&opts.grpc.bind_addr)?;
let grpc_server = self.build_grpc_server(
&opts.grpc,
&opts.meta_client,
None,
true,
request_memory_limiter.clone(),
)?;
let grpc_server = self.build_grpc_server(&opts.grpc, &opts.meta_client, None, true)?;
handlers.insert((Box::new(grpc_server), grpc_addr));
}
@@ -305,7 +276,6 @@ where
&opts.meta_client,
Some("INTERNAL_GRPC_SERVER".to_string()),
false,
request_memory_limiter.clone(),
)?;
handlers.insert((Box::new(grpc_server), grpc_addr));
}
@@ -314,8 +284,7 @@ where
// Always init HTTP server
let http_options = &opts.http;
let http_addr = parse_addr(&http_options.addr)?;
let http_server =
self.build_http_server(&opts, toml, request_memory_limiter.clone())?;
let http_server = self.build_http_server(&opts, toml)?;
handlers.insert((Box::new(http_server), http_addr));
}

View File

@@ -7,9 +7,6 @@ license.workspace = true
[lints]
workspace = true
[features]
vector_index = ["dep:usearch"]
[dependencies]
async-trait.workspace = true
asynchronous-codec = "0.7.0"
@@ -44,7 +41,7 @@ tantivy = { version = "0.24", features = ["zstd-compression"] }
tantivy-jieba = "0.16"
tokio.workspace = true
tokio-util.workspace = true
usearch = { version = "2.21", default-features = false, features = ["fp16lib"], optional = true }
usearch = { version = "2.21", default-features = false, features = ["fp16lib"] }
uuid.workspace = true
[dev-dependencies]

View File

@@ -22,7 +22,6 @@ pub mod external_provider;
pub mod fulltext_index;
pub mod inverted_index;
pub mod target;
#[cfg(feature = "vector_index")]
pub mod vector;
pub type Bytes = Vec<u8>;

View File

@@ -16,7 +16,7 @@ use std::collections::HashMap;
use std::sync::Arc;
use common_wal::config::kafka::DatanodeKafkaConfig;
use common_wal::config::kafka::common::DEFAULT_BACKOFF_CONFIG;
use common_wal::config::kafka::common::{DEFAULT_BACKOFF_CONFIG, DEFAULT_CONNECT_TIMEOUT};
use dashmap::DashMap;
use rskafka::client::ClientBuilder;
use rskafka::client::partition::{Compression, PartitionClient, UnknownTopicHandling};
@@ -79,8 +79,7 @@ impl ClientManager {
// Sets backoff config for the top-level kafka client and all clients constructed by it.
let mut builder = ClientBuilder::new(config.connection.broker_endpoints.clone())
.backoff_config(DEFAULT_BACKOFF_CONFIG)
.connect_timeout(Some(config.connection.connect_timeout))
.timeout(Some(config.connection.timeout));
.connect_timeout(Some(DEFAULT_CONNECT_TIMEOUT));
if let Some(sasl) = &config.connection.sasl {
builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
};

View File

@@ -14,6 +14,7 @@
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use api::v1::meta::cluster_server::ClusterServer;
use api::v1::meta::heartbeat_server::HeartbeatServer;
@@ -59,6 +60,11 @@ use crate::service::admin::admin_axum_router;
use crate::utils::etcd::create_etcd_client_with_tls;
use crate::{Result, error};
/// The default keep-alive interval for gRPC.
const DEFAULT_GRPC_KEEP_ALIVE_INTERVAL: Duration = Duration::from_secs(10);
/// The default keep-alive timeout for gRPC.
const DEFAULT_GRPC_KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(10);
pub struct MetasrvInstance {
metasrv: Arc<Metasrv>,
@@ -249,8 +255,8 @@ pub fn router(metasrv: Arc<Metasrv>) -> Router {
// for admin services
.accept_http1(true)
// For quick network failures detection.
.http2_keepalive_interval(Some(metasrv.options().grpc.http2_keep_alive_interval))
.http2_keepalive_timeout(Some(metasrv.options().grpc.http2_keep_alive_timeout));
.http2_keepalive_interval(Some(DEFAULT_GRPC_KEEP_ALIVE_INTERVAL))
.http2_keepalive_timeout(Some(DEFAULT_GRPC_KEEP_ALIVE_TIMEOUT));
let router = add_compressed_service!(router, HeartbeatServer::from_arc(metasrv.clone()));
let router = add_compressed_service!(router, StoreServer::from_arc(metasrv.clone()));
let router = add_compressed_service!(router, ClusterServer::from_arc(metasrv.clone()));
@@ -267,12 +273,8 @@ pub async fn metasrv_builder(
(Some(kv_backend), _) => (kv_backend, None),
(None, BackendImpl::MemoryStore) => (Arc::new(MemoryKvBackend::new()) as _, None),
(None, BackendImpl::EtcdStore) => {
let etcd_client = create_etcd_client_with_tls(
&opts.store_addrs,
&opts.backend_client,
opts.backend_tls.as_ref(),
)
.await?;
let etcd_client =
create_etcd_client_with_tls(&opts.store_addrs, opts.backend_tls.as_ref()).await?;
let kv_backend = EtcdStore::with_etcd_client(etcd_client.clone(), opts.max_txn_ops);
let election = EtcdElection::with_etcd_client(
&opts.grpc.server_addr,
@@ -339,7 +341,6 @@ pub async fn metasrv_builder(
opts.meta_schema_name.as_deref(),
&opts.meta_table_name,
opts.max_txn_ops,
opts.auto_create_schema,
)
.await
.context(error::KvBackendSnafu)?;

View File

@@ -16,9 +16,13 @@ pub mod lease;
pub mod node_info;
pub mod utils;
use std::time::Duration;
use api::v1::meta::heartbeat_request::NodeWorkloads;
use common_error::ext::BoxedError;
use common_meta::distributed_time_constants::default_distributed_time_constants;
use common_meta::distributed_time_constants::{
DATANODE_LEASE_SECS, FLOWNODE_LEASE_SECS, FRONTEND_HEARTBEAT_INTERVAL_MILLIS,
};
use common_meta::error::Result;
use common_meta::peer::{Peer, PeerDiscovery, PeerResolver};
use common_meta::{DatanodeId, FlownodeId};
@@ -34,7 +38,7 @@ impl PeerDiscovery for MetaPeerClient {
utils::alive_frontends(
&DefaultSystemTimer,
self,
default_distributed_time_constants().frontend_heartbeat_interval,
Duration::from_millis(FRONTEND_HEARTBEAT_INTERVAL_MILLIS),
)
.await
.map_err(BoxedError::new)
@@ -48,7 +52,7 @@ impl PeerDiscovery for MetaPeerClient {
utils::alive_datanodes(
&DefaultSystemTimer,
self,
default_distributed_time_constants().datanode_lease,
Duration::from_secs(DATANODE_LEASE_SECS),
filter,
)
.await
@@ -63,7 +67,7 @@ impl PeerDiscovery for MetaPeerClient {
utils::alive_flownodes(
&DefaultSystemTimer,
self,
default_distributed_time_constants().flownode_lease,
Duration::from_secs(FLOWNODE_LEASE_SECS),
filter,
)
.await

View File

@@ -102,7 +102,7 @@ mod tests {
use api::v1::meta::heartbeat_request::NodeWorkloads;
use api::v1::meta::{DatanodeWorkloads, FlownodeWorkloads};
use common_meta::cluster::{FrontendStatus, NodeInfo, NodeInfoKey, NodeStatus, Role};
use common_meta::distributed_time_constants::default_distributed_time_constants;
use common_meta::distributed_time_constants::FRONTEND_HEARTBEAT_INTERVAL_MILLIS;
use common_meta::kv_backend::ResettableKvBackendRef;
use common_meta::peer::{Peer, PeerDiscovery};
use common_meta::rpc::store::PutRequest;
@@ -473,10 +473,8 @@ mod tests {
let client = create_meta_peer_client();
let in_memory = client.memory_backend();
let frontend_heartbeat_interval =
default_distributed_time_constants().frontend_heartbeat_interval;
let last_activity_ts =
current_time_millis() - frontend_heartbeat_interval.as_millis() as i64 - 1000;
current_time_millis() - FRONTEND_HEARTBEAT_INTERVAL_MILLIS as i64 - 1000;
let active_frontend_node = NodeInfo {
peer: Peer {
id: 0,

View File

@@ -15,6 +15,7 @@
use std::collections::VecDeque;
use std::time::Duration;
use common_meta::distributed_time_constants;
use serde::{Deserialize, Serialize};
const FIRST_HEARTBEAT_ESTIMATE_MILLIS: i64 = 1000;
@@ -78,7 +79,9 @@ impl Default for PhiAccrualFailureDetectorOptions {
Self {
threshold: 8_f32,
min_std_deviation: Duration::from_millis(100),
acceptable_heartbeat_pause: Duration::from_secs(10),
acceptable_heartbeat_pause: Duration::from_secs(
distributed_time_constants::DATANODE_LEASE_SECS,
),
}
}
}

View File

@@ -194,7 +194,7 @@ impl SchedulerCtx for DefaultGcSchedulerCtx {
}
// Send GetFileRefs instructions to each datanode
let mut all_file_refs: HashMap<RegionId, HashSet<_>> = HashMap::new();
let mut all_file_refs: HashMap<RegionId, HashSet<FileId>> = HashMap::new();
let mut all_manifest_versions = HashMap::new();
for (peer, regions) in datanode2query_regions {

View File

@@ -53,7 +53,6 @@ pub fn new_empty_report_with(region_ids: impl IntoIterator<Item = RegionId>) ->
}
GcReport {
deleted_files,
deleted_indexes: HashMap::new(),
need_retry_regions: HashSet::new(),
}
}

View File

@@ -454,11 +454,7 @@ async fn test_region_gc_concurrency_with_retryable_errors() {
(
region_id,
// mock the actual gc report with deleted files when succeeded(even no files to delete)
GcReport::new(
HashMap::from([(region_id, vec![])]),
Default::default(),
HashSet::new(),
),
GcReport::new(HashMap::from([(region_id, vec![])]), HashSet::new()),
)
})
.collect();

View File

@@ -20,7 +20,7 @@ use common_meta::datanode::RegionManifestInfo;
use common_meta::peer::Peer;
use common_telemetry::init_default_ut_logging;
use store_api::region_engine::RegionRole;
use store_api::storage::{FileId, FileRef, FileRefsManifest, GcReport, RegionId};
use store_api::storage::{FileId, FileRefsManifest, GcReport, RegionId};
use crate::gc::mock::{
MockSchedulerCtx, TEST_REGION_SIZE_200MB, mock_region_stat, new_empty_report_with,
@@ -60,10 +60,7 @@ async fn test_gc_regions_failure_handling() {
let file_refs = FileRefsManifest {
manifest_version: HashMap::from([(region_id, 1)]),
file_refs: HashMap::from([(
region_id,
HashSet::from([FileRef::new(region_id, FileId::random(), None)]),
)]),
file_refs: HashMap::from([(region_id, HashSet::from([FileId::random()]))]),
};
let ctx = Arc::new(

View File

@@ -356,7 +356,8 @@ impl BatchGcProcedure {
}
// Send GetFileRefs instructions to each datanode
let mut all_file_refs: HashMap<RegionId, HashSet<_>> = HashMap::new();
let mut all_file_refs: HashMap<RegionId, HashSet<store_api::storage::FileId>> =
HashMap::new();
let mut all_manifest_versions = HashMap::new();
for (peer, regions) in datanode2query_regions {

View File

@@ -134,7 +134,7 @@ mod test {
use std::sync::Arc;
use common_meta::datanode::{RegionManifestInfo, RegionStat, Stat};
use common_meta::distributed_time_constants::default_distributed_time_constants;
use common_meta::distributed_time_constants;
use common_meta::key::TableMetadataManager;
use common_meta::key::table_route::TableRouteValue;
use common_meta::key::test_utils::new_test_table_info;
@@ -236,7 +236,7 @@ mod test {
let opening_region_keeper = Arc::new(MemoryRegionKeeper::default());
let handler = RegionLeaseHandler::new(
default_distributed_time_constants().region_lease.as_secs(),
distributed_time_constants::REGION_LEASE_SECS,
table_metadata_manager.clone(),
opening_region_keeper.clone(),
None,
@@ -266,7 +266,7 @@ mod test {
assert_eq!(
acc.region_lease.as_ref().unwrap().lease_seconds,
default_distributed_time_constants().region_lease.as_secs()
distributed_time_constants::REGION_LEASE_SECS
);
assert_region_lease(
@@ -300,7 +300,7 @@ mod test {
assert_eq!(
acc.region_lease.as_ref().unwrap().lease_seconds,
default_distributed_time_constants().region_lease.as_secs()
distributed_time_constants::REGION_LEASE_SECS
);
assert_region_lease(
@@ -379,7 +379,7 @@ mod test {
});
let handler = RegionLeaseHandler::new(
default_distributed_time_constants().region_lease.as_secs(),
distributed_time_constants::REGION_LEASE_SECS,
table_metadata_manager.clone(),
Default::default(),
None,
@@ -461,7 +461,7 @@ mod test {
..Default::default()
});
let handler = RegionLeaseHandler::new(
default_distributed_time_constants().region_lease.as_secs(),
distributed_time_constants::REGION_LEASE_SECS,
table_metadata_manager.clone(),
Default::default(),
None,

View File

@@ -27,7 +27,7 @@ use common_event_recorder::EventRecorderOptions;
use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
use common_meta::cache_invalidator::CacheInvalidatorRef;
use common_meta::ddl_manager::DdlManagerRef;
use common_meta::distributed_time_constants::{self, default_distributed_time_constants};
use common_meta::distributed_time_constants;
use common_meta::key::TableMetadataManagerRef;
use common_meta::key::runtime_switch::RuntimeSwitchManagerRef;
use common_meta::kv_backend::{KvBackendRef, ResettableKvBackend, ResettableKvBackendRef};
@@ -121,27 +121,6 @@ impl Default for StatsPersistenceOptions {
}
}
#[derive(Clone, PartialEq, Serialize, Deserialize, Debug)]
#[serde(default)]
pub struct BackendClientOptions {
#[serde(with = "humantime_serde")]
pub keep_alive_timeout: Duration,
#[serde(with = "humantime_serde")]
pub keep_alive_interval: Duration,
#[serde(with = "humantime_serde")]
pub connect_timeout: Duration,
}
impl Default for BackendClientOptions {
fn default() -> Self {
Self {
keep_alive_interval: Duration::from_secs(10),
keep_alive_timeout: Duration::from_secs(3),
connect_timeout: Duration::from_secs(3),
}
}
}
#[derive(Clone, PartialEq, Serialize, Deserialize)]
#[serde(default)]
pub struct MetasrvOptions {
@@ -157,20 +136,12 @@ pub struct MetasrvOptions {
/// Only applicable when using PostgreSQL or MySQL as the metadata store
#[serde(default)]
pub backend_tls: Option<TlsOption>,
/// The backend client options.
/// Currently, only applicable when using etcd as the metadata store.
#[serde(default)]
pub backend_client: BackendClientOptions,
/// The type of selector.
pub selector: SelectorType,
/// Whether to use the memory store.
pub use_memory_store: bool,
/// Whether to enable region failover.
pub enable_region_failover: bool,
/// The base heartbeat interval.
///
/// This value is used to calculate the distributed time constants for components.
/// e.g., the region lease time is `heartbeat_interval * 3 + Duration::from_secs(1)`.
#[serde(with = "humantime_serde")]
pub heartbeat_interval: Duration,
/// The delay before starting region failure detection.
/// This delay helps prevent Metasrv from triggering unnecessary region failovers before all Datanodes are fully started.
/// Especially useful when the cluster is not deployed with GreptimeDB Operator and maintenance mode is not enabled.
@@ -231,9 +202,6 @@ pub struct MetasrvOptions {
#[cfg(feature = "pg_kvbackend")]
/// Optional PostgreSQL schema for metadata table (defaults to current search_path if empty).
pub meta_schema_name: Option<String>,
#[cfg(feature = "pg_kvbackend")]
/// Automatically create PostgreSQL schema if it doesn't exist (default: true).
pub auto_create_schema: bool,
#[serde(with = "humantime_serde")]
pub node_max_idle_time: Duration,
/// The event recorder options.
@@ -251,6 +219,7 @@ impl fmt::Debug for MetasrvOptions {
.field("store_addrs", &self.sanitize_store_addrs())
.field("backend_tls", &self.backend_tls)
.field("selector", &self.selector)
.field("use_memory_store", &self.use_memory_store)
.field("enable_region_failover", &self.enable_region_failover)
.field(
"allow_region_failover_on_local_wal",
@@ -271,9 +240,7 @@ impl fmt::Debug for MetasrvOptions {
.field("tracing", &self.tracing)
.field("backend", &self.backend)
.field("event_recorder", &self.event_recorder)
.field("stats_persistence", &self.stats_persistence)
.field("heartbeat_interval", &self.heartbeat_interval)
.field("backend_client", &self.backend_client);
.field("stats_persistence", &self.stats_persistence);
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
debug_struct.field("meta_table_name", &self.meta_table_name);
@@ -301,8 +268,8 @@ impl Default for MetasrvOptions {
store_addrs: vec!["127.0.0.1:2379".to_string()],
backend_tls: None,
selector: SelectorType::default(),
use_memory_store: false,
enable_region_failover: false,
heartbeat_interval: distributed_time_constants::BASE_HEARTBEAT_INTERVAL,
region_failure_detector_initialization_delay: Duration::from_secs(10 * 60),
allow_region_failover_on_local_wal: false,
grpc: GrpcOptions {
@@ -336,13 +303,10 @@ impl Default for MetasrvOptions {
meta_election_lock_id: common_meta::kv_backend::DEFAULT_META_ELECTION_LOCK_ID,
#[cfg(feature = "pg_kvbackend")]
meta_schema_name: None,
#[cfg(feature = "pg_kvbackend")]
auto_create_schema: true,
node_max_idle_time: Duration::from_secs(24 * 60 * 60),
event_recorder: EventRecorderOptions::default(),
stats_persistence: StatsPersistenceOptions::default(),
gc: GcSchedulerOptions::default(),
backend_client: BackendClientOptions::default(),
}
}
}
@@ -783,7 +747,7 @@ impl Metasrv {
&DefaultSystemTimer,
self.meta_peer_client.as_ref(),
peer_id,
default_distributed_time_constants().datanode_lease,
Duration::from_secs(distributed_time_constants::DATANODE_LEASE_SECS),
)
.await
}

View File

@@ -29,7 +29,7 @@ use common_meta::ddl::{
DdlContext, NoopRegionFailureDetectorControl, RegionFailureDetectorControllerRef,
};
use common_meta::ddl_manager::{DdlManager, DdlManagerConfiguratorRef};
use common_meta::distributed_time_constants::default_distributed_time_constants;
use common_meta::distributed_time_constants::{self};
use common_meta::key::TableMetadataManager;
use common_meta::key::flow::FlowMetadataManager;
use common_meta::key::flow::flow_state::FlowStateManager;
@@ -513,7 +513,7 @@ impl MetasrvBuilder {
Some(handler_group_builder) => handler_group_builder,
None => {
let region_lease_handler = RegionLeaseHandler::new(
default_distributed_time_constants().region_lease.as_secs(),
distributed_time_constants::REGION_LEASE_SECS,
table_metadata_manager.clone(),
memory_region_keeper.clone(),
customized_region_lease_renewer,

View File

@@ -921,7 +921,7 @@ mod tests {
use std::assert_matches::assert_matches;
use std::sync::Arc;
use common_meta::distributed_time_constants::default_distributed_time_constants;
use common_meta::distributed_time_constants::REGION_LEASE_SECS;
use common_meta::instruction::Instruction;
use common_meta::key::test_utils::new_test_table_info;
use common_meta::rpc::router::{Region, RegionRoute};
@@ -1192,10 +1192,8 @@ mod tests {
.run_once()
.await;
let region_lease = default_distributed_time_constants().region_lease.as_secs();
// Ensure it didn't run into the slow path.
assert!(timer.elapsed().as_secs() < region_lease / 2);
assert!(timer.elapsed().as_secs() < REGION_LEASE_SECS / 2);
runner.suite.verify_table_metadata().await;
}
@@ -1541,9 +1539,8 @@ mod tests {
.run_once()
.await;
let region_lease = default_distributed_time_constants().region_lease.as_secs();
// Ensure it didn't run into the slow path.
assert!(timer.elapsed().as_secs() < region_lease);
assert!(timer.elapsed().as_secs() < REGION_LEASE_SECS);
runner.suite.verify_table_metadata().await;
}
}

View File

@@ -13,10 +13,11 @@
// limitations under the License.
use std::any::Any;
use std::time::Duration;
use api::v1::meta::MailboxMessage;
use common_meta::RegionIdent;
use common_meta::distributed_time_constants::default_distributed_time_constants;
use common_meta::distributed_time_constants::REGION_LEASE_SECS;
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
use common_procedure::{Context as ProcedureContext, Status};
use common_telemetry::{info, warn};
@@ -29,6 +30,9 @@ use crate::procedure::region_migration::migration_end::RegionMigrationEnd;
use crate::procedure::region_migration::{Context, State};
use crate::service::mailbox::Channel;
/// Uses lease time of a region as the timeout of closing a downgraded region.
const CLOSE_DOWNGRADED_REGION_TIMEOUT: Duration = Duration::from_secs(REGION_LEASE_SECS);
#[derive(Debug, Serialize, Deserialize)]
pub struct CloseDowngradedRegion;
@@ -108,7 +112,7 @@ impl CloseDowngradedRegion {
let ch = Channel::Datanode(downgrade_leader_datanode.id);
let receiver = ctx
.mailbox
.send(&ch, msg, default_distributed_time_constants().region_lease)
.send(&ch, msg, CLOSE_DOWNGRADED_REGION_TIMEOUT)
.await?;
match receiver.await {

View File

@@ -17,7 +17,7 @@ use std::time::Duration;
use api::v1::meta::MailboxMessage;
use common_error::ext::BoxedError;
use common_meta::distributed_time_constants::default_distributed_time_constants;
use common_meta::distributed_time_constants::REGION_LEASE_SECS;
use common_meta::instruction::{
DowngradeRegion, DowngradeRegionReply, DowngradeRegionsReply, Instruction, InstructionReply,
};
@@ -64,7 +64,7 @@ impl State for DowngradeLeaderRegion {
let now = Instant::now();
// Ensures the `leader_region_lease_deadline` must exist after recovering.
ctx.volatile_ctx
.set_leader_region_lease_deadline(default_distributed_time_constants().region_lease);
.set_leader_region_lease_deadline(Duration::from_secs(REGION_LEASE_SECS));
match self.downgrade_region_with_retry(ctx).await {
Ok(_) => {
@@ -277,14 +277,14 @@ impl DowngradeLeaderRegion {
if let Some(last_connection_at) = last_connection_at {
let now = current_time_millis();
let elapsed = now - last_connection_at;
let region_lease = default_distributed_time_constants().region_lease;
let region_lease = Duration::from_secs(REGION_LEASE_SECS);
// It's safe to update the region leader lease deadline here because:
// 1. The old region leader has already been marked as downgraded in metadata,
// which means any attempts to renew its lease will be rejected.
// 2. The pusher disconnect time record only gets removed when the datanode (from_peer)
// establishes a new heartbeat connection stream.
if elapsed >= (region_lease.as_secs() * 1000) as i64 {
if elapsed >= (REGION_LEASE_SECS * 1000) as i64 {
ctx.volatile_ctx.reset_leader_region_lease_deadline();
info!(
"Datanode {}({}) has been disconnected for longer than the region lease period ({:?}), reset leader region lease deadline to None, region: {:?}",
@@ -697,8 +697,7 @@ mod tests {
let procedure_ctx = new_procedure_context();
let (next, _) = state.next(&mut ctx, &procedure_ctx).await.unwrap();
let elapsed = timer.elapsed().as_secs();
let region_lease = default_distributed_time_constants().region_lease.as_secs();
assert!(elapsed < region_lease / 2);
assert!(elapsed < REGION_LEASE_SECS / 2);
assert_eq!(
ctx.volatile_ctx
.leader_region_last_entry_ids

View File

@@ -14,10 +14,11 @@
use std::any::Any;
use std::ops::Div;
use std::time::Duration;
use api::v1::meta::MailboxMessage;
use common_meta::RegionIdent;
use common_meta::distributed_time_constants::default_distributed_time_constants;
use common_meta::distributed_time_constants::REGION_LEASE_SECS;
use common_meta::instruction::{Instruction, InstructionReply, OpenRegion, SimpleReply};
use common_meta::key::datanode_table::RegionInfo;
use common_procedure::{Context as ProcedureContext, Status};
@@ -32,6 +33,9 @@ use crate::procedure::region_migration::flush_leader_region::PreFlushRegion;
use crate::procedure::region_migration::{Context, State};
use crate::service::mailbox::Channel;
/// Uses lease time of a region as the timeout of opening a candidate region.
const OPEN_CANDIDATE_REGION_TIMEOUT: Duration = Duration::from_secs(REGION_LEASE_SECS);
#[derive(Debug, Serialize, Deserialize)]
pub struct OpenCandidateRegion;
@@ -153,9 +157,7 @@ impl OpenCandidateRegion {
.context(error::ExceededDeadlineSnafu {
operation: "Open candidate region",
})?;
let operation_timeout = operation_timeout
.div(2)
.max(default_distributed_time_constants().region_lease);
let operation_timeout = operation_timeout.div(2).max(OPEN_CANDIDATE_REGION_TIMEOUT);
let ch = Channel::Datanode(candidate.id);
let now = Instant::now();
let receiver = ctx.mailbox.send(&ch, msg, operation_timeout).await?;

View File

@@ -12,14 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub(crate) mod enter_staging_region;
pub(crate) mod repartition_start;
pub(crate) mod update_metadata;
pub(crate) mod utils;
use std::any::Any;
use std::fmt::Debug;
use std::time::Duration;
use common_error::ext::BoxedError;
use common_meta::DatanodeId;
@@ -37,7 +34,6 @@ use uuid::Uuid;
use crate::error::{self, Result};
use crate::procedure::repartition::plan::RegionDescriptor;
use crate::service::mailbox::MailboxRef;
pub type GroupId = Uuid;
@@ -49,10 +45,6 @@ pub struct Context {
pub cache_invalidator: CacheInvalidatorRef,
pub table_metadata_manager: TableMetadataManagerRef,
pub mailbox: MailboxRef,
pub server_addr: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
@@ -192,13 +184,6 @@ impl Context {
.await
.context(error::TableMetadataManagerSnafu)
}
/// Returns the next operation timeout.
///
/// If the next operation timeout is not set, it will return `None`.
pub fn next_operation_timeout(&self) -> Option<Duration> {
Some(Duration::from_secs(10))
}
}
/// Returns the region routes of the given table route value.

View File

@@ -1,717 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::collections::HashMap;
use std::time::{Duration, Instant};
use api::v1::meta::MailboxMessage;
use common_meta::instruction::{
EnterStagingRegionReply, EnterStagingRegionsReply, Instruction, InstructionReply,
};
use common_meta::peer::Peer;
use common_procedure::{Context as ProcedureContext, Status};
use common_telemetry::info;
use futures::future::join_all;
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt, ensure};
use crate::error::{self, Error, Result};
use crate::handler::HeartbeatMailbox;
use crate::procedure::repartition::group::utils::{
HandleMultipleResult, group_region_routes_by_peer, handle_multiple_results,
};
use crate::procedure::repartition::group::{Context, GroupPrepareResult, State};
use crate::procedure::repartition::plan::RegionDescriptor;
use crate::service::mailbox::{Channel, MailboxRef};
#[derive(Debug, Serialize, Deserialize)]
pub struct EnterStagingRegion;
#[async_trait::async_trait]
#[typetag::serde]
impl State for EnterStagingRegion {
async fn next(
&mut self,
ctx: &mut Context,
_procedure_ctx: &ProcedureContext,
) -> Result<(Box<dyn State>, Status)> {
self.enter_staging_regions(ctx).await?;
Ok(Self::next_state())
}
fn as_any(&self) -> &dyn Any {
self
}
}
impl EnterStagingRegion {
#[allow(dead_code)]
fn next_state() -> (Box<dyn State>, Status) {
// TODO(weny): change it later.
(Box::new(EnterStagingRegion), Status::executing(true))
}
fn build_enter_staging_instructions(
prepare_result: &GroupPrepareResult,
targets: &[RegionDescriptor],
) -> Result<HashMap<Peer, Instruction>> {
let target_partition_expr_by_region = targets
.iter()
.map(|target| {
Ok((
target.region_id,
target
.partition_expr
.as_json_str()
.context(error::SerializePartitionExprSnafu)?,
))
})
.collect::<Result<HashMap<_, _>>>()?;
// Safety: `leader_peer` is set for all region routes, checked in `repartition_start`.
let target_region_routes_by_peer =
group_region_routes_by_peer(&prepare_result.target_routes);
let mut instructions = HashMap::with_capacity(target_region_routes_by_peer.len());
for (peer, region_ids) in target_region_routes_by_peer {
let enter_staging_regions = region_ids
.into_iter()
.map(|region_id| common_meta::instruction::EnterStagingRegion {
region_id,
// Safety: the target_routes is constructed from the targets, so the region_id is always present in the map.
partition_expr: target_partition_expr_by_region[&region_id].clone(),
})
.collect();
instructions.insert(
peer.clone(),
Instruction::EnterStagingRegions(enter_staging_regions),
);
}
Ok(instructions)
}
#[allow(dead_code)]
async fn enter_staging_regions(&self, ctx: &mut Context) -> Result<()> {
let table_id = ctx.persistent_ctx.table_id;
let group_id = ctx.persistent_ctx.group_id;
// Safety: the group prepare result is set in the RepartitionStart state.
let prepare_result = ctx.persistent_ctx.group_prepare_result.as_ref().unwrap();
let targets = &ctx.persistent_ctx.targets;
let instructions = Self::build_enter_staging_instructions(prepare_result, targets)?;
let operation_timeout =
ctx.next_operation_timeout()
.context(error::ExceededDeadlineSnafu {
operation: "Enter staging regions",
})?;
let (peers, tasks): (Vec<_>, Vec<_>) = instructions
.iter()
.map(|(peer, instruction)| {
(
peer,
Self::enter_staging_region(
&ctx.mailbox,
&ctx.server_addr,
peer,
instruction,
operation_timeout,
),
)
})
.unzip();
info!(
"Sent enter staging regions instructions to peers: {:?} for repartition table {}, group id {}",
peers, table_id, group_id
);
let format_err_msg = |idx: usize, error: &Error| {
let peer = peers[idx];
format!(
"Failed to enter staging regions on datanode {:?}, error: {:?}",
peer, error
)
};
// Waits for all tasks to complete.
let results = join_all(tasks).await;
let result = handle_multiple_results(&results);
match result {
HandleMultipleResult::AllSuccessful => Ok(()),
HandleMultipleResult::AllRetryable(retryable_errors) => error::RetryLaterSnafu {
reason: format!(
"All retryable errors during entering staging regions for repartition table {}, group id {}: {:?}",
table_id, group_id,
retryable_errors
.iter()
.map(|(idx, error)| format_err_msg(*idx, error))
.collect::<Vec<_>>()
.join(",")
),
}
.fail(),
HandleMultipleResult::AllNonRetryable(non_retryable_errors) => error::UnexpectedSnafu {
violated: format!(
"All non retryable errors during entering staging regions for repartition table {}, group id {}: {:?}",
table_id, group_id,
non_retryable_errors
.iter()
.map(|(idx, error)| format_err_msg(*idx, error))
.collect::<Vec<_>>()
.join(",")
),
}
.fail(),
HandleMultipleResult::PartialRetryable {
retryable_errors,
non_retryable_errors,
} => error::UnexpectedSnafu {
violated: format!(
"Partial retryable errors during entering staging regions for repartition table {}, group id {}: {:?}, non retryable errors: {:?}",
table_id, group_id,
retryable_errors
.iter()
.map(|(idx, error)| format_err_msg(*idx, error))
.collect::<Vec<_>>()
.join(","),
non_retryable_errors
.iter()
.map(|(idx, error)| format_err_msg(*idx, error))
.collect::<Vec<_>>()
.join(","),
),
}
.fail(),
}
}
/// Enter staging region on a datanode.
///
/// Retry:
/// - Pusher is not found.
/// - Mailbox timeout.
///
/// Abort(non-retry):
/// - Unexpected instruction reply.
/// - Exceeded deadline of enter staging regions instruction.
/// - Target region doesn't exist on the datanode.
async fn enter_staging_region(
mailbox: &MailboxRef,
server_addr: &str,
peer: &Peer,
instruction: &Instruction,
timeout: Duration,
) -> Result<()> {
let ch = Channel::Datanode(peer.id);
let message = MailboxMessage::json_message(
&format!("Enter staging regions: {:?}", instruction),
&format!("Metasrv@{}", server_addr),
&format!("Datanode-{}@{}", peer.id, peer.addr),
common_time::util::current_time_millis(),
&instruction,
)
.with_context(|_| error::SerializeToJsonSnafu {
input: instruction.to_string(),
})?;
let now = Instant::now();
let receiver = mailbox.send(&ch, message, timeout).await;
let receiver = match receiver {
Ok(receiver) => receiver,
Err(error::Error::PusherNotFound { .. }) => error::RetryLaterSnafu {
reason: format!(
"Pusher not found for enter staging regions on datanode {:?}, elapsed: {:?}",
peer,
now.elapsed()
),
}
.fail()?,
Err(err) => {
return Err(err);
}
};
match receiver.await {
Ok(msg) => {
let reply = HeartbeatMailbox::json_reply(&msg)?;
info!(
"Received enter staging regions reply: {:?}, elapsed: {:?}",
reply,
now.elapsed()
);
let InstructionReply::EnterStagingRegions(EnterStagingRegionsReply { replies }) =
reply
else {
return error::UnexpectedInstructionReplySnafu {
mailbox_message: msg.to_string(),
reason: "expect enter staging regions reply",
}
.fail();
};
for reply in replies {
Self::handle_enter_staging_region_reply(&reply, &now, peer)?;
}
Ok(())
}
Err(error::Error::MailboxTimeout { .. }) => {
let reason = format!(
"Mailbox received timeout for enter staging regions on datanode {:?}, elapsed: {:?}",
peer,
now.elapsed()
);
error::RetryLaterSnafu { reason }.fail()
}
Err(err) => Err(err),
}
}
fn handle_enter_staging_region_reply(
EnterStagingRegionReply {
region_id,
ready,
exists,
error,
}: &EnterStagingRegionReply,
now: &Instant,
peer: &Peer,
) -> Result<()> {
ensure!(
exists,
error::UnexpectedSnafu {
violated: format!(
"Region {} doesn't exist on datanode {:?}, elapsed: {:?}",
region_id,
peer,
now.elapsed()
)
}
);
if error.is_some() {
return error::RetryLaterSnafu {
reason: format!(
"Failed to enter staging region {} on datanode {:?}, error: {:?}, elapsed: {:?}",
region_id, peer, error, now.elapsed()
),
}
.fail();
}
ensure!(
ready,
error::RetryLaterSnafu {
reason: format!(
"Region {} is still entering staging state on datanode {:?}, elapsed: {:?}",
region_id,
peer,
now.elapsed()
),
}
);
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use std::time::Duration;
use common_meta::instruction::Instruction;
use common_meta::peer::Peer;
use common_meta::rpc::router::{Region, RegionRoute};
use store_api::storage::RegionId;
use crate::error::{self, Error};
use crate::procedure::repartition::group::GroupPrepareResult;
use crate::procedure::repartition::group::enter_staging_region::EnterStagingRegion;
use crate::procedure::repartition::plan::RegionDescriptor;
use crate::procedure::repartition::test_util::{
TestingEnv, new_persistent_context, range_expr,
};
use crate::procedure::test_util::{
new_close_region_reply, new_enter_staging_region_reply, send_mock_reply,
};
use crate::service::mailbox::Channel;
#[test]
fn test_build_enter_staging_instructions() {
let table_id = 1024;
let prepare_result = GroupPrepareResult {
source_routes: vec![RegionRoute {
region: Region {
id: RegionId::new(table_id, 1),
..Default::default()
},
leader_peer: Some(Peer::empty(1)),
..Default::default()
}],
target_routes: vec![
RegionRoute {
region: Region {
id: RegionId::new(table_id, 1),
..Default::default()
},
leader_peer: Some(Peer::empty(1)),
..Default::default()
},
RegionRoute {
region: Region {
id: RegionId::new(table_id, 2),
..Default::default()
},
leader_peer: Some(Peer::empty(2)),
..Default::default()
},
],
central_region: RegionId::new(table_id, 1),
central_region_datanode_id: 1,
};
let targets = test_targets();
let instructions =
EnterStagingRegion::build_enter_staging_instructions(&prepare_result, &targets)
.unwrap();
assert_eq!(instructions.len(), 2);
let instruction_1 = instructions
.get(&Peer::empty(1))
.unwrap()
.clone()
.into_enter_staging_regions()
.unwrap();
assert_eq!(
instruction_1,
vec![common_meta::instruction::EnterStagingRegion {
region_id: RegionId::new(table_id, 1),
partition_expr: range_expr("x", 0, 10).as_json_str().unwrap(),
}]
);
let instruction_2 = instructions
.get(&Peer::empty(2))
.unwrap()
.clone()
.into_enter_staging_regions()
.unwrap();
assert_eq!(
instruction_2,
vec![common_meta::instruction::EnterStagingRegion {
region_id: RegionId::new(table_id, 2),
partition_expr: range_expr("x", 10, 20).as_json_str().unwrap(),
}]
);
}
#[tokio::test]
async fn test_datanode_is_unreachable() {
let env = TestingEnv::new();
let server_addr = "localhost";
let peer = Peer::empty(1);
let instruction =
Instruction::EnterStagingRegions(vec![common_meta::instruction::EnterStagingRegion {
region_id: RegionId::new(1024, 1),
partition_expr: range_expr("x", 0, 10).as_json_str().unwrap(),
}]);
let timeout = Duration::from_secs(10);
let err = EnterStagingRegion::enter_staging_region(
env.mailbox_ctx.mailbox(),
server_addr,
&peer,
&instruction,
timeout,
)
.await
.unwrap_err();
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
}
#[tokio::test]
async fn test_enter_staging_region_exceeded_deadline() {
let mut env = TestingEnv::new();
let (tx, rx) = tokio::sync::mpsc::channel(1);
env.mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(1), tx)
.await;
let server_addr = "localhost";
let peer = Peer::empty(1);
let instruction =
Instruction::EnterStagingRegions(vec![common_meta::instruction::EnterStagingRegion {
region_id: RegionId::new(1024, 1),
partition_expr: range_expr("x", 0, 10).as_json_str().unwrap(),
}]);
let timeout = Duration::from_secs(10);
// Sends a timeout error.
send_mock_reply(env.mailbox_ctx.mailbox().clone(), rx, |id| {
Err(error::MailboxTimeoutSnafu { id }.build())
});
let err = EnterStagingRegion::enter_staging_region(
env.mailbox_ctx.mailbox(),
server_addr,
&peer,
&instruction,
timeout,
)
.await
.unwrap_err();
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
}
#[tokio::test]
async fn test_unexpected_instruction_reply() {
let mut env = TestingEnv::new();
let (tx, rx) = tokio::sync::mpsc::channel(1);
let server_addr = "localhost";
let peer = Peer::empty(1);
let instruction =
Instruction::EnterStagingRegions(vec![common_meta::instruction::EnterStagingRegion {
region_id: RegionId::new(1024, 1),
partition_expr: range_expr("x", 0, 10).as_json_str().unwrap(),
}]);
let timeout = Duration::from_secs(10);
env.mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(1), tx)
.await;
// Sends an incorrect reply.
send_mock_reply(env.mailbox_ctx.mailbox().clone(), rx, |id| {
Ok(new_close_region_reply(id))
});
let err = EnterStagingRegion::enter_staging_region(
env.mailbox_ctx.mailbox(),
server_addr,
&peer,
&instruction,
timeout,
)
.await
.unwrap_err();
assert_matches!(err, Error::UnexpectedInstructionReply { .. });
assert!(!err.is_retryable());
}
#[tokio::test]
async fn test_enter_staging_region_failed_to_enter_staging_state() {
let mut env = TestingEnv::new();
let (tx, rx) = tokio::sync::mpsc::channel(1);
env.mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(1), tx)
.await;
let server_addr = "localhost";
let peer = Peer::empty(1);
let instruction =
Instruction::EnterStagingRegions(vec![common_meta::instruction::EnterStagingRegion {
region_id: RegionId::new(1024, 1),
partition_expr: range_expr("x", 0, 10).as_json_str().unwrap(),
}]);
let timeout = Duration::from_secs(10);
// Sends a failed reply.
send_mock_reply(env.mailbox_ctx.mailbox().clone(), rx, |id| {
Ok(new_enter_staging_region_reply(
id,
RegionId::new(1024, 1),
false,
true,
Some("test mocked".to_string()),
))
});
let err = EnterStagingRegion::enter_staging_region(
env.mailbox_ctx.mailbox(),
server_addr,
&peer,
&instruction,
timeout,
)
.await
.unwrap_err();
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
let (tx, rx) = tokio::sync::mpsc::channel(1);
env.mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(1), tx)
.await;
// Region doesn't exist on the datanode.
send_mock_reply(env.mailbox_ctx.mailbox().clone(), rx, |id| {
Ok(new_enter_staging_region_reply(
id,
RegionId::new(1024, 1),
false,
false,
None,
))
});
let err = EnterStagingRegion::enter_staging_region(
env.mailbox_ctx.mailbox(),
server_addr,
&peer,
&instruction,
timeout,
)
.await
.unwrap_err();
assert_matches!(err, Error::Unexpected { .. });
assert!(!err.is_retryable());
}
fn test_prepare_result(table_id: u32) -> GroupPrepareResult {
GroupPrepareResult {
source_routes: vec![],
target_routes: vec![
RegionRoute {
region: Region {
id: RegionId::new(table_id, 1),
..Default::default()
},
leader_peer: Some(Peer::empty(1)),
..Default::default()
},
RegionRoute {
region: Region {
id: RegionId::new(table_id, 2),
..Default::default()
},
leader_peer: Some(Peer::empty(2)),
..Default::default()
},
],
central_region: RegionId::new(table_id, 1),
central_region_datanode_id: 1,
}
}
fn test_targets() -> Vec<RegionDescriptor> {
vec![
RegionDescriptor {
region_id: RegionId::new(1024, 1),
partition_expr: range_expr("x", 0, 10),
},
RegionDescriptor {
region_id: RegionId::new(1024, 2),
partition_expr: range_expr("x", 10, 20),
},
]
}
#[tokio::test]
async fn test_enter_staging_regions_all_successful() {
let mut env = TestingEnv::new();
let table_id = 1024;
let targets = test_targets();
let mut persistent_context = new_persistent_context(table_id, vec![], targets);
persistent_context.group_prepare_result = Some(test_prepare_result(table_id));
let (tx, rx) = tokio::sync::mpsc::channel(1);
env.mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(1), tx)
.await;
send_mock_reply(env.mailbox_ctx.mailbox().clone(), rx, |id| {
Ok(new_enter_staging_region_reply(
id,
RegionId::new(1024, 1),
true,
true,
None,
))
});
let (tx, rx) = tokio::sync::mpsc::channel(1);
env.mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(2), tx)
.await;
send_mock_reply(env.mailbox_ctx.mailbox().clone(), rx, |id| {
Ok(new_enter_staging_region_reply(
id,
RegionId::new(1024, 2),
true,
true,
None,
))
});
let mut ctx = env.create_context(persistent_context);
EnterStagingRegion
.enter_staging_regions(&mut ctx)
.await
.unwrap();
}
#[tokio::test]
async fn test_enter_staging_region_retryable() {
let env = TestingEnv::new();
let table_id = 1024;
let targets = test_targets();
let mut persistent_context = new_persistent_context(table_id, vec![], targets);
persistent_context.group_prepare_result = Some(test_prepare_result(table_id));
let mut ctx = env.create_context(persistent_context);
let err = EnterStagingRegion
.enter_staging_regions(&mut ctx)
.await
.unwrap_err();
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
}
#[tokio::test]
async fn test_enter_staging_regions_non_retryable() {
let mut env = TestingEnv::new();
let table_id = 1024;
let targets = test_targets();
let mut persistent_context = new_persistent_context(table_id, vec![], targets);
persistent_context.group_prepare_result = Some(test_prepare_result(table_id));
let (tx, rx) = tokio::sync::mpsc::channel(1);
env.mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(1), tx)
.await;
// Sends an incorrect reply.
send_mock_reply(env.mailbox_ctx.mailbox().clone(), rx, |id| {
Ok(new_close_region_reply(id))
});
let mut ctx = env.create_context(persistent_context.clone());
// Datanode 1 returns unexpected reply.
// Datanode 2 is unreachable.
let err = EnterStagingRegion
.enter_staging_regions(&mut ctx)
.await
.unwrap_err();
assert_matches!(err, Error::Unexpected { .. });
assert!(!err.is_retryable());
let (tx, rx) = tokio::sync::mpsc::channel(1);
env.mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(2), tx)
.await;
// Sends an incorrect reply.
send_mock_reply(env.mailbox_ctx.mailbox().clone(), rx, |id| {
Ok(new_close_region_reply(id))
});
let mut ctx = env.create_context(persistent_context);
// Datanode 1 returns unexpected reply.
// Datanode 2 returns unexpected reply.
let err = EnterStagingRegion
.enter_staging_regions(&mut ctx)
.await
.unwrap_err();
assert_matches!(err, Error::Unexpected { .. });
assert!(!err.is_retryable());
}
}

View File

@@ -97,17 +97,6 @@ impl RepartitionStart {
.map(|r| (*r).clone())
})
.collect::<Result<Vec<_>>>()?;
for target_region_route in &target_region_routes {
ensure!(
target_region_route.leader_peer.is_some(),
error::UnexpectedSnafu {
violated: format!(
"Leader peer is not set for region: {}",
target_region_route.region.id
),
}
);
}
let central_region = sources[0].region_id;
let central_region_datanode_id = source_region_routes[0]
.leader_peer

View File

@@ -1,88 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use common_meta::peer::Peer;
use common_meta::rpc::router::RegionRoute;
use store_api::storage::RegionId;
use crate::error::{Error, Result};
/// Groups the region routes by the leader peer.
///
/// # Panics
///
/// Panics if the leader peer is not set for any of the region routes.
pub(crate) fn group_region_routes_by_peer(
region_routes: &[RegionRoute],
) -> HashMap<&Peer, Vec<RegionId>> {
let mut map: HashMap<&Peer, Vec<RegionId>> = HashMap::new();
for region_route in region_routes {
map.entry(region_route.leader_peer.as_ref().unwrap())
.or_default()
.push(region_route.region.id);
}
map
}
/// Returns `true` if all results are successful.
fn all_successful(results: &[Result<()>]) -> bool {
results.iter().all(Result::is_ok)
}
pub enum HandleMultipleResult<'a> {
AllSuccessful,
AllRetryable(Vec<(usize, &'a Error)>),
PartialRetryable {
retryable_errors: Vec<(usize, &'a Error)>,
non_retryable_errors: Vec<(usize, &'a Error)>,
},
AllNonRetryable(Vec<(usize, &'a Error)>),
}
/// Evaluates results from multiple operations and categorizes errors by retryability.
///
/// If all operations succeed, returns `AllSuccessful`.
/// If all errors are retryable, returns `AllRetryable`.
/// If all errors are non-retryable, returns `AllNonRetryable`.
/// Otherwise, returns `PartialRetryable` with separate collections for retryable and non-retryable errors.
pub(crate) fn handle_multiple_results<'a>(results: &'a [Result<()>]) -> HandleMultipleResult<'a> {
if all_successful(results) {
return HandleMultipleResult::AllSuccessful;
}
let mut retryable_errors = Vec::new();
let mut non_retryable_errors = Vec::new();
for (index, result) in results.iter().enumerate() {
if let Err(error) = result {
if error.is_retryable() {
retryable_errors.push((index, error));
} else {
non_retryable_errors.push((index, error));
}
}
}
match (retryable_errors.is_empty(), non_retryable_errors.is_empty()) {
(true, false) => HandleMultipleResult::AllNonRetryable(non_retryable_errors),
(false, true) => HandleMultipleResult::AllRetryable(retryable_errors),
(false, false) => HandleMultipleResult::PartialRetryable {
retryable_errors,
non_retryable_errors,
},
// Should not happen, but include for completeness
(true, true) => HandleMultipleResult::AllSuccessful,
}
}

View File

@@ -32,7 +32,6 @@ use crate::procedure::test_util::MailboxContext;
pub struct TestingEnv {
pub table_metadata_manager: TableMetadataManagerRef,
pub mailbox_ctx: MailboxContext,
pub server_addr: String,
}
impl Default for TestingEnv {
@@ -52,11 +51,10 @@ impl TestingEnv {
Self {
table_metadata_manager,
mailbox_ctx,
server_addr: "localhost".to_string(),
}
}
pub fn create_context(&self, persistent_context: PersistentContext) -> Context {
pub fn create_context(self, persistent_context: PersistentContext) -> Context {
let cache_invalidator = Arc::new(MetasrvCacheInvalidator::new(
self.mailbox_ctx.mailbox().clone(),
MetasrvInfo {
@@ -68,8 +66,6 @@ impl TestingEnv {
persistent_ctx: persistent_context,
table_metadata_manager: self.table_metadata_manager.clone(),
cache_invalidator,
mailbox: self.mailbox_ctx.mailbox().clone(),
server_addr: self.server_addr.clone(),
}
}
}

View File

@@ -17,8 +17,8 @@ use std::collections::HashMap;
use api::v1::meta::mailbox_message::Payload;
use api::v1::meta::{HeartbeatResponse, MailboxMessage};
use common_meta::instruction::{
DowngradeRegionReply, DowngradeRegionsReply, EnterStagingRegionReply, EnterStagingRegionsReply,
FlushRegionReply, InstructionReply, SimpleReply, UpgradeRegionReply, UpgradeRegionsReply,
DowngradeRegionReply, DowngradeRegionsReply, FlushRegionReply, InstructionReply, SimpleReply,
UpgradeRegionReply, UpgradeRegionsReply,
};
use common_meta::key::TableMetadataManagerRef;
use common_meta::key::table_route::TableRouteValue;
@@ -198,7 +198,7 @@ pub fn new_downgrade_region_reply(
}
}
/// Generates a [InstructionReply::UpgradeRegions] reply.
/// Generates a [InstructionReply::UpgradeRegion] reply.
pub fn new_upgrade_region_reply(
id: u64,
ready: bool,
@@ -225,34 +225,6 @@ pub fn new_upgrade_region_reply(
}
}
/// Generates a [InstructionReply::EnterStagingRegions] reply.
pub fn new_enter_staging_region_reply(
id: u64,
region_id: RegionId,
ready: bool,
exists: bool,
error: Option<String>,
) -> MailboxMessage {
MailboxMessage {
id,
subject: "mock".to_string(),
from: "datanode".to_string(),
to: "meta".to_string(),
timestamp_millis: current_time_millis(),
payload: Some(Payload::Json(
serde_json::to_string(&InstructionReply::EnterStagingRegions(
EnterStagingRegionsReply::new(vec![EnterStagingRegionReply {
region_id,
ready,
exists,
error,
}]),
))
.unwrap(),
)),
}
}
/// Mock the test data for WAL pruning.
pub async fn new_wal_prune_metadata(
table_metadata_manager: TableMetadataManagerRef,

View File

@@ -99,7 +99,6 @@ impl heartbeat_server::Heartbeat for Metasrv {
error!("Client disconnected: broken pipe");
break;
}
error!(err; "Sending heartbeat response error");
if tx.send(Err(err)).await.is_err() {
info!("ReceiverStream was dropped; shutting down");

View File

@@ -12,18 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_meta::distributed_time_constants::default_etcd_client_options;
use common_meta::kv_backend::etcd::create_etcd_tls_options;
use etcd_client::{Client, ConnectOptions};
use etcd_client::Client;
use servers::tls::{TlsMode, TlsOption};
use snafu::ResultExt;
use crate::error::{self, BuildTlsOptionsSnafu, Result};
use crate::metasrv::BackendClientOptions;
/// Creates an etcd client with TLS configuration.
pub async fn create_etcd_client_with_tls(
store_addrs: &[String],
client_options: &BackendClientOptions,
tls_config: Option<&TlsOption>,
) -> Result<Client> {
let etcd_endpoints = store_addrs
@@ -32,12 +31,7 @@ pub async fn create_etcd_client_with_tls(
.filter(|x| !x.is_empty())
.collect::<Vec<_>>();
let mut connect_options = ConnectOptions::new()
.with_keep_alive_while_idle(true)
.with_keep_alive(
client_options.keep_alive_interval,
client_options.keep_alive_timeout,
);
let mut connect_options = default_etcd_client_options();
if let Some(tls_config) = tls_config
&& let Some(tls_options) = create_etcd_tls_options(&convert_tls_option(tls_config))
.context(BuildTlsOptionsSnafu)?

View File

@@ -48,7 +48,7 @@ impl IndexValueCodec {
) -> Result<()> {
ensure!(!value.is_null(), IndexEncodeNullSnafu);
if field.encode_data_type().is_string() {
if field.data_type().is_string() {
let value = value
.try_into_string()
.context(FieldTypeMismatchSnafu)?

View File

@@ -57,18 +57,13 @@ impl SortField {
&self.data_type
}
/// Returns the physical data type to encode of the field.
///
/// For example, a dictionary field will be encoded as its value type.
pub fn encode_data_type(&self) -> &ConcreteDataType {
match &self.data_type {
ConcreteDataType::Dictionary(dict_type) => dict_type.value_type(),
_ => &self.data_type,
}
}
pub fn estimated_size(&self) -> usize {
Self::estimated_size_by_type(self.encode_data_type())
match &self.data_type {
ConcreteDataType::Dictionary(dict_type) => {
Self::estimated_size_by_type(dict_type.value_type())
}
data_type => Self::estimated_size_by_type(data_type),
}
}
fn estimated_size_by_type(data_type: &ConcreteDataType) -> usize {
@@ -103,7 +98,12 @@ impl SortField {
serializer: &mut Serializer<&mut Vec<u8>>,
value: &ValueRef,
) -> Result<()> {
Self::serialize_by_type(self.encode_data_type(), serializer, value)
match self.data_type() {
ConcreteDataType::Dictionary(dict_type) => {
Self::serialize_by_type(dict_type.value_type(), serializer, value)
}
data_type => Self::serialize_by_type(data_type, serializer, value),
}
}
fn serialize_by_type(
@@ -194,7 +194,12 @@ impl SortField {
/// Deserialize a value from the deserializer.
pub fn deserialize<B: Buf>(&self, deserializer: &mut Deserializer<B>) -> Result<Value> {
Self::deserialize_by_type(self.encode_data_type(), deserializer)
match &self.data_type {
ConcreteDataType::Dictionary(dict_type) => {
Self::deserialize_by_type(dict_type.value_type(), deserializer)
}
data_type => Self::deserialize_by_type(data_type, deserializer),
}
}
fn deserialize_by_type<B: Buf>(
@@ -296,7 +301,12 @@ impl SortField {
return Ok(1);
}
Self::skip_deserialize_by_type(self.encode_data_type(), bytes, deserializer)
match &self.data_type {
ConcreteDataType::Dictionary(dict_type) => {
Self::skip_deserialize_by_type(dict_type.value_type(), bytes, deserializer)
}
data_type => Self::skip_deserialize_by_type(data_type, bytes, deserializer),
}
}
fn skip_deserialize_by_type(

View File

@@ -727,7 +727,7 @@ impl fmt::Display for FileType {
impl FileType {
/// Parses the file type from string.
pub(crate) fn parse(s: &str) -> Option<FileType> {
fn parse(s: &str) -> Option<FileType> {
match s {
"parquet" => Some(FileType::Parquet),
"puffin" => Some(FileType::Puffin(0)),

View File

@@ -25,7 +25,7 @@ use tokio::sync::mpsc;
use crate::compaction::compactor::{CompactionRegion, Compactor};
use crate::compaction::memory_manager::{CompactionMemoryGuard, CompactionMemoryManager};
use crate::compaction::picker::{CompactionTask, PickerOutput};
use crate::error::{CompactRegionSnafu, CompactionMemoryExhaustedSnafu};
use crate::error::{CompactRegionSnafu, CompactionMemoryExhaustedSnafu, MemoryAcquireFailedSnafu};
use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
use crate::metrics::{COMPACTION_FAILURE_COUNT, COMPACTION_MEMORY_WAIT, COMPACTION_STAGE_ELAPSED};
use crate::region::RegionRoleState;
@@ -95,16 +95,80 @@ impl CompactionTaskImpl {
async fn acquire_memory_with_policy(&self) -> error::Result<CompactionMemoryGuard> {
let region_id = self.compaction_region.region_id;
let requested_bytes = self.estimated_memory_bytes;
let policy = self.memory_policy;
let limit_bytes = self.memory_manager.limit_bytes();
let _timer = COMPACTION_MEMORY_WAIT.start_timer();
self.memory_manager
.acquire_with_policy(requested_bytes, policy)
.await
.context(CompactionMemoryExhaustedSnafu {
if limit_bytes > 0 && requested_bytes > limit_bytes {
warn!(
"Compaction for region {} requires {} bytes but limit is {} bytes; cannot satisfy request",
region_id, requested_bytes, limit_bytes
);
return Err(CompactionMemoryExhaustedSnafu {
region_id,
policy: format!("{policy:?}"),
})
required_bytes: requested_bytes,
limit_bytes,
policy: "exceed_limit".to_string(),
}
.build());
}
match self.memory_policy {
OnExhaustedPolicy::Wait {
timeout: wait_timeout,
} => {
let timer = COMPACTION_MEMORY_WAIT.start_timer();
match tokio::time::timeout(
wait_timeout,
self.memory_manager.acquire(requested_bytes),
)
.await
{
Ok(Ok(guard)) => {
timer.observe_duration();
Ok(guard)
}
Ok(Err(e)) => {
timer.observe_duration();
Err(e).with_context(|_| MemoryAcquireFailedSnafu {
region_id,
policy: format!("wait_timeout({}ms)", wait_timeout.as_millis()),
})
}
Err(_) => {
timer.observe_duration();
warn!(
"Compaction for region {} waited {:?} for {} bytes but timed out",
region_id, wait_timeout, requested_bytes
);
CompactionMemoryExhaustedSnafu {
region_id,
required_bytes: requested_bytes,
limit_bytes,
policy: format!("wait_timeout({}ms)", wait_timeout.as_millis()),
}
.fail()
}
}
}
OnExhaustedPolicy::Fail => {
// Try to acquire, fail immediately if not available
self.memory_manager
.try_acquire(requested_bytes)
.ok_or_else(|| {
warn!(
"Compaction memory exhausted for region {} (policy=fail, need {} bytes, limit {} bytes)",
region_id, requested_bytes, limit_bytes
);
CompactionMemoryExhaustedSnafu {
region_id,
required_bytes: requested_bytes,
limit_bytes,
policy: "fail".to_string(),
}
.build()
})
}
}
}
/// Remove expired ssts files, update manifest immediately

Some files were not shown because too many files have changed in this diff Show More