mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-22 22:20:02 +00:00
Compare commits
145 Commits
feat/serie
...
flow/choos
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e2c7dfcaba | ||
|
|
79f584316e | ||
|
|
6ab0f0cc5c | ||
|
|
8685ceb232 | ||
|
|
b442414422 | ||
|
|
51f2cb1053 | ||
|
|
fbf50c594e | ||
|
|
5739302845 | ||
|
|
148d96fc38 | ||
|
|
e787007eb5 | ||
|
|
60acf28f3c | ||
|
|
06126147d2 | ||
|
|
cce1285b16 | ||
|
|
4b5ab75312 | ||
|
|
56f31d5933 | ||
|
|
df31f0b9ec | ||
|
|
07e84a28a3 | ||
|
|
f298a110f9 | ||
|
|
6a5936468e | ||
|
|
49a936e2e1 | ||
|
|
41a706c7cd | ||
|
|
d6e98206b6 | ||
|
|
7b4df6343f | ||
|
|
bb4890cff8 | ||
|
|
b0ad3f0bb4 | ||
|
|
8726bf9f7a | ||
|
|
44e75b142d | ||
|
|
a706edbb73 | ||
|
|
0bf07d7f91 | ||
|
|
b8f9915d47 | ||
|
|
6166f2072e | ||
|
|
8338aa14d3 | ||
|
|
a18dc632c8 | ||
|
|
a9f486e493 | ||
|
|
06e8d46ba9 | ||
|
|
89661c0626 | ||
|
|
a3ae2d7b52 | ||
|
|
789f585a7f | ||
|
|
133f404547 | ||
|
|
bdd44fd7ec | ||
|
|
13ac4d5048 | ||
|
|
c6448a6ccc | ||
|
|
86aae6733d | ||
|
|
ed1ce8438f | ||
|
|
4b921b8425 | ||
|
|
1a517ec8ac | ||
|
|
21044c7339 | ||
|
|
8e1ec2a201 | ||
|
|
5ed0a095b6 | ||
|
|
3c943be189 | ||
|
|
eeba466717 | ||
|
|
2ff54486d3 | ||
|
|
66e2242e46 | ||
|
|
489b16ae30 | ||
|
|
85d564b0fb | ||
|
|
d5026f3491 | ||
|
|
e30753fc31 | ||
|
|
b476584f56 | ||
|
|
ff3a46b1d0 | ||
|
|
a533ac2555 | ||
|
|
cc5629b4a1 | ||
|
|
f3d000f6ec | ||
|
|
9557b76224 | ||
|
|
a0900f5b90 | ||
|
|
45a05fb08c | ||
|
|
71db79c8d6 | ||
|
|
79ed7bbc44 | ||
|
|
02e9a66d7a | ||
|
|
55cadcd2c0 | ||
|
|
8c4796734a | ||
|
|
919956999b | ||
|
|
7e5f6cbeae | ||
|
|
5c07f0dec7 | ||
|
|
9fb0487e67 | ||
|
|
6e407ae4b9 | ||
|
|
bcefc6b83f | ||
|
|
0f77135ef9 | ||
|
|
0a4594c9e2 | ||
|
|
d9437c6da7 | ||
|
|
35f4fa3c3e | ||
|
|
60e4607b64 | ||
|
|
3b8c6d5ce3 | ||
|
|
7a8e1bc3f9 | ||
|
|
ee07b9bfa8 | ||
|
|
90ffaa8a62 | ||
|
|
56f319a707 | ||
|
|
9df493988b | ||
|
|
ad1b77ab04 | ||
|
|
e817a65d75 | ||
|
|
41814bb49f | ||
|
|
1e394af583 | ||
|
|
a9065f5319 | ||
|
|
b8c6f1c8ed | ||
|
|
115e5a03a8 | ||
|
|
a5c443f734 | ||
|
|
5287b87925 | ||
|
|
4d38d8aa1e | ||
|
|
cc1b297831 | ||
|
|
e4556ce12b | ||
|
|
0f252c4d24 | ||
|
|
c58217ccec | ||
|
|
d27b9fc3a1 | ||
|
|
fdab5d198e | ||
|
|
7274ceba30 | ||
|
|
55c9a0de42 | ||
|
|
0fb9e1995e | ||
|
|
799c7cbfa9 | ||
|
|
dcf1a486f6 | ||
|
|
6700c0762d | ||
|
|
032df4c533 | ||
|
|
7b13376239 | ||
|
|
2189631efd | ||
|
|
96fbce1797 | ||
|
|
8d485e9be0 | ||
|
|
6a50d71920 | ||
|
|
747b71bf74 | ||
|
|
c522893552 | ||
|
|
7ddd7a9888 | ||
|
|
e3675494b4 | ||
|
|
7cd6b0f04b | ||
|
|
be837ddc24 | ||
|
|
5b0c75c85f | ||
|
|
5a36fa5e18 | ||
|
|
84e2bc52c2 | ||
|
|
71255b3cbd | ||
|
|
382eacdc13 | ||
|
|
74d8fd00a4 | ||
|
|
dce5e35d7c | ||
|
|
54ef29f394 | ||
|
|
e052c65a58 | ||
|
|
e23979df9f | ||
|
|
4b82ec7409 | ||
|
|
08d0f31865 | ||
|
|
dda7496265 | ||
|
|
df362be012 | ||
|
|
2ebe005e3c | ||
|
|
746b4e2369 | ||
|
|
6c66ec3ffc | ||
|
|
95d0c650ec | ||
|
|
311727939d | ||
|
|
7e3cad8a55 | ||
|
|
72625958bf | ||
|
|
7ea04817bd | ||
|
|
c26e165887 | ||
|
|
7335293983 |
@@ -1,15 +0,0 @@
|
||||
# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
|
||||
language: "en-US"
|
||||
early_access: false
|
||||
reviews:
|
||||
profile: "chill"
|
||||
request_changes_workflow: false
|
||||
high_level_summary: true
|
||||
poem: true
|
||||
review_status: true
|
||||
collapse_walkthrough: false
|
||||
auto_review:
|
||||
enabled: false
|
||||
drafts: false
|
||||
chat:
|
||||
auto_reply: true
|
||||
@@ -2,13 +2,14 @@ meta:
|
||||
configData: |-
|
||||
[runtime]
|
||||
global_rt_size = 4
|
||||
|
||||
|
||||
[wal]
|
||||
provider = "kafka"
|
||||
broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
|
||||
num_topics = 3
|
||||
auto_prune_interval = "30s"
|
||||
trigger_flush_threshold = 100
|
||||
|
||||
|
||||
[datanode]
|
||||
[datanode.client]
|
||||
timeout = "120s"
|
||||
@@ -22,6 +23,7 @@ datanode:
|
||||
provider = "kafka"
|
||||
broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
|
||||
linger = "2ms"
|
||||
overwrite_entry_start_id = true
|
||||
frontend:
|
||||
configData: |-
|
||||
[runtime]
|
||||
|
||||
26
.github/scripts/create-version.sh
vendored
26
.github/scripts/create-version.sh
vendored
@@ -10,22 +10,22 @@ set -e
|
||||
function create_version() {
|
||||
# Read from envrionment variables.
|
||||
if [ -z "$GITHUB_EVENT_NAME" ]; then
|
||||
echo "GITHUB_EVENT_NAME is empty"
|
||||
echo "GITHUB_EVENT_NAME is empty" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$NEXT_RELEASE_VERSION" ]; then
|
||||
echo "NEXT_RELEASE_VERSION is empty"
|
||||
exit 1
|
||||
echo "NEXT_RELEASE_VERSION is empty, use version from Cargo.toml" >&2
|
||||
export NEXT_RELEASE_VERSION=$(grep '^version = ' Cargo.toml | cut -d '"' -f 2 | head -n 1)
|
||||
fi
|
||||
|
||||
if [ -z "$NIGHTLY_RELEASE_PREFIX" ]; then
|
||||
echo "NIGHTLY_RELEASE_PREFIX is empty"
|
||||
echo "NIGHTLY_RELEASE_PREFIX is empty" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Reuse $NEXT_RELEASE_VERSION to identify whether it's a nightly build.
|
||||
# It will be like 'nigtly-20230808-7d0d8dc6'.
|
||||
# It will be like 'nightly-20230808-7d0d8dc6'.
|
||||
if [ "$NEXT_RELEASE_VERSION" = nightly ]; then
|
||||
echo "$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")-$(git rev-parse --short HEAD)"
|
||||
exit 0
|
||||
@@ -35,7 +35,7 @@ function create_version() {
|
||||
# It will be like 'dev-2023080819-f0e7216c'.
|
||||
if [ "$NEXT_RELEASE_VERSION" = dev ]; then
|
||||
if [ -z "$COMMIT_SHA" ]; then
|
||||
echo "COMMIT_SHA is empty in dev build"
|
||||
echo "COMMIT_SHA is empty in dev build" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "dev-$(date "+%Y%m%d-%s")-$(echo "$COMMIT_SHA" | cut -c1-8)"
|
||||
@@ -45,7 +45,7 @@ function create_version() {
|
||||
# Note: Only output 'version=xxx' to stdout when everything is ok, so that it can be used in GitHub Actions Outputs.
|
||||
if [ "$GITHUB_EVENT_NAME" = push ]; then
|
||||
if [ -z "$GITHUB_REF_NAME" ]; then
|
||||
echo "GITHUB_REF_NAME is empty in push event"
|
||||
echo "GITHUB_REF_NAME is empty in push event" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "$GITHUB_REF_NAME"
|
||||
@@ -54,15 +54,15 @@ function create_version() {
|
||||
elif [ "$GITHUB_EVENT_NAME" = schedule ]; then
|
||||
echo "$NEXT_RELEASE_VERSION-$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")"
|
||||
else
|
||||
echo "Unsupported GITHUB_EVENT_NAME: $GITHUB_EVENT_NAME"
|
||||
echo "Unsupported GITHUB_EVENT_NAME: $GITHUB_EVENT_NAME" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# You can run as following examples:
|
||||
# GITHUB_EVENT_NAME=push NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly GITHUB_REF_NAME=v0.3.0 ./create-version.sh
|
||||
# GITHUB_EVENT_NAME=workflow_dispatch NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
|
||||
# GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
|
||||
# GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=nightly NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
|
||||
# GITHUB_EVENT_NAME=workflow_dispatch COMMIT_SHA=f0e7216c4bb6acce9b29a21ec2d683be2e3f984a NEXT_RELEASE_VERSION=dev NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
|
||||
# GITHUB_EVENT_NAME=push NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly GITHUB_REF_NAME=v0.3.0 ./create-version.sh
|
||||
# GITHUB_EVENT_NAME=workflow_dispatch NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
|
||||
# GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
|
||||
# GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=nightly NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
|
||||
# GITHUB_EVENT_NAME=workflow_dispatch COMMIT_SHA=f0e7216c4bb6acce9b29a21ec2d683be2e3f984a NEXT_RELEASE_VERSION=dev NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
|
||||
create_version
|
||||
|
||||
37
.github/scripts/update-dev-builder-version.sh
vendored
Executable file
37
.github/scripts/update-dev-builder-version.sh
vendored
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
|
||||
DEV_BUILDER_IMAGE_TAG=$1
|
||||
|
||||
update_dev_builder_version() {
|
||||
if [ -z "$DEV_BUILDER_IMAGE_TAG" ]; then
|
||||
echo "Error: Should specify the dev-builder image tag"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Configure Git configs.
|
||||
git config --global user.email greptimedb-ci@greptime.com
|
||||
git config --global user.name greptimedb-ci
|
||||
|
||||
# Checkout a new branch.
|
||||
BRANCH_NAME="ci/update-dev-builder-$(date +%Y%m%d%H%M%S)"
|
||||
git checkout -b $BRANCH_NAME
|
||||
|
||||
# Update the dev-builder image tag in the Makefile.
|
||||
gsed -i "s/DEV_BUILDER_IMAGE_TAG ?=.*/DEV_BUILDER_IMAGE_TAG ?= ${DEV_BUILDER_IMAGE_TAG}/g" Makefile
|
||||
|
||||
# Commit the changes.
|
||||
git add Makefile
|
||||
git commit -m "ci: update dev-builder image tag"
|
||||
git push origin $BRANCH_NAME
|
||||
|
||||
# Create a Pull Request.
|
||||
gh pr create \
|
||||
--title "ci: update dev-builder image tag" \
|
||||
--body "This PR updates the dev-builder image tag" \
|
||||
--base main \
|
||||
--head $BRANCH_NAME \
|
||||
--reviewer zyy17 \
|
||||
--reviewer daviderli614
|
||||
}
|
||||
|
||||
update_dev_builder_version
|
||||
19
.github/workflows/develop.yml
vendored
19
.github/workflows/develop.yml
vendored
@@ -22,6 +22,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
check-typos-and-docs:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
name: Check typos and docs
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -36,6 +37,7 @@ jobs:
|
||||
|| (echo "'config/config.md' is not up-to-date, please run 'make config-docs'." && exit 1)
|
||||
|
||||
license-header-check:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
runs-on: ubuntu-latest
|
||||
name: Check License Header
|
||||
steps:
|
||||
@@ -45,6 +47,7 @@ jobs:
|
||||
- uses: korandoru/hawkeye@v5
|
||||
|
||||
check:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
name: Check
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
@@ -71,6 +74,7 @@ jobs:
|
||||
run: cargo check --locked --workspace --all-targets
|
||||
|
||||
toml:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
name: Toml Check
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
@@ -85,6 +89,7 @@ jobs:
|
||||
run: taplo format --check
|
||||
|
||||
build:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
name: Build GreptimeDB binaries
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
@@ -127,6 +132,7 @@ jobs:
|
||||
version: current
|
||||
|
||||
fuzztest:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
name: Fuzz Test
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
@@ -183,6 +189,7 @@ jobs:
|
||||
max-total-time: 120
|
||||
|
||||
unstable-fuzztest:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
name: Unstable Fuzz Test
|
||||
needs: build-greptime-ci
|
||||
runs-on: ubuntu-latest
|
||||
@@ -244,6 +251,7 @@ jobs:
|
||||
retention-days: 3
|
||||
|
||||
build-greptime-ci:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
name: Build GreptimeDB binary (profile-CI)
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
@@ -285,6 +293,7 @@ jobs:
|
||||
version: current
|
||||
|
||||
distributed-fuzztest:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
name: Fuzz Test (Distributed, ${{ matrix.mode.name }}, ${{ matrix.target }})
|
||||
runs-on: ubuntu-latest
|
||||
needs: build-greptime-ci
|
||||
@@ -416,6 +425,7 @@ jobs:
|
||||
docker system prune -f
|
||||
|
||||
distributed-fuzztest-with-chaos:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
name: Fuzz Test with Chaos (Distributed, ${{ matrix.mode.name }}, ${{ matrix.target }})
|
||||
runs-on: ubuntu-latest
|
||||
needs: build-greptime-ci
|
||||
@@ -563,6 +573,7 @@ jobs:
|
||||
docker system prune -f
|
||||
|
||||
sqlness:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
name: Sqlness Test (${{ matrix.mode.name }})
|
||||
needs: build
|
||||
runs-on: ${{ matrix.os }}
|
||||
@@ -609,6 +620,7 @@ jobs:
|
||||
retention-days: 3
|
||||
|
||||
fmt:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
name: Rustfmt
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
@@ -626,6 +638,7 @@ jobs:
|
||||
run: make fmt-check
|
||||
|
||||
clippy:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
name: Clippy
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
@@ -651,6 +664,7 @@ jobs:
|
||||
run: make clippy
|
||||
|
||||
conflict-check:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
name: Check for conflict
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -661,7 +675,7 @@ jobs:
|
||||
uses: olivernybroe/action-conflict-finder@v4.0
|
||||
|
||||
test:
|
||||
if: github.event_name != 'merge_group'
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' && github.event_name != 'merge_group' }}
|
||||
runs-on: ubuntu-22.04-arm
|
||||
timeout-minutes: 60
|
||||
needs: [conflict-check, clippy, fmt]
|
||||
@@ -713,7 +727,7 @@ jobs:
|
||||
UNITTEST_LOG_DIR: "__unittest_logs"
|
||||
|
||||
coverage:
|
||||
if: github.event_name == 'merge_group'
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' && github.event_name == 'merge_group' }}
|
||||
runs-on: ubuntu-22.04-8-cores
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
@@ -773,6 +787,7 @@ jobs:
|
||||
verbose: true
|
||||
|
||||
# compat:
|
||||
# if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
# name: Compatibility Test
|
||||
# needs: build
|
||||
# runs-on: ubuntu-22.04
|
||||
|
||||
30
.github/workflows/grafana.yml
vendored
30
.github/workflows/grafana.yml
vendored
@@ -21,32 +21,6 @@ jobs:
|
||||
run: sudo apt-get install -y jq
|
||||
|
||||
# Make the check.sh script executable
|
||||
- name: Make check.sh executable
|
||||
run: chmod +x grafana/check.sh
|
||||
|
||||
# Run the check.sh script
|
||||
- name: Run check.sh
|
||||
run: ./grafana/check.sh
|
||||
|
||||
# Only run summary.sh for pull_request events (not for merge queues or final pushes)
|
||||
- name: Check if this is a pull request
|
||||
id: check-pr
|
||||
- name: Check grafana dashboards
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
echo "is_pull_request=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_pull_request=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
# Make the summary.sh script executable
|
||||
- name: Make summary.sh executable
|
||||
if: steps.check-pr.outputs.is_pull_request == 'true'
|
||||
run: chmod +x grafana/summary.sh
|
||||
|
||||
# Run the summary.sh script and add its output to the GitHub Job Summary
|
||||
- name: Run summary.sh and add to Job Summary
|
||||
if: steps.check-pr.outputs.is_pull_request == 'true'
|
||||
run: |
|
||||
SUMMARY=$(./grafana/summary.sh)
|
||||
echo "### Summary of Grafana Panels" >> $GITHUB_STEP_SUMMARY
|
||||
echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY
|
||||
make check-dashboards
|
||||
|
||||
6
.github/workflows/nightly-ci.yml
vendored
6
.github/workflows/nightly-ci.yml
vendored
@@ -117,16 +117,16 @@ jobs:
|
||||
name: Run clean build on Linux
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
timeout-minutes: 60
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- uses: cachix/install-nix-action@v27
|
||||
- uses: cachix/install-nix-action@v31
|
||||
with:
|
||||
nix_path: nixpkgs=channel:nixos-24.11
|
||||
- run: nix develop --command cargo build
|
||||
- run: nix develop --command cargo build --bin greptime
|
||||
|
||||
check-status:
|
||||
name: Check status
|
||||
|
||||
@@ -24,11 +24,19 @@ on:
|
||||
description: Release dev-builder-android image
|
||||
required: false
|
||||
default: false
|
||||
update_dev_builder_image_tag:
|
||||
type: boolean
|
||||
description: Update the DEV_BUILDER_IMAGE_TAG in Makefile and create a PR
|
||||
required: false
|
||||
default: false
|
||||
|
||||
jobs:
|
||||
release-dev-builder-images:
|
||||
name: Release dev builder images
|
||||
if: ${{ inputs.release_dev_builder_ubuntu_image || inputs.release_dev_builder_centos_image || inputs.release_dev_builder_android_image }} # Only manually trigger this job.
|
||||
# The jobs are triggered by the following events:
|
||||
# 1. Manually triggered workflow_dispatch event
|
||||
# 2. Push event when the PR that modifies the `rust-toolchain.toml` or `docker/dev-builder/**` is merged to main
|
||||
if: ${{ github.event_name == 'push' || inputs.release_dev_builder_ubuntu_image || inputs.release_dev_builder_centos_image || inputs.release_dev_builder_android_image }}
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.set-version.outputs.version }}
|
||||
@@ -57,9 +65,9 @@ jobs:
|
||||
version: ${{ env.VERSION }}
|
||||
dockerhub-image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerhub-image-registry-token: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
build-dev-builder-ubuntu: ${{ inputs.release_dev_builder_ubuntu_image }}
|
||||
build-dev-builder-centos: ${{ inputs.release_dev_builder_centos_image }}
|
||||
build-dev-builder-android: ${{ inputs.release_dev_builder_android_image }}
|
||||
build-dev-builder-ubuntu: ${{ inputs.release_dev_builder_ubuntu_image || github.event_name == 'push' }}
|
||||
build-dev-builder-centos: ${{ inputs.release_dev_builder_centos_image || github.event_name == 'push' }}
|
||||
build-dev-builder-android: ${{ inputs.release_dev_builder_android_image || github.event_name == 'push' }}
|
||||
|
||||
release-dev-builder-images-ecr:
|
||||
name: Release dev builder images to AWS ECR
|
||||
@@ -85,7 +93,7 @@ jobs:
|
||||
|
||||
- name: Push dev-builder-ubuntu image
|
||||
shell: bash
|
||||
if: ${{ inputs.release_dev_builder_ubuntu_image }}
|
||||
if: ${{ inputs.release_dev_builder_ubuntu_image || github.event_name == 'push' }}
|
||||
env:
|
||||
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
|
||||
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
|
||||
@@ -106,7 +114,7 @@ jobs:
|
||||
|
||||
- name: Push dev-builder-centos image
|
||||
shell: bash
|
||||
if: ${{ inputs.release_dev_builder_centos_image }}
|
||||
if: ${{ inputs.release_dev_builder_centos_image || github.event_name == 'push' }}
|
||||
env:
|
||||
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
|
||||
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
|
||||
@@ -127,7 +135,7 @@ jobs:
|
||||
|
||||
- name: Push dev-builder-android image
|
||||
shell: bash
|
||||
if: ${{ inputs.release_dev_builder_android_image }}
|
||||
if: ${{ inputs.release_dev_builder_android_image || github.event_name == 'push' }}
|
||||
env:
|
||||
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
|
||||
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
|
||||
@@ -162,7 +170,7 @@ jobs:
|
||||
|
||||
- name: Push dev-builder-ubuntu image
|
||||
shell: bash
|
||||
if: ${{ inputs.release_dev_builder_ubuntu_image }}
|
||||
if: ${{ inputs.release_dev_builder_ubuntu_image || github.event_name == 'push' }}
|
||||
env:
|
||||
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
|
||||
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
|
||||
@@ -176,7 +184,7 @@ jobs:
|
||||
|
||||
- name: Push dev-builder-centos image
|
||||
shell: bash
|
||||
if: ${{ inputs.release_dev_builder_centos_image }}
|
||||
if: ${{ inputs.release_dev_builder_centos_image || github.event_name == 'push' }}
|
||||
env:
|
||||
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
|
||||
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
|
||||
@@ -190,7 +198,7 @@ jobs:
|
||||
|
||||
- name: Push dev-builder-android image
|
||||
shell: bash
|
||||
if: ${{ inputs.release_dev_builder_android_image }}
|
||||
if: ${{ inputs.release_dev_builder_android_image || github.event_name == 'push' }}
|
||||
env:
|
||||
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
|
||||
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
|
||||
@@ -201,3 +209,24 @@ jobs:
|
||||
quay.io/skopeo/stable:latest \
|
||||
copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION \
|
||||
docker://$ACR_IMAGE_REGISTRY/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION
|
||||
|
||||
update-dev-builder-image-tag:
|
||||
name: Update dev-builder image tag
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
if: ${{ github.event_name == 'push' || inputs.update_dev_builder_image_tag }}
|
||||
needs: [
|
||||
release-dev-builder-images
|
||||
]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Update dev-builder image tag
|
||||
shell: bash
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
./.github/scripts/update-dev-builder-version.sh ${{ needs.release-dev-builder-images.outputs.version }}
|
||||
|
||||
30
.github/workflows/release.yml
vendored
30
.github/workflows/release.yml
vendored
@@ -90,8 +90,6 @@ env:
|
||||
|
||||
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
|
||||
NIGHTLY_RELEASE_PREFIX: nightly
|
||||
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
|
||||
NEXT_RELEASE_VERSION: v0.14.0
|
||||
|
||||
jobs:
|
||||
allocate-runners:
|
||||
@@ -135,7 +133,6 @@ jobs:
|
||||
env:
|
||||
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||
GITHUB_REF_NAME: ${{ github.ref_name }}
|
||||
NEXT_RELEASE_VERSION: ${{ env.NEXT_RELEASE_VERSION }}
|
||||
NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }}
|
||||
|
||||
- name: Allocate linux-amd64 runner
|
||||
@@ -317,7 +314,7 @@ jobs:
|
||||
image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
version: ${{ needs.allocate-runners.outputs.version }}
|
||||
push-latest-tag: true
|
||||
push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
|
||||
|
||||
- name: Set build image result
|
||||
id: set-build-image-result
|
||||
@@ -364,7 +361,7 @@ jobs:
|
||||
dev-mode: false
|
||||
upload-to-s3: true
|
||||
update-version-info: true
|
||||
push-latest-tag: true
|
||||
push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
|
||||
|
||||
publish-github-release:
|
||||
name: Create GitHub release and upload artifacts
|
||||
@@ -467,6 +464,29 @@ jobs:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
DOCS_REPO_TOKEN: ${{ secrets.DOCS_REPO_TOKEN }}
|
||||
|
||||
bump-website-version:
|
||||
name: Bump website version
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
needs: [allocate-runners]
|
||||
runs-on: ubuntu-latest
|
||||
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
|
||||
permissions:
|
||||
issues: write # Allows the action to create issues for cyborg.
|
||||
contents: write # Allows the action to create a release.
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- uses: ./.github/actions/setup-cyborg
|
||||
- name: Bump website version
|
||||
working-directory: cyborg
|
||||
run: pnpm tsx bin/bump-website-version.ts
|
||||
env:
|
||||
VERSION: ${{ needs.allocate-runners.outputs.version }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
WEBSITE_REPO_TOKEN: ${{ secrets.WEBSITE_REPO_TOKEN }}
|
||||
|
||||
notification:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' && (github.event_name == 'push' || github.event_name == 'schedule') && always() }}
|
||||
name: Send notification to Greptime team
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -28,6 +28,7 @@ debug/
|
||||
# Logs
|
||||
**/__unittest_logs
|
||||
logs/
|
||||
!grafana/dashboards/logs/
|
||||
|
||||
# cpython's generated python byte code
|
||||
**/__pycache__/
|
||||
|
||||
2323
Cargo.lock
generated
2323
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
59
Cargo.toml
59
Cargo.toml
@@ -68,16 +68,16 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.14.0"
|
||||
version = "0.15.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[workspace.lints]
|
||||
clippy.print_stdout = "warn"
|
||||
clippy.print_stderr = "warn"
|
||||
clippy.dbg_macro = "warn"
|
||||
clippy.implicit_clone = "warn"
|
||||
clippy.readonly_write_lock = "allow"
|
||||
clippy.result_large_err = "allow"
|
||||
clippy.large_enum_variant = "allow"
|
||||
clippy.doc_overindented_list_items = "allow"
|
||||
rust.unknown_lints = "deny"
|
||||
rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
|
||||
|
||||
@@ -90,11 +90,11 @@ rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
|
||||
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
|
||||
ahash = { version = "0.8", features = ["compile-time-rng"] }
|
||||
aquamarine = "0.6"
|
||||
arrow = { version = "53.0.0", features = ["prettyprint"] }
|
||||
arrow-array = { version = "53.0.0", default-features = false, features = ["chrono-tz"] }
|
||||
arrow-flight = "53.0"
|
||||
arrow-ipc = { version = "53.0.0", default-features = false, features = ["lz4", "zstd"] }
|
||||
arrow-schema = { version = "53.0", features = ["serde"] }
|
||||
arrow = { version = "54.2", features = ["prettyprint"] }
|
||||
arrow-array = { version = "54.2", default-features = false, features = ["chrono-tz"] }
|
||||
arrow-flight = "54.2"
|
||||
arrow-ipc = { version = "54.2", default-features = false, features = ["lz4", "zstd"] }
|
||||
arrow-schema = { version = "54.2", features = ["serde"] }
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
# Remember to update axum-extra, axum-macros when updating axum
|
||||
@@ -113,15 +113,15 @@ clap = { version = "4.4", features = ["derive"] }
|
||||
config = "0.13.0"
|
||||
crossbeam-utils = "0.8"
|
||||
dashmap = "6.1"
|
||||
datafusion = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
|
||||
datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
|
||||
datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
|
||||
datafusion-functions = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
|
||||
datafusion-optimizer = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
|
||||
datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
|
||||
datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
|
||||
datafusion-sql = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
|
||||
datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
|
||||
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
|
||||
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
|
||||
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
|
||||
datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
|
||||
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
|
||||
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
|
||||
datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
|
||||
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
|
||||
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
|
||||
deadpool = "0.12"
|
||||
deadpool-postgres = "0.14"
|
||||
derive_builder = "0.20"
|
||||
@@ -130,7 +130,7 @@ etcd-client = "0.14"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "dd4a1996982534636734674db66e44464b0c0d83" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "17a3550751c8b1e02ec16be40101d5f24dc255c3" }
|
||||
hex = "0.4"
|
||||
http = "1"
|
||||
humantime = "2.1"
|
||||
@@ -148,6 +148,7 @@ moka = "0.12"
|
||||
nalgebra = "0.33"
|
||||
notify = "8.0"
|
||||
num_cpus = "1.16"
|
||||
object_store_opendal = "0.50"
|
||||
once_cell = "1.18"
|
||||
opentelemetry-proto = { version = "0.27", features = [
|
||||
"gen-tonic",
|
||||
@@ -157,12 +158,12 @@ opentelemetry-proto = { version = "0.27", features = [
|
||||
"logs",
|
||||
] }
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "53.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
|
||||
parquet = { version = "54.2", default-features = false, features = ["arrow", "async", "object_store"] }
|
||||
paste = "1.0"
|
||||
pin-project = "1.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
promql-parser = { version = "0.5", features = ["ser"] }
|
||||
prost = "0.13"
|
||||
promql-parser = { version = "0.5.1", features = ["ser"] }
|
||||
prost = { version = "0.13", features = ["no-recursion-limit"] }
|
||||
raft-engine = { version = "0.4.1", default-features = false }
|
||||
rand = "0.9"
|
||||
ratelimit = "0.10"
|
||||
@@ -191,19 +192,18 @@ simd-json = "0.15"
|
||||
similar-asserts = "1.6.0"
|
||||
smallvec = { version = "1", features = ["serde"] }
|
||||
snafu = "0.8"
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "0cf6c04490d59435ee965edd2078e8855bd8471e", features = [
|
||||
"visitor",
|
||||
"serde",
|
||||
] } # branch = "v0.54.x"
|
||||
sqlx = { version = "0.8", features = [
|
||||
"runtime-tokio-rustls",
|
||||
"mysql",
|
||||
"postgres",
|
||||
"chrono",
|
||||
] }
|
||||
sysinfo = "0.33"
|
||||
# on branch v0.52.x
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "71dd86058d2af97b9925093d40c4e03360403170", features = [
|
||||
"visitor",
|
||||
"serde",
|
||||
] } # on branch v0.44.x
|
||||
strum = { version = "0.27", features = ["derive"] }
|
||||
sysinfo = "0.33"
|
||||
tempfile = "3"
|
||||
tokio = { version = "1.40", features = ["full"] }
|
||||
tokio-postgres = "0.7"
|
||||
@@ -270,6 +270,9 @@ metric-engine = { path = "src/metric-engine" }
|
||||
mito2 = { path = "src/mito2" }
|
||||
object-store = { path = "src/object-store" }
|
||||
operator = { path = "src/operator" }
|
||||
otel-arrow-rust = { git = "https://github.com/open-telemetry/otel-arrow", rev = "5d551412d2a12e689cde4d84c14ef29e36784e51", features = [
|
||||
"server",
|
||||
] }
|
||||
partition = { path = "src/partition" }
|
||||
pipeline = { path = "src/pipeline" }
|
||||
plugins = { path = "src/plugins" }
|
||||
|
||||
17
Makefile
17
Makefile
@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
|
||||
IMAGE_REGISTRY ?= docker.io
|
||||
IMAGE_NAMESPACE ?= greptime
|
||||
IMAGE_TAG ?= latest
|
||||
DEV_BUILDER_IMAGE_TAG ?= 2024-12-25-a71b93dd-20250305072908
|
||||
DEV_BUILDER_IMAGE_TAG ?= 2025-04-15-1a517ec8-20250428023155
|
||||
BUILDX_MULTI_PLATFORM_BUILD ?= false
|
||||
BUILDX_BUILDER_NAME ?= gtbuilder
|
||||
BASE_IMAGE ?= ubuntu
|
||||
@@ -32,6 +32,10 @@ ifneq ($(strip $(BUILD_JOBS)),)
|
||||
NEXTEST_OPTS += --build-jobs=${BUILD_JOBS}
|
||||
endif
|
||||
|
||||
ifneq ($(strip $(BUILD_JOBS)),)
|
||||
SQLNESS_OPTS += --jobs ${BUILD_JOBS}
|
||||
endif
|
||||
|
||||
ifneq ($(strip $(CARGO_PROFILE)),)
|
||||
CARGO_BUILD_OPTS += --profile ${CARGO_PROFILE}
|
||||
endif
|
||||
@@ -193,6 +197,7 @@ fix-clippy: ## Fix clippy violations.
|
||||
fmt-check: ## Check code format.
|
||||
cargo fmt --all -- --check
|
||||
python3 scripts/check-snafu.py
|
||||
python3 scripts/check-super-imports.py
|
||||
|
||||
.PHONY: start-etcd
|
||||
start-etcd: ## Start single node etcd for testing purpose.
|
||||
@@ -217,6 +222,16 @@ start-cluster: ## Start the greptimedb cluster with etcd by using docker compose
|
||||
stop-cluster: ## Stop the greptimedb cluster that created by docker compose.
|
||||
docker compose -f ./docker/docker-compose/cluster-with-etcd.yaml stop
|
||||
|
||||
##@ Grafana
|
||||
|
||||
.PHONY: check-dashboards
|
||||
check-dashboards: ## Check the Grafana dashboards.
|
||||
@./grafana/scripts/check.sh
|
||||
|
||||
.PHONY: dashboards
|
||||
dashboards: ## Generate the Grafana dashboards for standalone mode and intermediate dashboards.
|
||||
@./grafana/scripts/gen-dashboards.sh
|
||||
|
||||
##@ Docs
|
||||
config-docs: ## Generate configuration documentation from toml files.
|
||||
docker run --rm \
|
||||
|
||||
199
README.md
199
README.md
@@ -6,7 +6,9 @@
|
||||
</picture>
|
||||
</p>
|
||||
|
||||
<h2 align="center">Unified & Cost-Effective Observerability Database for Metrics, Logs, and Events</h2>
|
||||
<h2 align="center">Real-Time & Cloud-Native Observability Database<br/>for metrics, logs, and traces</h2>
|
||||
|
||||
> Delivers sub-second querying at PB scale and exceptional cost efficiency from edge to cloud.
|
||||
|
||||
<div align="center">
|
||||
<h3 align="center">
|
||||
@@ -49,74 +51,77 @@
|
||||
</div>
|
||||
|
||||
- [Introduction](#introduction)
|
||||
- [**Features: Why GreptimeDB**](#why-greptimedb)
|
||||
- [Architecture](https://docs.greptime.com/contributor-guide/overview/#architecture)
|
||||
- [Try it for free](#try-greptimedb)
|
||||
- [⭐ Key Features](#features)
|
||||
- [Quick Comparison](#quick-comparison)
|
||||
- [Architecture](#architecture)
|
||||
- [Try GreptimeDB](#try-greptimedb)
|
||||
- [Getting Started](#getting-started)
|
||||
- [Project Status](#project-status)
|
||||
- [Join the community](#community)
|
||||
- [Contributing](#contributing)
|
||||
- [Build From Source](#build-from-source)
|
||||
- [Tools & Extensions](#tools--extensions)
|
||||
- [Project Status](#project-status)
|
||||
- [Community](#community)
|
||||
- [License](#license)
|
||||
- [Commercial Support](#commercial-support)
|
||||
- [Contributing](#contributing)
|
||||
- [Acknowledgement](#acknowledgement)
|
||||
|
||||
## Introduction
|
||||
|
||||
**GreptimeDB** is an open-source unified & cost-effective observerability database for **Metrics**, **Logs**, and **Events** (also **Traces** in plan). You can gain real-time insights from Edge to Cloud at Any Scale.
|
||||
**GreptimeDB** is an open-source, cloud-native database purpose-built for the unified collection and analysis of observability data (metrics, logs, and traces). Whether you’re operating on the edge, in the cloud, or across hybrid environments, GreptimeDB empowers real-time insights at massive scale — all in one system.
|
||||
|
||||
## News
|
||||
## Features
|
||||
|
||||
**[GreptimeDB tops JSONBench's billion-record cold run test!](https://greptime.com/blogs/2025-03-18-jsonbench-greptimedb-performance)**
|
||||
| Feature | Description |
|
||||
| --------- | ----------- |
|
||||
| [Unified Observability Data](https://docs.greptime.com/user-guide/concepts/why-greptimedb) | Store metrics, logs, and traces as timestamped, contextual wide events. Query via [SQL](https://docs.greptime.com/user-guide/query-data/sql), [PromQL](https://docs.greptime.com/user-guide/query-data/promql), and [streaming](https://docs.greptime.com/user-guide/flow-computation/overview). |
|
||||
| [High Performance & Cost Effective](https://docs.greptime.com/user-guide/manage-data/data-index) | Written in Rust, with a distributed query engine, [rich indexing](https://docs.greptime.com/user-guide/manage-data/data-index), and optimized columnar storage, delivering sub-second responses at PB scale. |
|
||||
| [Cloud-Native Architecture](https://docs.greptime.com/user-guide/concepts/architecture) | Designed for [Kubernetes](https://docs.greptime.com/user-guide/deployments/deploy-on-kubernetes/greptimedb-operator-management), with compute/storage separation, native object storage (AWS S3, Azure Blob, etc.) and seamless cross-cloud access. |
|
||||
| [Developer-Friendly](https://docs.greptime.com/user-guide/protocols/overview) | Access via SQL/PromQL interfaces, REST API, MySQL/PostgreSQL protocols, and popular ingestion [protocols](https://docs.greptime.com/user-guide/protocols/overview). |
|
||||
| [Flexible Deployment](https://docs.greptime.com/user-guide/deployments/overview) | Deploy anywhere: edge (including ARM/[Android](https://docs.greptime.com/user-guide/deployments/run-on-android)) or cloud, with unified APIs and efficient data sync. |
|
||||
|
||||
## Why GreptimeDB
|
||||
Learn more in [Why GreptimeDB](https://docs.greptime.com/user-guide/concepts/why-greptimedb) and [Observability 2.0 and the Database for It](https://greptime.com/blogs/2025-04-25-greptimedb-observability2-new-database).
|
||||
|
||||
Our core developers have been building observerability data platforms for years. Based on our best practices, GreptimeDB was born to give you:
|
||||
## Quick Comparison
|
||||
|
||||
* **Unified Processing of Metrics, Logs, and Events**
|
||||
| Feature | GreptimeDB | Traditional TSDB | Log Stores |
|
||||
|----------------------------------|-----------------------|--------------------|-----------------|
|
||||
| Data Types | Metrics, Logs, Traces | Metrics only | Logs only |
|
||||
| Query Language | SQL, PromQL, Streaming| Custom/PromQL | Custom/DSL |
|
||||
| Deployment | Edge + Cloud | Cloud/On-prem | Mostly central |
|
||||
| Indexing & Performance | PB-Scale, Sub-second | Varies | Varies |
|
||||
| Integration | REST, SQL, Common protocols | Varies | Varies |
|
||||
|
||||
GreptimeDB unifies observerability data processing by treating all data - whether metrics, logs, or events - as timestamped events with context. Users can analyze this data using either [SQL](https://docs.greptime.com/user-guide/query-data/sql) or [PromQL](https://docs.greptime.com/user-guide/query-data/promql) and leverage stream processing ([Flow](https://docs.greptime.com/user-guide/flow-computation/overview)) to enable continuous aggregation. [Read more](https://docs.greptime.com/user-guide/concepts/data-model).
|
||||
**Performance:**
|
||||
* [GreptimeDB tops JSONBench's billion-record cold run test!](https://greptime.com/blogs/2025-03-18-jsonbench-greptimedb-performance)
|
||||
* [TSBS Benchmark](https://github.com/GreptimeTeam/greptimedb/tree/main/docs/benchmarks/tsbs)
|
||||
|
||||
* **Cloud-native Distributed Database**
|
||||
Read [more benchmark reports](https://docs.greptime.com/user-guide/concepts/features-that-you-concern#how-is-greptimedbs-performance-compared-to-other-solutions).
|
||||
|
||||
Built for [Kubernetes](https://docs.greptime.com/user-guide/deployments/deploy-on-kubernetes/greptimedb-operator-management). GreptimeDB achieves seamless scalability with its [cloud-native architecture](https://docs.greptime.com/user-guide/concepts/architecture) of separated compute and storage, built on object storage (AWS S3, Azure Blob Storage, etc.) while enabling cross-cloud deployment through a unified data access layer.
|
||||
## Architecture
|
||||
|
||||
* **Performance and Cost-effective**
|
||||
|
||||
Written in pure Rust for superior performance and reliability. GreptimeDB features a distributed query engine with intelligent indexing to handle high cardinality data efficiently. Its optimized columnar storage achieves 50x cost efficiency on cloud object storage through advanced compression. [Benchmark reports](https://www.greptime.com/blogs/2024-09-09-report-summary).
|
||||
|
||||
* **Cloud-Edge Collaboration**
|
||||
|
||||
GreptimeDB seamlessly operates across cloud and edge (ARM/Android/Linux), providing consistent APIs and control plane for unified data management and efficient synchronization. [Learn how to run on Android](https://docs.greptime.com/user-guide/deployments/run-on-android/).
|
||||
|
||||
* **Multi-protocol Ingestion, SQL & PromQL Ready**
|
||||
|
||||
Widely adopted database protocols and APIs, including MySQL, PostgreSQL, InfluxDB, OpenTelemetry, Loki and Prometheus, etc. Effortless Adoption & Seamless Migration. [Supported Protocols Overview](https://docs.greptime.com/user-guide/protocols/overview).
|
||||
|
||||
For more detailed info please read [Why GreptimeDB](https://docs.greptime.com/user-guide/concepts/why-greptimedb).
|
||||
* Read the [architecture](https://docs.greptime.com/contributor-guide/overview/#architecture) document.
|
||||
* [DeepWiki](https://deepwiki.com/GreptimeTeam/greptimedb/1-overview) provides an in-depth look at GreptimeDB:
|
||||
<img alt="GreptimeDB System Overview" src="docs/architecture.png">
|
||||
|
||||
## Try GreptimeDB
|
||||
|
||||
### 1. [Live Demo](https://greptime.com/playground)
|
||||
|
||||
Try out the features of GreptimeDB right from your browser.
|
||||
Experience GreptimeDB directly in your browser.
|
||||
|
||||
### 2. [GreptimeCloud](https://console.greptime.cloud/)
|
||||
|
||||
Start instantly with a free cluster.
|
||||
|
||||
### 3. Docker Image
|
||||
|
||||
To install GreptimeDB locally, the recommended way is via Docker:
|
||||
### 3. Docker (Local Quickstart)
|
||||
|
||||
```shell
|
||||
docker pull greptime/greptimedb
|
||||
```
|
||||
|
||||
Start a GreptimeDB container with:
|
||||
|
||||
```shell
|
||||
docker run -p 127.0.0.1:4000-4003:4000-4003 \
|
||||
-v "$(pwd)/greptimedb:./greptimedb_data" \
|
||||
-v "$(pwd)/greptimedb:/greptimedb_data" \
|
||||
--name greptime --rm \
|
||||
greptime/greptimedb:latest standalone start \
|
||||
--http-addr 0.0.0.0:4000 \
|
||||
@@ -124,112 +129,90 @@ docker run -p 127.0.0.1:4000-4003:4000-4003 \
|
||||
--mysql-addr 0.0.0.0:4002 \
|
||||
--postgres-addr 0.0.0.0:4003
|
||||
```
|
||||
Dashboard: [http://localhost:4000/dashboard](http://localhost:4000/dashboard)
|
||||
[Full Install Guide](https://docs.greptime.com/getting-started/installation/overview)
|
||||
|
||||
Access the dashboard via `http://localhost:4000/dashboard`.
|
||||
|
||||
Read more about [Installation](https://docs.greptime.com/getting-started/installation/overview) on docs.
|
||||
**Troubleshooting:**
|
||||
* Cannot connect to the database? Ensure that ports `4000`, `4001`, `4002`, and `4003` are not blocked by a firewall or used by other services.
|
||||
* Failed to start? Check the container logs with `docker logs greptime` for further details.
|
||||
|
||||
## Getting Started
|
||||
|
||||
* [Quickstart](https://docs.greptime.com/getting-started/quick-start)
|
||||
* [User Guide](https://docs.greptime.com/user-guide/overview)
|
||||
* [Demos](https://github.com/GreptimeTeam/demo-scene)
|
||||
* [FAQ](https://docs.greptime.com/faq-and-others/faq)
|
||||
- [Quickstart](https://docs.greptime.com/getting-started/quick-start)
|
||||
- [User Guide](https://docs.greptime.com/user-guide/overview)
|
||||
- [Demo Scenes](https://github.com/GreptimeTeam/demo-scene)
|
||||
- [FAQ](https://docs.greptime.com/faq-and-others/faq)
|
||||
|
||||
## Build
|
||||
|
||||
Check the prerequisite:
|
||||
## Build From Source
|
||||
|
||||
**Prerequisites:**
|
||||
* [Rust toolchain](https://www.rust-lang.org/tools/install) (nightly)
|
||||
* [Protobuf compiler](https://grpc.io/docs/protoc-installation/) (>= 3.15)
|
||||
* C/C++ building essentials, including `gcc`/`g++`/`autoconf` and glibc library (eg. `libc6-dev` on Ubuntu and `glibc-devel` on Fedora)
|
||||
* Python toolchain (optional): Required only if using some test scripts.
|
||||
|
||||
Build GreptimeDB binary:
|
||||
|
||||
```shell
|
||||
**Build and Run:**
|
||||
```bash
|
||||
make
|
||||
```
|
||||
|
||||
Run a standalone server:
|
||||
|
||||
```shell
|
||||
cargo run -- standalone start
|
||||
```
|
||||
|
||||
## Tools & Extensions
|
||||
|
||||
### Kubernetes
|
||||
|
||||
- [GreptimeDB Operator](https://github.com/GrepTimeTeam/greptimedb-operator)
|
||||
|
||||
### Dashboard
|
||||
|
||||
- [The dashboard UI for GreptimeDB](https://github.com/GreptimeTeam/dashboard)
|
||||
|
||||
### SDK
|
||||
|
||||
- [GreptimeDB Go Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-go)
|
||||
- [GreptimeDB Java Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-java)
|
||||
- [GreptimeDB C++ Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-cpp)
|
||||
- [GreptimeDB Erlang Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-erl)
|
||||
- [GreptimeDB Rust Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-rust)
|
||||
- [GreptimeDB JavaScript Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-js)
|
||||
|
||||
### Grafana Dashboard
|
||||
|
||||
Our official Grafana dashboard for monitoring GreptimeDB is available at [grafana](grafana/README.md) directory.
|
||||
- **Kubernetes:** [GreptimeDB Operator](https://github.com/GrepTimeTeam/greptimedb-operator)
|
||||
- **Helm Charts:** [Greptime Helm Charts](https://github.com/GreptimeTeam/helm-charts)
|
||||
- **Dashboard:** [Web UI](https://github.com/GreptimeTeam/dashboard)
|
||||
- **SDKs/Ingester:** [Go](https://github.com/GreptimeTeam/greptimedb-ingester-go), [Java](https://github.com/GreptimeTeam/greptimedb-ingester-java), [C++](https://github.com/GreptimeTeam/greptimedb-ingester-cpp), [Erlang](https://github.com/GreptimeTeam/greptimedb-ingester-erl), [Rust](https://github.com/GreptimeTeam/greptimedb-ingester-rust), [JS](https://github.com/GreptimeTeam/greptimedb-ingester-js)
|
||||
- **Grafana**: [Official Dashboard](https://github.com/GreptimeTeam/greptimedb/blob/main/grafana/README.md)
|
||||
|
||||
## Project Status
|
||||
|
||||
GreptimeDB is currently in Beta. We are targeting GA (General Availability) with v1.0 release by Early 2025.
|
||||
> **Status:** Beta.
|
||||
> **GA (v1.0):** Targeted for mid 2025.
|
||||
|
||||
While in Beta, GreptimeDB is already:
|
||||
|
||||
* Being used in production by early adopters
|
||||
* Actively maintained with regular releases, [about version number](https://docs.greptime.com/nightly/reference/about-greptimedb-version)
|
||||
* Suitable for testing and evaluation
|
||||
- Being used in production by early adopters
|
||||
- Stable, actively maintained, with regular releases ([version info](https://docs.greptime.com/nightly/reference/about-greptimedb-version))
|
||||
- Suitable for evaluation and pilot deployments
|
||||
|
||||
For production use, we recommend using the latest stable release.
|
||||
[](https://www.star-history.com/#GreptimeTeam/GreptimeDB&Date)
|
||||
|
||||
If you find this project useful, a ⭐ would mean a lot to us!
|
||||
<img alt="Known Users" src="https://greptime.com/logo/img/users.png"/>
|
||||
|
||||
## Community
|
||||
|
||||
Our core team is thrilled to see you participate in any ways you like. When you are stuck, try to
|
||||
ask for help by filling an issue with a detailed description of what you were trying to do
|
||||
and what went wrong. If you have any questions or if you would like to get involved in our
|
||||
community, please check out:
|
||||
We invite you to engage and contribute!
|
||||
|
||||
- GreptimeDB Community on [Slack](https://greptime.com/slack)
|
||||
- GreptimeDB [GitHub Discussions forum](https://github.com/GreptimeTeam/greptimedb/discussions)
|
||||
- Greptime official [website](https://greptime.com)
|
||||
|
||||
In addition, you may:
|
||||
|
||||
- View our official [Blog](https://greptime.com/blogs/)
|
||||
- Connect us with [Linkedin](https://www.linkedin.com/company/greptime/)
|
||||
- Follow us on [Twitter](https://twitter.com/greptime)
|
||||
|
||||
## Commercial Support
|
||||
|
||||
If you are running GreptimeDB OSS in your organization, we offer additional
|
||||
enterprise add-ons, installation services, training, and consulting. [Contact
|
||||
us](https://greptime.com/contactus) and we will reach out to you with more
|
||||
detail of our commercial license.
|
||||
- [Slack](https://greptime.com/slack)
|
||||
- [Discussions](https://github.com/GreptimeTeam/greptimedb/discussions)
|
||||
- [Official Website](https://greptime.com/)
|
||||
- [Blog](https://greptime.com/blogs/)
|
||||
- [LinkedIn](https://www.linkedin.com/company/greptime/)
|
||||
- [Twitter](https://twitter.com/greptime)
|
||||
|
||||
## License
|
||||
|
||||
GreptimeDB uses the [Apache License 2.0](https://apache.org/licenses/LICENSE-2.0.txt) to strike a balance between
|
||||
open contributions and allowing you to use the software however you want.
|
||||
GreptimeDB is licensed under the [Apache License 2.0](https://apache.org/licenses/LICENSE-2.0.txt).
|
||||
|
||||
## Commercial Support
|
||||
|
||||
Running GreptimeDB in your organization?
|
||||
We offer enterprise add-ons, services, training, and consulting.
|
||||
[Contact us](https://greptime.com/contactus) for details.
|
||||
|
||||
## Contributing
|
||||
|
||||
Please refer to [contribution guidelines](CONTRIBUTING.md) and [internal concepts docs](https://docs.greptime.com/contributor-guide/overview.html) for more information.
|
||||
- Read our [Contribution Guidelines](https://github.com/GreptimeTeam/greptimedb/blob/main/CONTRIBUTING.md).
|
||||
- Explore [Internal Concepts](https://docs.greptime.com/contributor-guide/overview.html) and [DeepWiki](https://deepwiki.com/GreptimeTeam/greptimedb).
|
||||
- Pick up a [good first issue](https://github.com/GreptimeTeam/greptimedb/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) and join the #contributors [Slack](https://greptime.com/slack) channel.
|
||||
|
||||
## Acknowledgement
|
||||
|
||||
Special thanks to all the contributors who have propelled GreptimeDB forward. For a complete list of contributors, please refer to [AUTHOR.md](AUTHOR.md).
|
||||
Special thanks to all contributors! See [AUTHORS.md](https://github.com/GreptimeTeam/greptimedb/blob/main/AUTHOR.md).
|
||||
|
||||
- GreptimeDB uses [Apache Arrow™](https://arrow.apache.org/) as the memory model and [Apache Parquet™](https://parquet.apache.org/) as the persistent file format.
|
||||
- GreptimeDB's query engine is powered by [Apache Arrow DataFusion™](https://arrow.apache.org/datafusion/).
|
||||
- [Apache OpenDAL™](https://opendal.apache.org) gives GreptimeDB a very general and elegant data access abstraction layer.
|
||||
- GreptimeDB's meta service is based on [etcd](https://etcd.io/).
|
||||
- Uses [Apache Arrow™](https://arrow.apache.org/) (memory model)
|
||||
- [Apache Parquet™](https://parquet.apache.org/) (file storage)
|
||||
- [Apache Arrow DataFusion™](https://arrow.apache.org/datafusion/) (query engine)
|
||||
- [Apache OpenDAL™](https://opendal.apache.org/) (data access abstraction)
|
||||
- [etcd](https://etcd.io/) (meta service)
|
||||
|
||||
@@ -96,6 +96,8 @@
|
||||
| `procedure.max_running_procedures` | Integer | `128` | Max running procedures.<br/>The maximum number of procedures that can be running at the same time.<br/>If the number of running procedures exceeds this limit, the procedure will be rejected. |
|
||||
| `flow` | -- | -- | flow engine options. |
|
||||
| `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
|
||||
| `query` | -- | -- | The query engine options. |
|
||||
| `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
@@ -270,6 +272,8 @@
|
||||
| `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. |
|
||||
| `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. |
|
||||
| `meta_client.metadata_cache_tti` | String | `5m` | -- |
|
||||
| `query` | -- | -- | The query engine options. |
|
||||
| `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
|
||||
| `datanode` | -- | -- | Datanode options. |
|
||||
| `datanode.client` | -- | -- | Datanode client options. |
|
||||
| `datanode.client.connect_timeout` | String | `10s` | -- |
|
||||
@@ -315,6 +319,7 @@
|
||||
| `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
|
||||
| `use_memory_store` | Bool | `false` | Store data in memory. |
|
||||
| `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
|
||||
| `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.<br/>**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
|
||||
| `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
|
||||
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
|
||||
| `runtime` | -- | -- | The runtime options. |
|
||||
@@ -339,6 +344,9 @@
|
||||
| `wal.provider` | String | `raft_engine` | -- |
|
||||
| `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster. |
|
||||
| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
|
||||
| `wal.auto_prune_interval` | String | `0s` | Interval of automatically WAL pruning.<br/>Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically. |
|
||||
| `wal.trigger_flush_threshold` | Integer | `0` | The threshold to trigger a flush operation of a region in automatically WAL pruning.<br/>Metasrv will send a flush request to flush the region when:<br/>`trigger_flush_threshold` + `prunable_entry_id` < `max_prunable_entry_id`<br/>where:<br/>- `prunable_entry_id` is the maximum entry id that can be pruned of the region.<br/>- `max_prunable_entry_id` is the maximum prunable entry id among all regions in the same topic.<br/>Set to `0` to disable the flush operation. |
|
||||
| `wal.auto_prune_parallelism` | Integer | `10` | Concurrent task limit for automatically WAL pruning. |
|
||||
| `wal.num_topics` | Integer | `64` | Number of topics. |
|
||||
| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default) |
|
||||
| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>Only accepts strings that match the following regular expression pattern:<br/>[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1. |
|
||||
@@ -429,6 +437,8 @@
|
||||
| `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
|
||||
| `query` | -- | -- | The query engine options. |
|
||||
| `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
|
||||
@@ -243,6 +243,12 @@ overwrite_entry_start_id = false
|
||||
# credential = "base64-credential"
|
||||
# endpoint = "https://storage.googleapis.com"
|
||||
|
||||
## The query engine options.
|
||||
[query]
|
||||
## Parallelism of the query engine.
|
||||
## Default to 0, which means the number of CPU cores.
|
||||
parallelism = 0
|
||||
|
||||
## The data storage options.
|
||||
[storage]
|
||||
## The working home directory.
|
||||
|
||||
@@ -179,6 +179,12 @@ metadata_cache_ttl = "10m"
|
||||
# TTI of the metadata cache.
|
||||
metadata_cache_tti = "5m"
|
||||
|
||||
## The query engine options.
|
||||
[query]
|
||||
## Parallelism of the query engine.
|
||||
## Default to 0, which means the number of CPU cores.
|
||||
parallelism = 0
|
||||
|
||||
## Datanode options.
|
||||
[datanode]
|
||||
## Datanode client options.
|
||||
|
||||
@@ -50,6 +50,10 @@ use_memory_store = false
|
||||
## - Using shared storage (e.g., s3).
|
||||
enable_region_failover = false
|
||||
|
||||
## Whether to allow region failover on local WAL.
|
||||
## **This option is not recommended to be set to true, because it may lead to data loss during failover.**
|
||||
allow_region_failover_on_local_wal = false
|
||||
|
||||
## Max allowed idle time before removing node info from metasrv memory.
|
||||
node_max_idle_time = "24hours"
|
||||
|
||||
@@ -130,6 +134,22 @@ broker_endpoints = ["127.0.0.1:9092"]
|
||||
## Otherwise, use topics named `topic_name_prefix_[0..num_topics)`
|
||||
auto_create_topics = true
|
||||
|
||||
## Interval of automatically WAL pruning.
|
||||
## Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically.
|
||||
auto_prune_interval = "0s"
|
||||
|
||||
## The threshold to trigger a flush operation of a region in automatically WAL pruning.
|
||||
## Metasrv will send a flush request to flush the region when:
|
||||
## `trigger_flush_threshold` + `prunable_entry_id` < `max_prunable_entry_id`
|
||||
## where:
|
||||
## - `prunable_entry_id` is the maximum entry id that can be pruned of the region.
|
||||
## - `max_prunable_entry_id` is the maximum prunable entry id among all regions in the same topic.
|
||||
## Set to `0` to disable the flush operation.
|
||||
trigger_flush_threshold = 0
|
||||
|
||||
## Concurrent task limit for automatically WAL pruning.
|
||||
auto_prune_parallelism = 10
|
||||
|
||||
## Number of topics.
|
||||
num_topics = 64
|
||||
|
||||
|
||||
@@ -334,6 +334,12 @@ max_running_procedures = 128
|
||||
# credential = "base64-credential"
|
||||
# endpoint = "https://storage.googleapis.com"
|
||||
|
||||
## The query engine options.
|
||||
[query]
|
||||
## Parallelism of the query engine.
|
||||
## Default to 0, which means the number of CPU cores.
|
||||
parallelism = 0
|
||||
|
||||
## The data storage options.
|
||||
[storage]
|
||||
## The working home directory.
|
||||
|
||||
57
cyborg/bin/bump-website-version.ts
Normal file
57
cyborg/bin/bump-website-version.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright 2023 Greptime Team
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import * as core from "@actions/core";
|
||||
import {obtainClient} from "@/common";
|
||||
|
||||
async function triggerWorkflow(workflowId: string, version: string) {
|
||||
const websiteClient = obtainClient("WEBSITE_REPO_TOKEN")
|
||||
try {
|
||||
await websiteClient.rest.actions.createWorkflowDispatch({
|
||||
owner: "GreptimeTeam",
|
||||
repo: "website",
|
||||
workflow_id: workflowId,
|
||||
ref: "main",
|
||||
inputs: {
|
||||
version,
|
||||
},
|
||||
});
|
||||
console.log(`Successfully triggered ${workflowId} workflow with version ${version}`);
|
||||
} catch (error) {
|
||||
core.setFailed(`Failed to trigger workflow: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
const version = process.env.VERSION;
|
||||
if (!version) {
|
||||
core.setFailed("VERSION environment variable is required");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Remove 'v' prefix if exists
|
||||
const cleanVersion = version.startsWith('v') ? version.slice(1) : version;
|
||||
|
||||
if (cleanVersion.includes('nightly')) {
|
||||
console.log('Nightly version detected, skipping workflow trigger.');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
try {
|
||||
triggerWorkflow('bump-patch-version.yml', cleanVersion);
|
||||
} catch (error) {
|
||||
core.setFailed(`Error processing version: ${error.message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
BIN
docs/architecture.png
Normal file
BIN
docs/architecture.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 173 KiB |
@@ -1,6 +1,6 @@
|
||||
# Profile memory usage of GreptimeDB
|
||||
|
||||
This crate provides an easy approach to dump memory profiling info.
|
||||
This crate provides an easy approach to dump memory profiling info. A set of ready to use scripts is provided in [docs/how-to/memory-profile-scripts](docs/how-to/memory-profile-scripts).
|
||||
|
||||
## Prerequisites
|
||||
### jemalloc
|
||||
|
||||
52
docs/how-to/memory-profile-scripts/scripts/README.md
Normal file
52
docs/how-to/memory-profile-scripts/scripts/README.md
Normal file
@@ -0,0 +1,52 @@
|
||||
# Memory Analysis Process
|
||||
This section will guide you through the process of analyzing memory usage for greptimedb.
|
||||
|
||||
1. Get the `jeprof` tool script, see the next section("Getting the `jeprof` tool") for details.
|
||||
|
||||
2. After starting `greptimedb`(with env var `MALLOC_CONF=prof:true`), execute the `dump.sh` script with the PID of the `greptimedb` process as an argument. This continuously monitors memory usage and captures profiles when exceeding thresholds (e.g. +20MB within 10 minutes). Outputs `greptime-{timestamp}.gprof` files.
|
||||
|
||||
3. With 2-3 gprof files, run `gen_flamegraph.sh` in the same environment to generate flame graphs showing memory allocation call stacks.
|
||||
|
||||
4. **NOTE:** The `gen_flamegraph.sh` script requires `jeprof` and optionally `flamegraph.pl` to be in the current directory. If needed to gen flamegraph now, run the `get_flamegraph_tool.sh` script, which downloads the flame graph generation tool `flamegraph.pl` to the current directory.
|
||||
The usage of `gen_flamegraph.sh` is:
|
||||
|
||||
`Usage: ./gen_flamegraph.sh <binary_path> <gprof_directory>`
|
||||
where `<binary_path>` is the path to the greptimedb binary, `<gprof_directory>` is the directory containing the gprof files(the directory `dump.sh` is dumping profiles to).
|
||||
Example call: `./gen_flamegraph.sh ./greptime .`
|
||||
|
||||
Generating the flame graph might take a few minutes. The generated flame graphs are located in the `<gprof_directory>/flamegraphs` directory. Or if no `flamegraph.pl` is found, it will only contain `.collapse` files which is also fine.
|
||||
5. You can send the generated flame graphs(the entire folder of `<gprof_directory>/flamegraphs`) to developers for further analysis.
|
||||
|
||||
|
||||
## Getting the `jeprof` tool
|
||||
there are three ways to get `jeprof`, list in here from simple to complex, using any one of those methods is ok, as long as it's the same environment as the `greptimedb` will be running on:
|
||||
1. If you are compiling greptimedb from source, then `jeprof` is already produced during compilation. After running `cargo build`, execute `find_compiled_jeprof.sh`. This will copy `jeprof` to the current directory.
|
||||
2. Or, if you have the Rust toolchain installed locally, simply follow these commands:
|
||||
```bash
|
||||
cargo new get_jeprof
|
||||
cd get_jeprof
|
||||
```
|
||||
Then add this line to `Cargo.toml`:
|
||||
```toml
|
||||
[dependencies]
|
||||
tikv-jemalloc-ctl = { version = "0.6", features = ["use_std", "stats"] }
|
||||
```
|
||||
then run:
|
||||
```bash
|
||||
cargo build
|
||||
```
|
||||
after that the `jeprof` tool is produced. Now run `find_compiled_jeprof.sh` in current directory, it will copy the `jeprof` tool to the current directory.
|
||||
|
||||
3. compile jemalloc from source
|
||||
you can first clone this repo, and checkout to this commit:
|
||||
```bash
|
||||
git clone https://github.com/tikv/jemalloc.git
|
||||
cd jemalloc
|
||||
git checkout e13ca993e8ccb9ba9847cc330696e02839f328f7
|
||||
```
|
||||
then run:
|
||||
```bash
|
||||
./configure
|
||||
make
|
||||
```
|
||||
and `jeprof` is in `.bin/` directory. Copy it to the current directory.
|
||||
78
docs/how-to/memory-profile-scripts/scripts/dump.sh
Executable file
78
docs/how-to/memory-profile-scripts/scripts/dump.sh
Executable file
@@ -0,0 +1,78 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Monitors greptime process memory usage every 10 minutes
|
||||
# Triggers memory profile capture via `curl -X POST localhost:4000/debug/prof/mem > greptime-{timestamp}.gprof`
|
||||
# when memory increases by more than 20MB since last check
|
||||
# Generated profiles can be analyzed using flame graphs as described in `how-to-profile-memory.md`
|
||||
# (jeprof is compiled with the database - see documentation)
|
||||
# Alternative: Share binaries + profiles for analysis (Docker images preferred)
|
||||
|
||||
# Threshold in Kilobytes (20 MB)
|
||||
threshold_kb=$((20 * 1024))
|
||||
sleep_interval=$((10 * 60))
|
||||
|
||||
# Variable to store the last measured memory usage in KB
|
||||
last_mem_kb=0
|
||||
|
||||
echo "Starting memory monitoring for 'greptime' process..."
|
||||
|
||||
while true; do
|
||||
|
||||
# Check if PID is provided as an argument
|
||||
if [ -z "$1" ]; then
|
||||
echo "$(date): PID must be provided as a command-line argument."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
pid="$1"
|
||||
|
||||
# Validate that the PID is a number
|
||||
if ! [[ "$pid" =~ ^[0-9]+$ ]]; then
|
||||
echo "$(date): Invalid PID: '$pid'. PID must be a number."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get the current Resident Set Size (RSS) in Kilobytes
|
||||
current_mem_kb=$(ps -o rss= -p "$pid")
|
||||
|
||||
# Check if ps command was successful and returned a number
|
||||
if ! [[ "$current_mem_kb" =~ ^[0-9]+$ ]]; then
|
||||
echo "$(date): Failed to get memory usage for PID $pid. Skipping check."
|
||||
# Keep last_mem_kb to avoid false positives if the process briefly becomes unreadable.
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "$(date): Current memory usage for PID $pid: ${current_mem_kb} KB"
|
||||
|
||||
# Compare with the last measurement
|
||||
# if it's the first run, also do a baseline dump just to make sure we can dump
|
||||
|
||||
diff_kb=$((current_mem_kb - last_mem_kb))
|
||||
echo "$(date): Memory usage change since last check: ${diff_kb} KB"
|
||||
|
||||
if [ "$diff_kb" -gt "$threshold_kb" ]; then
|
||||
echo "$(date): Memory increase (${diff_kb} KB) exceeded threshold (${threshold_kb} KB). Dumping profile..."
|
||||
timestamp=$(date +%Y%m%d%H%M%S)
|
||||
profile_file="greptime-${timestamp}.gprof"
|
||||
# Execute curl and capture output to file
|
||||
if curl -sf -X POST localhost:4000/debug/prof/mem > "$profile_file"; then
|
||||
echo "$(date): Memory profile saved to $profile_file"
|
||||
else
|
||||
echo "$(date): Failed to dump memory profile (curl exit code: $?)."
|
||||
# Remove the potentially empty/failed profile file
|
||||
rm -f "$profile_file"
|
||||
fi
|
||||
else
|
||||
echo "$(date): Memory increase (${diff_kb} KB) is within the threshold (${threshold_kb} KB)."
|
||||
fi
|
||||
|
||||
|
||||
# Update the last memory usage
|
||||
last_mem_kb=$current_mem_kb
|
||||
|
||||
# Wait for 5 minutes
|
||||
echo "$(date): Sleeping for $sleep_interval seconds..."
|
||||
sleep $sleep_interval
|
||||
done
|
||||
|
||||
echo "Memory monitoring script stopped." # This line might not be reached in normal operation
|
||||
15
docs/how-to/memory-profile-scripts/scripts/find_compiled_jeprof.sh
Executable file
15
docs/how-to/memory-profile-scripts/scripts/find_compiled_jeprof.sh
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Locates compiled jeprof binary (memory analysis tool) after cargo build
|
||||
# Copies it to current directory from target/ build directories
|
||||
|
||||
JPROF_PATH=$(find . -name 'jeprof' -print -quit)
|
||||
if [ -n "$JPROF_PATH" ]; then
|
||||
echo "Found jeprof at $JPROF_PATH"
|
||||
cp "$JPROF_PATH" .
|
||||
chmod +x jeprof
|
||||
echo "Copied jeprof to current directory and made it executable."
|
||||
else
|
||||
echo "jeprof not found"
|
||||
exit 1
|
||||
fi
|
||||
89
docs/how-to/memory-profile-scripts/scripts/gen_flamegraph.sh
Executable file
89
docs/how-to/memory-profile-scripts/scripts/gen_flamegraph.sh
Executable file
@@ -0,0 +1,89 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Generate flame graphs from a series of `.gprof` files
|
||||
# First argument: Path to the binary executable
|
||||
# Second argument: Path to directory containing gprof files
|
||||
# Requires `jeprof` and `flamegraph.pl` in current directory
|
||||
# What this script essentially does is:
|
||||
# ./jeprof <binary> <gprof> --collapse | ./flamegraph.pl > <output>
|
||||
# For differential analysis between consecutive profiles:
|
||||
# ./jeprof <binary> --base <gprof1> <gprof2> --collapse | ./flamegraph.pl > <output_diff>
|
||||
|
||||
set -e # Exit immediately if a command exits with a non-zero status.
|
||||
|
||||
# Check for required tools
|
||||
if [ ! -f "./jeprof" ]; then
|
||||
echo "Error: jeprof not found in the current directory."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "./flamegraph.pl" ]; then
|
||||
echo "Error: flamegraph.pl not found in the current directory."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check arguments
|
||||
if [ "$#" -ne 2 ]; then
|
||||
echo "Usage: $0 <binary_path> <gprof_directory>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BINARY_PATH=$1
|
||||
GPROF_DIR=$2
|
||||
OUTPUT_DIR="${GPROF_DIR}/flamegraphs" # Store outputs in a subdirectory
|
||||
|
||||
if [ ! -f "$BINARY_PATH" ]; then
|
||||
echo "Error: Binary file not found at $BINARY_PATH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -d "$GPROF_DIR" ]; then
|
||||
echo "Error: gprof directory not found at $GPROF_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
echo "Generating flamegraphs in $OUTPUT_DIR"
|
||||
|
||||
# Find and sort gprof files
|
||||
# Use find + sort -V for natural sort of version numbers if present in filenames
|
||||
# Use null-terminated strings for safety with find/xargs/sort
|
||||
mapfile -d $'\0' gprof_files < <(find "$GPROF_DIR" -maxdepth 1 -name '*.gprof' -print0 | sort -zV)
|
||||
|
||||
if [ ${#gprof_files[@]} -eq 0 ]; then
|
||||
echo "No .gprof files found in $GPROF_DIR"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
prev_gprof=""
|
||||
|
||||
# Generate flamegraphs
|
||||
for gprof_file in "${gprof_files[@]}"; do
|
||||
# Skip empty entries if any
|
||||
if [ -z "$gprof_file" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
filename=$(basename "$gprof_file" .gprof)
|
||||
output_collapse="${OUTPUT_DIR}/${filename}.collapse"
|
||||
output_svg="${OUTPUT_DIR}/${filename}.svg"
|
||||
echo "Generating collapse file for $gprof_file -> $output_collapse"
|
||||
./jeprof "$BINARY_PATH" "$gprof_file" --collapse > "$output_collapse"
|
||||
echo "Generating flamegraph for $gprof_file -> $output_svg"
|
||||
./flamegraph.pl "$output_collapse" > "$output_svg" || true
|
||||
|
||||
# Generate diff flamegraph if not the first file
|
||||
if [ -n "$prev_gprof" ]; then
|
||||
prev_filename=$(basename "$prev_gprof" .gprof)
|
||||
diff_output_collapse="${OUTPUT_DIR}/${prev_filename}_vs_${filename}_diff.collapse"
|
||||
diff_output_svg="${OUTPUT_DIR}/${prev_filename}_vs_${filename}_diff.svg"
|
||||
echo "Generating diff collapse file for $prev_gprof vs $gprof_file -> $diff_output_collapse"
|
||||
./jeprof "$BINARY_PATH" --base "$prev_gprof" "$gprof_file" --collapse > "$diff_output_collapse"
|
||||
echo "Generating diff flamegraph for $prev_gprof vs $gprof_file -> $diff_output_svg"
|
||||
./flamegraph.pl "$diff_output_collapse" > "$diff_output_svg" || true
|
||||
fi
|
||||
|
||||
prev_gprof="$gprof_file"
|
||||
done
|
||||
|
||||
echo "Flamegraph generation complete."
|
||||
44
docs/how-to/memory-profile-scripts/scripts/gen_from_collapse.sh
Executable file
44
docs/how-to/memory-profile-scripts/scripts/gen_from_collapse.sh
Executable file
@@ -0,0 +1,44 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Generate flame graphs from .collapse files
|
||||
# Argument: Path to directory containing collapse files
|
||||
# Requires `flamegraph.pl` in current directory
|
||||
|
||||
# Check if flamegraph.pl exists
|
||||
if [ ! -f "./flamegraph.pl" ]; then
|
||||
echo "Error: flamegraph.pl not found in the current directory."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if directory argument is provided
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: $0 <collapse_directory>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
COLLAPSE_DIR=$1
|
||||
|
||||
# Check if the provided argument is a directory
|
||||
if [ ! -d "$COLLAPSE_DIR" ]; then
|
||||
echo "Error: '$COLLAPSE_DIR' is not a valid directory."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Generating flame graphs from collapse files in '$COLLAPSE_DIR'..."
|
||||
|
||||
# Find and process each .collapse file
|
||||
find "$COLLAPSE_DIR" -maxdepth 1 -name "*.collapse" -print0 | while IFS= read -r -d $'\0' collapse_file; do
|
||||
if [ -f "$collapse_file" ]; then
|
||||
# Construct the output SVG filename
|
||||
svg_file="${collapse_file%.collapse}.svg"
|
||||
echo "Generating $svg_file from $collapse_file..."
|
||||
./flamegraph.pl "$collapse_file" > "$svg_file"
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error generating flame graph for $collapse_file"
|
||||
else
|
||||
echo "Successfully generated $svg_file"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Flame graph generation complete."
|
||||
6
docs/how-to/memory-profile-scripts/scripts/get_flamegraph_tool.sh
Executable file
6
docs/how-to/memory-profile-scripts/scripts/get_flamegraph_tool.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Download flamegraph.pl to current directory - this is the flame graph generation tool script
|
||||
|
||||
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
|
||||
chmod +x ./flamegraph.pl
|
||||
18
flake.lock
generated
18
flake.lock
generated
@@ -8,11 +8,11 @@
|
||||
"rust-analyzer-src": "rust-analyzer-src"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1737613896,
|
||||
"narHash": "sha256-ldqXIglq74C7yKMFUzrS9xMT/EVs26vZpOD68Sh7OcU=",
|
||||
"lastModified": 1745735608,
|
||||
"narHash": "sha256-L0jzm815XBFfF2wCFmR+M1CF+beIEFj6SxlqVKF59Ec=",
|
||||
"owner": "nix-community",
|
||||
"repo": "fenix",
|
||||
"rev": "303a062fdd8e89f233db05868468975d17855d80",
|
||||
"rev": "c39a78eba6ed2a022cc3218db90d485077101496",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -41,11 +41,11 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1737569578,
|
||||
"narHash": "sha256-6qY0pk2QmUtBT9Mywdvif0i/CLVgpCjMUn6g9vB+f3M=",
|
||||
"lastModified": 1745487689,
|
||||
"narHash": "sha256-FQoi3R0NjQeBAsEOo49b5tbDPcJSMWc3QhhaIi9eddw=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "47addd76727f42d351590c905d9d1905ca895b82",
|
||||
"rev": "5630cf13cceac06cefe9fc607e8dfa8fb342dde3",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -65,11 +65,11 @@
|
||||
"rust-analyzer-src": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1737581772,
|
||||
"narHash": "sha256-t1P2Pe3FAX9TlJsCZbmJ3wn+C4qr6aSMypAOu8WNsN0=",
|
||||
"lastModified": 1745694049,
|
||||
"narHash": "sha256-fxvRYH/tS7hGQeg9zCVh5RBcSWT+JGJet7RA8Ss+rC0=",
|
||||
"owner": "rust-lang",
|
||||
"repo": "rust-analyzer",
|
||||
"rev": "582af7ee9c8d84f5d534272fc7de9f292bd849be",
|
||||
"rev": "d8887c0758bbd2d5f752d5bd405d4491e90e7ed6",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
lib = nixpkgs.lib;
|
||||
rustToolchain = fenix.packages.${system}.fromToolchainName {
|
||||
name = (lib.importTOML ./rust-toolchain.toml).toolchain.channel;
|
||||
sha256 = "sha256-f/CVA1EC61EWbh0SjaRNhLL0Ypx2ObupbzigZp8NmL4=";
|
||||
sha256 = "sha256-arzEYlWLGGYeOhECHpBxQd2joZ4rPKV3qLNnZ+eql6A=";
|
||||
};
|
||||
in
|
||||
{
|
||||
|
||||
@@ -1,61 +1,122 @@
|
||||
Grafana dashboard for GreptimeDB
|
||||
--------------------------------
|
||||
# Grafana dashboards for GreptimeDB
|
||||
|
||||
GreptimeDB's official Grafana dashboard.
|
||||
## Overview
|
||||
|
||||
Status notify: we are still working on this config. It's expected to change frequently in the recent days. Please feel free to submit your feedback and/or contribution to this dashboard 🤗
|
||||
This repository contains Grafana dashboards for visualizing metrics and logs of GreptimeDB instances running in either cluster or standalone mode. **The Grafana version should be greater than 9.0**.
|
||||
|
||||
If you use Helm [chart](https://github.com/GreptimeTeam/helm-charts) to deploy GreptimeDB cluster, you can enable self-monitoring by setting the following values in your Helm chart:
|
||||
We highly recommend using the self-monitoring feature provided by [GreptimeDB Operator](https://github.com/GrepTimeTeam/greptimedb-operator) to automatically collect metrics and logs from your GreptimeDB instances and store them in a dedicated GreptimeDB instance.
|
||||
|
||||
- **Metrics Dashboards**
|
||||
|
||||
- `dashboards/metrics/cluster/dashboard.json`: The Grafana dashboard for the GreptimeDB cluster. Read the [dashboard.md](./dashboards/metrics/cluster/dashboard.md) for more details.
|
||||
|
||||
- `dashboards/metrics/standalone/dashboard.json`: The Grafana dashboard for the standalone GreptimeDB instance. **It's generated from the `cluster/dashboard.json` by removing the instance filter through the `make dashboards` command**. Read the [dashboard.md](./dashboards/metrics/standalone/dashboard.md) for more details.
|
||||
|
||||
- **Logs Dashboard**
|
||||
|
||||
The `dashboards/logs/dashboard.json` provides a comprehensive Grafana dashboard for visualizing GreptimeDB logs. To utilize this dashboard effectively, you need to collect logs in JSON format from your GreptimeDB instances and store them in a dedicated GreptimeDB instance.
|
||||
|
||||
For proper integration, the logs table must adhere to the following schema design with the table name `_gt_logs`:
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS `_gt_logs` (
|
||||
`pod_ip` STRING NULL,
|
||||
`namespace` STRING NULL,
|
||||
`cluster` STRING NULL,
|
||||
`file` STRING NULL,
|
||||
`module_path` STRING NULL,
|
||||
`level` STRING NULL,
|
||||
`target` STRING NULL,
|
||||
`role` STRING NULL,
|
||||
`pod` STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'),
|
||||
`message` STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false'),
|
||||
`err` STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false'),
|
||||
`timestamp` TIMESTAMP(9) NOT NULL,
|
||||
TIME INDEX (`timestamp`),
|
||||
PRIMARY KEY (`level`, `target`, `role`)
|
||||
)
|
||||
ENGINE=mito
|
||||
WITH (
|
||||
append_mode = 'true'
|
||||
)
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
As GreptimeDB evolves rapidly, metrics may change over time. We welcome your feedback and contributions to improve these dashboards 🤗
|
||||
|
||||
To modify the metrics dashboards, simply edit the `dashboards/metrics/cluster/dashboard.json` file and run the `make dashboards` command. This will automatically generate the updated `dashboards/metrics/standalone/dashboard.json` and other related files.
|
||||
|
||||
For easier dashboard maintenance, we utilize the [`dac`](https://github.com/zyy17/dac) tool to generate human-readable intermediate dashboards and documentation:
|
||||
|
||||
- `dashboards/metrics/cluster/dashboard.yaml`: The intermediate dashboard file for the GreptimeDB cluster.
|
||||
- `dashboards/metrics/standalone/dashboard.yaml`: The intermediate dashboard file for standalone GreptimeDB instances.
|
||||
|
||||
## Data Sources
|
||||
|
||||
The following data sources are used to fetch metrics and logs:
|
||||
|
||||
- **`${metrics}`**: Prometheus data source for providing the GreptimeDB metrics.
|
||||
- **`${logs}`**: MySQL data source for providing the GreptimeDB logs.
|
||||
- **`${information_schema}`**: MySQL data source for providing the information schema of the current instance and used for the `overview` panel. It is the MySQL port of the current monitored instance.
|
||||
|
||||
## Instance Filters
|
||||
|
||||
To deploy the dashboards for multiple scenarios (K8s, bare metal, etc.), we prefer to use the `instance` label when filtering instances.
|
||||
|
||||
Additionally, we recommend including the `pod` label in the legend to make it easier to identify each instance, even though this field will be empty in bare metal scenarios.
|
||||
|
||||
For example, the following query is recommended:
|
||||
|
||||
```promql
|
||||
sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)
|
||||
```
|
||||
|
||||
And the legend will be like: `[{{instance}}]-[{{ pod }}]`.
|
||||
|
||||
## Deployment
|
||||
|
||||
### (Recommended) Helm Chart
|
||||
|
||||
If you use the [Helm Chart](https://github.com/GreptimeTeam/helm-charts) to deploy a GreptimeDB cluster, you can enable self-monitoring by setting the following values in your Helm chart:
|
||||
|
||||
- `monitoring.enabled=true`: Deploys a standalone GreptimeDB instance dedicated to monitoring the cluster;
|
||||
- `grafana.enabled=true`: Deploys Grafana and automatically imports the monitoring dashboard;
|
||||
|
||||
The standalone GreptimeDB instance will collect metrics from your cluster and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/nightly/user-guide/deployments/deploy-on-kubernetes/getting-started).
|
||||
The standalone GreptimeDB instance will collect metrics from your cluster, and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/nightly/user-guide/deployments/deploy-on-kubernetes/getting-started).
|
||||
|
||||
# How to use
|
||||
### Self-host Prometheus and import dashboards manually
|
||||
|
||||
## `greptimedb.json`
|
||||
1. **Configure Prometheus to scrape the cluster**
|
||||
|
||||
Open Grafana Dashboard page, choose `New` -> `Import`. And upload `greptimedb.json` file.
|
||||
The following is an example configuration(**Please modify it according to your actual situation**):
|
||||
|
||||
## `greptimedb-cluster.json`
|
||||
```yml
|
||||
# example config
|
||||
# only to indicate how to assign labels to each target
|
||||
# modify yours accordingly
|
||||
scrape_configs:
|
||||
- job_name: metasrv
|
||||
static_configs:
|
||||
- targets: ['<metasrv-ip>:<port>']
|
||||
|
||||
This cluster dashboard provides a comprehensive view of incoming requests, response statuses, and internal activities such as flush and compaction, with a layered structure from frontend to datanode. Designed with a focus on alert functionality, its primary aim is to highlight any anomalies in metrics, allowing users to quickly pinpoint the cause of errors.
|
||||
- job_name: datanode
|
||||
static_configs:
|
||||
- targets: ['<datanode0-ip>:<port>', '<datanode1-ip>:<port>', '<datanode2-ip>:<port>']
|
||||
|
||||
We use Prometheus to scrape off metrics from nodes in GreptimeDB cluster, Grafana to visualize the diagram. Any compatible stack should work too.
|
||||
- job_name: frontend
|
||||
static_configs:
|
||||
- targets: ['<frontend-ip>:<port>']
|
||||
```
|
||||
|
||||
__Note__: This dashboard is still in an early stage of development. Any issue or advice on improvement is welcomed.
|
||||
2. **Configure the data sources in Grafana**
|
||||
|
||||
### Configuration
|
||||
You need to add two data sources in Grafana:
|
||||
|
||||
Please ensure the following configuration before importing the dashboard into Grafana.
|
||||
- Prometheus: It is the Prometheus instance that scrapes the GreptimeDB metrics.
|
||||
- Information Schema: It is the MySQL port of the current monitored instance. The dashboard will use this datasource to show the information schema of the current instance.
|
||||
|
||||
__1. Prometheus scrape config__
|
||||
3. **Import the dashboards based on your deployment scenario**
|
||||
|
||||
Configure Prometheus to scrape the cluster.
|
||||
|
||||
```yml
|
||||
# example config
|
||||
# only to indicate how to assign labels to each target
|
||||
# modify yours accordingly
|
||||
scrape_configs:
|
||||
- job_name: metasrv
|
||||
static_configs:
|
||||
- targets: ['<metasrv-ip>:<port>']
|
||||
|
||||
- job_name: datanode
|
||||
static_configs:
|
||||
- targets: ['<datanode0-ip>:<port>', '<datanode1-ip>:<port>', '<datanode2-ip>:<port>']
|
||||
|
||||
- job_name: frontend
|
||||
static_configs:
|
||||
- targets: ['<frontend-ip>:<port>']
|
||||
```
|
||||
|
||||
__2. Grafana config__
|
||||
|
||||
Create a Prometheus data source in Grafana before using this dashboard. We use `datasource` as a variable in Grafana dashboard so that multiple environments are supported.
|
||||
|
||||
### Usage
|
||||
|
||||
Use `datasource` or `instance` on the upper-left corner to filter data from certain node.
|
||||
- **Cluster**: Import the `dashboards/metrics/cluster/dashboard.json` dashboard.
|
||||
- **Standalone**: Import the `dashboards/metrics/standalone/dashboard.json` dashboard.
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
BASEDIR=$(dirname "$0")
|
||||
|
||||
# Use jq to check for panels with empty or missing descriptions
|
||||
invalid_panels=$(cat $BASEDIR/greptimedb-cluster.json | jq -r '
|
||||
.panels[]
|
||||
| select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))
|
||||
')
|
||||
|
||||
# Check if any invalid panels were found
|
||||
if [[ -n "$invalid_panels" ]]; then
|
||||
echo "Error: The following panels have empty or missing descriptions:"
|
||||
echo "$invalid_panels"
|
||||
exit 1
|
||||
else
|
||||
echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
|
||||
exit 0
|
||||
fi
|
||||
292
grafana/dashboards/logs/dashboard.json
Normal file
292
grafana/dashboards/logs/dashboard.json
Normal file
@@ -0,0 +1,292 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": 12,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"default": false,
|
||||
"type": "mysql",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 20,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"dedupStrategy": "none",
|
||||
"enableInfiniteScrolling": true,
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": false,
|
||||
"showCommonLabels": false,
|
||||
"showLabels": false,
|
||||
"showTime": true,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLogMessage": false
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"dataset": "greptime_private",
|
||||
"datasource": {
|
||||
"type": "mysql",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"format": "table",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT `timestamp`, CONCAT('[', `level`, ']', ' ', '<', `target`, '>', ' ', `message`),\n `role`,\n `pod`,\n `pod_ip`,\n `namespace`,\n `cluster`,\n `err`,\n `file`,\n `module_path`\nFROM\n `_gt_logs`\nWHERE\n (\n \"$level\" = \"'all'\"\n OR `level` IN ($level)\n ) \n AND (\n \"$role\" = \"'all'\"\n OR `role` IN ($role)\n )\n AND (\n \"$pod\" = \"\"\n OR `pod` = '$pod'\n )\n AND (\n \"$target\" = \"\"\n OR `target` = '$target'\n )\n AND (\n \"$search\" = \"\"\n OR matches_term(`message`, '$search')\n )\n AND (\n \"$exclude\" = \"\"\n OR NOT matches_term(`message`, '$exclude')\n )\n AND $__timeFilter(`timestamp`)\nORDER BY `timestamp` DESC\nLIMIT $limit;\n",
|
||||
"refId": "A",
|
||||
"sql": {
|
||||
"columns": [
|
||||
{
|
||||
"parameters": [],
|
||||
"type": "function"
|
||||
}
|
||||
],
|
||||
"groupBy": [
|
||||
{
|
||||
"property": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "groupBy"
|
||||
}
|
||||
],
|
||||
"limit": 50
|
||||
}
|
||||
}
|
||||
],
|
||||
"title": "Logs",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"preload": false,
|
||||
"refresh": "",
|
||||
"schemaVersion": 41,
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "logs",
|
||||
"value": "P98F38F12DB221A8C"
|
||||
},
|
||||
"includeAll": false,
|
||||
"name": "datasource",
|
||||
"options": [],
|
||||
"query": "mysql",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": "'all'",
|
||||
"current": {
|
||||
"text": [
|
||||
"$__all"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "level",
|
||||
"multi": true,
|
||||
"name": "level",
|
||||
"options": [
|
||||
{
|
||||
"selected": false,
|
||||
"text": "INFO",
|
||||
"value": "INFO"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "ERROR",
|
||||
"value": "ERROR"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "WARN",
|
||||
"value": "WARN"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "DEBUG",
|
||||
"value": "DEBUG"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "TRACE",
|
||||
"value": "TRACE"
|
||||
}
|
||||
],
|
||||
"query": "INFO,ERROR,WARN,DEBUG,TRACE",
|
||||
"type": "custom"
|
||||
},
|
||||
{
|
||||
"allValue": "'all'",
|
||||
"current": {
|
||||
"text": [
|
||||
"$__all"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "role",
|
||||
"multi": true,
|
||||
"name": "role",
|
||||
"options": [
|
||||
{
|
||||
"selected": false,
|
||||
"text": "datanode",
|
||||
"value": "datanode"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "frontend",
|
||||
"value": "frontend"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "meta",
|
||||
"value": "meta"
|
||||
}
|
||||
],
|
||||
"query": "datanode,frontend,meta",
|
||||
"type": "custom"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"label": "pod",
|
||||
"name": "pod",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"type": "textbox"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"label": "target",
|
||||
"name": "target",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"type": "textbox"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"label": "search",
|
||||
"name": "search",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"type": "textbox"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"label": "exclude",
|
||||
"name": "exclude",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"type": "textbox"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"text": "2000",
|
||||
"value": "2000"
|
||||
},
|
||||
"includeAll": false,
|
||||
"label": "limit",
|
||||
"name": "limit",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "2000",
|
||||
"value": "2000"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "5000",
|
||||
"value": "5000"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "8000",
|
||||
"value": "8000"
|
||||
}
|
||||
],
|
||||
"query": "2000,5000,8000",
|
||||
"type": "custom"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "browser",
|
||||
"title": "GreptimeDB Logs",
|
||||
"uid": "edx5veo4rd3wge2",
|
||||
"version": 1
|
||||
}
|
||||
7193
grafana/dashboards/metrics/cluster/dashboard.json
Normal file
7193
grafana/dashboards/metrics/cluster/dashboard.json
Normal file
File diff suppressed because it is too large
Load Diff
97
grafana/dashboards/metrics/cluster/dashboard.md
Normal file
97
grafana/dashboards/metrics/cluster/dashboard.md
Normal file
@@ -0,0 +1,97 @@
|
||||
# Overview
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Uptime | `time() - process_start_time_seconds` | `stat` | The start time of GreptimeDB. | `prometheus` | `s` | `__auto` |
|
||||
| Version | `SELECT pkg_version FROM information_schema.build_info` | `stat` | GreptimeDB version. | `mysql` | -- | -- |
|
||||
| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))` | `stat` | Total ingestion rate. | `prometheus` | `rowsps` | `__auto` |
|
||||
| Total Storage Size | `select SUM(disk_size) from information_schema.region_statistics;` | `stat` | Total number of data file size. | `mysql` | `decbytes` | -- |
|
||||
| Total Rows | `select SUM(region_rows) from information_schema.region_statistics;` | `stat` | Total number of data rows in the cluster. Calculated by sum of rows from each region. | `mysql` | `sishort` | -- |
|
||||
| Deployment | `SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';`<br/>`SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';`<br/>`SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';`<br/>`SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';` | `stat` | The deployment topology of GreptimeDB. | `mysql` | -- | -- |
|
||||
| Database Resources | `SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')`<br/>`SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'`<br/>`SELECT COUNT(region_id) as regions FROM information_schema.region_peers`<br/>`SELECT COUNT(*) as flows FROM information_schema.flows` | `stat` | The number of the key resources in GreptimeDB. | `mysql` | -- | -- |
|
||||
| Data Size | `SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;`<br/>`SELECT SUM(index_size) as index FROM information_schema.region_statistics;`<br/>`SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;` | `stat` | The data size of wal/index/manifest in the GreptimeDB. | `mysql` | `decbytes` | -- |
|
||||
# Ingestion
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `ingestion` |
|
||||
| Ingestion Rate by Type | `sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))`<br/>`sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `http-logs` |
|
||||
# Queries
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Total Query Rate | `sum (rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_http_promql_elapsed_counte{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Total rate of query API calls by protocol. This metric is collected from frontends.<br/><br/>Here we listed 3 main protocols:<br/>- MySQL<br/>- Postgres<br/>- Prometheus API<br/><br/>Note that there are some other minor query APIs like /sql are not included | `prometheus` | `reqps` | `mysql` |
|
||||
# Resources
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Datanode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{instance}}]-[{{ pod }}]` |
|
||||
| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Frontend Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
|
||||
| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
|
||||
| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Flownode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
# Frontend Requests
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| HTTP QPS per Instance | `sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{instance=~"$frontend",path!~"/health\|/metrics"}[$__rate_interval]))` | `timeseries` | HTTP QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]` |
|
||||
| HTTP P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{instance=~"$frontend",path!~"/health\|/metrics"}[$__rate_interval])))` | `timeseries` | HTTP P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
|
||||
| gRPC QPS per Instance | `sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | gRPC QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]` |
|
||||
| gRPC P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | gRPC P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
|
||||
| MySQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | MySQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
|
||||
| MySQL P99 per Instance | `histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | MySQL P99 per Instance. | `prometheus` | `s` | `[{{ instance }}]-[{{ pod }}]-p99` |
|
||||
| PostgreSQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | PostgreSQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
|
||||
| PostgreSQL P99 per Instance | `histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | PostgreSQL P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
|
||||
# Frontend to Datanode
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Ingest Rows per Instance | `sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Ingestion rate by row as in each frontend | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
|
||||
| Region Call QPS per Instance | `sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Region Call QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
|
||||
| Region Call P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | Region Call P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
|
||||
# Mito Engine
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Request OPS per Instance | `sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Request QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
|
||||
| Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
|
||||
| Write Buffer per Instance | `greptime_mito_write_buffer_bytes{instance=~"$datanode"}` | `timeseries` | Write Buffer per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]` |
|
||||
| Write Rows per Instance | `sum by (instance, pod) (rate(greptime_mito_write_rows_total{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Ingestion size by row counts. | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
|
||||
| Flush OPS per Instance | `sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Flush QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{reason}}]` |
|
||||
| Write Stall per Instance | `sum by(instance, pod) (greptime_mito_write_stall_total{instance=~"$datanode"})` | `timeseries` | Write Stall per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
|
||||
| Read Stage OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{instance=~"$datanode", stage="total"}[$__rate_interval]))` | `timeseries` | Read Stage OPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]` |
|
||||
| Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
|
||||
| Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
|
||||
| Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
|
||||
| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
|
||||
| Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
|
||||
| WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
|
||||
| Cached Bytes per Instance | `greptime_mito_cache_bytes{instance=~"$datanode"}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
|
||||
| Inflight Compaction | `greptime_mito_inflight_compaction_count` | `timeseries` | Ongoing compaction task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
|
||||
| WAL sync duration seconds | `histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))` | `timeseries` | Raft engine (local disk) log store sync latency, p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
|
||||
| Log Store op duration seconds | `histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))` | `timeseries` | Write-ahead log operations latency at p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99` |
|
||||
| Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
|
||||
# OpenDAL
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
|
||||
| Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
|
||||
| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode",operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode", error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
|
||||
# Metasrv
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `state-timeline` | Counter of region migration by source and destination | `prometheus` | `none` | `from-datanode-{{datanode_id}}` |
|
||||
| Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `prometheus` | `none` | `__auto` |
|
||||
| Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `__auto` |
|
||||
# Flownode
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | `prometheus` | -- | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
|
||||
| Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-p95` |
|
||||
| Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
|
||||
| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}]` |
|
||||
| Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |
|
||||
769
grafana/dashboards/metrics/cluster/dashboard.yaml
Normal file
769
grafana/dashboards/metrics/cluster/dashboard.yaml
Normal file
@@ -0,0 +1,769 @@
|
||||
groups:
|
||||
- title: Overview
|
||||
panels:
|
||||
- title: Uptime
|
||||
type: stat
|
||||
description: The start time of GreptimeDB.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: time() - process_start_time_seconds
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Version
|
||||
type: stat
|
||||
description: GreptimeDB version.
|
||||
queries:
|
||||
- expr: SELECT pkg_version FROM information_schema.build_info
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- title: Total Ingestion Rate
|
||||
type: stat
|
||||
description: Total ingestion rate.
|
||||
unit: rowsps
|
||||
queries:
|
||||
- expr: sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Total Storage Size
|
||||
type: stat
|
||||
description: Total number of data file size.
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: select SUM(disk_size) from information_schema.region_statistics;
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- title: Total Rows
|
||||
type: stat
|
||||
description: Total number of data rows in the cluster. Calculated by sum of rows from each region.
|
||||
unit: sishort
|
||||
queries:
|
||||
- expr: select SUM(region_rows) from information_schema.region_statistics;
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- title: Deployment
|
||||
type: stat
|
||||
description: The deployment topology of GreptimeDB.
|
||||
queries:
|
||||
- expr: SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- title: Database Resources
|
||||
type: stat
|
||||
description: The number of the key resources in GreptimeDB.
|
||||
queries:
|
||||
- expr: SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT COUNT(region_id) as regions FROM information_schema.region_peers
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT COUNT(*) as flows FROM information_schema.flows
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- title: Data Size
|
||||
type: stat
|
||||
description: The data size of wal/index/manifest in the GreptimeDB.
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT SUM(index_size) as index FROM information_schema.region_statistics;
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- title: Ingestion
|
||||
panels:
|
||||
- title: Total Ingestion Rate
|
||||
type: timeseries
|
||||
description: |
|
||||
Total ingestion rate.
|
||||
|
||||
Here we listed 3 primary protocols:
|
||||
|
||||
- Prometheus remote write
|
||||
- Greptime's gRPC API (when using our ingest SDK)
|
||||
- Log ingestion http API
|
||||
unit: rowsps
|
||||
queries:
|
||||
- expr: sum(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: ingestion
|
||||
- title: Ingestion Rate by Type
|
||||
type: timeseries
|
||||
description: |
|
||||
Total ingestion rate.
|
||||
|
||||
Here we listed 3 primary protocols:
|
||||
|
||||
- Prometheus remote write
|
||||
- Greptime's gRPC API (when using our ingest SDK)
|
||||
- Log ingestion http API
|
||||
unit: rowsps
|
||||
queries:
|
||||
- expr: sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: http-logs
|
||||
- expr: sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: prometheus-remote-write
|
||||
- title: Queries
|
||||
panels:
|
||||
- title: Total Query Rate
|
||||
type: timeseries
|
||||
description: |-
|
||||
Total rate of query API calls by protocol. This metric is collected from frontends.
|
||||
|
||||
Here we listed 3 main protocols:
|
||||
- MySQL
|
||||
- Postgres
|
||||
- Prometheus API
|
||||
|
||||
Note that there are some other minor query APIs like /sql are not included
|
||||
unit: reqps
|
||||
queries:
|
||||
- expr: sum (rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: mysql
|
||||
- expr: sum (rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: pg
|
||||
- expr: sum (rate(greptime_servers_http_promql_elapsed_counte{instance=~"$frontend"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: promql
|
||||
- title: Resources
|
||||
panels:
|
||||
- title: Datanode Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{ pod }}]'
|
||||
- title: Datanode CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
unit: none
|
||||
queries:
|
||||
- expr: sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- title: Frontend Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- title: Frontend CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
unit: none
|
||||
queries:
|
||||
- expr: sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
|
||||
- title: Metasrv Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
|
||||
- title: Metasrv CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
unit: none
|
||||
queries:
|
||||
- expr: sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- title: Flownode Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- title: Flownode CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
unit: none
|
||||
queries:
|
||||
- expr: sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- title: Frontend Requests
|
||||
panels:
|
||||
- title: HTTP QPS per Instance
|
||||
type: timeseries
|
||||
description: HTTP QPS per Instance.
|
||||
unit: reqps
|
||||
queries:
|
||||
- expr: sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{instance=~"$frontend",path!~"/health|/metrics"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]'
|
||||
- title: HTTP P99 per Instance
|
||||
type: timeseries
|
||||
description: HTTP P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{instance=~"$frontend",path!~"/health|/metrics"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
|
||||
- title: gRPC QPS per Instance
|
||||
type: timeseries
|
||||
description: gRPC QPS per Instance.
|
||||
unit: reqps
|
||||
queries:
|
||||
- expr: sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]'
|
||||
- title: gRPC P99 per Instance
|
||||
type: timeseries
|
||||
description: gRPC P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
|
||||
- title: MySQL QPS per Instance
|
||||
type: timeseries
|
||||
description: MySQL QPS per Instance.
|
||||
unit: reqps
|
||||
queries:
|
||||
- expr: sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: MySQL P99 per Instance
|
||||
type: timeseries
|
||||
description: MySQL P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]-p99'
|
||||
- title: PostgreSQL QPS per Instance
|
||||
type: timeseries
|
||||
description: PostgreSQL QPS per Instance.
|
||||
unit: reqps
|
||||
queries:
|
||||
- expr: sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: PostgreSQL P99 per Instance
|
||||
type: timeseries
|
||||
description: PostgreSQL P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
|
||||
- title: Frontend to Datanode
|
||||
panels:
|
||||
- title: Ingest Rows per Instance
|
||||
type: timeseries
|
||||
description: Ingestion rate by row as in each frontend
|
||||
unit: rowsps
|
||||
queries:
|
||||
- expr: sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: Region Call QPS per Instance
|
||||
type: timeseries
|
||||
description: Region Call QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{instance=~"$frontend"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
|
||||
- title: Region Call P99 per Instance
|
||||
type: timeseries
|
||||
description: Region Call P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{instance=~"$frontend"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
|
||||
- title: Mito Engine
|
||||
panels:
|
||||
- title: Request OPS per Instance
|
||||
type: timeseries
|
||||
description: Request QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{instance=~"$datanode"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
|
||||
- title: Request P99 per Instance
|
||||
type: timeseries
|
||||
description: Request P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
|
||||
- title: Write Buffer per Instance
|
||||
type: timeseries
|
||||
description: Write Buffer per Instance.
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: greptime_mito_write_buffer_bytes{instance=~"$datanode"}
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: Write Rows per Instance
|
||||
type: timeseries
|
||||
description: Ingestion size by row counts.
|
||||
unit: rowsps
|
||||
queries:
|
||||
- expr: sum by (instance, pod) (rate(greptime_mito_write_rows_total{instance=~"$datanode"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: Flush OPS per Instance
|
||||
type: timeseries
|
||||
description: Flush QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{instance=~"$datanode"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{reason}}]'
|
||||
- title: Write Stall per Instance
|
||||
type: timeseries
|
||||
description: Write Stall per Instance.
|
||||
queries:
|
||||
- expr: sum by(instance, pod) (greptime_mito_write_stall_total{instance=~"$datanode"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: Read Stage OPS per Instance
|
||||
type: timeseries
|
||||
description: Read Stage OPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{instance=~"$datanode", stage="total"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: Read Stage P99 per Instance
|
||||
type: timeseries
|
||||
description: Read Stage P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
|
||||
- title: Write Stage P99 per Instance
|
||||
type: timeseries
|
||||
description: Write Stage P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
|
||||
- title: Compaction OPS per Instance
|
||||
type: timeseries
|
||||
description: Compaction OPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{pod}}]'
|
||||
- title: Compaction P99 per Instance by Stage
|
||||
type: timeseries
|
||||
description: Compaction latency by stage
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
|
||||
- title: Compaction P99 per Instance
|
||||
type: timeseries
|
||||
description: Compaction P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction'
|
||||
- title: WAL write size
|
||||
type: timeseries
|
||||
description: Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate.
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p95'
|
||||
- expr: histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p99'
|
||||
- expr: sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-throughput'
|
||||
- title: Cached Bytes per Instance
|
||||
type: timeseries
|
||||
description: Cached Bytes per Instance.
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: greptime_mito_cache_bytes{instance=~"$datanode"}
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
|
||||
- title: Inflight Compaction
|
||||
type: timeseries
|
||||
description: Ongoing compaction task count
|
||||
unit: none
|
||||
queries:
|
||||
- expr: greptime_mito_inflight_compaction_count
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: WAL sync duration seconds
|
||||
type: timeseries
|
||||
description: Raft engine (local disk) log store sync latency, p99
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
|
||||
- title: Log Store op duration seconds
|
||||
type: timeseries
|
||||
description: Write-ahead log operations latency at p99
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99'
|
||||
- title: Inflight Flush
|
||||
type: timeseries
|
||||
description: Ongoing flush task count
|
||||
unit: none
|
||||
queries:
|
||||
- expr: greptime_mito_inflight_flush_count
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: OpenDAL
|
||||
panels:
|
||||
- title: QPS per Instance
|
||||
type: timeseries
|
||||
description: QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
- title: Read QPS per Instance
|
||||
type: timeseries
|
||||
description: Read QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
|
||||
- title: Read P99 per Instance
|
||||
type: timeseries
|
||||
description: Read P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
|
||||
- title: Write QPS per Instance
|
||||
type: timeseries
|
||||
description: Write QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
|
||||
- title: Write P99 per Instance
|
||||
type: timeseries
|
||||
description: Write P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
|
||||
- title: List QPS per Instance
|
||||
type: timeseries
|
||||
description: List QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="list"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
|
||||
- title: List P99 per Instance
|
||||
type: timeseries
|
||||
description: List P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="list"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
|
||||
- title: Other Requests per Instance
|
||||
type: timeseries
|
||||
description: Other Requests per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode",operation!~"read|write|list|stat"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
- title: Other Request P99 per Instance
|
||||
type: timeseries
|
||||
description: Other Request P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read|write|list"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
- title: Opendal traffic
|
||||
type: timeseries
|
||||
description: Total traffic as in bytes by instance and operation
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
- title: OpenDAL errors per Instance
|
||||
type: timeseries
|
||||
description: OpenDAL error counts per Instance.
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode", error!="NotFound"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
|
||||
- title: Metasrv
|
||||
panels:
|
||||
- title: Region migration datanode
|
||||
type: state-timeline
|
||||
description: Counter of region migration by source and destination
|
||||
unit: none
|
||||
queries:
|
||||
- expr: greptime_meta_region_migration_stat{datanode_type="src"}
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: from-datanode-{{datanode_id}}
|
||||
- expr: greptime_meta_region_migration_stat{datanode_type="desc"}
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: to-datanode-{{datanode_id}}
|
||||
- title: Region migration error
|
||||
type: timeseries
|
||||
description: Counter of region migration error
|
||||
unit: none
|
||||
queries:
|
||||
- expr: greptime_meta_region_migration_error
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Datanode load
|
||||
type: timeseries
|
||||
description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
|
||||
unit: none
|
||||
queries:
|
||||
- expr: greptime_datanode_load
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Flownode
|
||||
panels:
|
||||
- title: Flow Ingest / Output Rate
|
||||
type: timeseries
|
||||
description: Flow Ingest / Output Rate.
|
||||
queries:
|
||||
- expr: sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{pod}}]-[{{instance}}]-[{{direction}}]'
|
||||
- title: Flow Ingest Latency
|
||||
type: timeseries
|
||||
description: Flow Ingest Latency.
|
||||
queries:
|
||||
- expr: histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-p95'
|
||||
- expr: histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
|
||||
- title: Flow Operation Latency
|
||||
type: timeseries
|
||||
description: Flow Operation Latency.
|
||||
queries:
|
||||
- expr: histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p95'
|
||||
- expr: histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p99'
|
||||
- title: Flow Buffer Size per Instance
|
||||
type: timeseries
|
||||
description: Flow Buffer Size per Instance.
|
||||
queries:
|
||||
- expr: greptime_flow_input_buf_size
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}]'
|
||||
- title: Flow Processing Error per Instance
|
||||
type: timeseries
|
||||
description: Flow Processing Error per Instance.
|
||||
queries:
|
||||
- expr: sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'
|
||||
7193
grafana/dashboards/metrics/standalone/dashboard.json
Normal file
7193
grafana/dashboards/metrics/standalone/dashboard.json
Normal file
File diff suppressed because it is too large
Load Diff
97
grafana/dashboards/metrics/standalone/dashboard.md
Normal file
97
grafana/dashboards/metrics/standalone/dashboard.md
Normal file
@@ -0,0 +1,97 @@
|
||||
# Overview
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Uptime | `time() - process_start_time_seconds` | `stat` | The start time of GreptimeDB. | `prometheus` | `s` | `__auto` |
|
||||
| Version | `SELECT pkg_version FROM information_schema.build_info` | `stat` | GreptimeDB version. | `mysql` | -- | -- |
|
||||
| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))` | `stat` | Total ingestion rate. | `prometheus` | `rowsps` | `__auto` |
|
||||
| Total Storage Size | `select SUM(disk_size) from information_schema.region_statistics;` | `stat` | Total number of data file size. | `mysql` | `decbytes` | -- |
|
||||
| Total Rows | `select SUM(region_rows) from information_schema.region_statistics;` | `stat` | Total number of data rows in the cluster. Calculated by sum of rows from each region. | `mysql` | `sishort` | -- |
|
||||
| Deployment | `SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';`<br/>`SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';`<br/>`SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';`<br/>`SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';` | `stat` | The deployment topology of GreptimeDB. | `mysql` | -- | -- |
|
||||
| Database Resources | `SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')`<br/>`SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'`<br/>`SELECT COUNT(region_id) as regions FROM information_schema.region_peers`<br/>`SELECT COUNT(*) as flows FROM information_schema.flows` | `stat` | The number of the key resources in GreptimeDB. | `mysql` | -- | -- |
|
||||
| Data Size | `SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;`<br/>`SELECT SUM(index_size) as index FROM information_schema.region_statistics;`<br/>`SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;` | `stat` | The data size of wal/index/manifest in the GreptimeDB. | `mysql` | `decbytes` | -- |
|
||||
# Ingestion
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `ingestion` |
|
||||
| Ingestion Rate by Type | `sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))`<br/>`sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `http-logs` |
|
||||
# Queries
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Total Query Rate | `sum (rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_http_promql_elapsed_counte{}[$__rate_interval]))` | `timeseries` | Total rate of query API calls by protocol. This metric is collected from frontends.<br/><br/>Here we listed 3 main protocols:<br/>- MySQL<br/>- Postgres<br/>- Prometheus API<br/><br/>Note that there are some other minor query APIs like /sql are not included | `prometheus` | `reqps` | `mysql` |
|
||||
# Resources
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Datanode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{instance}}]-[{{ pod }}]` |
|
||||
| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Frontend Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
|
||||
| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
|
||||
| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Flownode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
# Frontend Requests
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| HTTP QPS per Instance | `sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{path!~"/health\|/metrics"}[$__rate_interval]))` | `timeseries` | HTTP QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]` |
|
||||
| HTTP P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{path!~"/health\|/metrics"}[$__rate_interval])))` | `timeseries` | HTTP P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
|
||||
| gRPC QPS per Instance | `sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{}[$__rate_interval]))` | `timeseries` | gRPC QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]` |
|
||||
| gRPC P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | gRPC P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
|
||||
| MySQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))` | `timeseries` | MySQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
|
||||
| MySQL P99 per Instance | `histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | MySQL P99 per Instance. | `prometheus` | `s` | `[{{ instance }}]-[{{ pod }}]-p99` |
|
||||
| PostgreSQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))` | `timeseries` | PostgreSQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
|
||||
| PostgreSQL P99 per Instance | `histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | PostgreSQL P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
|
||||
# Frontend to Datanode
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Ingest Rows per Instance | `sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))` | `timeseries` | Ingestion rate by row as in each frontend | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
|
||||
| Region Call QPS per Instance | `sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{}[$__rate_interval]))` | `timeseries` | Region Call QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
|
||||
| Region Call P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{}[$__rate_interval])))` | `timeseries` | Region Call P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
|
||||
# Mito Engine
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Request OPS per Instance | `sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{}[$__rate_interval]))` | `timeseries` | Request QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
|
||||
| Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
|
||||
| Write Buffer per Instance | `greptime_mito_write_buffer_bytes{}` | `timeseries` | Write Buffer per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]` |
|
||||
| Write Rows per Instance | `sum by (instance, pod) (rate(greptime_mito_write_rows_total{}[$__rate_interval]))` | `timeseries` | Ingestion size by row counts. | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
|
||||
| Flush OPS per Instance | `sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{}[$__rate_interval]))` | `timeseries` | Flush QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{reason}}]` |
|
||||
| Write Stall per Instance | `sum by(instance, pod) (greptime_mito_write_stall_total{})` | `timeseries` | Write Stall per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
|
||||
| Read Stage OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{ stage="total"}[$__rate_interval]))` | `timeseries` | Read Stage OPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]` |
|
||||
| Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
|
||||
| Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
|
||||
| Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
|
||||
| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
|
||||
| Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
|
||||
| WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
|
||||
| Cached Bytes per Instance | `greptime_mito_cache_bytes{}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
|
||||
| Inflight Compaction | `greptime_mito_inflight_compaction_count` | `timeseries` | Ongoing compaction task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
|
||||
| WAL sync duration seconds | `histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))` | `timeseries` | Raft engine (local disk) log store sync latency, p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
|
||||
| Log Store op duration seconds | `histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))` | `timeseries` | Write-ahead log operations latency at p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99` |
|
||||
| Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
|
||||
# OpenDAL
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
|
||||
| Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
|
||||
| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{ error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
|
||||
# Metasrv
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `state-timeline` | Counter of region migration by source and destination | `prometheus` | `none` | `from-datanode-{{datanode_id}}` |
|
||||
| Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `prometheus` | `none` | `__auto` |
|
||||
| Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `__auto` |
|
||||
# Flownode
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | `prometheus` | -- | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
|
||||
| Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-p95` |
|
||||
| Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
|
||||
| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}]` |
|
||||
| Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |
|
||||
769
grafana/dashboards/metrics/standalone/dashboard.yaml
Normal file
769
grafana/dashboards/metrics/standalone/dashboard.yaml
Normal file
@@ -0,0 +1,769 @@
|
||||
groups:
|
||||
- title: Overview
|
||||
panels:
|
||||
- title: Uptime
|
||||
type: stat
|
||||
description: The start time of GreptimeDB.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: time() - process_start_time_seconds
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Version
|
||||
type: stat
|
||||
description: GreptimeDB version.
|
||||
queries:
|
||||
- expr: SELECT pkg_version FROM information_schema.build_info
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- title: Total Ingestion Rate
|
||||
type: stat
|
||||
description: Total ingestion rate.
|
||||
unit: rowsps
|
||||
queries:
|
||||
- expr: sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Total Storage Size
|
||||
type: stat
|
||||
description: Total number of data file size.
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: select SUM(disk_size) from information_schema.region_statistics;
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- title: Total Rows
|
||||
type: stat
|
||||
description: Total number of data rows in the cluster. Calculated by sum of rows from each region.
|
||||
unit: sishort
|
||||
queries:
|
||||
- expr: select SUM(region_rows) from information_schema.region_statistics;
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- title: Deployment
|
||||
type: stat
|
||||
description: The deployment topology of GreptimeDB.
|
||||
queries:
|
||||
- expr: SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- title: Database Resources
|
||||
type: stat
|
||||
description: The number of the key resources in GreptimeDB.
|
||||
queries:
|
||||
- expr: SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT COUNT(region_id) as regions FROM information_schema.region_peers
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT COUNT(*) as flows FROM information_schema.flows
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- title: Data Size
|
||||
type: stat
|
||||
description: The data size of wal/index/manifest in the GreptimeDB.
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT SUM(index_size) as index FROM information_schema.region_statistics;
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- expr: SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;
|
||||
datasource:
|
||||
type: mysql
|
||||
uid: ${information_schema}
|
||||
- title: Ingestion
|
||||
panels:
|
||||
- title: Total Ingestion Rate
|
||||
type: timeseries
|
||||
description: |
|
||||
Total ingestion rate.
|
||||
|
||||
Here we listed 3 primary protocols:
|
||||
|
||||
- Prometheus remote write
|
||||
- Greptime's gRPC API (when using our ingest SDK)
|
||||
- Log ingestion http API
|
||||
unit: rowsps
|
||||
queries:
|
||||
- expr: sum(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: ingestion
|
||||
- title: Ingestion Rate by Type
|
||||
type: timeseries
|
||||
description: |
|
||||
Total ingestion rate.
|
||||
|
||||
Here we listed 3 primary protocols:
|
||||
|
||||
- Prometheus remote write
|
||||
- Greptime's gRPC API (when using our ingest SDK)
|
||||
- Log ingestion http API
|
||||
unit: rowsps
|
||||
queries:
|
||||
- expr: sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: http-logs
|
||||
- expr: sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: prometheus-remote-write
|
||||
- title: Queries
|
||||
panels:
|
||||
- title: Total Query Rate
|
||||
type: timeseries
|
||||
description: |-
|
||||
Total rate of query API calls by protocol. This metric is collected from frontends.
|
||||
|
||||
Here we listed 3 main protocols:
|
||||
- MySQL
|
||||
- Postgres
|
||||
- Prometheus API
|
||||
|
||||
Note that there are some other minor query APIs like /sql are not included
|
||||
unit: reqps
|
||||
queries:
|
||||
- expr: sum (rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: mysql
|
||||
- expr: sum (rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: pg
|
||||
- expr: sum (rate(greptime_servers_http_promql_elapsed_counte{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: promql
|
||||
- title: Resources
|
||||
panels:
|
||||
- title: Datanode Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{ pod }}]'
|
||||
- title: Datanode CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
unit: none
|
||||
queries:
|
||||
- expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- title: Frontend Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- title: Frontend CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
unit: none
|
||||
queries:
|
||||
- expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
|
||||
- title: Metasrv Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
|
||||
- title: Metasrv CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
unit: none
|
||||
queries:
|
||||
- expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- title: Flownode Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- title: Flownode CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
unit: none
|
||||
queries:
|
||||
- expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- title: Frontend Requests
|
||||
panels:
|
||||
- title: HTTP QPS per Instance
|
||||
type: timeseries
|
||||
description: HTTP QPS per Instance.
|
||||
unit: reqps
|
||||
queries:
|
||||
- expr: sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{path!~"/health|/metrics"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]'
|
||||
- title: HTTP P99 per Instance
|
||||
type: timeseries
|
||||
description: HTTP P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{path!~"/health|/metrics"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
|
||||
- title: gRPC QPS per Instance
|
||||
type: timeseries
|
||||
description: gRPC QPS per Instance.
|
||||
unit: reqps
|
||||
queries:
|
||||
- expr: sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]'
|
||||
- title: gRPC P99 per Instance
|
||||
type: timeseries
|
||||
description: gRPC P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
|
||||
- title: MySQL QPS per Instance
|
||||
type: timeseries
|
||||
description: MySQL QPS per Instance.
|
||||
unit: reqps
|
||||
queries:
|
||||
- expr: sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: MySQL P99 per Instance
|
||||
type: timeseries
|
||||
description: MySQL P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]-p99'
|
||||
- title: PostgreSQL QPS per Instance
|
||||
type: timeseries
|
||||
description: PostgreSQL QPS per Instance.
|
||||
unit: reqps
|
||||
queries:
|
||||
- expr: sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: PostgreSQL P99 per Instance
|
||||
type: timeseries
|
||||
description: PostgreSQL P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
|
||||
- title: Frontend to Datanode
|
||||
panels:
|
||||
- title: Ingest Rows per Instance
|
||||
type: timeseries
|
||||
description: Ingestion rate by row as in each frontend
|
||||
unit: rowsps
|
||||
queries:
|
||||
- expr: sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: Region Call QPS per Instance
|
||||
type: timeseries
|
||||
description: Region Call QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
|
||||
- title: Region Call P99 per Instance
|
||||
type: timeseries
|
||||
description: Region Call P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
|
||||
- title: Mito Engine
|
||||
panels:
|
||||
- title: Request OPS per Instance
|
||||
type: timeseries
|
||||
description: Request QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
|
||||
- title: Request P99 per Instance
|
||||
type: timeseries
|
||||
description: Request P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
|
||||
- title: Write Buffer per Instance
|
||||
type: timeseries
|
||||
description: Write Buffer per Instance.
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: greptime_mito_write_buffer_bytes{}
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: Write Rows per Instance
|
||||
type: timeseries
|
||||
description: Ingestion size by row counts.
|
||||
unit: rowsps
|
||||
queries:
|
||||
- expr: sum by (instance, pod) (rate(greptime_mito_write_rows_total{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: Flush OPS per Instance
|
||||
type: timeseries
|
||||
description: Flush QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{reason}}]'
|
||||
- title: Write Stall per Instance
|
||||
type: timeseries
|
||||
description: Write Stall per Instance.
|
||||
queries:
|
||||
- expr: sum by(instance, pod) (greptime_mito_write_stall_total{})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: Read Stage OPS per Instance
|
||||
type: timeseries
|
||||
description: Read Stage OPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{ stage="total"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: Read Stage P99 per Instance
|
||||
type: timeseries
|
||||
description: Read Stage P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
|
||||
- title: Write Stage P99 per Instance
|
||||
type: timeseries
|
||||
description: Write Stage P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
|
||||
- title: Compaction OPS per Instance
|
||||
type: timeseries
|
||||
description: Compaction OPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{pod}}]'
|
||||
- title: Compaction P99 per Instance by Stage
|
||||
type: timeseries
|
||||
description: Compaction latency by stage
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
|
||||
- title: Compaction P99 per Instance
|
||||
type: timeseries
|
||||
description: Compaction P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction'
|
||||
- title: WAL write size
|
||||
type: timeseries
|
||||
description: Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate.
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p95'
|
||||
- expr: histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p99'
|
||||
- expr: sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-throughput'
|
||||
- title: Cached Bytes per Instance
|
||||
type: timeseries
|
||||
description: Cached Bytes per Instance.
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: greptime_mito_cache_bytes{}
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
|
||||
- title: Inflight Compaction
|
||||
type: timeseries
|
||||
description: Ongoing compaction task count
|
||||
unit: none
|
||||
queries:
|
||||
- expr: greptime_mito_inflight_compaction_count
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: WAL sync duration seconds
|
||||
type: timeseries
|
||||
description: Raft engine (local disk) log store sync latency, p99
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
|
||||
- title: Log Store op duration seconds
|
||||
type: timeseries
|
||||
description: Write-ahead log operations latency at p99
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99'
|
||||
- title: Inflight Flush
|
||||
type: timeseries
|
||||
description: Ongoing flush task count
|
||||
unit: none
|
||||
queries:
|
||||
- expr: greptime_mito_inflight_flush_count
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: OpenDAL
|
||||
panels:
|
||||
- title: QPS per Instance
|
||||
type: timeseries
|
||||
description: QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
- title: Read QPS per Instance
|
||||
type: timeseries
|
||||
description: Read QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
|
||||
- title: Read P99 per Instance
|
||||
type: timeseries
|
||||
description: Read P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
|
||||
- title: Write QPS per Instance
|
||||
type: timeseries
|
||||
description: Write QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
|
||||
- title: Write P99 per Instance
|
||||
type: timeseries
|
||||
description: Write P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
|
||||
- title: List QPS per Instance
|
||||
type: timeseries
|
||||
description: List QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="list"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
|
||||
- title: List P99 per Instance
|
||||
type: timeseries
|
||||
description: List P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="list"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
|
||||
- title: Other Requests per Instance
|
||||
type: timeseries
|
||||
description: Other Requests per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{operation!~"read|write|list|stat"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
- title: Other Request P99 per Instance
|
||||
type: timeseries
|
||||
description: Other Request P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read|write|list"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
- title: Opendal traffic
|
||||
type: timeseries
|
||||
description: Total traffic as in bytes by instance and operation
|
||||
unit: decbytes
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
- title: OpenDAL errors per Instance
|
||||
type: timeseries
|
||||
description: OpenDAL error counts per Instance.
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{ error!="NotFound"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
|
||||
- title: Metasrv
|
||||
panels:
|
||||
- title: Region migration datanode
|
||||
type: state-timeline
|
||||
description: Counter of region migration by source and destination
|
||||
unit: none
|
||||
queries:
|
||||
- expr: greptime_meta_region_migration_stat{datanode_type="src"}
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: from-datanode-{{datanode_id}}
|
||||
- expr: greptime_meta_region_migration_stat{datanode_type="desc"}
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: to-datanode-{{datanode_id}}
|
||||
- title: Region migration error
|
||||
type: timeseries
|
||||
description: Counter of region migration error
|
||||
unit: none
|
||||
queries:
|
||||
- expr: greptime_meta_region_migration_error
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Datanode load
|
||||
type: timeseries
|
||||
description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
|
||||
unit: none
|
||||
queries:
|
||||
- expr: greptime_datanode_load
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Flownode
|
||||
panels:
|
||||
- title: Flow Ingest / Output Rate
|
||||
type: timeseries
|
||||
description: Flow Ingest / Output Rate.
|
||||
queries:
|
||||
- expr: sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{pod}}]-[{{instance}}]-[{{direction}}]'
|
||||
- title: Flow Ingest Latency
|
||||
type: timeseries
|
||||
description: Flow Ingest Latency.
|
||||
queries:
|
||||
- expr: histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-p95'
|
||||
- expr: histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
|
||||
- title: Flow Operation Latency
|
||||
type: timeseries
|
||||
description: Flow Operation Latency.
|
||||
queries:
|
||||
- expr: histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p95'
|
||||
- expr: histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p99'
|
||||
- title: Flow Buffer Size per Instance
|
||||
type: timeseries
|
||||
description: Flow Buffer Size per Instance.
|
||||
queries:
|
||||
- expr: greptime_flow_input_buf_size
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}]'
|
||||
- title: Flow Processing Error per Instance
|
||||
type: timeseries
|
||||
description: Flow Processing Error per Instance.
|
||||
queries:
|
||||
- expr: sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
54
grafana/scripts/check.sh
Executable file
54
grafana/scripts/check.sh
Executable file
@@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
DASHBOARD_DIR=${1:-grafana/dashboards/metrics}
|
||||
|
||||
check_dashboard_description() {
|
||||
for dashboard in $(find $DASHBOARD_DIR -name "*.json"); do
|
||||
echo "Checking $dashboard description"
|
||||
|
||||
# Use jq to check for panels with empty or missing descriptions
|
||||
invalid_panels=$(cat $dashboard | jq -r '
|
||||
.panels[]
|
||||
| select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))')
|
||||
|
||||
# Check if any invalid panels were found
|
||||
if [[ -n "$invalid_panels" ]]; then
|
||||
echo "Error: The following panels have empty or missing descriptions:"
|
||||
echo "$invalid_panels"
|
||||
exit 1
|
||||
else
|
||||
echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
check_dashboards_generation() {
|
||||
./grafana/scripts/gen-dashboards.sh
|
||||
|
||||
if [[ -n "$(git diff --name-only grafana/dashboards/metrics)" ]]; then
|
||||
echo "Error: The dashboards are not generated correctly. You should execute the `make dashboards` command."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_datasource() {
|
||||
for dashboard in $(find $DASHBOARD_DIR -name "*.json"); do
|
||||
echo "Checking $dashboard datasource"
|
||||
jq -r '.panels[] | select(.type != "row") | .targets[] | [.datasource.type, .datasource.uid] | @tsv' $dashboard | while read -r type uid; do
|
||||
# if the datasource is prometheus, check if the uid is ${metrics}
|
||||
if [[ "$type" == "prometheus" && "$uid" != "\${metrics}" ]]; then
|
||||
echo "Error: The datasource uid of $dashboard is not valid. It should be \${metrics}, got $uid"
|
||||
exit 1
|
||||
fi
|
||||
# if the datasource is mysql, check if the uid is ${information_schema}
|
||||
if [[ "$type" == "mysql" && "$uid" != "\${information_schema}" ]]; then
|
||||
echo "Error: The datasource uid of $dashboard is not valid. It should be \${information_schema}, got $uid"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
check_dashboards_generation
|
||||
check_dashboard_description
|
||||
check_datasource
|
||||
25
grafana/scripts/gen-dashboards.sh
Executable file
25
grafana/scripts/gen-dashboards.sh
Executable file
@@ -0,0 +1,25 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
CLUSTER_DASHBOARD_DIR=${1:-grafana/dashboards/metrics/cluster}
|
||||
STANDALONE_DASHBOARD_DIR=${2:-grafana/dashboards/metrics/standalone}
|
||||
DAC_IMAGE=ghcr.io/zyy17/dac:20250423-522bd35
|
||||
|
||||
remove_instance_filters() {
|
||||
# Remove the instance filters for the standalone dashboards.
|
||||
sed 's/instance=~\\"$datanode\\",//; s/instance=~\\"$datanode\\"//; s/instance=~\\"$frontend\\",//; s/instance=~\\"$frontend\\"//; s/instance=~\\"$metasrv\\",//; s/instance=~\\"$metasrv\\"//; s/instance=~\\"$flownode\\",//; s/instance=~\\"$flownode\\"//;' $CLUSTER_DASHBOARD_DIR/dashboard.json > $STANDALONE_DASHBOARD_DIR/dashboard.json
|
||||
}
|
||||
|
||||
generate_intermediate_dashboards_and_docs() {
|
||||
docker run -v ${PWD}:/greptimedb --rm ${DAC_IMAGE} \
|
||||
-i /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.json \
|
||||
-o /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.yaml \
|
||||
-m /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.md
|
||||
|
||||
docker run -v ${PWD}:/greptimedb --rm ${DAC_IMAGE} \
|
||||
-i /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.json \
|
||||
-o /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.yaml \
|
||||
-m /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.md
|
||||
}
|
||||
|
||||
remove_instance_filters
|
||||
generate_intermediate_dashboards_and_docs
|
||||
@@ -1,11 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
BASEDIR=$(dirname "$0")
|
||||
echo '| Title | Description | Expressions |
|
||||
|---|---|---|'
|
||||
|
||||
cat $BASEDIR/greptimedb-cluster.json | jq -r '
|
||||
.panels |
|
||||
map(select(.type == "stat" or .type == "timeseries")) |
|
||||
.[] | "| \(.title) | \(.description | gsub("\n"; "<br>")) | \(.targets | map(.expr // .rawSql | "`\(.|gsub("\n"; "<br>"))`") | join("<br>")) |"
|
||||
'
|
||||
@@ -1,2 +1,2 @@
|
||||
[toolchain]
|
||||
channel = "nightly-2024-12-25"
|
||||
channel = "nightly-2025-04-15"
|
||||
|
||||
74
scripts/check-super-imports.py
Normal file
74
scripts/check-super-imports.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# Copyright 2023 Greptime Team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import re
|
||||
from multiprocessing import Pool
|
||||
|
||||
|
||||
def find_rust_files(directory):
|
||||
rust_files = []
|
||||
for root, _, files in os.walk(directory):
|
||||
# Skip files with "test" in the path
|
||||
if "test" in root.lower():
|
||||
continue
|
||||
|
||||
for file in files:
|
||||
# Skip files with "test" in the filename
|
||||
if "test" in file.lower():
|
||||
continue
|
||||
|
||||
if file.endswith(".rs"):
|
||||
rust_files.append(os.path.join(root, file))
|
||||
return rust_files
|
||||
|
||||
|
||||
def check_file_for_super_import(file_path):
|
||||
with open(file_path, "r") as file:
|
||||
lines = file.readlines()
|
||||
|
||||
violations = []
|
||||
for line_number, line in enumerate(lines, 1):
|
||||
# Check for "use super::" without leading tab
|
||||
if line.startswith("use super::"):
|
||||
violations.append((line_number, line.strip()))
|
||||
|
||||
if violations:
|
||||
return file_path, violations
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
rust_files = find_rust_files(".")
|
||||
|
||||
with Pool() as pool:
|
||||
results = pool.map(check_file_for_super_import, rust_files)
|
||||
|
||||
# Filter out None results
|
||||
violations = [result for result in results if result]
|
||||
|
||||
if violations:
|
||||
print("Found 'use super::' without leading tab in the following files:")
|
||||
counter = 1
|
||||
for file_path, file_violations in violations:
|
||||
for line_number, line in file_violations:
|
||||
print(f"{counter:>5} {file_path}:{line_number} - {line}")
|
||||
counter += 1
|
||||
raise SystemExit(1)
|
||||
else:
|
||||
print("No 'use super::' without leading tab found. All files are compliant.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -514,6 +514,7 @@ fn query_request_type(request: &QueryRequest) -> &'static str {
|
||||
Some(Query::Sql(_)) => "query.sql",
|
||||
Some(Query::LogicalPlan(_)) => "query.logical_plan",
|
||||
Some(Query::PromRangeQuery(_)) => "query.prom_range",
|
||||
Some(Query::InsertIntoPlan(_)) => "query.insert_into_plan",
|
||||
None => "query.empty",
|
||||
}
|
||||
}
|
||||
@@ -1049,7 +1050,7 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
|
||||
Value::Int64(v) => Some(ValueData::I64Value(v)),
|
||||
Value::Float32(v) => Some(ValueData::F32Value(*v)),
|
||||
Value::Float64(v) => Some(ValueData::F64Value(*v)),
|
||||
Value::String(v) => Some(ValueData::StringValue(v.as_utf8().to_string())),
|
||||
Value::String(v) => Some(ValueData::StringValue(v.into_string())),
|
||||
Value::Binary(v) => Some(ValueData::BinaryValue(v.to_vec())),
|
||||
Value::Date(v) => Some(ValueData::DateValue(v.val())),
|
||||
Value::Timestamp(v) => Some(match v.unit() {
|
||||
|
||||
@@ -36,7 +36,7 @@ pub fn userinfo_by_name(username: Option<String>) -> UserInfoRef {
|
||||
}
|
||||
|
||||
pub fn user_provider_from_option(opt: &String) -> Result<UserProviderRef> {
|
||||
let (name, content) = opt.split_once(':').context(InvalidConfigSnafu {
|
||||
let (name, content) = opt.split_once(':').with_context(|| InvalidConfigSnafu {
|
||||
value: opt.to_string(),
|
||||
msg: "UserProviderOption must be in format `<option>:<value>`",
|
||||
})?;
|
||||
@@ -57,6 +57,24 @@ pub fn user_provider_from_option(opt: &String) -> Result<UserProviderRef> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn static_user_provider_from_option(opt: &String) -> Result<StaticUserProvider> {
|
||||
let (name, content) = opt.split_once(':').with_context(|| InvalidConfigSnafu {
|
||||
value: opt.to_string(),
|
||||
msg: "UserProviderOption must be in format `<option>:<value>`",
|
||||
})?;
|
||||
match name {
|
||||
STATIC_USER_PROVIDER => {
|
||||
let provider = StaticUserProvider::new(content)?;
|
||||
Ok(provider)
|
||||
}
|
||||
_ => InvalidConfigSnafu {
|
||||
value: name.to_string(),
|
||||
msg: format!("Invalid UserProviderOption, expect only {STATIC_USER_PROVIDER}"),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
type Username<'a> = &'a str;
|
||||
type HostOrIp<'a> = &'a str;
|
||||
|
||||
|
||||
@@ -38,6 +38,14 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert to utf8"))]
|
||||
FromUtf8 {
|
||||
#[snafu(source)]
|
||||
error: std::string::FromUtf8Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Authentication source failure"))]
|
||||
AuthBackend {
|
||||
#[snafu(implicit)]
|
||||
@@ -85,7 +93,7 @@ impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Error::InvalidConfig { .. } => StatusCode::InvalidArguments,
|
||||
Error::IllegalParam { .. } => StatusCode::InvalidArguments,
|
||||
Error::IllegalParam { .. } | Error::FromUtf8 { .. } => StatusCode::InvalidArguments,
|
||||
Error::FileWatch { .. } => StatusCode::InvalidArguments,
|
||||
Error::InternalState { .. } => StatusCode::Unexpected,
|
||||
Error::Io { .. } => StatusCode::StorageUnavailable,
|
||||
|
||||
@@ -22,10 +22,12 @@ mod user_provider;
|
||||
pub mod tests;
|
||||
|
||||
pub use common::{
|
||||
auth_mysql, user_provider_from_option, userinfo_by_name, HashedPassword, Identity, Password,
|
||||
auth_mysql, static_user_provider_from_option, user_provider_from_option, userinfo_by_name,
|
||||
HashedPassword, Identity, Password,
|
||||
};
|
||||
pub use permission::{PermissionChecker, PermissionReq, PermissionResp};
|
||||
pub use user_info::UserInfo;
|
||||
pub use user_provider::static_user_provider::StaticUserProvider;
|
||||
pub use user_provider::UserProvider;
|
||||
|
||||
/// pub type alias
|
||||
|
||||
@@ -15,15 +15,15 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use snafu::OptionExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{InvalidConfigSnafu, Result};
|
||||
use crate::error::{FromUtf8Snafu, InvalidConfigSnafu, Result};
|
||||
use crate::user_provider::{authenticate_with_credential, load_credential_from_file};
|
||||
use crate::{Identity, Password, UserInfoRef, UserProvider};
|
||||
|
||||
pub(crate) const STATIC_USER_PROVIDER: &str = "static_user_provider";
|
||||
|
||||
pub(crate) struct StaticUserProvider {
|
||||
pub struct StaticUserProvider {
|
||||
users: HashMap<String, Vec<u8>>,
|
||||
}
|
||||
|
||||
@@ -60,6 +60,18 @@ impl StaticUserProvider {
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a random username/password pair
|
||||
/// This is useful for invoking from other components in the cluster
|
||||
pub fn get_one_user_pwd(&self) -> Result<(String, String)> {
|
||||
let kv = self.users.iter().next().context(InvalidConfigSnafu {
|
||||
value: "",
|
||||
msg: "Expect at least one pair of username and password",
|
||||
})?;
|
||||
let username = kv.0;
|
||||
let pwd = String::from_utf8(kv.1.clone()).context(FromUtf8Snafu)?;
|
||||
Ok((username.clone(), pwd))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
@@ -49,7 +49,6 @@ pub use table_names::*;
|
||||
use views::InformationSchemaViews;
|
||||
|
||||
use self::columns::InformationSchemaColumns;
|
||||
use super::{SystemSchemaProviderInner, SystemTable, SystemTableRef};
|
||||
use crate::error::{Error, Result};
|
||||
use crate::system_schema::information_schema::cluster_info::InformationSchemaClusterInfo;
|
||||
use crate::system_schema::information_schema::flows::InformationSchemaFlows;
|
||||
@@ -63,7 +62,9 @@ use crate::system_schema::information_schema::table_constraints::InformationSche
|
||||
use crate::system_schema::information_schema::tables::InformationSchemaTables;
|
||||
use crate::system_schema::memory_table::MemoryTable;
|
||||
pub(crate) use crate::system_schema::predicate::Predicates;
|
||||
use crate::system_schema::SystemSchemaProvider;
|
||||
use crate::system_schema::{
|
||||
SystemSchemaProvider, SystemSchemaProviderInner, SystemTable, SystemTableRef,
|
||||
};
|
||||
use crate::CatalogManager;
|
||||
|
||||
lazy_static! {
|
||||
|
||||
@@ -36,9 +36,8 @@ use datatypes::vectors::{
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::CLUSTER_INFO;
|
||||
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates, CLUSTER_INFO};
|
||||
use crate::system_schema::utils;
|
||||
use crate::CatalogManager;
|
||||
|
||||
|
||||
@@ -38,11 +38,11 @@ use snafu::{OptionExt, ResultExt};
|
||||
use sql::statements;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::{InformationTable, COLUMNS};
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::Predicates;
|
||||
use crate::system_schema::information_schema::{InformationTable, COLUMNS};
|
||||
use crate::CatalogManager;
|
||||
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -18,7 +18,7 @@ use common_catalog::consts::{METRIC_ENGINE, MITO_ENGINE};
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
use datatypes::vectors::{Int64Vector, StringVector, VectorRef};
|
||||
|
||||
use super::table_names::*;
|
||||
use crate::system_schema::information_schema::table_names::*;
|
||||
use crate::system_schema::utils::tables::{
|
||||
bigint_column, string_column, string_columns, timestamp_micro_column,
|
||||
};
|
||||
|
||||
@@ -24,18 +24,17 @@ use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatch
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, MutableVector, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::schema::{ColumnSchema, FulltextBackend, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, UInt32VectorBuilder};
|
||||
use futures_util::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::KEY_COLUMN_USAGE;
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates, KEY_COLUMN_USAGE};
|
||||
use crate::CatalogManager;
|
||||
|
||||
pub const CONSTRAINT_SCHEMA: &str = "constraint_schema";
|
||||
@@ -48,20 +47,38 @@ pub const TABLE_SCHEMA: &str = "table_schema";
|
||||
pub const TABLE_NAME: &str = "table_name";
|
||||
pub const COLUMN_NAME: &str = "column_name";
|
||||
pub const ORDINAL_POSITION: &str = "ordinal_position";
|
||||
/// The type of the index.
|
||||
pub const GREPTIME_INDEX_TYPE: &str = "greptime_index_type";
|
||||
const INIT_CAPACITY: usize = 42;
|
||||
|
||||
/// Primary key constraint name
|
||||
pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY";
|
||||
/// Time index constraint name
|
||||
pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX";
|
||||
pub(crate) const CONSTRAINT_NAME_TIME_INDEX: &str = "TIME INDEX";
|
||||
|
||||
/// Primary key constraint name
|
||||
pub(crate) const CONSTRAINT_NAME_PRI: &str = "PRIMARY";
|
||||
/// Primary key index type
|
||||
pub(crate) const INDEX_TYPE_PRI: &str = "greptime-primary-key-v1";
|
||||
|
||||
/// Inverted index constraint name
|
||||
pub(crate) const INVERTED_INDEX_CONSTRAINT_NAME: &str = "INVERTED INDEX";
|
||||
pub(crate) const CONSTRAINT_NAME_INVERTED_INDEX: &str = "INVERTED INDEX";
|
||||
/// Inverted index type
|
||||
pub(crate) const INDEX_TYPE_INVERTED_INDEX: &str = "greptime-inverted-index-v1";
|
||||
|
||||
/// Fulltext index constraint name
|
||||
pub(crate) const FULLTEXT_INDEX_CONSTRAINT_NAME: &str = "FULLTEXT INDEX";
|
||||
pub(crate) const CONSTRAINT_NAME_FULLTEXT_INDEX: &str = "FULLTEXT INDEX";
|
||||
/// Fulltext index v1 type
|
||||
pub(crate) const INDEX_TYPE_FULLTEXT_TANTIVY: &str = "greptime-fulltext-index-v1";
|
||||
/// Fulltext index bloom type
|
||||
pub(crate) const INDEX_TYPE_FULLTEXT_BLOOM: &str = "greptime-fulltext-index-bloom";
|
||||
|
||||
/// Skipping index constraint name
|
||||
pub(crate) const SKIPPING_INDEX_CONSTRAINT_NAME: &str = "SKIPPING INDEX";
|
||||
pub(crate) const CONSTRAINT_NAME_SKIPPING_INDEX: &str = "SKIPPING INDEX";
|
||||
/// Skipping index type
|
||||
pub(crate) const INDEX_TYPE_SKIPPING_INDEX: &str = "greptime-bloom-filter-v1";
|
||||
|
||||
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
|
||||
///
|
||||
/// Provides an extra column `greptime_index_type` for the index type of the key column.
|
||||
#[derive(Debug)]
|
||||
pub(super) struct InformationSchemaKeyColumnUsage {
|
||||
schema: SchemaRef,
|
||||
@@ -121,6 +138,11 @@ impl InformationSchemaKeyColumnUsage {
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
GREPTIME_INDEX_TYPE,
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
]))
|
||||
}
|
||||
|
||||
@@ -185,6 +207,7 @@ struct InformationSchemaKeyColumnUsageBuilder {
|
||||
column_name: StringVectorBuilder,
|
||||
ordinal_position: UInt32VectorBuilder,
|
||||
position_in_unique_constraint: UInt32VectorBuilder,
|
||||
greptime_index_type: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaKeyColumnUsageBuilder {
|
||||
@@ -207,6 +230,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
column_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
ordinal_position: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
position_in_unique_constraint: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
greptime_index_type: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -230,34 +254,47 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
|
||||
for (idx, column) in schema.column_schemas().iter().enumerate() {
|
||||
let mut constraints = vec![];
|
||||
let mut greptime_index_type = vec![];
|
||||
if column.is_time_index() {
|
||||
self.add_key_column_usage(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
TIME_INDEX_CONSTRAINT_NAME,
|
||||
CONSTRAINT_NAME_TIME_INDEX,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
table_name,
|
||||
&column.name,
|
||||
1, //always 1 for time index
|
||||
"",
|
||||
);
|
||||
}
|
||||
// TODO(dimbtp): foreign key constraint not supported yet
|
||||
if keys.contains(&idx) {
|
||||
constraints.push(PRI_CONSTRAINT_NAME);
|
||||
constraints.push(CONSTRAINT_NAME_PRI);
|
||||
greptime_index_type.push(INDEX_TYPE_PRI);
|
||||
}
|
||||
if column.is_inverted_indexed() {
|
||||
constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);
|
||||
constraints.push(CONSTRAINT_NAME_INVERTED_INDEX);
|
||||
greptime_index_type.push(INDEX_TYPE_INVERTED_INDEX);
|
||||
}
|
||||
if column.is_fulltext_indexed() {
|
||||
constraints.push(FULLTEXT_INDEX_CONSTRAINT_NAME);
|
||||
if let Ok(Some(options)) = column.fulltext_options() {
|
||||
if options.enable {
|
||||
constraints.push(CONSTRAINT_NAME_FULLTEXT_INDEX);
|
||||
let index_type = match options.backend {
|
||||
FulltextBackend::Bloom => INDEX_TYPE_FULLTEXT_BLOOM,
|
||||
FulltextBackend::Tantivy => INDEX_TYPE_FULLTEXT_TANTIVY,
|
||||
};
|
||||
greptime_index_type.push(index_type);
|
||||
}
|
||||
}
|
||||
if column.is_skipping_indexed() {
|
||||
constraints.push(SKIPPING_INDEX_CONSTRAINT_NAME);
|
||||
constraints.push(CONSTRAINT_NAME_SKIPPING_INDEX);
|
||||
greptime_index_type.push(INDEX_TYPE_SKIPPING_INDEX);
|
||||
}
|
||||
|
||||
if !constraints.is_empty() {
|
||||
let aggregated_constraints = constraints.join(", ");
|
||||
let aggregated_index_types = greptime_index_type.join(", ");
|
||||
self.add_key_column_usage(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
@@ -267,6 +304,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
table_name,
|
||||
&column.name,
|
||||
idx as u32 + 1,
|
||||
&aggregated_index_types,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -289,6 +327,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
table_name: &str,
|
||||
column_name: &str,
|
||||
ordinal_position: u32,
|
||||
index_types: &str,
|
||||
) {
|
||||
let row = [
|
||||
(CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
|
||||
@@ -298,6 +337,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
(TABLE_NAME, &Value::from(table_name)),
|
||||
(COLUMN_NAME, &Value::from(column_name)),
|
||||
(ORDINAL_POSITION, &Value::from(ordinal_position)),
|
||||
(GREPTIME_INDEX_TYPE, &Value::from(index_types)),
|
||||
];
|
||||
|
||||
if !predicates.eval(&row) {
|
||||
@@ -314,6 +354,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
self.column_name.push(Some(column_name));
|
||||
self.ordinal_position.push(Some(ordinal_position));
|
||||
self.position_in_unique_constraint.push(None);
|
||||
self.greptime_index_type.push(Some(index_types));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<RecordBatch> {
|
||||
@@ -337,6 +378,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
null_string_vector.clone(),
|
||||
null_string_vector.clone(),
|
||||
null_string_vector,
|
||||
Arc::new(self.greptime_index_type.finish()),
|
||||
];
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
}
|
||||
|
||||
@@ -39,13 +39,12 @@ use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
use table::metadata::{TableInfo, TableType};
|
||||
|
||||
use super::PARTITIONS;
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, FindPartitionsSnafu, InternalSnafu, PartitionManagerNotFoundSnafu,
|
||||
Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::kvbackend::KvBackendCatalogManager;
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates, PARTITIONS};
|
||||
use crate::CatalogManager;
|
||||
|
||||
const TABLE_CATALOG: &str = "table_catalog";
|
||||
|
||||
@@ -33,9 +33,8 @@ use datatypes::vectors::{StringVectorBuilder, TimestampMillisecondVectorBuilder}
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::PROCEDURE_INFO;
|
||||
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates, PROCEDURE_INFO};
|
||||
use crate::system_schema::utils;
|
||||
use crate::CatalogManager;
|
||||
|
||||
|
||||
@@ -35,13 +35,12 @@ use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{RegionId, ScanRequest, TableId};
|
||||
use table::metadata::TableType;
|
||||
|
||||
use super::REGION_PEERS;
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, FindRegionRoutesSnafu, InternalSnafu, Result,
|
||||
UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::kvbackend::KvBackendCatalogManager;
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates, REGION_PEERS};
|
||||
use crate::CatalogManager;
|
||||
|
||||
pub const TABLE_CATALOG: &str = "table_catalog";
|
||||
|
||||
@@ -30,9 +30,9 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, UInt64VectorB
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::{InformationTable, REGION_STATISTICS};
|
||||
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
|
||||
use crate::information_schema::Predicates;
|
||||
use crate::system_schema::information_schema::{InformationTable, REGION_STATISTICS};
|
||||
use crate::system_schema::utils;
|
||||
use crate::CatalogManager;
|
||||
|
||||
|
||||
@@ -35,8 +35,8 @@ use itertools::Itertools;
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::{InformationTable, RUNTIME_METRICS};
|
||||
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
|
||||
use crate::system_schema::information_schema::{InformationTable, RUNTIME_METRICS};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) struct InformationSchemaMetrics {
|
||||
|
||||
@@ -31,12 +31,11 @@ use datatypes::vectors::StringVectorBuilder;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::SCHEMATA;
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, TableMetadataManagerSnafu,
|
||||
UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates, SCHEMATA};
|
||||
use crate::system_schema::utils;
|
||||
use crate::CatalogManager;
|
||||
|
||||
|
||||
@@ -32,14 +32,14 @@ use futures::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::{InformationTable, TABLE_CONSTRAINTS};
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::key_column_usage::{
|
||||
PRI_CONSTRAINT_NAME, TIME_INDEX_CONSTRAINT_NAME,
|
||||
CONSTRAINT_NAME_PRI, CONSTRAINT_NAME_TIME_INDEX,
|
||||
};
|
||||
use crate::information_schema::Predicates;
|
||||
use crate::system_schema::information_schema::{InformationTable, TABLE_CONSTRAINTS};
|
||||
use crate::CatalogManager;
|
||||
|
||||
/// The `TABLE_CONSTRAINTS` table describes which tables have constraints.
|
||||
@@ -188,7 +188,7 @@ impl InformationSchemaTableConstraintsBuilder {
|
||||
self.add_table_constraint(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
TIME_INDEX_CONSTRAINT_NAME,
|
||||
CONSTRAINT_NAME_TIME_INDEX,
|
||||
&schema_name,
|
||||
&table.table_info().name,
|
||||
TIME_INDEX_CONSTRAINT_TYPE,
|
||||
@@ -199,7 +199,7 @@ impl InformationSchemaTableConstraintsBuilder {
|
||||
self.add_table_constraint(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
PRI_CONSTRAINT_NAME,
|
||||
CONSTRAINT_NAME_PRI,
|
||||
&schema_name,
|
||||
&table.table_info().name,
|
||||
PRI_KEY_CONSTRAINT_TYPE,
|
||||
|
||||
@@ -38,11 +38,10 @@ use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{RegionId, ScanRequest, TableId};
|
||||
use table::metadata::{TableInfo, TableType};
|
||||
|
||||
use super::TABLES;
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates, TABLES};
|
||||
use crate::system_schema::utils;
|
||||
use crate::CatalogManager;
|
||||
|
||||
|
||||
@@ -32,13 +32,12 @@ use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
use table::metadata::TableType;
|
||||
|
||||
use super::VIEWS;
|
||||
use crate::error::{
|
||||
CastManagerSnafu, CreateRecordBatchSnafu, GetViewCacheSnafu, InternalSnafu, Result,
|
||||
UpgradeWeakCatalogManagerRefSnafu, ViewInfoNotFoundSnafu,
|
||||
};
|
||||
use crate::kvbackend::KvBackendCatalogManager;
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates, VIEWS};
|
||||
use crate::CatalogManager;
|
||||
const INIT_CAPACITY: usize = 42;
|
||||
|
||||
|
||||
@@ -29,8 +29,8 @@ use datatypes::vectors::VectorRef;
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use super::SystemTable;
|
||||
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
|
||||
use crate::system_schema::SystemTable;
|
||||
|
||||
/// A memory table with specified schema and columns.
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -34,9 +34,9 @@ use table::TableRef;
|
||||
pub use table_names::*;
|
||||
|
||||
use self::pg_namespace::oid_map::{PGNamespaceOidMap, PGNamespaceOidMapRef};
|
||||
use super::memory_table::MemoryTable;
|
||||
use super::utils::tables::u32_column;
|
||||
use super::{SystemSchemaProvider, SystemSchemaProviderInner, SystemTableRef};
|
||||
use crate::system_schema::memory_table::MemoryTable;
|
||||
use crate::system_schema::utils::tables::u32_column;
|
||||
use crate::system_schema::{SystemSchemaProvider, SystemSchemaProviderInner, SystemTableRef};
|
||||
use crate::CatalogManager;
|
||||
|
||||
lazy_static! {
|
||||
|
||||
@@ -17,9 +17,9 @@ use std::sync::Arc;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::vectors::{Int16Vector, StringVector, UInt32Vector, VectorRef};
|
||||
|
||||
use super::oid_column;
|
||||
use super::table_names::PG_TYPE;
|
||||
use crate::memory_table_cols;
|
||||
use crate::system_schema::pg_catalog::oid_column;
|
||||
use crate::system_schema::pg_catalog::table_names::PG_TYPE;
|
||||
use crate::system_schema::utils::tables::{i16_column, string_column};
|
||||
|
||||
fn pg_type_schema_columns() -> (Vec<ColumnSchema>, Vec<VectorRef>) {
|
||||
|
||||
@@ -32,12 +32,12 @@ use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::ScanRequest;
|
||||
use table::metadata::TableType;
|
||||
|
||||
use super::pg_namespace::oid_map::PGNamespaceOidMapRef;
|
||||
use super::{query_ctx, OID_COLUMN_NAME, PG_CLASS};
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::Predicates;
|
||||
use crate::system_schema::pg_catalog::pg_namespace::oid_map::PGNamespaceOidMapRef;
|
||||
use crate::system_schema::pg_catalog::{query_ctx, OID_COLUMN_NAME, PG_CLASS};
|
||||
use crate::system_schema::utils::tables::{string_column, u32_column};
|
||||
use crate::system_schema::SystemTable;
|
||||
use crate::CatalogManager;
|
||||
|
||||
@@ -29,12 +29,12 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, VectorRef};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::ScanRequest;
|
||||
|
||||
use super::pg_namespace::oid_map::PGNamespaceOidMapRef;
|
||||
use super::{query_ctx, OID_COLUMN_NAME, PG_DATABASE};
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::Predicates;
|
||||
use crate::system_schema::pg_catalog::pg_namespace::oid_map::PGNamespaceOidMapRef;
|
||||
use crate::system_schema::pg_catalog::{query_ctx, OID_COLUMN_NAME, PG_DATABASE};
|
||||
use crate::system_schema::utils::tables::{string_column, u32_column};
|
||||
use crate::system_schema::SystemTable;
|
||||
use crate::CatalogManager;
|
||||
|
||||
@@ -35,11 +35,13 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, VectorRef};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::ScanRequest;
|
||||
|
||||
use super::{query_ctx, PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE};
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::Predicates;
|
||||
use crate::system_schema::pg_catalog::{
|
||||
query_ctx, PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE,
|
||||
};
|
||||
use crate::system_schema::utils::tables::{string_column, u32_column};
|
||||
use crate::system_schema::SystemTable;
|
||||
use crate::CatalogManager;
|
||||
|
||||
@@ -84,12 +84,6 @@ mod tests {
|
||||
let key1 = "3178510";
|
||||
let key2 = "4215648";
|
||||
|
||||
// have collision
|
||||
assert_eq!(
|
||||
oid_map.hasher.hash_one(key1) as u32,
|
||||
oid_map.hasher.hash_one(key2) as u32
|
||||
);
|
||||
|
||||
// insert them into oid_map
|
||||
let oid1 = oid_map.get_oid(key1);
|
||||
let oid2 = oid_map.get_oid(key2);
|
||||
|
||||
@@ -437,10 +437,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn column(name: &str) -> Expr {
|
||||
Expr::Column(Column {
|
||||
relation: None,
|
||||
name: name.to_string(),
|
||||
})
|
||||
Expr::Column(Column::from_name(name))
|
||||
}
|
||||
|
||||
fn string_literal(v: &str) -> Expr {
|
||||
|
||||
@@ -27,7 +27,7 @@ use session::context::QueryContextRef;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::TableType;
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
mod dummy_catalog;
|
||||
pub mod dummy_catalog;
|
||||
use dummy_catalog::DummyCatalogList;
|
||||
use table::TableRef;
|
||||
|
||||
|
||||
@@ -51,7 +51,6 @@ opendal = { version = "0.51.1", features = [
|
||||
query.workspace = true
|
||||
rand.workspace = true
|
||||
reqwest.workspace = true
|
||||
rustyline = "10.1"
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
servers.workspace = true
|
||||
|
||||
@@ -1,154 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::error::{Error, InvalidReplCommandSnafu, Result};
|
||||
|
||||
/// Represents the parsed command from the user (which may be over many lines)
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) enum ReplCommand {
|
||||
Help,
|
||||
UseDatabase { db_name: String },
|
||||
Sql { sql: String },
|
||||
Exit,
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for ReplCommand {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(input: &str) -> Result<Self> {
|
||||
let input = input.trim();
|
||||
if input.is_empty() {
|
||||
return InvalidReplCommandSnafu {
|
||||
reason: "No command specified".to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
// If line ends with ';', it must be treated as a complete input.
|
||||
// However, the opposite is not true.
|
||||
let input_is_completed = input.ends_with(';');
|
||||
|
||||
let input = input.strip_suffix(';').map(|x| x.trim()).unwrap_or(input);
|
||||
let lowercase = input.to_lowercase();
|
||||
match lowercase.as_str() {
|
||||
"help" => Ok(Self::Help),
|
||||
"exit" | "quit" => Ok(Self::Exit),
|
||||
_ => match input.split_once(' ') {
|
||||
Some((maybe_use, database)) if maybe_use.to_lowercase() == "use" => {
|
||||
Ok(Self::UseDatabase {
|
||||
db_name: database.trim().to_string(),
|
||||
})
|
||||
}
|
||||
// Any valid SQL must contains at least one whitespace.
|
||||
Some(_) if input_is_completed => Ok(Self::Sql {
|
||||
sql: input.to_string(),
|
||||
}),
|
||||
_ => InvalidReplCommandSnafu {
|
||||
reason: format!("unknown command '{input}', maybe input is not completed"),
|
||||
}
|
||||
.fail(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ReplCommand {
|
||||
pub fn help() -> &'static str {
|
||||
r#"
|
||||
Available commands (case insensitive):
|
||||
- 'help': print this help
|
||||
- 'exit' or 'quit': exit the REPL
|
||||
- 'use <your database name>': switch to another database/schema context
|
||||
- Other typed in text will be treated as SQL.
|
||||
You can enter new line while typing, just remember to end it with ';'.
|
||||
"#
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::error::Error::InvalidReplCommand;
|
||||
|
||||
#[test]
|
||||
fn test_from_str() {
|
||||
fn test_ok(s: &str, expected: ReplCommand) {
|
||||
let actual: ReplCommand = s.try_into().unwrap();
|
||||
assert_eq!(expected, actual, "'{}'", s);
|
||||
}
|
||||
|
||||
fn test_err(s: &str) {
|
||||
let result: Result<ReplCommand> = s.try_into();
|
||||
assert!(matches!(result, Err(InvalidReplCommand { .. })))
|
||||
}
|
||||
|
||||
test_err("");
|
||||
test_err(" ");
|
||||
test_err("\t");
|
||||
|
||||
test_ok("help", ReplCommand::Help);
|
||||
test_ok("help", ReplCommand::Help);
|
||||
test_ok(" help", ReplCommand::Help);
|
||||
test_ok(" help ", ReplCommand::Help);
|
||||
test_ok(" HELP ", ReplCommand::Help);
|
||||
test_ok(" Help; ", ReplCommand::Help);
|
||||
test_ok(" help ; ", ReplCommand::Help);
|
||||
|
||||
test_ok("exit", ReplCommand::Exit);
|
||||
test_ok("exit;", ReplCommand::Exit);
|
||||
test_ok("exit ;", ReplCommand::Exit);
|
||||
test_ok("EXIT", ReplCommand::Exit);
|
||||
|
||||
test_ok("quit", ReplCommand::Exit);
|
||||
test_ok("quit;", ReplCommand::Exit);
|
||||
test_ok("quit ;", ReplCommand::Exit);
|
||||
test_ok("QUIT", ReplCommand::Exit);
|
||||
|
||||
test_ok(
|
||||
"use Foo",
|
||||
ReplCommand::UseDatabase {
|
||||
db_name: "Foo".to_string(),
|
||||
},
|
||||
);
|
||||
test_ok(
|
||||
" use Foo ; ",
|
||||
ReplCommand::UseDatabase {
|
||||
db_name: "Foo".to_string(),
|
||||
},
|
||||
);
|
||||
// ensure that database name is case sensitive
|
||||
test_ok(
|
||||
" use FOO ; ",
|
||||
ReplCommand::UseDatabase {
|
||||
db_name: "FOO".to_string(),
|
||||
},
|
||||
);
|
||||
|
||||
// ensure that we aren't messing with capitalization
|
||||
test_ok(
|
||||
"SELECT * from foo;",
|
||||
ReplCommand::Sql {
|
||||
sql: "SELECT * from foo".to_string(),
|
||||
},
|
||||
);
|
||||
// Input line (that don't belong to any other cases above) must ends with ';' to make it a valid SQL.
|
||||
test_err("insert blah");
|
||||
test_ok(
|
||||
"insert blah;",
|
||||
ReplCommand::Sql {
|
||||
sql: "insert blah".to_string(),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -17,7 +17,6 @@ use std::any::Any;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use rustyline::error::ReadlineError;
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
#[derive(Snafu)]
|
||||
@@ -102,55 +101,6 @@ pub enum Error {
|
||||
error: reqwest::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid REPL command: {reason}"))]
|
||||
InvalidReplCommand { reason: String },
|
||||
|
||||
#[snafu(display("Cannot create REPL"))]
|
||||
ReplCreation {
|
||||
#[snafu(source)]
|
||||
error: ReadlineError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Error reading command"))]
|
||||
Readline {
|
||||
#[snafu(source)]
|
||||
error: ReadlineError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to request database, sql: {sql}"))]
|
||||
RequestDatabase {
|
||||
sql: String,
|
||||
#[snafu(source)]
|
||||
source: client::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to collect RecordBatches"))]
|
||||
CollectRecordBatches {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_recordbatch::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to pretty print Recordbatches"))]
|
||||
PrettyPrintRecordBatches {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_recordbatch::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start Meta client"))]
|
||||
StartMetaClient {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: meta_client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse SQL: {}", sql))]
|
||||
ParseSql {
|
||||
sql: String,
|
||||
@@ -166,13 +116,6 @@ pub enum Error {
|
||||
source: query::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to encode logical plan in substrait"))]
|
||||
SubstraitEncodeLogicalPlan {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: substrait::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to load layered config"))]
|
||||
LoadLayeredConfig {
|
||||
#[snafu(source(from(common_config::error::Error, Box::new)))]
|
||||
@@ -308,7 +251,6 @@ impl ErrorExt for Error {
|
||||
Error::MissingConfig { .. }
|
||||
| Error::LoadLayeredConfig { .. }
|
||||
| Error::IllegalConfig { .. }
|
||||
| Error::InvalidReplCommand { .. }
|
||||
| Error::InitTimezone { .. }
|
||||
| Error::ConnectEtcd { .. }
|
||||
| Error::CreateDir { .. }
|
||||
@@ -318,17 +260,10 @@ impl ErrorExt for Error {
|
||||
Error::StartProcedureManager { source, .. }
|
||||
| Error::StopProcedureManager { source, .. } => source.status_code(),
|
||||
Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
|
||||
Error::ReplCreation { .. } | Error::Readline { .. } | Error::HttpQuerySql { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
Error::RequestDatabase { source, .. } => source.status_code(),
|
||||
Error::CollectRecordBatches { source, .. }
|
||||
| Error::PrettyPrintRecordBatches { source, .. } => source.status_code(),
|
||||
Error::StartMetaClient { source, .. } => source.status_code(),
|
||||
Error::HttpQuerySql { .. } => StatusCode::Internal,
|
||||
Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => {
|
||||
source.status_code()
|
||||
}
|
||||
Error::SubstraitEncodeLogicalPlan { source, .. } => source.status_code(),
|
||||
|
||||
Error::SerdeJson { .. }
|
||||
| Error::FileIo { .. }
|
||||
|
||||
@@ -1,112 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::borrow::Cow;
|
||||
|
||||
use rustyline::completion::Completer;
|
||||
use rustyline::highlight::{Highlighter, MatchingBracketHighlighter};
|
||||
use rustyline::hint::{Hinter, HistoryHinter};
|
||||
use rustyline::validate::{ValidationContext, ValidationResult, Validator};
|
||||
|
||||
use crate::cmd::ReplCommand;
|
||||
|
||||
pub(crate) struct RustylineHelper {
|
||||
hinter: HistoryHinter,
|
||||
highlighter: MatchingBracketHighlighter,
|
||||
}
|
||||
|
||||
impl Default for RustylineHelper {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
hinter: HistoryHinter {},
|
||||
highlighter: MatchingBracketHighlighter::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl rustyline::Helper for RustylineHelper {}
|
||||
|
||||
impl Validator for RustylineHelper {
|
||||
fn validate(&self, ctx: &mut ValidationContext<'_>) -> rustyline::Result<ValidationResult> {
|
||||
let input = ctx.input();
|
||||
match ReplCommand::try_from(input) {
|
||||
Ok(_) => Ok(ValidationResult::Valid(None)),
|
||||
Err(e) => {
|
||||
if input.trim_end().ends_with(';') {
|
||||
// If line ends with ';', it HAS to be a valid command.
|
||||
Ok(ValidationResult::Invalid(Some(e.to_string())))
|
||||
} else {
|
||||
Ok(ValidationResult::Incomplete)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Hinter for RustylineHelper {
|
||||
type Hint = String;
|
||||
|
||||
fn hint(&self, line: &str, pos: usize, ctx: &rustyline::Context<'_>) -> Option<Self::Hint> {
|
||||
self.hinter.hint(line, pos, ctx)
|
||||
}
|
||||
}
|
||||
|
||||
impl Highlighter for RustylineHelper {
|
||||
fn highlight<'l>(&self, line: &'l str, pos: usize) -> Cow<'l, str> {
|
||||
self.highlighter.highlight(line, pos)
|
||||
}
|
||||
|
||||
fn highlight_prompt<'b, 's: 'b, 'p: 'b>(
|
||||
&'s self,
|
||||
prompt: &'p str,
|
||||
default: bool,
|
||||
) -> Cow<'b, str> {
|
||||
self.highlighter.highlight_prompt(prompt, default)
|
||||
}
|
||||
|
||||
fn highlight_hint<'h>(&self, hint: &'h str) -> Cow<'h, str> {
|
||||
use nu_ansi_term::Style;
|
||||
Cow::Owned(Style::new().dimmed().paint(hint).to_string())
|
||||
}
|
||||
|
||||
fn highlight_candidate<'c>(
|
||||
&self,
|
||||
candidate: &'c str,
|
||||
completion: rustyline::CompletionType,
|
||||
) -> Cow<'c, str> {
|
||||
self.highlighter.highlight_candidate(candidate, completion)
|
||||
}
|
||||
|
||||
fn highlight_char(&self, line: &str, pos: usize) -> bool {
|
||||
self.highlighter.highlight_char(line, pos)
|
||||
}
|
||||
}
|
||||
|
||||
impl Completer for RustylineHelper {
|
||||
type Candidate = String;
|
||||
|
||||
fn complete(
|
||||
&self,
|
||||
line: &str,
|
||||
pos: usize,
|
||||
ctx: &rustyline::Context<'_>,
|
||||
) -> rustyline::Result<(usize, Vec<Self::Candidate>)> {
|
||||
// If there is a hint, use that as the auto-complete when user hits `tab`
|
||||
if let Some(hint) = self.hinter.hint(line, pos, ctx) {
|
||||
Ok((pos, vec![hint]))
|
||||
} else {
|
||||
Ok((0, vec![]))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,25 +13,16 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod bench;
|
||||
pub mod error;
|
||||
// Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373
|
||||
#[allow(unused)]
|
||||
mod cmd;
|
||||
mod export;
|
||||
mod helper;
|
||||
|
||||
// Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373
|
||||
mod database;
|
||||
pub mod error;
|
||||
mod export;
|
||||
mod import;
|
||||
#[allow(unused)]
|
||||
mod repl;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use clap::Parser;
|
||||
use common_error::ext::BoxedError;
|
||||
pub use database::DatabaseClient;
|
||||
use error::Result;
|
||||
pub use repl::Repl;
|
||||
|
||||
pub use crate::bench::BenchTableMetadataCommand;
|
||||
pub use crate::export::ExportCommand;
|
||||
|
||||
@@ -1,299 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use cache::{
|
||||
build_fundamental_cache_registry, with_default_composite_cache_registry, TABLE_CACHE_NAME,
|
||||
TABLE_ROUTE_CACHE_NAME,
|
||||
};
|
||||
use catalog::information_extension::DistributedInformationExtension;
|
||||
use catalog::kvbackend::{
|
||||
CachedKvBackend, CachedKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend,
|
||||
};
|
||||
use client::{Client, Database, OutputData, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_base::Plugins;
|
||||
use common_config::Mode;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use common_telemetry::debug;
|
||||
use either::Either;
|
||||
use meta_client::client::{ClusterKvBackend, MetaClientBuilder};
|
||||
use query::datafusion::DatafusionQueryEngine;
|
||||
use query::parser::QueryLanguageParser;
|
||||
use query::query_engine::{DefaultSerializer, QueryEngineState};
|
||||
use query::QueryEngine;
|
||||
use rustyline::error::ReadlineError;
|
||||
use rustyline::Editor;
|
||||
use session::context::QueryContext;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
|
||||
|
||||
use crate::cmd::ReplCommand;
|
||||
use crate::error::{
|
||||
CollectRecordBatchesSnafu, ParseSqlSnafu, PlanStatementSnafu, PrettyPrintRecordBatchesSnafu,
|
||||
ReadlineSnafu, ReplCreationSnafu, RequestDatabaseSnafu, Result, StartMetaClientSnafu,
|
||||
SubstraitEncodeLogicalPlanSnafu,
|
||||
};
|
||||
use crate::helper::RustylineHelper;
|
||||
use crate::{error, AttachCommand};
|
||||
|
||||
/// Captures the state of the repl, gathers commands and executes them one by one
|
||||
pub struct Repl {
|
||||
/// Rustyline editor for interacting with user on command line
|
||||
rl: Editor<RustylineHelper>,
|
||||
|
||||
/// Current prompt
|
||||
prompt: String,
|
||||
|
||||
/// Client for interacting with GreptimeDB
|
||||
database: Database,
|
||||
|
||||
query_engine: Option<DatafusionQueryEngine>,
|
||||
}
|
||||
|
||||
#[allow(clippy::print_stdout)]
|
||||
impl Repl {
|
||||
fn print_help(&self) {
|
||||
println!("{}", ReplCommand::help())
|
||||
}
|
||||
|
||||
pub(crate) async fn try_new(cmd: &AttachCommand) -> Result<Self> {
|
||||
let mut rl = Editor::new().context(ReplCreationSnafu)?;
|
||||
|
||||
if !cmd.disable_helper {
|
||||
rl.set_helper(Some(RustylineHelper::default()));
|
||||
|
||||
let history_file = history_file();
|
||||
if let Err(e) = rl.load_history(&history_file) {
|
||||
debug!(
|
||||
"failed to load history file on {}, error: {e}",
|
||||
history_file.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let client = Client::with_urls([&cmd.grpc_addr]);
|
||||
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
|
||||
|
||||
let query_engine = if let Some(meta_addr) = &cmd.meta_addr {
|
||||
create_query_engine(meta_addr).await.map(Some)?
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
rl,
|
||||
prompt: "> ".to_string(),
|
||||
database,
|
||||
query_engine,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse the next command
|
||||
fn next_command(&mut self) -> Result<ReplCommand> {
|
||||
match self.rl.readline(&self.prompt) {
|
||||
Ok(ref line) => {
|
||||
let request = line.trim();
|
||||
|
||||
let _ = self.rl.add_history_entry(request.to_string());
|
||||
|
||||
request.try_into()
|
||||
}
|
||||
Err(ReadlineError::Eof) | Err(ReadlineError::Interrupted) => Ok(ReplCommand::Exit),
|
||||
// Some sort of real underlying error
|
||||
Err(e) => Err(e).context(ReadlineSnafu),
|
||||
}
|
||||
}
|
||||
|
||||
/// Read Evaluate Print Loop (interactive command line) for GreptimeDB
|
||||
///
|
||||
/// Inspired / based on repl.rs from InfluxDB IOX
|
||||
pub(crate) async fn run(&mut self) -> Result<()> {
|
||||
println!("Ready for commands. (Hint: try 'help')");
|
||||
|
||||
loop {
|
||||
match self.next_command()? {
|
||||
ReplCommand::Help => {
|
||||
self.print_help();
|
||||
}
|
||||
ReplCommand::UseDatabase { db_name } => {
|
||||
if self.execute_sql(format!("USE {db_name}")).await {
|
||||
println!("Using {db_name}");
|
||||
self.database.set_schema(&db_name);
|
||||
self.prompt = format!("[{db_name}] > ");
|
||||
}
|
||||
}
|
||||
ReplCommand::Sql { sql } => {
|
||||
let _ = self.execute_sql(sql).await;
|
||||
}
|
||||
ReplCommand::Exit => {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn execute_sql(&self, sql: String) -> bool {
|
||||
self.do_execute_sql(sql)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
let status_code = e.status_code();
|
||||
let root_cause = e.output_msg();
|
||||
println!("Error: {}({status_code}), {root_cause}", status_code as u32)
|
||||
})
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
async fn do_execute_sql(&self, sql: String) -> Result<()> {
|
||||
let start = Instant::now();
|
||||
|
||||
let output = if let Some(query_engine) = &self.query_engine {
|
||||
let query_ctx = Arc::new(QueryContext::with(
|
||||
self.database.catalog(),
|
||||
self.database.schema(),
|
||||
));
|
||||
|
||||
let stmt = QueryLanguageParser::parse_sql(&sql, &query_ctx)
|
||||
.with_context(|_| ParseSqlSnafu { sql: sql.clone() })?;
|
||||
|
||||
let plan = query_engine
|
||||
.planner()
|
||||
.plan(&stmt, query_ctx.clone())
|
||||
.await
|
||||
.context(PlanStatementSnafu)?;
|
||||
|
||||
let plan = query_engine
|
||||
.optimize(&query_engine.engine_context(query_ctx), &plan)
|
||||
.context(PlanStatementSnafu)?;
|
||||
|
||||
let plan = DFLogicalSubstraitConvertor {}
|
||||
.encode(&plan, DefaultSerializer)
|
||||
.context(SubstraitEncodeLogicalPlanSnafu)?;
|
||||
|
||||
self.database.logical_plan(plan.to_vec()).await
|
||||
} else {
|
||||
self.database.sql(&sql).await
|
||||
}
|
||||
.context(RequestDatabaseSnafu { sql: &sql })?;
|
||||
|
||||
let either = match output.data {
|
||||
OutputData::Stream(s) => {
|
||||
let x = RecordBatches::try_collect(s)
|
||||
.await
|
||||
.context(CollectRecordBatchesSnafu)?;
|
||||
Either::Left(x)
|
||||
}
|
||||
OutputData::RecordBatches(x) => Either::Left(x),
|
||||
OutputData::AffectedRows(rows) => Either::Right(rows),
|
||||
};
|
||||
|
||||
let end = Instant::now();
|
||||
|
||||
match either {
|
||||
Either::Left(recordbatches) => {
|
||||
let total_rows: usize = recordbatches.iter().map(|x| x.num_rows()).sum();
|
||||
if total_rows > 0 {
|
||||
println!(
|
||||
"{}",
|
||||
recordbatches
|
||||
.pretty_print()
|
||||
.context(PrettyPrintRecordBatchesSnafu)?
|
||||
);
|
||||
}
|
||||
println!("Total Rows: {total_rows}")
|
||||
}
|
||||
Either::Right(rows) => println!("Affected Rows: {rows}"),
|
||||
};
|
||||
|
||||
println!("Cost {} ms", (end - start).as_millis());
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Repl {
|
||||
fn drop(&mut self) {
|
||||
if self.rl.helper().is_some() {
|
||||
let history_file = history_file();
|
||||
if let Err(e) = self.rl.save_history(&history_file) {
|
||||
debug!(
|
||||
"failed to save history file on {}, error: {e}",
|
||||
history_file.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the location of the history file (defaults to $HOME/".greptimedb_cli_history")
|
||||
fn history_file() -> PathBuf {
|
||||
let mut buf = match std::env::var("HOME") {
|
||||
Ok(home) => PathBuf::from(home),
|
||||
Err(_) => PathBuf::new(),
|
||||
};
|
||||
buf.push(".greptimedb_cli_history");
|
||||
buf
|
||||
}
|
||||
|
||||
async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
|
||||
let mut meta_client = MetaClientBuilder::default().enable_store().build();
|
||||
meta_client
|
||||
.start([meta_addr])
|
||||
.await
|
||||
.context(StartMetaClientSnafu)?;
|
||||
let meta_client = Arc::new(meta_client);
|
||||
|
||||
let cached_meta_backend = Arc::new(
|
||||
CachedKvBackendBuilder::new(Arc::new(MetaKvBackend::new(meta_client.clone()))).build(),
|
||||
);
|
||||
let layered_cache_builder = LayeredCacheRegistryBuilder::default().add_cache_registry(
|
||||
CacheRegistryBuilder::default()
|
||||
.add_cache(cached_meta_backend.clone())
|
||||
.build(),
|
||||
);
|
||||
let fundamental_cache_registry =
|
||||
build_fundamental_cache_registry(Arc::new(MetaKvBackend::new(meta_client.clone())));
|
||||
let layered_cache_registry = Arc::new(
|
||||
with_default_composite_cache_registry(
|
||||
layered_cache_builder.add_cache_registry(fundamental_cache_registry),
|
||||
)
|
||||
.context(error::BuildCacheRegistrySnafu)?
|
||||
.build(),
|
||||
);
|
||||
|
||||
let information_extension = Arc::new(DistributedInformationExtension::new(meta_client.clone()));
|
||||
let catalog_manager = KvBackendCatalogManager::new(
|
||||
information_extension,
|
||||
cached_meta_backend.clone(),
|
||||
layered_cache_registry,
|
||||
None,
|
||||
);
|
||||
let plugins: Plugins = Default::default();
|
||||
let state = Arc::new(QueryEngineState::new(
|
||||
catalog_manager,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
plugins.clone(),
|
||||
));
|
||||
|
||||
Ok(DatafusionQueryEngine::new(state, plugins))
|
||||
}
|
||||
@@ -16,6 +16,7 @@ arc-swap = "1.6"
|
||||
arrow-flight.workspace = true
|
||||
async-stream.workspace = true
|
||||
async-trait.workspace = true
|
||||
base64.workspace = true
|
||||
common-catalog.workspace = true
|
||||
common-error.workspace = true
|
||||
common-grpc.workspace = true
|
||||
@@ -25,6 +26,7 @@ common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
enum_dispatch = "0.3"
|
||||
futures.workspace = true
|
||||
futures-util.workspace = true
|
||||
lazy_static.workspace = true
|
||||
moka = { workspace = true, features = ["future"] }
|
||||
|
||||
@@ -12,36 +12,49 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::pin::Pin;
|
||||
use std::str::FromStr;
|
||||
|
||||
use api::v1::auth_header::AuthScheme;
|
||||
use api::v1::ddl_request::Expr as DdlExpr;
|
||||
use api::v1::greptime_database_client::GreptimeDatabaseClient;
|
||||
use api::v1::greptime_request::Request;
|
||||
use api::v1::query_request::Query;
|
||||
use api::v1::{
|
||||
AlterTableExpr, AuthHeader, CreateTableExpr, DdlRequest, GreptimeRequest, InsertRequests,
|
||||
QueryRequest, RequestHeader,
|
||||
AlterTableExpr, AuthHeader, Basic, CreateTableExpr, DdlRequest, GreptimeRequest,
|
||||
InsertRequests, QueryRequest, RequestHeader,
|
||||
};
|
||||
use arrow_flight::Ticket;
|
||||
use arrow_flight::{FlightData, Ticket};
|
||||
use async_stream::stream;
|
||||
use base64::prelude::BASE64_STANDARD;
|
||||
use base64::Engine;
|
||||
use common_catalog::build_db_string;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_grpc::flight::do_put::DoPutResponse;
|
||||
use common_grpc::flight::{FlightDecoder, FlightMessage};
|
||||
use common_query::Output;
|
||||
use common_recordbatch::error::ExternalSnafu;
|
||||
use common_recordbatch::RecordBatchStreamWrapper;
|
||||
use common_telemetry::error;
|
||||
use common_telemetry::tracing_context::W3cTrace;
|
||||
use futures_util::StreamExt;
|
||||
use common_telemetry::{error, warn};
|
||||
use futures::future;
|
||||
use futures_util::{Stream, StreamExt, TryStreamExt};
|
||||
use prost::Message;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use tonic::metadata::AsciiMetadataKey;
|
||||
use tonic::metadata::{AsciiMetadataKey, AsciiMetadataValue, MetadataMap, MetadataValue};
|
||||
use tonic::transport::Channel;
|
||||
|
||||
use crate::error::{
|
||||
ConvertFlightDataSnafu, Error, FlightGetSnafu, IllegalFlightMessagesSnafu, InvalidAsciiSnafu,
|
||||
ServerSnafu,
|
||||
ConvertFlightDataSnafu, Error, FlightGetSnafu, IllegalFlightMessagesSnafu,
|
||||
InvalidTonicMetadataValueSnafu, ServerSnafu,
|
||||
};
|
||||
use crate::{from_grpc_response, Client, Result};
|
||||
|
||||
type FlightDataStream = Pin<Box<dyn Stream<Item = FlightData> + Send>>;
|
||||
|
||||
type DoPutResponseStream = Pin<Box<dyn Stream<Item = Result<DoPutResponse>>>>;
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct Database {
|
||||
// The "catalog" and "schema" to be used in processing the requests at the server side.
|
||||
@@ -108,16 +121,24 @@ impl Database {
|
||||
self.catalog = catalog.into();
|
||||
}
|
||||
|
||||
pub fn catalog(&self) -> &String {
|
||||
&self.catalog
|
||||
fn catalog_or_default(&self) -> &str {
|
||||
if self.catalog.is_empty() {
|
||||
DEFAULT_CATALOG_NAME
|
||||
} else {
|
||||
&self.catalog
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_schema(&mut self, schema: impl Into<String>) {
|
||||
self.schema = schema.into();
|
||||
}
|
||||
|
||||
pub fn schema(&self) -> &String {
|
||||
&self.schema
|
||||
fn schema_or_default(&self) -> &str {
|
||||
if self.schema.is_empty() {
|
||||
DEFAULT_SCHEMA_NAME
|
||||
} else {
|
||||
&self.schema
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_timezone(&mut self, timezone: impl Into<String>) {
|
||||
@@ -144,33 +165,64 @@ impl Database {
|
||||
|
||||
let mut request = tonic::Request::new(request);
|
||||
let metadata = request.metadata_mut();
|
||||
for (key, value) in hints {
|
||||
let key = AsciiMetadataKey::from_bytes(format!("x-greptime-hint-{}", key).as_bytes())
|
||||
.map_err(|_| {
|
||||
InvalidAsciiSnafu {
|
||||
value: key.to_string(),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
let value = value.parse().map_err(|_| {
|
||||
InvalidAsciiSnafu {
|
||||
value: value.to_string(),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
metadata.insert(key, value);
|
||||
}
|
||||
Self::put_hints(metadata, hints)?;
|
||||
|
||||
let response = client.handle(request).await?.into_inner();
|
||||
from_grpc_response(response)
|
||||
}
|
||||
|
||||
async fn handle(&self, request: Request) -> Result<u32> {
|
||||
fn put_hints(metadata: &mut MetadataMap, hints: &[(&str, &str)]) -> Result<()> {
|
||||
let Some(value) = hints
|
||||
.iter()
|
||||
.map(|(k, v)| format!("{}={}", k, v))
|
||||
.reduce(|a, b| format!("{},{}", a, b))
|
||||
else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let key = AsciiMetadataKey::from_static("x-greptime-hints");
|
||||
let value = AsciiMetadataValue::from_str(&value).context(InvalidTonicMetadataValueSnafu)?;
|
||||
metadata.insert(key, value);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn handle(&self, request: Request) -> Result<u32> {
|
||||
let mut client = make_database_client(&self.client)?.inner;
|
||||
let request = self.to_rpc_request(request);
|
||||
let response = client.handle(request).await?.into_inner();
|
||||
from_grpc_response(response)
|
||||
}
|
||||
|
||||
/// Retry if connection fails, max_retries is the max number of retries, so the total wait time
|
||||
/// is `max_retries * GRPC_CONN_TIMEOUT`
|
||||
pub async fn handle_with_retry(&self, request: Request, max_retries: u32) -> Result<u32> {
|
||||
let mut client = make_database_client(&self.client)?.inner;
|
||||
let mut retries = 0;
|
||||
let request = self.to_rpc_request(request);
|
||||
loop {
|
||||
let raw_response = client.handle(request.clone()).await;
|
||||
match (raw_response, retries < max_retries) {
|
||||
(Ok(resp), _) => return from_grpc_response(resp.into_inner()),
|
||||
(Err(err), true) => {
|
||||
// determine if the error is retryable
|
||||
if is_grpc_retryable(&err) {
|
||||
// retry
|
||||
retries += 1;
|
||||
warn!("Retrying {} times with error = {:?}", retries, err);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
(Err(err), false) => {
|
||||
error!(
|
||||
"Failed to send request to grpc handle after {} retries, error = {:?}",
|
||||
retries, err
|
||||
);
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_rpc_request(&self, request: Request) -> GreptimeRequest {
|
||||
GreptimeRequest {
|
||||
@@ -191,39 +243,49 @@ impl Database {
|
||||
where
|
||||
S: AsRef<str>,
|
||||
{
|
||||
self.do_get(Request::Query(QueryRequest {
|
||||
self.sql_with_hint(sql, &[]).await
|
||||
}
|
||||
|
||||
pub async fn sql_with_hint<S>(&self, sql: S, hints: &[(&str, &str)]) -> Result<Output>
|
||||
where
|
||||
S: AsRef<str>,
|
||||
{
|
||||
let request = Request::Query(QueryRequest {
|
||||
query: Some(Query::Sql(sql.as_ref().to_string())),
|
||||
}))
|
||||
.await
|
||||
});
|
||||
self.do_get(request, hints).await
|
||||
}
|
||||
|
||||
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<Output> {
|
||||
self.do_get(Request::Query(QueryRequest {
|
||||
let request = Request::Query(QueryRequest {
|
||||
query: Some(Query::LogicalPlan(logical_plan)),
|
||||
}))
|
||||
.await
|
||||
});
|
||||
self.do_get(request, &[]).await
|
||||
}
|
||||
|
||||
pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> {
|
||||
self.do_get(Request::Ddl(DdlRequest {
|
||||
let request = Request::Ddl(DdlRequest {
|
||||
expr: Some(DdlExpr::CreateTable(expr)),
|
||||
}))
|
||||
.await
|
||||
});
|
||||
self.do_get(request, &[]).await
|
||||
}
|
||||
|
||||
pub async fn alter(&self, expr: AlterTableExpr) -> Result<Output> {
|
||||
self.do_get(Request::Ddl(DdlRequest {
|
||||
let request = Request::Ddl(DdlRequest {
|
||||
expr: Some(DdlExpr::AlterTable(expr)),
|
||||
}))
|
||||
.await
|
||||
});
|
||||
self.do_get(request, &[]).await
|
||||
}
|
||||
|
||||
async fn do_get(&self, request: Request) -> Result<Output> {
|
||||
async fn do_get(&self, request: Request, hints: &[(&str, &str)]) -> Result<Output> {
|
||||
let request = self.to_rpc_request(request);
|
||||
let request = Ticket {
|
||||
ticket: request.encode_to_vec().into(),
|
||||
};
|
||||
|
||||
let mut request = tonic::Request::new(request);
|
||||
Self::put_hints(request.metadata_mut(), hints)?;
|
||||
|
||||
let mut client = self.client.make_flight_client()?;
|
||||
|
||||
let response = client.mut_inner().do_get(request).await.or_else(|e| {
|
||||
@@ -310,6 +372,46 @@ impl Database {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Ingest a stream of [RecordBatch]es that belong to a table, using Arrow Flight's "`DoPut`"
|
||||
/// method. The return value is also a stream, produces [DoPutResponse]s.
|
||||
pub async fn do_put(&self, stream: FlightDataStream) -> Result<DoPutResponseStream> {
|
||||
let mut request = tonic::Request::new(stream);
|
||||
|
||||
if let Some(AuthHeader {
|
||||
auth_scheme: Some(AuthScheme::Basic(Basic { username, password })),
|
||||
}) = &self.ctx.auth_header
|
||||
{
|
||||
let encoded = BASE64_STANDARD.encode(format!("{username}:{password}"));
|
||||
let value =
|
||||
MetadataValue::from_str(&encoded).context(InvalidTonicMetadataValueSnafu)?;
|
||||
request.metadata_mut().insert("x-greptime-auth", value);
|
||||
}
|
||||
|
||||
let db_to_put = if !self.dbname.is_empty() {
|
||||
&self.dbname
|
||||
} else {
|
||||
&build_db_string(self.catalog_or_default(), self.schema_or_default())
|
||||
};
|
||||
request.metadata_mut().insert(
|
||||
"x-greptime-db-name",
|
||||
MetadataValue::from_str(db_to_put).context(InvalidTonicMetadataValueSnafu)?,
|
||||
);
|
||||
|
||||
let mut client = self.client.make_flight_client()?;
|
||||
let response = client.mut_inner().do_put(request).await?;
|
||||
let response = response
|
||||
.into_inner()
|
||||
.map_err(Into::into)
|
||||
.and_then(|x| future::ready(DoPutResponse::try_from(x).context(ConvertFlightDataSnafu)))
|
||||
.boxed();
|
||||
Ok(response)
|
||||
}
|
||||
}
|
||||
|
||||
/// by grpc standard, only `Unavailable` is retryable, see: https://github.com/grpc/grpc/blob/master/doc/statuscodes.md#status-codes-and-their-use-in-grpc
|
||||
pub fn is_grpc_retryable(err: &tonic::Status) -> bool {
|
||||
matches!(err.code(), tonic::Code::Unavailable)
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone)]
|
||||
|
||||
@@ -15,10 +15,11 @@
|
||||
use std::any::Any;
|
||||
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_error::status_code::{convert_tonic_code_to_status_code, StatusCode};
|
||||
use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG};
|
||||
use common_macro::stack_trace_debug;
|
||||
use snafu::{location, Location, Snafu};
|
||||
use tonic::metadata::errors::InvalidMetadataValue;
|
||||
use tonic::{Code, Status};
|
||||
|
||||
#[derive(Snafu)]
|
||||
@@ -109,9 +110,10 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse ascii string: {}", value))]
|
||||
InvalidAscii {
|
||||
value: String,
|
||||
#[snafu(display("Invalid Tonic metadata value"))]
|
||||
InvalidTonicMetadataValue {
|
||||
#[snafu(source)]
|
||||
error: InvalidMetadataValue,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
@@ -134,8 +136,7 @@ impl ErrorExt for Error {
|
||||
| Error::ConvertFlightData { source, .. }
|
||||
| Error::CreateTlsChannel { source, .. } => source.status_code(),
|
||||
Error::IllegalGrpcClientState { .. } => StatusCode::Unexpected,
|
||||
|
||||
Error::InvalidAscii { .. } => StatusCode::InvalidArguments,
|
||||
Error::InvalidTonicMetadataValue { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -152,15 +153,15 @@ impl From<Status> for Error {
|
||||
.and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
|
||||
}
|
||||
|
||||
let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE)
|
||||
.and_then(|s| {
|
||||
if let Ok(code) = s.parse::<u32>() {
|
||||
StatusCode::from_u32(code)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.unwrap_or(StatusCode::Unknown);
|
||||
let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE).and_then(|s| {
|
||||
if let Ok(code) = s.parse::<u32>() {
|
||||
StatusCode::from_u32(code)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
let tonic_code = e.code();
|
||||
let code = code.unwrap_or_else(|| convert_tonic_code_to_status_code(tonic_code));
|
||||
|
||||
let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG)
|
||||
.unwrap_or_else(|| e.message().to_string());
|
||||
@@ -187,9 +188,6 @@ impl Error {
|
||||
} | Self::RegionServer {
|
||||
code: Code::Unavailable,
|
||||
..
|
||||
} | Self::RegionServer {
|
||||
code: Code::Unknown,
|
||||
..
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
mod client;
|
||||
pub mod client_manager;
|
||||
mod database;
|
||||
pub mod database;
|
||||
pub mod error;
|
||||
pub mod flow;
|
||||
pub mod load_balance;
|
||||
|
||||
@@ -201,12 +201,11 @@ impl RegionRequester {
|
||||
.await
|
||||
.map_err(|e| {
|
||||
let code = e.code();
|
||||
let err: error::Error = e.into();
|
||||
// Uses `Error::RegionServer` instead of `Error::Server`
|
||||
error::Error::RegionServer {
|
||||
addr,
|
||||
code,
|
||||
source: BoxedError::new(err),
|
||||
source: BoxedError::new(error::Error::from(e)),
|
||||
location: location!(),
|
||||
}
|
||||
})?
|
||||
|
||||
@@ -68,7 +68,6 @@ query.workspace = true
|
||||
rand.workspace = true
|
||||
regex.workspace = true
|
||||
reqwest.workspace = true
|
||||
rustyline = "10.1"
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
servers.workspace = true
|
||||
|
||||
@@ -15,9 +15,11 @@
|
||||
#![doc = include_str!("../../../../README.md")]
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use cmd::datanode::builder::InstanceBuilder;
|
||||
use cmd::error::{InitTlsProviderSnafu, Result};
|
||||
use cmd::options::GlobalOptions;
|
||||
use cmd::{cli, datanode, flownode, frontend, metasrv, standalone, App};
|
||||
use common_base::Plugins;
|
||||
use common_version::version;
|
||||
use servers::install_ring_crypto_provider;
|
||||
|
||||
@@ -102,10 +104,10 @@ async fn main_body() -> Result<()> {
|
||||
async fn start(cli: Command) -> Result<()> {
|
||||
match cli.subcmd {
|
||||
SubCommand::Datanode(cmd) => {
|
||||
cmd.build(cmd.load_options(&cli.global_options)?)
|
||||
.await?
|
||||
.run()
|
||||
.await
|
||||
let opts = cmd.load_options(&cli.global_options)?;
|
||||
let plugins = Plugins::new();
|
||||
let builder = InstanceBuilder::try_new_with_init(opts, plugins).await?;
|
||||
cmd.build_with(builder).await?.run().await
|
||||
}
|
||||
SubCommand::Flownode(cmd) => {
|
||||
cmd.build(cmd.load_options(&cli.global_options)?)
|
||||
|
||||
@@ -58,7 +58,7 @@ impl App for Instance {
|
||||
false
|
||||
}
|
||||
|
||||
async fn stop(&self) -> Result<()> {
|
||||
async fn stop(&mut self) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,33 +12,27 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
pub mod builder;
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use cache::build_datanode_cache_registry;
|
||||
use catalog::kvbackend::MetaKvBackend;
|
||||
use clap::Parser;
|
||||
use common_base::Plugins;
|
||||
use common_config::Configurable;
|
||||
use common_meta::cache::LayeredCacheRegistryBuilder;
|
||||
use common_telemetry::logging::TracingOptions;
|
||||
use common_telemetry::{info, warn};
|
||||
use common_version::{short_version, version};
|
||||
use common_wal::config::DatanodeWalConfig;
|
||||
use datanode::datanode::{Datanode, DatanodeBuilder};
|
||||
use datanode::service::DatanodeServiceBuilder;
|
||||
use meta_client::{MetaClientOptions, MetaClientType};
|
||||
use servers::Mode;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use datanode::datanode::Datanode;
|
||||
use meta_client::MetaClientOptions;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::datanode::builder::InstanceBuilder;
|
||||
use crate::error::{
|
||||
LoadLayeredConfigSnafu, MetaClientInitSnafu, MissingConfigSnafu, Result, ShutdownDatanodeSnafu,
|
||||
StartDatanodeSnafu,
|
||||
LoadLayeredConfigSnafu, MissingConfigSnafu, Result, ShutdownDatanodeSnafu, StartDatanodeSnafu,
|
||||
};
|
||||
use crate::options::{GlobalOptions, GreptimeOptions};
|
||||
use crate::{log_versions, App};
|
||||
use crate::App;
|
||||
|
||||
pub const APP_NAME: &str = "greptime-datanode";
|
||||
|
||||
@@ -83,7 +77,7 @@ impl App for Instance {
|
||||
self.datanode.start().await.context(StartDatanodeSnafu)
|
||||
}
|
||||
|
||||
async fn stop(&self) -> Result<()> {
|
||||
async fn stop(&mut self) -> Result<()> {
|
||||
self.datanode
|
||||
.shutdown()
|
||||
.await
|
||||
@@ -98,8 +92,8 @@ pub struct Command {
|
||||
}
|
||||
|
||||
impl Command {
|
||||
pub async fn build(&self, opts: DatanodeOptions) -> Result<Instance> {
|
||||
self.subcmd.build(opts).await
|
||||
pub async fn build_with(&self, builder: InstanceBuilder) -> Result<Instance> {
|
||||
self.subcmd.build_with(builder).await
|
||||
}
|
||||
|
||||
pub fn load_options(&self, global_options: &GlobalOptions) -> Result<DatanodeOptions> {
|
||||
@@ -115,9 +109,12 @@ enum SubCommand {
|
||||
}
|
||||
|
||||
impl SubCommand {
|
||||
async fn build(&self, opts: DatanodeOptions) -> Result<Instance> {
|
||||
async fn build_with(&self, builder: InstanceBuilder) -> Result<Instance> {
|
||||
match self {
|
||||
SubCommand::Start(cmd) => cmd.build(opts).await,
|
||||
SubCommand::Start(cmd) => {
|
||||
info!("Building datanode with {:#?}", cmd);
|
||||
builder.build().await
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -263,74 +260,6 @@ impl StartCommand {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn build(&self, opts: DatanodeOptions) -> Result<Instance> {
|
||||
common_runtime::init_global_runtimes(&opts.runtime);
|
||||
|
||||
let guard = common_telemetry::init_global_logging(
|
||||
APP_NAME,
|
||||
&opts.component.logging,
|
||||
&opts.component.tracing,
|
||||
opts.component.node_id.map(|x| x.to_string()),
|
||||
);
|
||||
log_versions(version(), short_version(), APP_NAME);
|
||||
|
||||
info!("Datanode start command: {:#?}", self);
|
||||
info!("Datanode options: {:#?}", opts);
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let mut opts = opts.component;
|
||||
opts.grpc.detect_server_addr();
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
|
||||
let member_id = opts
|
||||
.node_id
|
||||
.context(MissingConfigSnafu { msg: "'node_id'" })?;
|
||||
|
||||
let meta_config = opts.meta_client.as_ref().context(MissingConfigSnafu {
|
||||
msg: "'meta_client_options'",
|
||||
})?;
|
||||
|
||||
let meta_client = meta_client::create_meta_client(
|
||||
MetaClientType::Datanode { member_id },
|
||||
meta_config,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.context(MetaClientInitSnafu)?;
|
||||
|
||||
let meta_backend = Arc::new(MetaKvBackend {
|
||||
client: meta_client.clone(),
|
||||
});
|
||||
|
||||
// Builds cache registry for datanode.
|
||||
let layered_cache_registry = Arc::new(
|
||||
LayeredCacheRegistryBuilder::default()
|
||||
.add_cache_registry(build_datanode_cache_registry(meta_backend.clone()))
|
||||
.build(),
|
||||
);
|
||||
|
||||
let mut datanode = DatanodeBuilder::new(opts.clone(), plugins, Mode::Distributed)
|
||||
.with_meta_client(meta_client)
|
||||
.with_kv_backend(meta_backend)
|
||||
.with_cache_registry(layered_cache_registry)
|
||||
.build()
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
|
||||
let services = DatanodeServiceBuilder::new(&opts)
|
||||
.with_default_grpc_server(&datanode.region_server())
|
||||
.enable_http_service()
|
||||
.build()
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
datanode.setup_services(services);
|
||||
|
||||
Ok(Instance::new(datanode, guard))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -352,7 +281,6 @@ mod tests {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let mut file = create_named_temp_file();
|
||||
let toml_str = r#"
|
||||
mode = "distributed"
|
||||
enable_memory_catalog = false
|
||||
node_id = 42
|
||||
|
||||
@@ -379,7 +307,6 @@ mod tests {
|
||||
fn test_read_from_config_file() {
|
||||
let mut file = create_named_temp_file();
|
||||
let toml_str = r#"
|
||||
mode = "distributed"
|
||||
enable_memory_catalog = false
|
||||
node_id = 42
|
||||
|
||||
@@ -545,7 +472,6 @@ mod tests {
|
||||
fn test_config_precedence_order() {
|
||||
let mut file = create_named_temp_file();
|
||||
let toml_str = r#"
|
||||
mode = "distributed"
|
||||
enable_memory_catalog = false
|
||||
node_id = 42
|
||||
rpc_addr = "127.0.0.1:3001"
|
||||
|
||||
137
src/cmd/src/datanode/builder.rs
Normal file
137
src/cmd/src/datanode/builder.rs
Normal file
@@ -0,0 +1,137 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use cache::build_datanode_cache_registry;
|
||||
use catalog::kvbackend::MetaKvBackend;
|
||||
use common_base::Plugins;
|
||||
use common_meta::cache::LayeredCacheRegistryBuilder;
|
||||
use common_telemetry::info;
|
||||
use common_version::{short_version, version};
|
||||
use datanode::datanode::DatanodeBuilder;
|
||||
use datanode::service::DatanodeServiceBuilder;
|
||||
use meta_client::MetaClientType;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::datanode::{DatanodeOptions, Instance, APP_NAME};
|
||||
use crate::error::{MetaClientInitSnafu, MissingConfigSnafu, Result, StartDatanodeSnafu};
|
||||
use crate::log_versions;
|
||||
|
||||
/// Builder for Datanode instance.
|
||||
pub struct InstanceBuilder {
|
||||
guard: Vec<WorkerGuard>,
|
||||
opts: DatanodeOptions,
|
||||
datanode_builder: DatanodeBuilder,
|
||||
}
|
||||
|
||||
impl InstanceBuilder {
|
||||
/// Try to create a new [InstanceBuilder], and do some initialization work like allocating
|
||||
/// runtime resources, setting up global logging and plugins, etc.
|
||||
pub async fn try_new_with_init(
|
||||
mut opts: DatanodeOptions,
|
||||
mut plugins: Plugins,
|
||||
) -> Result<Self> {
|
||||
let guard = Self::init(&mut opts, &mut plugins).await?;
|
||||
|
||||
let datanode_builder = Self::datanode_builder(&opts, plugins).await?;
|
||||
|
||||
Ok(Self {
|
||||
guard,
|
||||
opts,
|
||||
datanode_builder,
|
||||
})
|
||||
}
|
||||
|
||||
async fn init(opts: &mut DatanodeOptions, plugins: &mut Plugins) -> Result<Vec<WorkerGuard>> {
|
||||
common_runtime::init_global_runtimes(&opts.runtime);
|
||||
|
||||
let dn_opts = &mut opts.component;
|
||||
let guard = common_telemetry::init_global_logging(
|
||||
APP_NAME,
|
||||
&dn_opts.logging,
|
||||
&dn_opts.tracing,
|
||||
dn_opts.node_id.map(|x| x.to_string()),
|
||||
);
|
||||
|
||||
log_versions(version(), short_version(), APP_NAME);
|
||||
|
||||
plugins::setup_datanode_plugins(plugins, &opts.plugins, dn_opts)
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
|
||||
dn_opts.grpc.detect_server_addr();
|
||||
|
||||
info!("Initialized Datanode instance with {:#?}", opts);
|
||||
Ok(guard)
|
||||
}
|
||||
|
||||
async fn datanode_builder(opts: &DatanodeOptions, plugins: Plugins) -> Result<DatanodeBuilder> {
|
||||
let dn_opts = &opts.component;
|
||||
|
||||
let member_id = dn_opts
|
||||
.node_id
|
||||
.context(MissingConfigSnafu { msg: "'node_id'" })?;
|
||||
let meta_client_options = dn_opts.meta_client.as_ref().context(MissingConfigSnafu {
|
||||
msg: "meta client options",
|
||||
})?;
|
||||
let client = meta_client::create_meta_client(
|
||||
MetaClientType::Datanode { member_id },
|
||||
meta_client_options,
|
||||
Some(&plugins),
|
||||
)
|
||||
.await
|
||||
.context(MetaClientInitSnafu)?;
|
||||
|
||||
let backend = Arc::new(MetaKvBackend {
|
||||
client: client.clone(),
|
||||
});
|
||||
let mut builder = DatanodeBuilder::new(dn_opts.clone(), plugins.clone(), backend.clone());
|
||||
|
||||
let registry = Arc::new(
|
||||
LayeredCacheRegistryBuilder::default()
|
||||
.add_cache_registry(build_datanode_cache_registry(backend))
|
||||
.build(),
|
||||
);
|
||||
builder
|
||||
.with_cache_registry(registry)
|
||||
.with_meta_client(client.clone());
|
||||
Ok(builder)
|
||||
}
|
||||
|
||||
/// Get the mutable builder for Datanode, in case you want to change some fields before the
|
||||
/// final construction.
|
||||
pub fn mut_datanode_builder(&mut self) -> &mut DatanodeBuilder {
|
||||
&mut self.datanode_builder
|
||||
}
|
||||
|
||||
/// Try to build the Datanode instance.
|
||||
pub async fn build(self) -> Result<Instance> {
|
||||
let mut datanode = self
|
||||
.datanode_builder
|
||||
.build()
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
|
||||
let services = DatanodeServiceBuilder::new(&self.opts.component)
|
||||
.with_default_grpc_server(&datanode.region_server())
|
||||
.enable_http_service()
|
||||
.build()
|
||||
.context(StartDatanodeSnafu)?;
|
||||
datanode.setup_services(services);
|
||||
|
||||
Ok(Instance::new(datanode, self.guard))
|
||||
}
|
||||
}
|
||||
@@ -17,7 +17,6 @@ use std::any::Any;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use rustyline::error::ReadlineError;
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
#[derive(Snafu)]
|
||||
@@ -178,55 +177,6 @@ pub enum Error {
|
||||
source: meta_srv::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid REPL command: {reason}"))]
|
||||
InvalidReplCommand { reason: String },
|
||||
|
||||
#[snafu(display("Cannot create REPL"))]
|
||||
ReplCreation {
|
||||
#[snafu(source)]
|
||||
error: ReadlineError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Error reading command"))]
|
||||
Readline {
|
||||
#[snafu(source)]
|
||||
error: ReadlineError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to request database, sql: {sql}"))]
|
||||
RequestDatabase {
|
||||
sql: String,
|
||||
#[snafu(source)]
|
||||
source: client::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to collect RecordBatches"))]
|
||||
CollectRecordBatches {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_recordbatch::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to pretty print Recordbatches"))]
|
||||
PrettyPrintRecordBatches {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_recordbatch::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start Meta client"))]
|
||||
StartMetaClient {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: meta_client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse SQL: {}", sql))]
|
||||
ParseSql {
|
||||
sql: String,
|
||||
@@ -242,13 +192,6 @@ pub enum Error {
|
||||
source: query::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to encode logical plan in substrait"))]
|
||||
SubstraitEncodeLogicalPlan {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: substrait::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to load layered config"))]
|
||||
LoadLayeredConfig {
|
||||
#[snafu(source(from(common_config::error::Error, Box::new)))]
|
||||
@@ -385,7 +328,6 @@ impl ErrorExt for Error {
|
||||
Error::MissingConfig { .. }
|
||||
| Error::LoadLayeredConfig { .. }
|
||||
| Error::IllegalConfig { .. }
|
||||
| Error::InvalidReplCommand { .. }
|
||||
| Error::InitTimezone { .. }
|
||||
| Error::ConnectEtcd { .. }
|
||||
| Error::CreateDir { .. }
|
||||
@@ -395,17 +337,10 @@ impl ErrorExt for Error {
|
||||
| Error::StopProcedureManager { source, .. } => source.status_code(),
|
||||
Error::BuildWalOptionsAllocator { source, .. }
|
||||
| Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
|
||||
Error::ReplCreation { .. } | Error::Readline { .. } | Error::HttpQuerySql { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
Error::RequestDatabase { source, .. } => source.status_code(),
|
||||
Error::CollectRecordBatches { source, .. }
|
||||
| Error::PrettyPrintRecordBatches { source, .. } => source.status_code(),
|
||||
Error::StartMetaClient { source, .. } => source.status_code(),
|
||||
Error::HttpQuerySql { .. } => StatusCode::Internal,
|
||||
Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => {
|
||||
source.status_code()
|
||||
}
|
||||
Error::SubstraitEncodeLogicalPlan { source, .. } => source.status_code(),
|
||||
|
||||
Error::SerdeJson { .. }
|
||||
| Error::FileIo { .. }
|
||||
|
||||
@@ -32,7 +32,10 @@ use common_meta::key::TableMetadataManager;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::TracingOptions;
|
||||
use common_version::{short_version, version};
|
||||
use flow::{FlownodeBuilder, FlownodeInstance, FrontendInvoker};
|
||||
use flow::{
|
||||
get_flow_auth_options, FlownodeBuilder, FlownodeInstance, FlownodeServiceBuilder,
|
||||
FrontendClient, FrontendInvoker,
|
||||
};
|
||||
use meta_client::{MetaClientOptions, MetaClientType};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
@@ -80,10 +83,14 @@ impl App for Instance {
|
||||
}
|
||||
|
||||
async fn start(&mut self) -> Result<()> {
|
||||
plugins::start_flownode_plugins(self.flownode.flow_engine().plugins().clone())
|
||||
.await
|
||||
.context(StartFlownodeSnafu)?;
|
||||
|
||||
self.flownode.start().await.context(StartFlownodeSnafu)
|
||||
}
|
||||
|
||||
async fn stop(&self) -> Result<()> {
|
||||
async fn stop(&mut self) -> Result<()> {
|
||||
self.flownode
|
||||
.shutdown()
|
||||
.await
|
||||
@@ -149,6 +156,9 @@ struct StartCommand {
|
||||
/// HTTP request timeout in seconds.
|
||||
#[clap(long)]
|
||||
http_timeout: Option<u64>,
|
||||
/// User Provider cfg, for auth, currently only support static user provider
|
||||
#[clap(long)]
|
||||
user_provider: Option<String>,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
@@ -212,6 +222,10 @@ impl StartCommand {
|
||||
opts.http.timeout = Duration::from_secs(http_timeout);
|
||||
}
|
||||
|
||||
if let Some(user_provider) = &self.user_provider {
|
||||
opts.user_provider = Some(user_provider.clone());
|
||||
}
|
||||
|
||||
ensure!(
|
||||
opts.node_id.is_some(),
|
||||
MissingConfigSnafu {
|
||||
@@ -236,9 +250,15 @@ impl StartCommand {
|
||||
info!("Flownode start command: {:#?}", self);
|
||||
info!("Flownode options: {:#?}", opts);
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let mut opts = opts.component;
|
||||
opts.grpc.detect_server_addr();
|
||||
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_flownode_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
.context(StartFlownodeSnafu)?;
|
||||
|
||||
let member_id = opts
|
||||
.node_id
|
||||
.context(MissingConfigSnafu { msg: "'node_id'" })?;
|
||||
@@ -313,16 +333,27 @@ impl StartCommand {
|
||||
);
|
||||
|
||||
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
|
||||
let flow_auth_header = get_flow_auth_options(&opts).context(StartFlownodeSnafu)?;
|
||||
let frontend_client =
|
||||
FrontendClient::from_meta_client(meta_client.clone(), flow_auth_header);
|
||||
let flownode_builder = FlownodeBuilder::new(
|
||||
opts,
|
||||
Plugins::new(),
|
||||
opts.clone(),
|
||||
plugins,
|
||||
table_metadata_manager,
|
||||
catalog_manager.clone(),
|
||||
flow_metadata_manager,
|
||||
Arc::new(frontend_client),
|
||||
)
|
||||
.with_heartbeat_task(heartbeat_task);
|
||||
|
||||
let flownode = flownode_builder.build().await.context(StartFlownodeSnafu)?;
|
||||
let mut flownode = flownode_builder.build().await.context(StartFlownodeSnafu)?;
|
||||
let services = FlownodeServiceBuilder::new(&opts)
|
||||
.with_grpc_server(flownode.flownode_server().clone())
|
||||
.enable_http_service()
|
||||
.build()
|
||||
.context(StartFlownodeSnafu)?;
|
||||
flownode.setup_services(services);
|
||||
let flownode = flownode;
|
||||
|
||||
// flownode's frontend to datanode need not timeout.
|
||||
// Some queries are expected to take long time.
|
||||
@@ -333,7 +364,7 @@ impl StartCommand {
|
||||
let client = Arc::new(NodeClients::new(channel_config));
|
||||
|
||||
let invoker = FrontendInvoker::build_from(
|
||||
flownode.flow_worker_manager().clone(),
|
||||
flownode.flow_engine().streaming_engine(),
|
||||
catalog_manager.clone(),
|
||||
cached_meta_backend.clone(),
|
||||
layered_cache_registry.clone(),
|
||||
@@ -343,7 +374,9 @@ impl StartCommand {
|
||||
.await
|
||||
.context(StartFlownodeSnafu)?;
|
||||
flownode
|
||||
.flow_worker_manager()
|
||||
.flow_engine()
|
||||
.streaming_engine()
|
||||
// TODO(discord9): refactor and avoid circular reference
|
||||
.set_frontend_invoker(invoker)
|
||||
.await;
|
||||
|
||||
|
||||
@@ -89,7 +89,7 @@ impl App for Instance {
|
||||
.context(error::StartFrontendSnafu)
|
||||
}
|
||||
|
||||
async fn stop(&self) -> Result<()> {
|
||||
async fn stop(&mut self) -> Result<()> {
|
||||
self.frontend
|
||||
.shutdown()
|
||||
.await
|
||||
@@ -382,7 +382,6 @@ impl StartCommand {
|
||||
|
||||
let servers = Services::new(opts, instance.clone(), plugins)
|
||||
.build()
|
||||
.await
|
||||
.context(error::StartFrontendSnafu)?;
|
||||
|
||||
let frontend = Frontend {
|
||||
@@ -448,8 +447,6 @@ mod tests {
|
||||
fn test_read_from_config_file() {
|
||||
let mut file = create_named_temp_file();
|
||||
let toml_str = r#"
|
||||
mode = "distributed"
|
||||
|
||||
[http]
|
||||
addr = "127.0.0.1:4000"
|
||||
timeout = "0s"
|
||||
@@ -538,8 +535,6 @@ mod tests {
|
||||
fn test_config_precedence_order() {
|
||||
let mut file = create_named_temp_file();
|
||||
let toml_str = r#"
|
||||
mode = "distributed"
|
||||
|
||||
[http]
|
||||
addr = "127.0.0.1:4000"
|
||||
|
||||
|
||||
@@ -74,7 +74,7 @@ pub trait App: Send {
|
||||
true
|
||||
}
|
||||
|
||||
async fn stop(&self) -> Result<()>;
|
||||
async fn stop(&mut self) -> Result<()>;
|
||||
|
||||
async fn run(&mut self) -> Result<()> {
|
||||
info!("Starting app: {}", self.name());
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
@@ -68,7 +69,7 @@ impl App for Instance {
|
||||
self.instance.start().await.context(StartMetaServerSnafu)
|
||||
}
|
||||
|
||||
async fn stop(&self) -> Result<()> {
|
||||
async fn stop(&mut self) -> Result<()> {
|
||||
self.instance
|
||||
.shutdown()
|
||||
.await
|
||||
@@ -131,8 +132,8 @@ impl SubCommand {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Parser)]
|
||||
struct StartCommand {
|
||||
#[derive(Default, Parser)]
|
||||
pub struct StartCommand {
|
||||
/// The address to bind the gRPC server.
|
||||
#[clap(long, alias = "bind-addr")]
|
||||
rpc_bind_addr: Option<String>,
|
||||
@@ -171,8 +172,29 @@ struct StartCommand {
|
||||
backend: Option<BackendImpl>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for StartCommand {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("StartCommand")
|
||||
.field("rpc_bind_addr", &self.rpc_bind_addr)
|
||||
.field("rpc_server_addr", &self.rpc_server_addr)
|
||||
.field("store_addrs", &self.sanitize_store_addrs())
|
||||
.field("config_file", &self.config_file)
|
||||
.field("selector", &self.selector)
|
||||
.field("use_memory_store", &self.use_memory_store)
|
||||
.field("enable_region_failover", &self.enable_region_failover)
|
||||
.field("http_addr", &self.http_addr)
|
||||
.field("http_timeout", &self.http_timeout)
|
||||
.field("env_prefix", &self.env_prefix)
|
||||
.field("data_home", &self.data_home)
|
||||
.field("store_key_prefix", &self.store_key_prefix)
|
||||
.field("max_txn_ops", &self.max_txn_ops)
|
||||
.field("backend", &self.backend)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
fn load_options(&self, global_options: &GlobalOptions) -> Result<MetasrvOptions> {
|
||||
pub fn load_options(&self, global_options: &GlobalOptions) -> Result<MetasrvOptions> {
|
||||
let mut opts = MetasrvOptions::load_layered_options(
|
||||
self.config_file.as_deref(),
|
||||
self.env_prefix.as_ref(),
|
||||
@@ -184,6 +206,15 @@ impl StartCommand {
|
||||
Ok(opts)
|
||||
}
|
||||
|
||||
fn sanitize_store_addrs(&self) -> Option<Vec<String>> {
|
||||
self.store_addrs.as_ref().map(|addrs| {
|
||||
addrs
|
||||
.iter()
|
||||
.map(|addr| common_meta::kv_backend::util::sanitize_connection_string(addr))
|
||||
.collect()
|
||||
})
|
||||
}
|
||||
|
||||
// The precedence order is: cli > config file > environment variables > default values.
|
||||
fn merge_with_cli_options(
|
||||
&self,
|
||||
@@ -261,7 +292,7 @@ impl StartCommand {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn build(&self, opts: MetasrvOptions) -> Result<Instance> {
|
||||
pub async fn build(&self, opts: MetasrvOptions) -> Result<Instance> {
|
||||
common_runtime::init_global_runtimes(&opts.runtime);
|
||||
|
||||
let guard = common_telemetry::init_global_logging(
|
||||
|
||||
@@ -55,7 +55,10 @@ use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, Sto
|
||||
use datanode::datanode::{Datanode, DatanodeBuilder};
|
||||
use datanode::region_server::RegionServer;
|
||||
use file_engine::config::EngineConfig as FileEngineConfig;
|
||||
use flow::{FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendInvoker};
|
||||
use flow::{
|
||||
FlowConfig, FlownodeBuilder, FlownodeInstance, FlownodeOptions, FrontendClient,
|
||||
FrontendInvoker, GrpcQueryHandlerWithBoxedError, StreamingEngine,
|
||||
};
|
||||
use frontend::frontend::{Frontend, FrontendOptions};
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::instance::{Instance as FeInstance, StandaloneDatanodeManager};
|
||||
@@ -72,12 +75,11 @@ use servers::export_metrics::{ExportMetricsOption, ExportMetricsTask};
|
||||
use servers::grpc::GrpcOptions;
|
||||
use servers::http::HttpOptions;
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
use tokio::sync::{broadcast, RwLock};
|
||||
use tokio::sync::RwLock;
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::error::{Result, StartFlownodeSnafu};
|
||||
use crate::options::{GlobalOptions, GreptimeOptions};
|
||||
use crate::{error, log_versions, App};
|
||||
|
||||
@@ -244,9 +246,7 @@ impl StandaloneOptions {
|
||||
pub struct Instance {
|
||||
datanode: Datanode,
|
||||
frontend: Frontend,
|
||||
// TODO(discord9): wrapped it in flownode instance instead
|
||||
flow_worker_manager: Arc<FlowWorkerManager>,
|
||||
flow_shutdown: broadcast::Sender<()>,
|
||||
flownode: FlownodeInstance,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
// Keep the logging guard to prevent the worker from being dropped.
|
||||
@@ -255,8 +255,8 @@ pub struct Instance {
|
||||
|
||||
impl Instance {
|
||||
/// Find the socket addr of a server by its `name`.
|
||||
pub async fn server_addr(&self, name: &str) -> Option<SocketAddr> {
|
||||
self.frontend.server_handlers().addr(name).await
|
||||
pub fn server_addr(&self, name: &str) -> Option<SocketAddr> {
|
||||
self.frontend.server_handlers().addr(name)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -288,14 +288,12 @@ impl App for Instance {
|
||||
.await
|
||||
.context(error::StartFrontendSnafu)?;
|
||||
|
||||
self.flow_worker_manager
|
||||
.clone()
|
||||
.run_background(Some(self.flow_shutdown.subscribe()));
|
||||
self.flownode.start().await.context(StartFlownodeSnafu)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn stop(&self) -> Result<()> {
|
||||
async fn stop(&mut self) -> Result<()> {
|
||||
self.frontend
|
||||
.shutdown()
|
||||
.await
|
||||
@@ -311,14 +309,9 @@ impl App for Instance {
|
||||
.await
|
||||
.context(error::ShutdownDatanodeSnafu)?;
|
||||
|
||||
self.flow_shutdown
|
||||
.send(())
|
||||
.map_err(|_e| {
|
||||
flow::error::InternalSnafu {
|
||||
reason: "Failed to send shutdown signal to flow worker manager, all receiver end already closed".to_string(),
|
||||
}
|
||||
.build()
|
||||
})
|
||||
self.flownode
|
||||
.shutdown()
|
||||
.await
|
||||
.context(error::ShutdownFlownodeSnafu)?;
|
||||
|
||||
info!("Datanode instance stopped.");
|
||||
@@ -503,12 +496,9 @@ impl StartCommand {
|
||||
.build(),
|
||||
);
|
||||
|
||||
let datanode = DatanodeBuilder::new(dn_opts, plugins.clone(), Mode::Standalone)
|
||||
.with_kv_backend(kv_backend.clone())
|
||||
.with_cache_registry(layered_cache_registry.clone())
|
||||
.build()
|
||||
.await
|
||||
.context(error::StartDatanodeSnafu)?;
|
||||
let mut builder = DatanodeBuilder::new(dn_opts, plugins.clone(), kv_backend.clone());
|
||||
builder.with_cache_registry(layered_cache_registry.clone());
|
||||
let datanode = builder.build().await.context(error::StartDatanodeSnafu)?;
|
||||
|
||||
let information_extension = Arc::new(StandaloneInformationExtension::new(
|
||||
datanode.region_server(),
|
||||
@@ -529,32 +519,36 @@ impl StartCommand {
|
||||
flow: opts.flow.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// for standalone not use grpc, but get a handler to frontend grpc client without
|
||||
// actually make a connection
|
||||
let (frontend_client, frontend_instance_handler) =
|
||||
FrontendClient::from_empty_grpc_handler();
|
||||
let flow_builder = FlownodeBuilder::new(
|
||||
flownode_options,
|
||||
plugins.clone(),
|
||||
table_metadata_manager.clone(),
|
||||
catalog_manager.clone(),
|
||||
flow_metadata_manager.clone(),
|
||||
Arc::new(frontend_client.clone()),
|
||||
);
|
||||
let flownode = Arc::new(
|
||||
flow_builder
|
||||
.build()
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::OtherSnafu)?,
|
||||
);
|
||||
let flownode = flow_builder
|
||||
.build()
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::OtherSnafu)?;
|
||||
|
||||
// set the ref to query for the local flow state
|
||||
{
|
||||
let flow_worker_manager = flownode.flow_worker_manager();
|
||||
let flow_streaming_engine = flownode.flow_engine().streaming_engine();
|
||||
information_extension
|
||||
.set_flow_worker_manager(flow_worker_manager.clone())
|
||||
.set_flow_streaming_engine(flow_streaming_engine)
|
||||
.await;
|
||||
}
|
||||
|
||||
let node_manager = Arc::new(StandaloneDatanodeManager {
|
||||
region_server: datanode.region_server(),
|
||||
flow_server: flownode.flow_worker_manager(),
|
||||
flow_server: flownode.flow_engine(),
|
||||
});
|
||||
|
||||
let table_id_sequence = Arc::new(
|
||||
@@ -608,10 +602,19 @@ impl StartCommand {
|
||||
.context(error::StartFrontendSnafu)?;
|
||||
let fe_instance = Arc::new(fe_instance);
|
||||
|
||||
let flow_worker_manager = flownode.flow_worker_manager();
|
||||
// set the frontend client for flownode
|
||||
let grpc_handler = fe_instance.clone() as Arc<dyn GrpcQueryHandlerWithBoxedError>;
|
||||
let weak_grpc_handler = Arc::downgrade(&grpc_handler);
|
||||
frontend_instance_handler
|
||||
.lock()
|
||||
.unwrap()
|
||||
.replace(weak_grpc_handler);
|
||||
|
||||
// set the frontend invoker for flownode
|
||||
let flow_streaming_engine = flownode.flow_engine().streaming_engine();
|
||||
// flow server need to be able to use frontend to write insert requests back
|
||||
let invoker = FrontendInvoker::build_from(
|
||||
flow_worker_manager.clone(),
|
||||
flow_streaming_engine.clone(),
|
||||
catalog_manager.clone(),
|
||||
kv_backend.clone(),
|
||||
layered_cache_registry.clone(),
|
||||
@@ -620,16 +623,13 @@ impl StartCommand {
|
||||
)
|
||||
.await
|
||||
.context(error::StartFlownodeSnafu)?;
|
||||
flow_worker_manager.set_frontend_invoker(invoker).await;
|
||||
|
||||
let (tx, _rx) = broadcast::channel(1);
|
||||
flow_streaming_engine.set_frontend_invoker(invoker).await;
|
||||
|
||||
let export_metrics_task = ExportMetricsTask::try_new(&opts.export_metrics, Some(&plugins))
|
||||
.context(error::ServersSnafu)?;
|
||||
|
||||
let servers = Services::new(opts, fe_instance.clone(), plugins)
|
||||
.build()
|
||||
.await
|
||||
.context(error::StartFrontendSnafu)?;
|
||||
|
||||
let frontend = Frontend {
|
||||
@@ -642,8 +642,7 @@ impl StartCommand {
|
||||
Ok(Instance {
|
||||
datanode,
|
||||
frontend,
|
||||
flow_worker_manager,
|
||||
flow_shutdown: tx,
|
||||
flownode,
|
||||
procedure_manager,
|
||||
wal_options_allocator,
|
||||
_guard: guard,
|
||||
@@ -699,7 +698,7 @@ pub struct StandaloneInformationExtension {
|
||||
region_server: RegionServer,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
start_time_ms: u64,
|
||||
flow_worker_manager: RwLock<Option<Arc<FlowWorkerManager>>>,
|
||||
flow_streaming_engine: RwLock<Option<Arc<StreamingEngine>>>,
|
||||
}
|
||||
|
||||
impl StandaloneInformationExtension {
|
||||
@@ -708,14 +707,14 @@ impl StandaloneInformationExtension {
|
||||
region_server,
|
||||
procedure_manager,
|
||||
start_time_ms: common_time::util::current_time_millis() as u64,
|
||||
flow_worker_manager: RwLock::new(None),
|
||||
flow_streaming_engine: RwLock::new(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the flow worker manager for the standalone instance.
|
||||
pub async fn set_flow_worker_manager(&self, flow_worker_manager: Arc<FlowWorkerManager>) {
|
||||
let mut guard = self.flow_worker_manager.write().await;
|
||||
*guard = Some(flow_worker_manager);
|
||||
/// Set the flow streaming engine for the standalone instance.
|
||||
pub async fn set_flow_streaming_engine(&self, flow_streaming_engine: Arc<StreamingEngine>) {
|
||||
let mut guard = self.flow_streaming_engine.write().await;
|
||||
*guard = Some(flow_streaming_engine);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -784,6 +783,8 @@ impl InformationExtension for StandaloneInformationExtension {
|
||||
sst_size: region_stat.sst_size,
|
||||
index_size: region_stat.index_size,
|
||||
region_manifest: region_stat.manifest.into(),
|
||||
data_topic_latest_entry_id: region_stat.data_topic_latest_entry_id,
|
||||
metadata_topic_latest_entry_id: region_stat.metadata_topic_latest_entry_id,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
@@ -792,7 +793,7 @@ impl InformationExtension for StandaloneInformationExtension {
|
||||
|
||||
async fn flow_stats(&self) -> std::result::Result<Option<FlowStat>, Self::Error> {
|
||||
Ok(Some(
|
||||
self.flow_worker_manager
|
||||
self.flow_streaming_engine
|
||||
.read()
|
||||
.await
|
||||
.as_ref()
|
||||
@@ -852,8 +853,6 @@ mod tests {
|
||||
fn test_read_from_config_file() {
|
||||
let mut file = create_named_temp_file();
|
||||
let toml_str = r#"
|
||||
mode = "distributed"
|
||||
|
||||
enable_memory_catalog = true
|
||||
|
||||
[wal]
|
||||
@@ -984,8 +983,6 @@ mod tests {
|
||||
fn test_config_precedence_order() {
|
||||
let mut file = create_named_temp_file();
|
||||
let toml_str = r#"
|
||||
mode = "standalone"
|
||||
|
||||
[http]
|
||||
addr = "127.0.0.1:4000"
|
||||
|
||||
|
||||
@@ -1,148 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
mod tests {
|
||||
use std::path::PathBuf;
|
||||
use std::process::{Command, Stdio};
|
||||
use std::time::Duration;
|
||||
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use rexpect::session::PtyReplSession;
|
||||
|
||||
struct Repl {
|
||||
repl: PtyReplSession,
|
||||
}
|
||||
|
||||
impl Repl {
|
||||
fn send_line(&mut self, line: &str) {
|
||||
let _ = self.repl.send_line(line).unwrap();
|
||||
|
||||
// read a line to consume the prompt
|
||||
let _ = self.read_line();
|
||||
}
|
||||
|
||||
fn read_line(&mut self) -> String {
|
||||
self.repl.read_line().unwrap()
|
||||
}
|
||||
|
||||
fn read_expect(&mut self, expect: &str) {
|
||||
assert_eq!(self.read_line(), expect);
|
||||
}
|
||||
|
||||
fn read_contains(&mut self, pat: &str) {
|
||||
assert!(self.read_line().contains(pat));
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(LFC): Un-ignore this REPL test.
|
||||
// Ignore this REPL test because some logical plans like create database are not supported yet in Datanode.
|
||||
#[ignore]
|
||||
#[test]
|
||||
fn test_repl() {
|
||||
let data_home = create_temp_dir("data");
|
||||
let wal_dir = create_temp_dir("wal");
|
||||
|
||||
let mut bin_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
bin_path.push("../../target/debug");
|
||||
let bin_path = bin_path.to_str().unwrap();
|
||||
|
||||
let mut datanode = Command::new("./greptime")
|
||||
.current_dir(bin_path)
|
||||
.args([
|
||||
"datanode",
|
||||
"start",
|
||||
"--rpc-bind-addr=0.0.0.0:4321",
|
||||
"--node-id=1",
|
||||
&format!("--data-home={}", data_home.path().display()),
|
||||
&format!("--wal-dir={}", wal_dir.path().display()),
|
||||
])
|
||||
.stdout(Stdio::null())
|
||||
.spawn()
|
||||
.unwrap();
|
||||
|
||||
// wait for Datanode actually started
|
||||
std::thread::sleep(Duration::from_secs(3));
|
||||
|
||||
let mut repl_cmd = Command::new("./greptime");
|
||||
let _ = repl_cmd.current_dir(bin_path).args([
|
||||
"--log-level=off",
|
||||
"cli",
|
||||
"attach",
|
||||
"--grpc-bind-addr=0.0.0.0:4321",
|
||||
// history commands can sneaky into stdout and mess up our tests, so disable it
|
||||
"--disable-helper",
|
||||
]);
|
||||
let pty_session = rexpect::session::spawn_command(repl_cmd, Some(5_000)).unwrap();
|
||||
let repl = PtyReplSession {
|
||||
prompt: "> ".to_string(),
|
||||
pty_session,
|
||||
quit_command: None,
|
||||
echo_on: false,
|
||||
};
|
||||
let repl = &mut Repl { repl };
|
||||
repl.read_expect("Ready for commands. (Hint: try 'help')");
|
||||
|
||||
test_create_database(repl);
|
||||
|
||||
test_use_database(repl);
|
||||
|
||||
test_create_table(repl);
|
||||
|
||||
test_insert(repl);
|
||||
|
||||
test_select(repl);
|
||||
|
||||
datanode.kill().unwrap();
|
||||
let _ = datanode.wait().unwrap();
|
||||
}
|
||||
|
||||
fn test_create_database(repl: &mut Repl) {
|
||||
repl.send_line("CREATE DATABASE db;");
|
||||
repl.read_expect("Affected Rows: 1");
|
||||
repl.read_contains("Cost");
|
||||
}
|
||||
|
||||
fn test_use_database(repl: &mut Repl) {
|
||||
repl.send_line("USE db");
|
||||
repl.read_expect("Total Rows: 0");
|
||||
repl.read_contains("Cost");
|
||||
repl.read_expect("Using db");
|
||||
}
|
||||
|
||||
fn test_create_table(repl: &mut Repl) {
|
||||
repl.send_line("CREATE TABLE t(x STRING, ts TIMESTAMP TIME INDEX);");
|
||||
repl.read_expect("Affected Rows: 0");
|
||||
repl.read_contains("Cost");
|
||||
}
|
||||
|
||||
fn test_insert(repl: &mut Repl) {
|
||||
repl.send_line("INSERT INTO t(x, ts) VALUES ('hello', 1676895812239);");
|
||||
repl.read_expect("Affected Rows: 1");
|
||||
repl.read_contains("Cost");
|
||||
}
|
||||
|
||||
fn test_select(repl: &mut Repl) {
|
||||
repl.send_line("SELECT * FROM t;");
|
||||
|
||||
repl.read_expect("+-------+-------------------------+");
|
||||
repl.read_expect("| x | ts |");
|
||||
repl.read_expect("+-------+-------------------------+");
|
||||
repl.read_expect("| hello | 2023-02-20T12:23:32.239 |");
|
||||
repl.read_expect("+-------+-------------------------+");
|
||||
repl.read_expect("Total Rows: 1");
|
||||
|
||||
repl.read_contains("Cost");
|
||||
}
|
||||
}
|
||||
@@ -74,6 +74,7 @@ fn test_load_datanode_example_config() {
|
||||
RegionEngineConfig::File(FileEngineConfig {}),
|
||||
RegionEngineConfig::Metric(MetricEngineConfig {
|
||||
experimental_sparse_primary_key_encoding: false,
|
||||
flush_metadata_region_interval: Duration::from_secs(30),
|
||||
}),
|
||||
],
|
||||
logging: LoggingOptions {
|
||||
@@ -216,6 +217,7 @@ fn test_load_standalone_example_config() {
|
||||
RegionEngineConfig::File(FileEngineConfig {}),
|
||||
RegionEngineConfig::Metric(MetricEngineConfig {
|
||||
experimental_sparse_primary_key_encoding: false,
|
||||
flush_metadata_region_interval: Duration::from_secs(30),
|
||||
}),
|
||||
],
|
||||
storage: StorageConfig {
|
||||
|
||||
@@ -31,7 +31,8 @@ impl Plugins {
|
||||
}
|
||||
|
||||
pub fn insert<T: 'static + Send + Sync>(&self, value: T) {
|
||||
let _ = self.write().insert(value);
|
||||
let last = self.write().insert(value);
|
||||
assert!(last.is_none(), "each type of plugins must be one and only");
|
||||
}
|
||||
|
||||
pub fn get<T: 'static + Send + Sync + Clone>(&self) -> Option<T> {
|
||||
@@ -137,4 +138,12 @@ mod tests {
|
||||
assert_eq!(plugins.len(), 2);
|
||||
assert!(!plugins.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "each type of plugins must be one and only")]
|
||||
fn test_plugin_uniqueness() {
|
||||
let plugins = Plugins::new();
|
||||
plugins.insert(1i32);
|
||||
plugins.insert(2i32);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -111,11 +111,9 @@ mod tests {
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::*;
|
||||
use crate::Mode;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[derive(Debug, Serialize, Deserialize, Default)]
|
||||
struct TestDatanodeConfig {
|
||||
mode: Mode,
|
||||
node_id: Option<u64>,
|
||||
logging: LoggingOptions,
|
||||
meta_client: Option<MetaClientOptions>,
|
||||
@@ -123,19 +121,6 @@ mod tests {
|
||||
storage: StorageConfig,
|
||||
}
|
||||
|
||||
impl Default for TestDatanodeConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
mode: Mode::Distributed,
|
||||
node_id: None,
|
||||
logging: LoggingOptions::default(),
|
||||
meta_client: None,
|
||||
wal: DatanodeWalConfig::default(),
|
||||
storage: StorageConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Configurable for TestDatanodeConfig {
|
||||
fn env_list_keys() -> Option<&'static [&'static str]> {
|
||||
Some(&["meta_client.metasrv_addrs"])
|
||||
@@ -146,7 +131,6 @@ mod tests {
|
||||
fn test_load_layered_options() {
|
||||
let mut file = create_named_temp_file();
|
||||
let toml_str = r#"
|
||||
mode = "distributed"
|
||||
enable_memory_catalog = false
|
||||
rpc_addr = "127.0.0.1:3001"
|
||||
rpc_hostname = "127.0.0.1"
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user