From c9f6f67ae988a09924b261917616ebc82f8f674c Mon Sep 17 00:00:00 2001 From: LFC <990479+MichaelScofield@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:16:08 +0800 Subject: [PATCH] fix: nightly jsonbench test (#8212) * fix: nightly jsonbench test Signed-off-by: luofucong * Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Signed-off-by: luofucong Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .github/workflows/nightly-jsonbench.yaml | 157 ++++++++++++++++------- 1 file changed, 109 insertions(+), 48 deletions(-) diff --git a/.github/workflows/nightly-jsonbench.yaml b/.github/workflows/nightly-jsonbench.yaml index 3667ee26a6..a9ce4dd363 100644 --- a/.github/workflows/nightly-jsonbench.yaml +++ b/.github/workflows/nightly-jsonbench.yaml @@ -1,19 +1,81 @@ name: Nightly JSONBench on: - schedule: - # Trigger at 00:00(Asia/Shanghai) on every weekday. - - cron: "0 16 * * 0-4" + workflow_run: + workflows: [ "GreptimeDB Nightly Build" ] + types: [ completed ] workflow_dispatch: + inputs: + run_id: + description: The nightly build workflow run id to download GreptimeDB artifacts from + required: true + type: string + +permissions: + actions: read + contents: read concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: + resolve-artifact: + name: Resolve GreptimeDB nightly artifact + if: ${{ github.repository == 'GreptimeTeam/greptimedb' && (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success') }} + runs-on: ubuntu-latest + outputs: + artifact-name: ${{ steps.find-artifact.outputs.artifact-name }} + run-id: ${{ steps.resolve-run-id.outputs.run-id }} + steps: + - name: Resolve nightly build run id + id: resolve-run-id + shell: bash + env: + EVENT_NAME: ${{ github.event_name }} + WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} + INPUT_RUN_ID: ${{ inputs.run_id }} + run: | + set -euo pipefail + + if [[ "${EVENT_NAME}" == "workflow_dispatch" ]]; then + run_id="${INPUT_RUN_ID}" + else + run_id="${WORKFLOW_RUN_ID}" + fi + + if [[ ! "${run_id}" =~ ^[0-9]+$ ]]; then + echo "Invalid workflow run id: ${run_id}" + exit 1 + fi + + echo "run-id=${run_id}" >> "${GITHUB_OUTPUT}" + + - name: Find GreptimeDB nightly artifact + id: find-artifact + shell: bash + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + RUN_ID: ${{ steps.resolve-run-id.outputs.run-id }} + run: | + set -euo pipefail + + artifact_name=$(gh api "repos/${GITHUB_REPOSITORY}/actions/runs/${RUN_ID}/artifacts" --paginate \ + --jq '.artifacts[] | select(.name | test("^greptime-linux-arm64-nightly-[0-9]{8}-[0-9a-f]+$")) | .name' \ + | head -n 1) + + if [[ -z "${artifact_name}" ]]; then + echo "Cannot find linux arm64 nightly artifact in workflow run ${RUN_ID}." + exit 1 + fi + + echo "Download GreptimeDB artifact: ${artifact_name}" + echo "artifact-name=${artifact_name}" >> "${GITHUB_OUTPUT}" + allocate-runner: name: Allocate runner - if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} + if: ${{ github.repository == 'GreptimeTeam/greptimedb' && (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success') }} + needs: [ resolve-artifact ] runs-on: ubuntu-latest outputs: linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }} @@ -43,55 +105,50 @@ jobs: jsonbench: name: Run JSONBench - if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} - needs: [ allocate-runner ] + if: ${{ github.repository == 'GreptimeTeam/greptimedb' && (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success') }} + needs: [ resolve-artifact, allocate-runner ] runs-on: ${{ needs.allocate-runner.outputs.linux-arm64-runner }} timeout-minutes: 120 env: - JSONBENCH_DATA_DIR: /home/runner/data/bluesky - JSONBENCH_OUTPUT_PREFIX: _ubuntu-latest + JSONBENCH_OUTPUT_PREFIX: _linux-arm64 steps: - - name: Checkout - uses: actions/checkout@v4 + - name: Download GreptimeDB nightly artifact + uses: actions/download-artifact@v4 with: - fetch-depth: 0 - persist-credentials: false + name: ${{ needs.resolve-artifact.outputs.artifact-name }} + path: greptimedb-artifact + github-token: ${{ secrets.GITHUB_TOKEN }} + run-id: ${{ needs.resolve-artifact.outputs.run-id }} - - uses: arduino/setup-protoc@v3 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - uses: actions-rust-lang/setup-rust-toolchain@v1 - - - name: Rust Cache - uses: Swatinem/rust-cache@v2 - with: - shared-key: "nightly-jsonbench" - cache-all-crates: "true" - save-if: ${{ github.ref == 'refs/heads/main' }} - - - name: Build GreptimeDB - run: cargo build --profile nightly --bin greptime - - - name: Reclaim disk space + - name: Prepare GreptimeDB binary shell: bash run: | set -euo pipefail - mkdir -p "${RUNNER_TEMP}/greptimedb-bin" - cp ./target/nightly/greptime "${RUNNER_TEMP}/greptimedb-bin/greptime" - chmod +x "${RUNNER_TEMP}/greptimedb-bin/greptime" - - rm -rf ./target + tar -xzf "greptimedb-artifact/${{ needs.resolve-artifact.outputs.artifact-name }}.tar.gz" + cp "${{ needs.resolve-artifact.outputs.artifact-name }}/greptime" ./greptime + chmod +x ./greptime + rm -rf greptimedb-artifact "${{ needs.resolve-artifact.outputs.artifact-name }}" - name: Run JSONBench + env: + # TODO(LFC): Change to "3" (100m) when JSON2 ingestion performance is optimized. + JSONBENCH_DATASET: 2 shell: bash run: | set -euo pipefail - cd "${RUNNER_TEMP}" - cp "${RUNNER_TEMP}/greptimedb-bin/greptime" ./greptime - chmod +x ./greptime + export JSONBENCH_DATA_DIR="/root/data/bluesky" + echo "Use JSONBench data directory ${JSONBENCH_DATA_DIR}" + + echo "Cloning JSONBench" + git clone --branch greptimedb-new-json --depth 1 https://github.com/GreptimeTeam/JSONBench.git JSONBench + + echo "Downloading JSONBench dataset choice ${JSONBENCH_DATASET} to ${JSONBENCH_DATA_DIR}" + mkdir -p "${JSONBENCH_DATA_DIR}" + printf "${JSONBENCH_DATASET}\n" | ./JSONBench/download_data.sh + downloaded_files=$(find "${JSONBENCH_DATA_DIR}" -type f | wc -l) + echo "Downloaded JSONBench dataset files: ${downloaded_files}" export GREPTIMEDB_STANDALONE__WAL__DIR=greptimedb_data/wal export GREPTIMEDB_STANDALONE__STORAGE__DATA_HOME=greptimedb_data @@ -100,10 +157,12 @@ jobs: export GREPTIMEDB_STANDALONE__HTTP__BODY_LIMIT=1GB export GREPTIMEDB_STANDALONE__HTTP__TIMEOUT=500s + echo "Starting GreptimeDB standalone" ./greptime standalone start > greptimedb.log 2>&1 & greptime_pid=$! trap 'kill "${greptime_pid}" 2>/dev/null || true' EXIT + echo "Waiting for GreptimeDB health check" until curl -s --fail -o /dev/null http://localhost:4000/health; do if ! kill -0 "${greptime_pid}" 2>/dev/null; then cat greptimedb.log @@ -111,12 +170,14 @@ jobs: fi sleep 1 done + echo "GreptimeDB is ready" - git clone --branch greptimedb-new-json --depth 1 https://github.com/GreptimeTeam/JSONBench.git JSONBench cp ./greptime JSONBench/greptimedb/greptime cd JSONBench/greptimedb - ./main.sh 3 "${JSONBENCH_DATA_DIR}" success.log error.log "${JSONBENCH_OUTPUT_PREFIX}" false + echo "Running JSONBench main.sh with dataset choice ${JSONBENCH_DATASET} and install=false" + ./main.sh ${JSONBENCH_DATASET} "${JSONBENCH_DATA_DIR}" success.log error.log "${JSONBENCH_OUTPUT_PREFIX}" false + echo "JSONBench finished" - name: Upload JSONBench results if: always() @@ -124,21 +185,21 @@ jobs: with: name: jsonbench-results path: | - ${{ runner.temp }}/greptimedb.log - ${{ runner.temp }}/JSONBench/greptimedb/*.log - ${{ runner.temp }}/JSONBench/greptimedb/*.total_size - ${{ runner.temp }}/JSONBench/greptimedb/*.data_size - ${{ runner.temp }}/JSONBench/greptimedb/*.index_size - ${{ runner.temp }}/JSONBench/greptimedb/*.count - ${{ runner.temp }}/JSONBench/greptimedb/*.results_runtime - ${{ runner.temp }}/JSONBench/greptimedb/*.query_results + ./greptimedb.log + ./JSONBench/greptimedb/*.log + ./JSONBench/greptimedb/*.total_size + ./JSONBench/greptimedb/*.data_size + ./JSONBench/greptimedb/*.index_size + ./JSONBench/greptimedb/*.count + ./JSONBench/greptimedb/*.results_runtime + ./JSONBench/greptimedb/*.query_results if-no-files-found: ignore retention-days: 7 stop-linux-arm64-runner: name: Stop Linux ARM64 runner # It's always run as the last job in the workflow to make sure that the runner is released. - if: ${{ always() }} + if: ${{ always() && needs.allocate-runner.outputs.linux-arm64-ec2-runner-instance-id != '' }} runs-on: ubuntu-latest needs: [ allocate-runner,