chore: cherry pick #5625 to v0.12 branch (#6831)

* ci: update 0.12 release ci Signed-off-by: evenyag <realevenyag@gmail.com> * fix: out of bound during bloom search (#5625) Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> --------- Signed-off-by: evenyag <realevenyag@gmail.com> Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>
2026-05-29 19:30:37 +00:00 · 2025-08-27 16:12:25 +08:00
parent 8da5949fc5
commit f4c527cddf
9 changed files with 418 additions and 19 deletions
--- a/.github/scripts/check-version.sh
+++ b/.github/scripts/check-version.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Get current version
+CURRENT_VERSION=$1
+if [ -z "$CURRENT_VERSION" ]; then
+  echo "Error: Failed to get current version"
+  exit 1
+fi
+
+# Get the latest version from GitHub Releases
+API_RESPONSE=$(curl -s "https://api.github.com/repos/GreptimeTeam/greptimedb/releases/latest")
+
+if [ -z "$API_RESPONSE" ] || [ "$(echo "$API_RESPONSE" | jq -r '.message')" = "Not Found" ]; then
+  echo "Error: Failed to fetch latest version from GitHub"
+  exit 1
+fi
+
+# Get the latest version
+LATEST_VERSION=$(echo "$API_RESPONSE" | jq -r '.tag_name')
+
+if [ -z "$LATEST_VERSION" ] || [ "$LATEST_VERSION" = "null" ]; then
+  echo "Error: No valid version found in GitHub releases"
+  exit 1
+fi
+
+# Cleaned up version number format (removed possible 'v' prefix and -nightly suffix)
+CLEAN_CURRENT=$(echo "$CURRENT_VERSION" | sed 's/^v//' | sed 's/-nightly-.*//')
+CLEAN_LATEST=$(echo "$LATEST_VERSION" | sed 's/^v//' | sed 's/-nightly-.*//')
+
+echo "Current version: $CLEAN_CURRENT"
+echo "Latest release version: $CLEAN_LATEST"
+
+# Use sort -V to compare versions
+HIGHER_VERSION=$(printf "%s\n%s" "$CLEAN_CURRENT" "$CLEAN_LATEST" | sort -V | tail -n1)
+
+if [ "$HIGHER_VERSION" = "$CLEAN_CURRENT" ]; then
+  echo "Current version ($CLEAN_CURRENT) is NEWER than or EQUAL to latest ($CLEAN_LATEST)"
+  echo "should-push-latest-tag=true" >> $GITHUB_OUTPUT
+else
+  echo "Current version ($CLEAN_CURRENT) is OLDER than latest ($CLEAN_LATEST)"
+  echo "should-push-latest-tag=false" >> $GITHUB_OUTPUT
+fi
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -706,7 +706,7 @@ jobs:
      - name: Install toolchain
        uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
-            cache: false
+          cache: false
      - name: Rust Cache
        uses: Swatinem/rust-cache@v2
        with:
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -110,6 +110,8 @@ jobs:

      # The 'version' use as the global tag name of the release workflow.
      version: ${{ steps.create-version.outputs.version }}
+
+      should-push-latest-tag: ${{ steps.check-version.outputs.should-push-latest-tag }}
    steps:
      - name: Checkout
        uses: actions/checkout@v4
@@ -135,6 +137,11 @@ jobs:
          GITHUB_REF_NAME: ${{ github.ref_name }}
          NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }}

+      - name: Check version
+        id: check-version
+        run: |
+          ./.github/scripts/check-version.sh "${{ steps.create-version.outputs.version }}"
+
      - name: Allocate linux-amd64 runner
        if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
        uses: ./.github/actions/start-runner
@@ -314,7 +321,7 @@ jobs:
          image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
          version: ${{ needs.allocate-runners.outputs.version }}
-          push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
+          push-latest-tag: ${{ needs.allocate-runners.outputs.should-push-latest-tag == 'true' && github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}

      - name: Set build image result
        id: set-build-image-result
@@ -332,7 +339,7 @@ jobs:
      build-windows-artifacts,
      release-images-to-dockerhub,
    ]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-latest-16-cores
    # When we push to ACR, it's easy to fail due to some unknown network issues.
    # However, we don't want to fail the whole workflow because of this.
    # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -361,7 +368,7 @@ jobs:
          dev-mode: false
          upload-to-s3: true
          update-version-info: true
-          push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
+          push-latest-tag: ${{ needs.allocate-runners.outputs.should-push-latest-tag == 'true' && github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}

  publish-github-release:
    name: Create GitHub release and upload artifacts
--- a/.github/workflows/semantic-pull-request.yml
+++ b/.github/workflows/semantic-pull-request.yml
@@ -11,17 +11,17 @@ concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

+permissions:
+  issues: write
+  contents: write
+  pull-requests: write
+
 jobs:
  check:
    runs-on: ubuntu-latest
-    permissions:
-      pull-requests: write  # Add permissions to modify PRs
-      issues: write
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@v4
-        with:
-          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Check Pull Request
        working-directory: cyborg
--- a/cyborg/bin/bump-versions.ts
+++ b/cyborg/bin/bump-versions.ts
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2023 Greptime Team
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import * as core from "@actions/core";
+import {obtainClient} from "@/common";
+
+interface RepoConfig {
+  tokenEnv: string;
+  repo: string;
+  workflowLogic: (version: string) => [string, string] | null;
+}
+
+const REPO_CONFIGS: Record<string, RepoConfig> = {
+  website: {
+    tokenEnv: "WEBSITE_REPO_TOKEN",
+    repo: "website",
+    workflowLogic: (version: string) => {
+      // Skip nightly versions for website
+      if (version.includes('nightly')) {
+        console.log('Nightly version detected for website, skipping workflow trigger.');
+        return null;
+      }
+      return ['bump-patch-version.yml', version];
+    }
+  },
+  demo: {
+    tokenEnv: "DEMO_REPO_TOKEN",
+    repo: "demo-scene",
+    workflowLogic: (version: string) => {
+      // Skip nightly versions for demo
+      if (version.includes('nightly')) {
+        console.log('Nightly version detected for demo, skipping workflow trigger.');
+        return null;
+      }
+      return ['bump-patch-version.yml', version];
+    }
+  },
+  docs: {
+    tokenEnv: "DOCS_REPO_TOKEN",
+    repo: "docs",
+    workflowLogic: (version: string) => {
+      // Check if it's a nightly version
+      if (version.includes('nightly')) {
+        return ['bump-nightly-version.yml', version];
+      }
+
+      const parts = version.split('.');
+      if (parts.length !== 3) {
+        throw new Error('Invalid version format');
+      }
+
+      // If patch version (last number) is 0, it's a major version
+      // Return only major.minor version
+      if (parts[2] === '0') {
+        return ['bump-version.yml', `${parts[0]}.${parts[1]}`];
+      }
+
+      // Otherwise it's a patch version, use full version
+      return ['bump-patch-version.yml', version];
+    }
+  }
+};
+
+async function triggerWorkflow(repoConfig: RepoConfig, workflowId: string, version: string) {
+  const client = obtainClient(repoConfig.tokenEnv);
+  try {
+    await client.rest.actions.createWorkflowDispatch({
+      owner: "GreptimeTeam",
+      repo: repoConfig.repo,
+      workflow_id: workflowId,
+      ref: "main",
+      inputs: {
+        version,
+      },
+    });
+    console.log(`Successfully triggered ${workflowId} workflow for ${repoConfig.repo} with version ${version}`);
+  } catch (error) {
+    core.setFailed(`Failed to trigger workflow for ${repoConfig.repo}: ${error.message}`);
+    throw error;
+  }
+}
+
+async function processRepo(repoName: string, version: string) {
+  const repoConfig = REPO_CONFIGS[repoName];
+  if (!repoConfig) {
+    throw new Error(`Unknown repository: ${repoName}`);
+  }
+
+  try {
+    const workflowResult = repoConfig.workflowLogic(version);
+    if (workflowResult === null) {
+      // Skip this repo (e.g., nightly version for website)
+      return;
+    }
+
+    const [workflowId, apiVersion] = workflowResult;
+    await triggerWorkflow(repoConfig, workflowId, apiVersion);
+  } catch (error) {
+    core.setFailed(`Error processing ${repoName} with version ${version}: ${error.message}`);
+    throw error;
+  }
+}
+
+async function main() {
+  const version = process.env.VERSION;
+  if (!version) {
+    core.setFailed("VERSION environment variable is required");
+    process.exit(1);
+  }
+
+  // Remove 'v' prefix if exists
+  const cleanVersion = version.startsWith('v') ? version.slice(1) : version;
+
+  // Get target repositories from environment variable
+  // Default to both if not specified
+  const targetRepos = process.env.TARGET_REPOS?.split(',').map(repo => repo.trim()) || ['website', 'docs'];
+
+  console.log(`Processing version ${cleanVersion} for repositories: ${targetRepos.join(', ')}`);
+
+  const errors: string[] = [];
+
+  // Process each repository
+  for (const repo of targetRepos) {
+    try {
+      await processRepo(repo, cleanVersion);
+    } catch (error) {
+      errors.push(`${repo}: ${error.message}`);
+    }
+  }
+
+  if (errors.length > 0) {
+    core.setFailed(`Failed to process some repositories: ${errors.join('; ')}`);
+    process.exit(1);
+  }
+
+  console.log('All repositories processed successfully');
+}
+
+// Execute main function
+main().catch((error) => {
+  core.setFailed(`Unexpected error: ${error.message}`);
+  process.exit(1);
+});
--- a/cyborg/bin/follow-up-docs-issue.ts
+++ b/cyborg/bin/follow-up-docs-issue.ts
@@ -55,12 +55,25 @@ async function main() {
        await client.rest.issues.addLabels({
            owner, repo, issue_number: number, labels: [labelDocsRequired],
        })
+
+        // Get available assignees for the docs repo
+        const assigneesResponse = await docsClient.rest.issues.listAssignees({
+            owner: 'GreptimeTeam',
+            repo: 'docs',
+        })
+        const validAssignees = assigneesResponse.data.map(assignee => assignee.login)
+        core.info(`Available assignees: ${validAssignees.join(', ')}`)
+
+        // Check if the actor is a valid assignee, otherwise fallback to fengjiachun
+        const assignee = validAssignees.includes(actor) ? actor : 'fengjiachun'
+        core.info(`Assigning issue to: ${assignee}`)
+
        await docsClient.rest.issues.create({
            owner: 'GreptimeTeam',
            repo: 'docs',
            title: `Update docs for ${title}`,
            body: `A document change request is generated from ${html_url}`,
-            assignee: actor,
+            assignee: assignee,
        }).then((res) => {
            core.info(`Created issue ${res.data}`)
        })
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -53,6 +53,54 @@ get_arch_type() {
  esac
 }

+# Verify SHA256 checksum
+verify_sha256() {
+  file="$1"
+  expected_sha256="$2"
+
+  if command -v sha256sum >/dev/null 2>&1; then
+    actual_sha256=$(sha256sum "$file" | cut -d' ' -f1)
+  elif command -v shasum >/dev/null 2>&1; then
+    actual_sha256=$(shasum -a 256 "$file" | cut -d' ' -f1)
+  else
+    echo "Warning: No SHA256 verification tool found (sha256sum or shasum). Skipping checksum verification."
+    return 0
+  fi
+
+  if [ "$actual_sha256" = "$expected_sha256" ]; then
+    echo "SHA256 checksum verified successfully."
+    return 0
+  else
+    echo "Error: SHA256 checksum verification failed!"
+    echo "Expected: $expected_sha256"
+    echo "Actual: $actual_sha256"
+    return 1
+  fi
+}
+
+# Prompt for user confirmation (compatible with different shells)
+prompt_confirmation() {
+  message="$1"
+  printf "%s (y/N): " "$message"
+
+  # Try to read user input, fallback if read fails
+  answer=""
+  if read answer </dev/tty 2>/dev/null; then
+    case "$answer" in
+      [Yy]|[Yy][Ee][Ss])
+        return 0
+        ;;
+      *)
+        return 1
+        ;;
+    esac
+  else
+    echo ""
+    echo "Cannot read user input. Defaulting to No."
+    return 1
+  fi
+}
+
 download_artifact() {
  if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
    # Use the latest stable released version.
@@ -71,17 +119,104 @@ download_artifact() {
    fi

    echo "Downloading ${BIN}, OS: ${OS_TYPE}, Arch: ${ARCH_TYPE}, Version: ${VERSION}"
-    PACKAGE_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}.tar.gz"
+    PKG_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}"
+    PACKAGE_NAME="${PKG_NAME}.tar.gz"
+    SHA256_FILE="${PKG_NAME}.sha256sum"

    if [ -n "${PACKAGE_NAME}" ]; then
-      wget "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"
+      # Check if files already exist and prompt for override
+      if [ -f "${PACKAGE_NAME}" ]; then
+        echo "File ${PACKAGE_NAME} already exists."
+        if prompt_confirmation "Do you want to override it?"; then
+          echo "Overriding existing file..."
+          rm -f "${PACKAGE_NAME}"
+        else
+          echo "Skipping download. Using existing file."
+        fi
+      fi
+
+      if [ -f "${BIN}" ]; then
+        echo "Binary ${BIN} already exists."
+        if prompt_confirmation "Do you want to override it?"; then
+          echo "Will override existing binary..."
+          rm -f "${BIN}"
+        else
+          echo "Installation cancelled."
+          exit 0
+        fi
+      fi
+
+      # Download package if not exists
+      if [ ! -f "${PACKAGE_NAME}" ]; then
+        echo "Downloading ${PACKAGE_NAME}..."
+        # Use curl instead of wget for better compatibility
+        if command -v curl >/dev/null 2>&1; then
+          if ! curl -L -o "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
+            echo "Error: Failed to download ${PACKAGE_NAME}"
+            exit 1
+          fi
+        elif command -v wget >/dev/null 2>&1; then
+          if ! wget -O "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
+            echo "Error: Failed to download ${PACKAGE_NAME}"
+            exit 1
+          fi
+        else
+          echo "Error: Neither curl nor wget is available for downloading."
+          exit 1
+        fi
+      fi
+
+      # Download and verify SHA256 checksum
+      echo "Downloading SHA256 checksum..."
+      sha256_download_success=0
+      if command -v curl >/dev/null 2>&1; then
+        if curl -L -s -o "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
+          sha256_download_success=1
+        fi
+      elif command -v wget >/dev/null 2>&1; then
+        if wget -q -O "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
+          sha256_download_success=1
+        fi
+      fi
+
+      if [ $sha256_download_success -eq 1 ] && [ -f "${SHA256_FILE}" ]; then
+        expected_sha256=$(cat "${SHA256_FILE}" | cut -d' ' -f1)
+        if [ -n "$expected_sha256" ]; then
+          if ! verify_sha256 "${PACKAGE_NAME}" "${expected_sha256}"; then
+            echo "SHA256 verification failed. Removing downloaded file."
+            rm -f "${PACKAGE_NAME}" "${SHA256_FILE}"
+            exit 1
+          fi
+        else
+          echo "Warning: Could not parse SHA256 checksum from file."
+        fi
+        rm -f "${SHA256_FILE}"
+      else
+        echo "Warning: Could not download SHA256 checksum file. Skipping verification."
+      fi

      # Extract the binary and clean the rest.
-      tar xvf "${PACKAGE_NAME}" && \
-      mv "${PACKAGE_NAME%.tar.gz}/${BIN}" "${PWD}" && \
-      rm -r "${PACKAGE_NAME}" && \
-      rm -r "${PACKAGE_NAME%.tar.gz}" && \
-      echo "Run './${BIN} --help' to get started"
+      echo "Extracting ${PACKAGE_NAME}..."
+      if ! tar xf "${PACKAGE_NAME}"; then
+        echo "Error: Failed to extract ${PACKAGE_NAME}"
+        exit 1
+      fi
+
+      # Find the binary in the extracted directory
+      extracted_dir="${PACKAGE_NAME%.tar.gz}"
+      if [ -f "${extracted_dir}/${BIN}" ]; then
+        mv "${extracted_dir}/${BIN}" "${PWD}/"
+        rm -f "${PACKAGE_NAME}"
+        rm -rf "${extracted_dir}"
+        chmod +x "${BIN}"
+        echo "Installation completed successfully!"
+        echo "Run './${BIN} --help' to get started"
+      else
+        echo "Error: Binary ${BIN} not found in extracted archive"
+        rm -f "${PACKAGE_NAME}"
+        rm -rf "${extracted_dir}"
+        exit 1
+      fi
    fi
  fi
 }
--- a/src/index/src/bloom_filter/applier.rs
+++ b/src/index/src/bloom_filter/applier.rs
@@ -42,7 +42,16 @@ impl BloomFilterApplier {
    ) -> Result<Vec<Range<usize>>> {
        let rows_per_segment = self.meta.rows_per_segment as usize;
        let start_seg = search_range.start / rows_per_segment;
-        let end_seg = search_range.end.div_ceil(rows_per_segment);
+        let mut end_seg = search_range.end.div_ceil(rows_per_segment);
+
+        if end_seg == self.meta.segment_loc_indices.len() + 1 {
+            // In a previous version, there was a bug where if the last segment was all null,
+            // this segment would not be written into the index. This caused the slice
+            // `self.meta.segment_loc_indices[start_seg..end_seg]` to go out of bounds due to
+            // the missing segment. Since the `search` function does not search for nulls,
+            // we can simply ignore the last segment in this buggy scenario.
+            end_seg -= 1;
+        }

        let locs = &self.meta.segment_loc_indices[start_seg..end_seg];

--- a/src/index/src/bloom_filter/creator.rs
+++ b/src/index/src/bloom_filter/creator.rs
@@ -64,6 +64,9 @@ pub struct BloomFilterCreator {
    /// Storage for finalized Bloom filters.
    finalized_bloom_filters: FinalizedBloomFilterStorage,

+    /// Row count that finalized so far.
+    finalized_row_count: usize,
+
    /// Global memory usage of the bloom filter creator.
    global_memory_usage: Arc<AtomicUsize>,
 }
@@ -96,6 +99,7 @@ impl BloomFilterCreator {
                global_memory_usage,
                global_memory_usage_threshold,
            ),
+            finalized_row_count: 0,
        }
    }

@@ -136,6 +140,7 @@ impl BloomFilterCreator {

            if self.accumulated_row_count % self.rows_per_segment == 0 {
                self.finalize_segment().await?;
+                self.finalized_row_count = self.accumulated_row_count;
            }
        }

@@ -161,6 +166,7 @@ impl BloomFilterCreator {

        if self.accumulated_row_count % self.rows_per_segment == 0 {
            self.finalize_segment().await?;
+            self.finalized_row_count = self.accumulated_row_count;
        }

        Ok(())
@@ -168,7 +174,7 @@ impl BloomFilterCreator {

    /// Finalizes any remaining segments and writes the bloom filters and metadata to the provided writer.
    pub async fn finish(&mut self, mut writer: impl AsyncWrite + Unpin) -> Result<()> {
-        if !self.cur_seg_distinct_elems.is_empty() {
+        if self.accumulated_row_count > self.finalized_row_count {
            self.finalize_segment().await?;
        }

@@ -406,4 +412,35 @@ mod tests {
            assert!(bf.contains(&b"f"));
        }
    }
+
+    #[tokio::test]
+    async fn test_final_seg_all_null() {
+        let mut writer = Cursor::new(Vec::new());
+        let mut creator = BloomFilterCreator::new(
+            2,
+            Arc::new(MockExternalTempFileProvider::new()),
+            Arc::new(AtomicUsize::new(0)),
+            None,
+        );
+
+        creator
+            .push_n_row_elems(4, vec![b"a".to_vec(), b"b".to_vec()])
+            .await
+            .unwrap();
+        creator.push_row_elems(Vec::new()).await.unwrap();
+
+        creator.finish(&mut writer).await.unwrap();
+
+        let bytes = writer.into_inner();
+        let total_size = bytes.len();
+        let meta_size_offset = total_size - 4;
+        let meta_size = u32::from_le_bytes((&bytes[meta_size_offset..]).try_into().unwrap());
+
+        let meta_bytes = &bytes[total_size - meta_size as usize - 4..total_size - 4];
+        let meta = BloomFilterMeta::decode(meta_bytes).unwrap();
+
+        assert_eq!(meta.rows_per_segment, 2);
+        assert_eq!(meta.segment_count, 3);
+        assert_eq!(meta.row_count, 5);
+    }
 }