diff --git a/.github/scripts/check-version.sh b/.github/scripts/check-version.sh new file mode 100755 index 0000000000..6ea4d154be --- /dev/null +++ b/.github/scripts/check-version.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Get current version +CURRENT_VERSION=$1 +if [ -z "$CURRENT_VERSION" ]; then + echo "Error: Failed to get current version" + exit 1 +fi + +# Get the latest version from GitHub Releases +API_RESPONSE=$(curl -s "https://api.github.com/repos/GreptimeTeam/greptimedb/releases/latest") + +if [ -z "$API_RESPONSE" ] || [ "$(echo "$API_RESPONSE" | jq -r '.message')" = "Not Found" ]; then + echo "Error: Failed to fetch latest version from GitHub" + exit 1 +fi + +# Get the latest version +LATEST_VERSION=$(echo "$API_RESPONSE" | jq -r '.tag_name') + +if [ -z "$LATEST_VERSION" ] || [ "$LATEST_VERSION" = "null" ]; then + echo "Error: No valid version found in GitHub releases" + exit 1 +fi + +# Cleaned up version number format (removed possible 'v' prefix and -nightly suffix) +CLEAN_CURRENT=$(echo "$CURRENT_VERSION" | sed 's/^v//' | sed 's/-nightly-.*//') +CLEAN_LATEST=$(echo "$LATEST_VERSION" | sed 's/^v//' | sed 's/-nightly-.*//') + +echo "Current version: $CLEAN_CURRENT" +echo "Latest release version: $CLEAN_LATEST" + +# Use sort -V to compare versions +HIGHER_VERSION=$(printf "%s\n%s" "$CLEAN_CURRENT" "$CLEAN_LATEST" | sort -V | tail -n1) + +if [ "$HIGHER_VERSION" = "$CLEAN_CURRENT" ]; then + echo "Current version ($CLEAN_CURRENT) is NEWER than or EQUAL to latest ($CLEAN_LATEST)" + echo "should-push-latest-tag=true" >> $GITHUB_OUTPUT +else + echo "Current version ($CLEAN_CURRENT) is OLDER than latest ($CLEAN_LATEST)" + echo "should-push-latest-tag=false" >> $GITHUB_OUTPUT +fi diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 4e7aafc3cc..49d893cf7a 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -706,7 +706,7 @@ jobs: - name: Install toolchain uses: actions-rust-lang/setup-rust-toolchain@v1 with: - cache: false + cache: false - name: Rust Cache uses: Swatinem/rust-cache@v2 with: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 42828b7f01..2a42b82773 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -110,6 +110,8 @@ jobs: # The 'version' use as the global tag name of the release workflow. version: ${{ steps.create-version.outputs.version }} + + should-push-latest-tag: ${{ steps.check-version.outputs.should-push-latest-tag }} steps: - name: Checkout uses: actions/checkout@v4 @@ -135,6 +137,11 @@ jobs: GITHUB_REF_NAME: ${{ github.ref_name }} NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }} + - name: Check version + id: check-version + run: | + ./.github/scripts/check-version.sh "${{ steps.create-version.outputs.version }}" + - name: Allocate linux-amd64 runner if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }} uses: ./.github/actions/start-runner @@ -314,7 +321,7 @@ jobs: image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }} image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }} version: ${{ needs.allocate-runners.outputs.version }} - push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }} + push-latest-tag: ${{ needs.allocate-runners.outputs.should-push-latest-tag == 'true' && github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }} - name: Set build image result id: set-build-image-result @@ -332,7 +339,7 @@ jobs: build-windows-artifacts, release-images-to-dockerhub, ] - runs-on: ubuntu-latest + runs-on: ubuntu-latest-16-cores # When we push to ACR, it's easy to fail due to some unknown network issues. # However, we don't want to fail the whole workflow because of this. # The ACR have daily sync with DockerHub, so don't worry about the image not being updated. @@ -361,7 +368,7 @@ jobs: dev-mode: false upload-to-s3: true update-version-info: true - push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }} + push-latest-tag: ${{ needs.allocate-runners.outputs.should-push-latest-tag == 'true' && github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }} publish-github-release: name: Create GitHub release and upload artifacts diff --git a/.github/workflows/semantic-pull-request.yml b/.github/workflows/semantic-pull-request.yml index 004a3f421b..522ec09007 100644 --- a/.github/workflows/semantic-pull-request.yml +++ b/.github/workflows/semantic-pull-request.yml @@ -11,17 +11,17 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true +permissions: + issues: write + contents: write + pull-requests: write + jobs: check: runs-on: ubuntu-latest - permissions: - pull-requests: write # Add permissions to modify PRs - issues: write timeout-minutes: 10 steps: - uses: actions/checkout@v4 - with: - persist-credentials: false - uses: ./.github/actions/setup-cyborg - name: Check Pull Request working-directory: cyborg diff --git a/cyborg/bin/bump-versions.ts b/cyborg/bin/bump-versions.ts new file mode 100644 index 0000000000..ce9d3d21a0 --- /dev/null +++ b/cyborg/bin/bump-versions.ts @@ -0,0 +1,156 @@ +/* + * Copyright 2023 Greptime Team + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import * as core from "@actions/core"; +import {obtainClient} from "@/common"; + +interface RepoConfig { + tokenEnv: string; + repo: string; + workflowLogic: (version: string) => [string, string] | null; +} + +const REPO_CONFIGS: Record = { + website: { + tokenEnv: "WEBSITE_REPO_TOKEN", + repo: "website", + workflowLogic: (version: string) => { + // Skip nightly versions for website + if (version.includes('nightly')) { + console.log('Nightly version detected for website, skipping workflow trigger.'); + return null; + } + return ['bump-patch-version.yml', version]; + } + }, + demo: { + tokenEnv: "DEMO_REPO_TOKEN", + repo: "demo-scene", + workflowLogic: (version: string) => { + // Skip nightly versions for demo + if (version.includes('nightly')) { + console.log('Nightly version detected for demo, skipping workflow trigger.'); + return null; + } + return ['bump-patch-version.yml', version]; + } + }, + docs: { + tokenEnv: "DOCS_REPO_TOKEN", + repo: "docs", + workflowLogic: (version: string) => { + // Check if it's a nightly version + if (version.includes('nightly')) { + return ['bump-nightly-version.yml', version]; + } + + const parts = version.split('.'); + if (parts.length !== 3) { + throw new Error('Invalid version format'); + } + + // If patch version (last number) is 0, it's a major version + // Return only major.minor version + if (parts[2] === '0') { + return ['bump-version.yml', `${parts[0]}.${parts[1]}`]; + } + + // Otherwise it's a patch version, use full version + return ['bump-patch-version.yml', version]; + } + } +}; + +async function triggerWorkflow(repoConfig: RepoConfig, workflowId: string, version: string) { + const client = obtainClient(repoConfig.tokenEnv); + try { + await client.rest.actions.createWorkflowDispatch({ + owner: "GreptimeTeam", + repo: repoConfig.repo, + workflow_id: workflowId, + ref: "main", + inputs: { + version, + }, + }); + console.log(`Successfully triggered ${workflowId} workflow for ${repoConfig.repo} with version ${version}`); + } catch (error) { + core.setFailed(`Failed to trigger workflow for ${repoConfig.repo}: ${error.message}`); + throw error; + } +} + +async function processRepo(repoName: string, version: string) { + const repoConfig = REPO_CONFIGS[repoName]; + if (!repoConfig) { + throw new Error(`Unknown repository: ${repoName}`); + } + + try { + const workflowResult = repoConfig.workflowLogic(version); + if (workflowResult === null) { + // Skip this repo (e.g., nightly version for website) + return; + } + + const [workflowId, apiVersion] = workflowResult; + await triggerWorkflow(repoConfig, workflowId, apiVersion); + } catch (error) { + core.setFailed(`Error processing ${repoName} with version ${version}: ${error.message}`); + throw error; + } +} + +async function main() { + const version = process.env.VERSION; + if (!version) { + core.setFailed("VERSION environment variable is required"); + process.exit(1); + } + + // Remove 'v' prefix if exists + const cleanVersion = version.startsWith('v') ? version.slice(1) : version; + + // Get target repositories from environment variable + // Default to both if not specified + const targetRepos = process.env.TARGET_REPOS?.split(',').map(repo => repo.trim()) || ['website', 'docs']; + + console.log(`Processing version ${cleanVersion} for repositories: ${targetRepos.join(', ')}`); + + const errors: string[] = []; + + // Process each repository + for (const repo of targetRepos) { + try { + await processRepo(repo, cleanVersion); + } catch (error) { + errors.push(`${repo}: ${error.message}`); + } + } + + if (errors.length > 0) { + core.setFailed(`Failed to process some repositories: ${errors.join('; ')}`); + process.exit(1); + } + + console.log('All repositories processed successfully'); +} + +// Execute main function +main().catch((error) => { + core.setFailed(`Unexpected error: ${error.message}`); + process.exit(1); +}); diff --git a/cyborg/bin/follow-up-docs-issue.ts b/cyborg/bin/follow-up-docs-issue.ts index e08bfee083..acca21fe93 100644 --- a/cyborg/bin/follow-up-docs-issue.ts +++ b/cyborg/bin/follow-up-docs-issue.ts @@ -55,12 +55,25 @@ async function main() { await client.rest.issues.addLabels({ owner, repo, issue_number: number, labels: [labelDocsRequired], }) + + // Get available assignees for the docs repo + const assigneesResponse = await docsClient.rest.issues.listAssignees({ + owner: 'GreptimeTeam', + repo: 'docs', + }) + const validAssignees = assigneesResponse.data.map(assignee => assignee.login) + core.info(`Available assignees: ${validAssignees.join(', ')}`) + + // Check if the actor is a valid assignee, otherwise fallback to fengjiachun + const assignee = validAssignees.includes(actor) ? actor : 'fengjiachun' + core.info(`Assigning issue to: ${assignee}`) + await docsClient.rest.issues.create({ owner: 'GreptimeTeam', repo: 'docs', title: `Update docs for ${title}`, body: `A document change request is generated from ${html_url}`, - assignee: actor, + assignee: assignee, }).then((res) => { core.info(`Created issue ${res.data}`) }) diff --git a/scripts/install.sh b/scripts/install.sh index 6b5103ec44..88bdebd713 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -53,6 +53,54 @@ get_arch_type() { esac } +# Verify SHA256 checksum +verify_sha256() { + file="$1" + expected_sha256="$2" + + if command -v sha256sum >/dev/null 2>&1; then + actual_sha256=$(sha256sum "$file" | cut -d' ' -f1) + elif command -v shasum >/dev/null 2>&1; then + actual_sha256=$(shasum -a 256 "$file" | cut -d' ' -f1) + else + echo "Warning: No SHA256 verification tool found (sha256sum or shasum). Skipping checksum verification." + return 0 + fi + + if [ "$actual_sha256" = "$expected_sha256" ]; then + echo "SHA256 checksum verified successfully." + return 0 + else + echo "Error: SHA256 checksum verification failed!" + echo "Expected: $expected_sha256" + echo "Actual: $actual_sha256" + return 1 + fi +} + +# Prompt for user confirmation (compatible with different shells) +prompt_confirmation() { + message="$1" + printf "%s (y/N): " "$message" + + # Try to read user input, fallback if read fails + answer="" + if read answer /dev/null; then + case "$answer" in + [Yy]|[Yy][Ee][Ss]) + return 0 + ;; + *) + return 1 + ;; + esac + else + echo "" + echo "Cannot read user input. Defaulting to No." + return 1 + fi +} + download_artifact() { if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then # Use the latest stable released version. @@ -71,17 +119,104 @@ download_artifact() { fi echo "Downloading ${BIN}, OS: ${OS_TYPE}, Arch: ${ARCH_TYPE}, Version: ${VERSION}" - PACKAGE_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}.tar.gz" + PKG_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}" + PACKAGE_NAME="${PKG_NAME}.tar.gz" + SHA256_FILE="${PKG_NAME}.sha256sum" if [ -n "${PACKAGE_NAME}" ]; then - wget "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}" + # Check if files already exist and prompt for override + if [ -f "${PACKAGE_NAME}" ]; then + echo "File ${PACKAGE_NAME} already exists." + if prompt_confirmation "Do you want to override it?"; then + echo "Overriding existing file..." + rm -f "${PACKAGE_NAME}" + else + echo "Skipping download. Using existing file." + fi + fi + + if [ -f "${BIN}" ]; then + echo "Binary ${BIN} already exists." + if prompt_confirmation "Do you want to override it?"; then + echo "Will override existing binary..." + rm -f "${BIN}" + else + echo "Installation cancelled." + exit 0 + fi + fi + + # Download package if not exists + if [ ! -f "${PACKAGE_NAME}" ]; then + echo "Downloading ${PACKAGE_NAME}..." + # Use curl instead of wget for better compatibility + if command -v curl >/dev/null 2>&1; then + if ! curl -L -o "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then + echo "Error: Failed to download ${PACKAGE_NAME}" + exit 1 + fi + elif command -v wget >/dev/null 2>&1; then + if ! wget -O "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then + echo "Error: Failed to download ${PACKAGE_NAME}" + exit 1 + fi + else + echo "Error: Neither curl nor wget is available for downloading." + exit 1 + fi + fi + + # Download and verify SHA256 checksum + echo "Downloading SHA256 checksum..." + sha256_download_success=0 + if command -v curl >/dev/null 2>&1; then + if curl -L -s -o "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then + sha256_download_success=1 + fi + elif command -v wget >/dev/null 2>&1; then + if wget -q -O "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then + sha256_download_success=1 + fi + fi + + if [ $sha256_download_success -eq 1 ] && [ -f "${SHA256_FILE}" ]; then + expected_sha256=$(cat "${SHA256_FILE}" | cut -d' ' -f1) + if [ -n "$expected_sha256" ]; then + if ! verify_sha256 "${PACKAGE_NAME}" "${expected_sha256}"; then + echo "SHA256 verification failed. Removing downloaded file." + rm -f "${PACKAGE_NAME}" "${SHA256_FILE}" + exit 1 + fi + else + echo "Warning: Could not parse SHA256 checksum from file." + fi + rm -f "${SHA256_FILE}" + else + echo "Warning: Could not download SHA256 checksum file. Skipping verification." + fi # Extract the binary and clean the rest. - tar xvf "${PACKAGE_NAME}" && \ - mv "${PACKAGE_NAME%.tar.gz}/${BIN}" "${PWD}" && \ - rm -r "${PACKAGE_NAME}" && \ - rm -r "${PACKAGE_NAME%.tar.gz}" && \ - echo "Run './${BIN} --help' to get started" + echo "Extracting ${PACKAGE_NAME}..." + if ! tar xf "${PACKAGE_NAME}"; then + echo "Error: Failed to extract ${PACKAGE_NAME}" + exit 1 + fi + + # Find the binary in the extracted directory + extracted_dir="${PACKAGE_NAME%.tar.gz}" + if [ -f "${extracted_dir}/${BIN}" ]; then + mv "${extracted_dir}/${BIN}" "${PWD}/" + rm -f "${PACKAGE_NAME}" + rm -rf "${extracted_dir}" + chmod +x "${BIN}" + echo "Installation completed successfully!" + echo "Run './${BIN} --help' to get started" + else + echo "Error: Binary ${BIN} not found in extracted archive" + rm -f "${PACKAGE_NAME}" + rm -rf "${extracted_dir}" + exit 1 + fi fi fi } diff --git a/src/index/src/bloom_filter/applier.rs b/src/index/src/bloom_filter/applier.rs index e87a94cd1b..8829f4e0ee 100644 --- a/src/index/src/bloom_filter/applier.rs +++ b/src/index/src/bloom_filter/applier.rs @@ -42,7 +42,16 @@ impl BloomFilterApplier { ) -> Result>> { let rows_per_segment = self.meta.rows_per_segment as usize; let start_seg = search_range.start / rows_per_segment; - let end_seg = search_range.end.div_ceil(rows_per_segment); + let mut end_seg = search_range.end.div_ceil(rows_per_segment); + + if end_seg == self.meta.segment_loc_indices.len() + 1 { + // In a previous version, there was a bug where if the last segment was all null, + // this segment would not be written into the index. This caused the slice + // `self.meta.segment_loc_indices[start_seg..end_seg]` to go out of bounds due to + // the missing segment. Since the `search` function does not search for nulls, + // we can simply ignore the last segment in this buggy scenario. + end_seg -= 1; + } let locs = &self.meta.segment_loc_indices[start_seg..end_seg]; diff --git a/src/index/src/bloom_filter/creator.rs b/src/index/src/bloom_filter/creator.rs index 0b6810a688..66e892e29f 100644 --- a/src/index/src/bloom_filter/creator.rs +++ b/src/index/src/bloom_filter/creator.rs @@ -64,6 +64,9 @@ pub struct BloomFilterCreator { /// Storage for finalized Bloom filters. finalized_bloom_filters: FinalizedBloomFilterStorage, + /// Row count that finalized so far. + finalized_row_count: usize, + /// Global memory usage of the bloom filter creator. global_memory_usage: Arc, } @@ -96,6 +99,7 @@ impl BloomFilterCreator { global_memory_usage, global_memory_usage_threshold, ), + finalized_row_count: 0, } } @@ -136,6 +140,7 @@ impl BloomFilterCreator { if self.accumulated_row_count % self.rows_per_segment == 0 { self.finalize_segment().await?; + self.finalized_row_count = self.accumulated_row_count; } } @@ -161,6 +166,7 @@ impl BloomFilterCreator { if self.accumulated_row_count % self.rows_per_segment == 0 { self.finalize_segment().await?; + self.finalized_row_count = self.accumulated_row_count; } Ok(()) @@ -168,7 +174,7 @@ impl BloomFilterCreator { /// Finalizes any remaining segments and writes the bloom filters and metadata to the provided writer. pub async fn finish(&mut self, mut writer: impl AsyncWrite + Unpin) -> Result<()> { - if !self.cur_seg_distinct_elems.is_empty() { + if self.accumulated_row_count > self.finalized_row_count { self.finalize_segment().await?; } @@ -406,4 +412,35 @@ mod tests { assert!(bf.contains(&b"f")); } } + + #[tokio::test] + async fn test_final_seg_all_null() { + let mut writer = Cursor::new(Vec::new()); + let mut creator = BloomFilterCreator::new( + 2, + Arc::new(MockExternalTempFileProvider::new()), + Arc::new(AtomicUsize::new(0)), + None, + ); + + creator + .push_n_row_elems(4, vec![b"a".to_vec(), b"b".to_vec()]) + .await + .unwrap(); + creator.push_row_elems(Vec::new()).await.unwrap(); + + creator.finish(&mut writer).await.unwrap(); + + let bytes = writer.into_inner(); + let total_size = bytes.len(); + let meta_size_offset = total_size - 4; + let meta_size = u32::from_le_bytes((&bytes[meta_size_offset..]).try_into().unwrap()); + + let meta_bytes = &bytes[total_size - meta_size as usize - 4..total_size - 4]; + let meta = BloomFilterMeta::decode(meta_bytes).unwrap(); + + assert_eq!(meta.rows_per_segment, 2); + assert_eq!(meta.segment_count, 3); + assert_eq!(meta.row_count, 5); + } }