From 9a5b0398eced4a4c72746e20cb2501efd8a6b077 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 16 Mar 2026 06:25:40 -0700 Subject: [PATCH] chore: fix ci (#3139) * Move away from buildjet, which is shutting down runners for GHA [^1] * Add `Cargo.lock` to build jobs, so when we upgrade locked dependencies we check the builds actually pass. CI started failing because dependencies were changed in #3116 without running all build jobs. * Add fixes for aws-lc-rs build in NodeJS. [^1]: https://buildjet.com/for-github-actions/blog/we-are-shutting-down --------- Co-authored-by: Claude Opus 4.6 (1M context) --- .github/workflows/nodejs.yml | 1 + .github/workflows/npm-publish.yml | 17 ++++++++++++----- .github/workflows/pypi-publish.yml | 1 + .github/workflows/python.yml | 1 + .github/workflows/rust.yml | 1 + Cargo.lock | 12 ++++++------ docs/src/js/classes/Table.md | 14 +------------- docs/src/js/interfaces/OptimizeOptions.md | 9 +++++++++ 8 files changed, 32 insertions(+), 24 deletions(-) diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml index 3f0c768d3..5d2f15c77 100644 --- a/.github/workflows/nodejs.yml +++ b/.github/workflows/nodejs.yml @@ -7,6 +7,7 @@ on: pull_request: paths: - Cargo.toml + - Cargo.lock - nodejs/** - rust/** - docs/src/js/** diff --git a/.github/workflows/npm-publish.yml b/.github/workflows/npm-publish.yml index 438eba87c..714d11c6d 100644 --- a/.github/workflows/npm-publish.yml +++ b/.github/workflows/npm-publish.yml @@ -19,6 +19,7 @@ on: paths: - .github/workflows/npm-publish.yml - Cargo.toml # Change in dependency frequently breaks builds + - Cargo.lock concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -124,7 +125,12 @@ jobs: pre_build: |- set -e && apt-get update && - apt-get install -y protobuf-compiler pkg-config + apt-get install -y protobuf-compiler pkg-config && + # The base image (manylinux2014-cross) sets TARGET_CC to the old + # GCC 4.8 cross-compiler. aws-lc-sys checks TARGET_CC before CC, + # so it picks up GCC even though the napi-rs image sets CC=clang. + # Override to use the image's clang-18 which supports -fuse-ld=lld. + export TARGET_CC=clang TARGET_CXX=clang++ - target: x86_64-unknown-linux-musl # This one seems to need some extra memory host: ubuntu-2404-8x-x64 @@ -144,9 +150,10 @@ jobs: set -e && apt-get update && apt-get install -y protobuf-compiler pkg-config && - # https://github.com/aws/aws-lc-rs/issues/737#issuecomment-2725918627 - ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc/aarch64-unknown-linux-gnu/4.8.5/crtbeginS.o /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/crtbeginS.o && - ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/gcc && + export TARGET_CC=clang TARGET_CXX=clang++ && + # The manylinux2014 sysroot has glibc 2.17 headers which lack + # AT_HWCAP2 (added in Linux 3.17). Define it for aws-lc-sys. + export CFLAGS="$CFLAGS -DAT_HWCAP2=26" && rustup target add aarch64-unknown-linux-gnu - target: aarch64-unknown-linux-musl host: ubuntu-2404-8x-x64 @@ -266,7 +273,7 @@ jobs: - target: x86_64-unknown-linux-gnu host: ubuntu-latest - target: aarch64-unknown-linux-gnu - host: buildjet-16vcpu-ubuntu-2204-arm + host: ubuntu-2404-8x-arm64 node: - '20' runs-on: ${{ matrix.settings.host }} diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index ee79166c6..0b2f5616a 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -9,6 +9,7 @@ on: paths: - .github/workflows/pypi-publish.yml - Cargo.toml # Change in dependency frequently breaks builds + - Cargo.lock env: PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/" diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index dd9ea346e..978da7a3d 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -7,6 +7,7 @@ on: pull_request: paths: - Cargo.toml + - Cargo.lock - python/** - rust/** - .github/workflows/python.yml diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 292127b51..19911d3f2 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -7,6 +7,7 @@ on: pull_request: paths: - Cargo.toml + - Cargo.lock - rust/** - .github/workflows/rust.yml diff --git a/Cargo.lock b/Cargo.lock index a06b43ac7..19619af51 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3852,7 +3852,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.0", "system-configuration", "tokio", "tower-service", @@ -6443,7 +6443,7 @@ version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "itertools 0.14.0", "log", "multimap", @@ -6632,7 +6632,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.31", - "socket2 0.5.10", + "socket2 0.6.0", "thiserror 2.0.17", "tokio", "tracing", @@ -6669,7 +6669,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.0", "tracing", "windows-sys 0.60.2", ] @@ -7735,7 +7735,7 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", "syn 2.0.114", @@ -7747,7 +7747,7 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54254b8531cafa275c5e096f62d48c81435d1015405a91198ddb11e967301d40" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", "syn 2.0.114", diff --git a/docs/src/js/classes/Table.md b/docs/src/js/classes/Table.md index 5b6b55089..b2f72012f 100644 --- a/docs/src/js/classes/Table.md +++ b/docs/src/js/classes/Table.md @@ -485,19 +485,7 @@ Modeled after ``VACUUM`` in PostgreSQL. - Prune: Removes old versions of the dataset - Index: Optimizes the indices, adding new data to existing indices - Experimental API - ---------------- - - The optimization process is undergoing active development and may change. - Our goal with these changes is to improve the performance of optimization and - reduce the complexity. - - That being said, it is essential today to run optimize if you want the best - performance. It should be stable and safe to use in production, but it our - hope that the API may be simplified (or not even need to be called) in the - future. - - The frequency an application shoudl call optimize is based on the frequency of + The frequency an application should call optimize is based on the frequency of data modifications. If data is frequently added, deleted, or updated then optimize should be run frequently. A good rule of thumb is to run optimize if you have added or modified 100,000 or more records or run more than 20 data diff --git a/docs/src/js/interfaces/OptimizeOptions.md b/docs/src/js/interfaces/OptimizeOptions.md index e2897970a..700632342 100644 --- a/docs/src/js/interfaces/OptimizeOptions.md +++ b/docs/src/js/interfaces/OptimizeOptions.md @@ -37,3 +37,12 @@ tbl.optimize({cleanupOlderThan: new Date()}); ```ts deleteUnverified: boolean; ``` + +Because they may be part of an in-progress transaction, files newer than +7 days old are not deleted by default. If you are sure that there are no +in-progress transactions, then you can set this to true to delete all +files older than `cleanupOlderThan`. + +**WARNING**: This should only be set to true if you can guarantee that +no other process is currently working on this dataset. Otherwise the +dataset could be put into a corrupted state.