ci: make rust ci faster, get ci green (#2782)

* Add `ci` profile for smaller build caches. This had a meaningful
impact in Lance, and I expect a similar impact here.
https://github.com/lancedb/lance/pull/5236
* Get caching working in Rust. Previously was not working due to
`workspaces: rust`.
* Get caching working in NodeJs lint job. Previously wasn't working
because we installed the toolchain **after** we called `- uses:
Swatinem/rust-cache@v2`, which invalidates the cache locally.
* Fix broken pytest from async io transition
(`pytest.PytestRemovedIn9Warning`)
* Altered `get_num_sub_vectors` to handle bug in case of 4-bit PQ. This
was cause of `rust future panicked: unknown error`. Raised an issue
upstream to change panic to error:
https://github.com/lancedb/lance/issues/5257
* Call `npm run docs` to fix doc issue.
* Disable flakey Windows test for consistency. It's just an OS-specific
timer issue, not our fault.
* Fix Windows absolute path handling in namespaces. Was causing CI
failure `OSError: [WinError 123] The filename, directory name, or volume
label syntax is incorrect: `
This commit is contained in:
Will Jones
2025-11-18 09:04:56 -08:00
committed by GitHub
parent 92dbec1f95
commit 1cf3917a87
10 changed files with 81 additions and 66 deletions

View File

@@ -16,9 +16,6 @@ concurrency:
cancel-in-progress: true
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
RUST_BACKTRACE: "1"
jobs:
@@ -43,18 +40,20 @@ jobs:
node-version: 20
cache: 'npm'
cache-dependency-path: nodejs/package-lock.json
- uses: Swatinem/rust-cache@v2
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt, clippy
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt, clippy
- name: Lint
- uses: Swatinem/rust-cache@v2
- name: Format Rust
run: cargo fmt --all -- --check
- name: Lint Rust
run: cargo clippy --profile ci --all --all-features -- -D warnings
- name: Lint Typescript
run: |
cargo fmt --all -- --check
cargo clippy --all --all-features -- -D warnings
npm ci
npm run lint-ci
- name: Lint examples
@@ -90,7 +89,8 @@ jobs:
- name: Build
run: |
npm ci
npm run build
npm run build:debug -- --profile ci
npm run tsc
- name: Setup localstack
working-directory: .
run: docker compose up --detach --wait
@@ -147,7 +147,8 @@ jobs:
- name: Build
run: |
npm ci
npm run build
npm run build:debug -- --profile ci
npm run tsc
- name: Test
run: |
npm run test

View File

@@ -19,6 +19,7 @@ env:
PYTEST_ADDOPTS: "--color=yes"
FORCE_COLOR: "1"
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
RUST_BACKTRACE: "1"
jobs:
lint:
@@ -97,9 +98,6 @@ jobs:
run: |
sudo apt update
sudo apt install -y protobuf-compiler
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
- name: Install
run: |
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
@@ -131,10 +129,9 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: 3.${{ matrix.python-minor-version }}
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
- uses: ./.github/workflows/build_linux_wheel
with:
args: --profile ci
- uses: ./.github/workflows/run_tests
with:
integration: true
@@ -169,10 +166,9 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
- uses: ./.github/workflows/build_mac_wheel
with:
args: --profile ci
- uses: ./.github/workflows/run_tests
# Make sure wheels are not included in the Rust cache
- name: Delete wheels
@@ -199,10 +195,9 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
- uses: ./.github/workflows/build_windows_wheel
with:
args: --profile ci
- uses: ./.github/workflows/run_tests
# Make sure wheels are not included in the Rust cache
- name: Delete wheels

View File

@@ -18,11 +18,7 @@ env:
# This env var is used by Swatinem/rust-cache@v2 for the cache
# key, so we set it to make sure it is always consistent.
CARGO_TERM_COLOR: always
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
RUST_BACKTRACE: "1"
CARGO_INCREMENTAL: 0
jobs:
lint:
@@ -44,8 +40,6 @@ jobs:
with:
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install dependencies
run: |
sudo apt update
@@ -53,7 +47,7 @@ jobs:
- name: Run format
run: cargo fmt --all -- --check
- name: Run clippy
run: cargo clippy --workspace --tests --all-features -- -D warnings
run: cargo clippy --profile ci --workspace --tests --all-features -- -D warnings
build-no-lock:
runs-on: ubuntu-24.04
@@ -80,7 +74,7 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev
- name: Build all
run: |
cargo build --benches --all-features --tests
cargo build --profile ci --benches --all-features --tests
linux:
timeout-minutes: 30
@@ -103,14 +97,8 @@ jobs:
fetch-depth: 0
lfs: true
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install dependencies
run: |
# This shaves 2 minutes off this step in CI. This doesn't seem to be
# necessary in standard runners, but it is in the 4x runners.
sudo rm /var/lib/man-db/auto-update
sudo apt install -y protobuf-compiler libssl-dev
run: sudo apt install -y protobuf-compiler libssl-dev
- uses: rui314/setup-mold@v1
- name: Make Swap
run: |
@@ -119,16 +107,16 @@ jobs:
sudo mkswap /swapfile
sudo swapon /swapfile
- name: Build
run: cargo build --all-features --tests --locked --examples
run: cargo build --profile ci --all-features --tests --locked --examples
- name: Run feature tests
run: make -C ./lancedb feature-tests
run: CARGO_ARGS="--profile ci" make -C ./lancedb feature-tests
- name: Run examples
run: cargo run --example simple --locked
run: cargo run --profile ci --example simple --locked
- name: Run remote tests
# Running this requires access to secrets, so skip if this is
# a PR from a fork.
if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
run: make -C ./lancedb remote-tests
run: CARGO_ARGS="--profile ci" make -C ./lancedb remote-tests
macos:
timeout-minutes: 30
@@ -148,8 +136,6 @@ jobs:
- name: CPU features
run: sysctl -a | grep cpu
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install dependencies
run: brew install protobuf
- name: Run tests
@@ -159,7 +145,7 @@ jobs:
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps \
| jq -r '.packages[] | .features | keys | .[]' \
| grep -v s3-test | sort | uniq | paste -s -d "," -`
cargo test --features $ALL_FEATURES --locked
cargo test --profile ci --features $ALL_FEATURES --locked
windows:
runs-on: windows-2022
@@ -173,22 +159,21 @@ jobs:
working-directory: rust/lancedb
steps:
- uses: actions/checkout@v4
- name: Set target
run: rustup target add ${{ matrix.target }}
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install Protoc v21.12
run: choco install --no-progress protoc
- name: Build
run: |
rustup target add ${{ matrix.target }}
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
cargo build --features remote --tests --locked --target ${{ matrix.target }}
cargo build --profile ci --features remote --tests --locked --target ${{ matrix.target }}
- name: Run tests
# Can only run tests when target matches host
if: ${{ matrix.target == 'x86_64-pc-windows-msvc' }}
run: |
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
cargo test --features remote --locked
cargo test --profile ci --features remote --locked
msrv:
# Check the minimum supported Rust version
@@ -213,6 +198,7 @@ jobs:
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ matrix.msrv }}
- uses: Swatinem/rust-cache@v2
- name: Downgrade dependencies
# These packages have newer requirements for MSRV
run: |
@@ -226,4 +212,4 @@ jobs:
cargo update -p aws-sdk-sts --precise 1.51.0
cargo update -p home --precise 0.5.9
- name: cargo +${{ matrix.msrv }} check
run: cargo check --workspace --tests --benches --all-features
run: cargo check --profile ci --workspace --tests --benches --all-features

View File

@@ -63,3 +63,17 @@ regex = "1.10"
lazy_static = "1"
semver = "1.0.25"
chrono = "0.4"
[profile.ci]
debug = "line-tables-only"
inherits = "dev"
incremental = false
# This rule applies to every package except workspace members (dependencies
# such as `arrow` and `tokio`). It disables debug info and related features on
# dependencies so their binaries stay smaller, improving cache reuse.
[profile.ci.package."*"]
debug = false
debug-assertions = false
strip = "debuginfo"
incremental = false

View File

@@ -1,7 +1,7 @@
# Contributing to LanceDB Typescript
This document outlines the process for contributing to LanceDB Typescript.
For general contribution guidelines, see [CONTRIBUTING.md](../../../../CONTRIBUTING.md).
For general contribution guidelines, see [CONTRIBUTING.md](../CONTRIBUTING.md).
## Project layout

View File

@@ -73,8 +73,10 @@
"scripts": {
"artifacts": "napi artifacts",
"build:debug": "napi build --platform --no-const-enum --dts ../lancedb/native.d.ts --js ../lancedb/native.js lancedb",
"postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
"build:release": "napi build --platform --no-const-enum --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
"build": "npm run build:debug && npm run tsc && shx cp lancedb/*.node dist/",
"postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
"build": "npm run build:debug && npm run tsc",
"build-release": "npm run build:release && npm run tsc",
"tsc": "tsc -b",
"posttsc": "shx cp lancedb/native.d.ts dist/native.d.ts",

View File

@@ -472,6 +472,12 @@ class LanceDBConnection(DBConnection):
uri = uri[7:] # Remove "file://"
elif uri.startswith("file:/"):
uri = uri[5:] # Remove "file:"
if sys.platform == "win32":
# On Windows, a path like /C:/path should become C:/path
if len(uri) >= 3 and uri[0] == "/" and uri[2] == ":":
uri = uri[1:]
uri = Path(uri)
uri = uri.expanduser().absolute()
Path(uri).mkdir(parents=True, exist_ok=True)

View File

@@ -32,6 +32,7 @@ import numpy as np
import pyarrow as pa
import pandas as pd
import pytest
import pytest_asyncio
from utils import exception_output
pytest.importorskip("lancedb.fts")
@@ -90,7 +91,7 @@ def table(tmp_path) -> ldb.table.LanceTable:
return table
@pytest.fixture
@pytest_asyncio.fixture
async def async_table(tmp_path) -> ldb.table.AsyncTable:
# Use local random state to avoid affecting other tests
rng = np.random.RandomState(42)
@@ -253,7 +254,7 @@ def test_search_fts(table, use_tantivy):
@pytest.mark.asyncio
async def test_fts_select_async(async_table):
tbl = await async_table
tbl = async_table
await tbl.create_index("text", config=FTS())
await tbl.create_index("text2", config=FTS())
results = (
@@ -338,7 +339,6 @@ def test_search_fts_phrase_query(table):
@pytest.mark.asyncio
async def test_search_fts_phrase_query_async(async_table):
async_table = await async_table
await async_table.create_index("text", config=FTS(with_position=False))
try:
phrase_results = (
@@ -393,7 +393,6 @@ def test_search_fts_specify_column(table):
@pytest.mark.asyncio
async def test_search_fts_async(async_table):
async_table = await async_table
await async_table.create_index("text", config=FTS())
results = await async_table.query().nearest_to_text("puppy").limit(5).to_list()
assert len(results) == 5
@@ -424,7 +423,6 @@ async def test_search_fts_async(async_table):
@pytest.mark.asyncio
async def test_search_fts_specify_column_async(async_table):
async_table = await async_table
await async_table.create_index("text", config=FTS())
await async_table.create_index("text2", config=FTS())

View File

@@ -9,11 +9,11 @@ all-tests: feature-tests remote-tests
# the environment.
feature-tests:
../../ci/run_with_docker_compose.sh \
cargo test --all-features --tests --locked --examples
cargo test --all-features --tests --locked --examples $(CARGO_ARGS)
.PHONY: feature-tests
# Run tests against remote endpoints.
remote-tests:
../../ci/run_with_test_connection.sh \
cargo test --features remote --locked
cargo test --features remote --locked $(CARGO_ARGS)
.PHONY: remote-tests

View File

@@ -1810,8 +1810,17 @@ impl NativeTable {
}
// Helper to get num_sub_vectors with default calculation
fn get_num_sub_vectors(provided: Option<u32>, dim: u32) -> u32 {
provided.unwrap_or_else(|| suggested_num_sub_vectors(dim))
fn get_num_sub_vectors(provided: Option<u32>, dim: u32, num_bits: Option<u32>) -> u32 {
if let Some(provided) = provided {
return provided;
}
let suggested = suggested_num_sub_vectors(dim);
if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 {
// num_sub_vectors must be even when 4 bits are used
suggested + 1
} else {
suggested
}
}
// Helper to extract vector dimension from field
@@ -1834,7 +1843,7 @@ impl NativeTable {
// Use IvfPq as the default for auto vector indices
let dim = Self::get_vector_dimension(field)?;
let ivf_params = lance_index::vector::ivf::IvfBuildParams::default();
let num_sub_vectors = Self::get_num_sub_vectors(None, dim);
let num_sub_vectors = Self::get_num_sub_vectors(None, dim, None);
let pq_params =
lance_index::vector::pq::PQBuildParams::new(num_sub_vectors as usize, 8);
let lance_idx_params =
@@ -1901,7 +1910,8 @@ impl NativeTable {
index.sample_rate,
index.max_iterations,
);
let num_sub_vectors = Self::get_num_sub_vectors(index.num_sub_vectors, dim);
let num_sub_vectors =
Self::get_num_sub_vectors(index.num_sub_vectors, dim, index.num_bits);
let num_bits = index.num_bits.unwrap_or(8) as usize;
let mut pq_params = PQBuildParams::new(num_sub_vectors as usize, num_bits);
pq_params.max_iters = index.max_iterations as usize;
@@ -1937,7 +1947,8 @@ impl NativeTable {
index.sample_rate,
index.max_iterations,
);
let num_sub_vectors = Self::get_num_sub_vectors(index.num_sub_vectors, dim);
let num_sub_vectors =
Self::get_num_sub_vectors(index.num_sub_vectors, dim, index.num_bits);
let hnsw_params = HnswBuildParams::default()
.num_edges(index.m as usize)
.ef_construction(index.ef_construction as usize);
@@ -4122,6 +4133,8 @@ mod tests {
table.prewarm_index("text_idx").await.unwrap();
}
// Windows does not support precise sleep durations due to timer resolution limitations.
#[cfg(not(target_os = "windows"))]
#[tokio::test]
async fn test_read_consistency_interval() {
let intervals = vec![