diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml index 44edfee2..d1e077a5 100644 --- a/.github/workflows/nodejs.yml +++ b/.github/workflows/nodejs.yml @@ -16,9 +16,6 @@ concurrency: cancel-in-progress: true env: - # Disable full debug symbol generation to speed up CI build and keep memory down - # "1" means line tables only, which is useful for panic tracebacks. - RUSTFLAGS: "-C debuginfo=1" RUST_BACKTRACE: "1" jobs: @@ -43,18 +40,20 @@ jobs: node-version: 20 cache: 'npm' cache-dependency-path: nodejs/package-lock.json - - uses: Swatinem/rust-cache@v2 + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + components: rustfmt, clippy - name: Install dependencies run: | sudo apt update sudo apt install -y protobuf-compiler libssl-dev - - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - components: rustfmt, clippy - - name: Lint + - uses: Swatinem/rust-cache@v2 + - name: Format Rust + run: cargo fmt --all -- --check + - name: Lint Rust + run: cargo clippy --profile ci --all --all-features -- -D warnings + - name: Lint Typescript run: | - cargo fmt --all -- --check - cargo clippy --all --all-features -- -D warnings npm ci npm run lint-ci - name: Lint examples @@ -90,7 +89,8 @@ jobs: - name: Build run: | npm ci - npm run build + npm run build:debug -- --profile ci + npm run tsc - name: Setup localstack working-directory: . run: docker compose up --detach --wait @@ -147,7 +147,8 @@ jobs: - name: Build run: | npm ci - npm run build + npm run build:debug -- --profile ci + npm run tsc - name: Test run: | npm run test diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 4341f990..dac5e3b4 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -19,6 +19,7 @@ env: PYTEST_ADDOPTS: "--color=yes" FORCE_COLOR: "1" PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/" + RUST_BACKTRACE: "1" jobs: lint: @@ -97,9 +98,6 @@ jobs: run: | sudo apt update sudo apt install -y protobuf-compiler - - uses: Swatinem/rust-cache@v2 - with: - workspaces: python - name: Install run: | pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings] @@ -131,10 +129,9 @@ jobs: uses: actions/setup-python@v5 with: python-version: 3.${{ matrix.python-minor-version }} - - uses: Swatinem/rust-cache@v2 - with: - workspaces: python - uses: ./.github/workflows/build_linux_wheel + with: + args: --profile ci - uses: ./.github/workflows/run_tests with: integration: true @@ -169,10 +166,9 @@ jobs: uses: actions/setup-python@v5 with: python-version: "3.12" - - uses: Swatinem/rust-cache@v2 - with: - workspaces: python - uses: ./.github/workflows/build_mac_wheel + with: + args: --profile ci - uses: ./.github/workflows/run_tests # Make sure wheels are not included in the Rust cache - name: Delete wheels @@ -199,10 +195,9 @@ jobs: uses: actions/setup-python@v5 with: python-version: "3.12" - - uses: Swatinem/rust-cache@v2 - with: - workspaces: python - uses: ./.github/workflows/build_windows_wheel + with: + args: --profile ci - uses: ./.github/workflows/run_tests # Make sure wheels are not included in the Rust cache - name: Delete wheels diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 0faff65d..3cb9a66e 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -18,11 +18,7 @@ env: # This env var is used by Swatinem/rust-cache@v2 for the cache # key, so we set it to make sure it is always consistent. CARGO_TERM_COLOR: always - # Disable full debug symbol generation to speed up CI build and keep memory down - # "1" means line tables only, which is useful for panic tracebacks. - RUSTFLAGS: "-C debuginfo=1" RUST_BACKTRACE: "1" - CARGO_INCREMENTAL: 0 jobs: lint: @@ -44,8 +40,6 @@ jobs: with: components: rustfmt, clippy - uses: Swatinem/rust-cache@v2 - with: - workspaces: rust - name: Install dependencies run: | sudo apt update @@ -53,7 +47,7 @@ jobs: - name: Run format run: cargo fmt --all -- --check - name: Run clippy - run: cargo clippy --workspace --tests --all-features -- -D warnings + run: cargo clippy --profile ci --workspace --tests --all-features -- -D warnings build-no-lock: runs-on: ubuntu-24.04 @@ -80,7 +74,7 @@ jobs: sudo apt install -y protobuf-compiler libssl-dev - name: Build all run: | - cargo build --benches --all-features --tests + cargo build --profile ci --benches --all-features --tests linux: timeout-minutes: 30 @@ -103,14 +97,8 @@ jobs: fetch-depth: 0 lfs: true - uses: Swatinem/rust-cache@v2 - with: - workspaces: rust - name: Install dependencies - run: | - # This shaves 2 minutes off this step in CI. This doesn't seem to be - # necessary in standard runners, but it is in the 4x runners. - sudo rm /var/lib/man-db/auto-update - sudo apt install -y protobuf-compiler libssl-dev + run: sudo apt install -y protobuf-compiler libssl-dev - uses: rui314/setup-mold@v1 - name: Make Swap run: | @@ -119,16 +107,16 @@ jobs: sudo mkswap /swapfile sudo swapon /swapfile - name: Build - run: cargo build --all-features --tests --locked --examples + run: cargo build --profile ci --all-features --tests --locked --examples - name: Run feature tests - run: make -C ./lancedb feature-tests + run: CARGO_ARGS="--profile ci" make -C ./lancedb feature-tests - name: Run examples - run: cargo run --example simple --locked + run: cargo run --profile ci --example simple --locked - name: Run remote tests # Running this requires access to secrets, so skip if this is # a PR from a fork. if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork - run: make -C ./lancedb remote-tests + run: CARGO_ARGS="--profile ci" make -C ./lancedb remote-tests macos: timeout-minutes: 30 @@ -148,8 +136,6 @@ jobs: - name: CPU features run: sysctl -a | grep cpu - uses: Swatinem/rust-cache@v2 - with: - workspaces: rust - name: Install dependencies run: brew install protobuf - name: Run tests @@ -159,7 +145,7 @@ jobs: ALL_FEATURES=`cargo metadata --format-version=1 --no-deps \ | jq -r '.packages[] | .features | keys | .[]' \ | grep -v s3-test | sort | uniq | paste -s -d "," -` - cargo test --features $ALL_FEATURES --locked + cargo test --profile ci --features $ALL_FEATURES --locked windows: runs-on: windows-2022 @@ -173,22 +159,21 @@ jobs: working-directory: rust/lancedb steps: - uses: actions/checkout@v4 + - name: Set target + run: rustup target add ${{ matrix.target }} - uses: Swatinem/rust-cache@v2 - with: - workspaces: rust - name: Install Protoc v21.12 run: choco install --no-progress protoc - name: Build run: | - rustup target add ${{ matrix.target }} $env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT - cargo build --features remote --tests --locked --target ${{ matrix.target }} + cargo build --profile ci --features remote --tests --locked --target ${{ matrix.target }} - name: Run tests # Can only run tests when target matches host if: ${{ matrix.target == 'x86_64-pc-windows-msvc' }} run: | $env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT - cargo test --features remote --locked + cargo test --profile ci --features remote --locked msrv: # Check the minimum supported Rust version @@ -213,6 +198,7 @@ jobs: uses: dtolnay/rust-toolchain@master with: toolchain: ${{ matrix.msrv }} + - uses: Swatinem/rust-cache@v2 - name: Downgrade dependencies # These packages have newer requirements for MSRV run: | @@ -226,4 +212,4 @@ jobs: cargo update -p aws-sdk-sts --precise 1.51.0 cargo update -p home --precise 0.5.9 - name: cargo +${{ matrix.msrv }} check - run: cargo check --workspace --tests --benches --all-features + run: cargo check --profile ci --workspace --tests --benches --all-features diff --git a/Cargo.toml b/Cargo.toml index 5a10d4c6..33997489 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,3 +63,17 @@ regex = "1.10" lazy_static = "1" semver = "1.0.25" chrono = "0.4" + +[profile.ci] +debug = "line-tables-only" +inherits = "dev" +incremental = false + +# This rule applies to every package except workspace members (dependencies +# such as `arrow` and `tokio`). It disables debug info and related features on +# dependencies so their binaries stay smaller, improving cache reuse. +[profile.ci.package."*"] +debug = false +debug-assertions = false +strip = "debuginfo" +incremental = false diff --git a/docs/src/js/_media/CONTRIBUTING.md b/docs/src/js/_media/CONTRIBUTING.md index 881799ae..c8a347ea 100644 --- a/docs/src/js/_media/CONTRIBUTING.md +++ b/docs/src/js/_media/CONTRIBUTING.md @@ -1,7 +1,7 @@ # Contributing to LanceDB Typescript This document outlines the process for contributing to LanceDB Typescript. -For general contribution guidelines, see [CONTRIBUTING.md](../../../../CONTRIBUTING.md). +For general contribution guidelines, see [CONTRIBUTING.md](../CONTRIBUTING.md). ## Project layout diff --git a/nodejs/package.json b/nodejs/package.json index dee21fe3..d228f7b9 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -73,8 +73,10 @@ "scripts": { "artifacts": "napi artifacts", "build:debug": "napi build --platform --no-const-enum --dts ../lancedb/native.d.ts --js ../lancedb/native.js lancedb", + "postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/", "build:release": "napi build --platform --no-const-enum --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/", - "build": "npm run build:debug && npm run tsc && shx cp lancedb/*.node dist/", + "postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/", + "build": "npm run build:debug && npm run tsc", "build-release": "npm run build:release && npm run tsc", "tsc": "tsc -b", "posttsc": "shx cp lancedb/native.d.ts dist/native.d.ts", diff --git a/python/python/lancedb/db.py b/python/python/lancedb/db.py index a4430b22..2015c07a 100644 --- a/python/python/lancedb/db.py +++ b/python/python/lancedb/db.py @@ -472,6 +472,12 @@ class LanceDBConnection(DBConnection): uri = uri[7:] # Remove "file://" elif uri.startswith("file:/"): uri = uri[5:] # Remove "file:" + + if sys.platform == "win32": + # On Windows, a path like /C:/path should become C:/path + if len(uri) >= 3 and uri[0] == "/" and uri[2] == ":": + uri = uri[1:] + uri = Path(uri) uri = uri.expanduser().absolute() Path(uri).mkdir(parents=True, exist_ok=True) diff --git a/python/python/tests/test_fts.py b/python/python/tests/test_fts.py index 04e2416d..a7422823 100644 --- a/python/python/tests/test_fts.py +++ b/python/python/tests/test_fts.py @@ -32,6 +32,7 @@ import numpy as np import pyarrow as pa import pandas as pd import pytest +import pytest_asyncio from utils import exception_output pytest.importorskip("lancedb.fts") @@ -90,7 +91,7 @@ def table(tmp_path) -> ldb.table.LanceTable: return table -@pytest.fixture +@pytest_asyncio.fixture async def async_table(tmp_path) -> ldb.table.AsyncTable: # Use local random state to avoid affecting other tests rng = np.random.RandomState(42) @@ -253,7 +254,7 @@ def test_search_fts(table, use_tantivy): @pytest.mark.asyncio async def test_fts_select_async(async_table): - tbl = await async_table + tbl = async_table await tbl.create_index("text", config=FTS()) await tbl.create_index("text2", config=FTS()) results = ( @@ -338,7 +339,6 @@ def test_search_fts_phrase_query(table): @pytest.mark.asyncio async def test_search_fts_phrase_query_async(async_table): - async_table = await async_table await async_table.create_index("text", config=FTS(with_position=False)) try: phrase_results = ( @@ -393,7 +393,6 @@ def test_search_fts_specify_column(table): @pytest.mark.asyncio async def test_search_fts_async(async_table): - async_table = await async_table await async_table.create_index("text", config=FTS()) results = await async_table.query().nearest_to_text("puppy").limit(5).to_list() assert len(results) == 5 @@ -424,7 +423,6 @@ async def test_search_fts_async(async_table): @pytest.mark.asyncio async def test_search_fts_specify_column_async(async_table): - async_table = await async_table await async_table.create_index("text", config=FTS()) await async_table.create_index("text2", config=FTS()) diff --git a/rust/lancedb/Makefile b/rust/lancedb/Makefile index 7c487370..fa80d3bf 100644 --- a/rust/lancedb/Makefile +++ b/rust/lancedb/Makefile @@ -9,11 +9,11 @@ all-tests: feature-tests remote-tests # the environment. feature-tests: ../../ci/run_with_docker_compose.sh \ - cargo test --all-features --tests --locked --examples + cargo test --all-features --tests --locked --examples $(CARGO_ARGS) .PHONY: feature-tests # Run tests against remote endpoints. remote-tests: ../../ci/run_with_test_connection.sh \ - cargo test --features remote --locked + cargo test --features remote --locked $(CARGO_ARGS) .PHONY: remote-tests diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index 6cb28286..5aae72f7 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -1810,8 +1810,17 @@ impl NativeTable { } // Helper to get num_sub_vectors with default calculation - fn get_num_sub_vectors(provided: Option, dim: u32) -> u32 { - provided.unwrap_or_else(|| suggested_num_sub_vectors(dim)) + fn get_num_sub_vectors(provided: Option, dim: u32, num_bits: Option) -> u32 { + if let Some(provided) = provided { + return provided; + } + let suggested = suggested_num_sub_vectors(dim); + if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 { + // num_sub_vectors must be even when 4 bits are used + suggested + 1 + } else { + suggested + } } // Helper to extract vector dimension from field @@ -1834,7 +1843,7 @@ impl NativeTable { // Use IvfPq as the default for auto vector indices let dim = Self::get_vector_dimension(field)?; let ivf_params = lance_index::vector::ivf::IvfBuildParams::default(); - let num_sub_vectors = Self::get_num_sub_vectors(None, dim); + let num_sub_vectors = Self::get_num_sub_vectors(None, dim, None); let pq_params = lance_index::vector::pq::PQBuildParams::new(num_sub_vectors as usize, 8); let lance_idx_params = @@ -1901,7 +1910,8 @@ impl NativeTable { index.sample_rate, index.max_iterations, ); - let num_sub_vectors = Self::get_num_sub_vectors(index.num_sub_vectors, dim); + let num_sub_vectors = + Self::get_num_sub_vectors(index.num_sub_vectors, dim, index.num_bits); let num_bits = index.num_bits.unwrap_or(8) as usize; let mut pq_params = PQBuildParams::new(num_sub_vectors as usize, num_bits); pq_params.max_iters = index.max_iterations as usize; @@ -1937,7 +1947,8 @@ impl NativeTable { index.sample_rate, index.max_iterations, ); - let num_sub_vectors = Self::get_num_sub_vectors(index.num_sub_vectors, dim); + let num_sub_vectors = + Self::get_num_sub_vectors(index.num_sub_vectors, dim, index.num_bits); let hnsw_params = HnswBuildParams::default() .num_edges(index.m as usize) .ef_construction(index.ef_construction as usize); @@ -4122,6 +4133,8 @@ mod tests { table.prewarm_index("text_idx").await.unwrap(); } + // Windows does not support precise sleep durations due to timer resolution limitations. + #[cfg(not(target_os = "windows"))] #[tokio::test] async fn test_read_consistency_interval() { let intervals = vec![