From a92ae0ded5228b92d61dc7721b2723fc6a98c792 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Tue, 21 Apr 2026 08:39:03 -0700 Subject: [PATCH 01/20] fix: enable hostname verification by default (#3304) ## Summary - make `TlsConfig::default()` enable hostname verification by default - align the Rust default with the documented Python and Node behavior - update the Rust unit test to lock in the safe default --- rust/lancedb/src/remote/client.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/rust/lancedb/src/remote/client.rs b/rust/lancedb/src/remote/client.rs index b50ca2206..7fd5c6497 100644 --- a/rust/lancedb/src/remote/client.rs +++ b/rust/lancedb/src/remote/client.rs @@ -16,7 +16,7 @@ use crate::remote::retry::{ResolvedRetryConfig, RetryCounter}; const REQUEST_ID_HEADER: HeaderName = HeaderName::from_static("x-request-id"); /// Configuration for TLS/mTLS settings. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug)] pub struct TlsConfig { /// Path to the client certificate file (PEM format) pub cert_file: Option, @@ -24,10 +24,22 @@ pub struct TlsConfig { pub key_file: Option, /// Path to the CA certificate file for server verification (PEM format) pub ssl_ca_cert: Option, - /// Whether to verify the hostname in the server's certificate + /// Whether to verify the hostname in the server's certificate. + /// Defaults to `true`. pub assert_hostname: bool, } +impl Default for TlsConfig { + fn default() -> Self { + Self { + cert_file: None, + key_file: None, + ssl_ca_cert: None, + assert_hostname: true, + } + } +} + /// Trait for providing custom headers for each request #[async_trait::async_trait] pub trait HeaderProvider: Send + Sync + std::fmt::Debug { @@ -926,7 +938,7 @@ mod tests { assert!(config.cert_file.is_none()); assert!(config.key_file.is_none()); assert!(config.ssl_ca_cert.is_none()); - assert!(!config.assert_hostname); + assert!(config.assert_hostname); } #[test] From 0d767abd0e28f5969ca7c6e9489b99e8f454d693 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 24 Apr 2026 20:52:54 -0700 Subject: [PATCH 02/20] ci: add Dependabot config for shipped Rust binaries (#3300) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `.github/dependabot.yml` enabling weekly cargo update PRs for the root workspace, which produces the Rust binaries we ship: the Node.js and Python native extensions. The `rust/lancedb` library crate shares the same lockfile — its consumers pick versions themselves, but bumping transitive deps here keeps the shipped binaries current. Also removes the misleading `exclude = ["python"]` line from the root `Cargo.toml`: `python` is listed in `members`, and `cargo metadata` confirms it's a workspace member, so the exclude was dead code that implied the opposite. Minor/patch updates are grouped to reduce PR noise. Part of #3292. Only covers the cargo ecosystem; pip, npm, and github-actions can follow. --- .github/dependabot.yml | 18 ++++++++++++++++++ Cargo.toml | 2 -- 2 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..4107990ea --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,18 @@ +version: 2 + +# Scope: the root Cargo workspace, which produces the Rust binaries we +# ship to users (the Node.js and Python native extensions). The +# `rust/lancedb` library crate shares the same lockfile; its consumers +# pick their own dependency versions, but bumping transitive deps here +# keeps the binaries we ship current. +updates: + - package-ecosystem: cargo + directory: / + schedule: + interval: weekly + open-pull-requests-limit: 10 + groups: + rust-minor-patch: + update-types: + - minor + - patch diff --git a/Cargo.toml b/Cargo.toml index e8d5a95b4..074b0a869 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,5 @@ [workspace] members = ["rust/lancedb", "nodejs", "python"] -# Python package needs to be built by maturin. -exclude = ["python"] resolver = "2" [workspace.package] From ef399de0920f4e3426850ab4e48da01f9b02f969 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 24 Apr 2026 20:53:06 -0700 Subject: [PATCH 03/20] ci: switch PyPI publish to OIDC trusted publishing (#3302) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Replaces `LANCEDB_PYPI_API_TOKEN` (long-lived token) with OIDC trusted publishing via `pypa/gh-action-pypi-publish` - Adds `id-token: write` permission to linux/mac/windows jobs - Removes `twine`-based upload and the `pypi_token` input from `upload_wheel` composite action - Enables PEP 740 Sigstore attestations on published wheels as a bonus After merging, rotate/revoke the `LANCEDB_PYPI_API_TOKEN` secret. Closes #3294 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Sonnet 4.6 --- .github/workflows/pypi-publish.yml | 12 ++++++--- .github/workflows/upload_wheel/action.yml | 31 ++++++++--------------- 2 files changed, 19 insertions(+), 24 deletions(-) diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 976dec77f..ca6e3219b 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -21,6 +21,9 @@ jobs: linux: name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }} timeout-minutes: 60 + permissions: + id-token: write + contents: read strategy: matrix: config: @@ -60,10 +63,12 @@ jobs: - uses: ./.github/workflows/upload_wheel if: startsWith(github.ref, 'refs/tags/python-v') with: - pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }} fury_token: ${{ secrets.FURY_TOKEN }} mac: timeout-minutes: 90 + permissions: + id-token: write + contents: read runs-on: ${{ matrix.config.runner }} strategy: matrix: @@ -88,10 +93,12 @@ jobs: - uses: ./.github/workflows/upload_wheel if: startsWith(github.ref, 'refs/tags/python-v') with: - pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }} fury_token: ${{ secrets.FURY_TOKEN }} windows: timeout-minutes: 60 + permissions: + id-token: write + contents: read runs-on: windows-latest steps: - uses: actions/checkout@v4 @@ -110,7 +117,6 @@ jobs: - uses: ./.github/workflows/upload_wheel if: startsWith(github.ref, 'refs/tags/python-v') with: - pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }} fury_token: ${{ secrets.FURY_TOKEN }} gh-release: if: startsWith(github.ref, 'refs/tags/python-v') diff --git a/.github/workflows/upload_wheel/action.yml b/.github/workflows/upload_wheel/action.yml index 03725d03f..8bcdb7a88 100644 --- a/.github/workflows/upload_wheel/action.yml +++ b/.github/workflows/upload_wheel/action.yml @@ -2,9 +2,6 @@ name: upload-wheel description: "Upload wheels to Pypi" inputs: - pypi_token: - required: true - description: "release token for the repo" fury_token: required: true description: "release token for the fury repo" @@ -12,12 +9,6 @@ inputs: runs: using: "composite" steps: - - name: Install dependencies - shell: bash - run: | - python -m pip install --upgrade pip - pip install twine - python3 -m pip install --upgrade pkginfo - name: Choose repo shell: bash id: choose_repo @@ -27,19 +18,17 @@ runs: else echo "repo=pypi" >> $GITHUB_OUTPUT fi - - name: Publish to PyPI + - name: Publish to Fury + if: steps.choose_repo.outputs.repo == 'fury' shell: bash env: FURY_TOKEN: ${{ inputs.fury_token }} - PYPI_TOKEN: ${{ inputs.pypi_token }} run: | - if [[ ${{ steps.choose_repo.outputs.repo }} == fury ]]; then - WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1) - echo "Uploading $WHEEL to Fury" - curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/ - else - twine upload --repository ${{ steps.choose_repo.outputs.repo }} \ - --username __token__ \ - --password $PYPI_TOKEN \ - target/wheels/lancedb-*.whl - fi + WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1) + echo "Uploading $WHEEL to Fury" + curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/ + - name: Publish to PyPI + if: steps.choose_repo.outputs.repo == 'pypi' + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: target/wheels/ From d135c18db621064d7d035974f262767e45cd193c Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 24 Apr 2026 20:53:15 -0700 Subject: [PATCH 04/20] ci: add cargo-deny configuration and CI check (#3307) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a `deny.toml` at the workspace root and a `deny` CI job that runs `cargo deny check` on every PR. Catches yanked crates, license drift, banned or wildcard dependencies, unapproved sources, and new RUSTSEC advisories. As part of wiring this up: - Updated `aws-lc-rs` 1.13.0 → 1.16.3 / `aws-lc-sys` 0.28.0 → 0.40.0 to clear four 2026 AWS-LC advisories (timing side-channel, PKCS7 bypass, CRL scope). Removed the `=0.28.0` workaround pin; the original build failure no longer reproduces. - Updated `bytes`, `zlib-rs`, `rand`, `rustls-webpki`, `lz4_flex` to clear their current advisories. - Marked `lancedb-nodejs` and `lancedb-python` as `publish = false` and pinned `lzma-sys` from `*` to `0.1` so `bans.wildcards = "deny"` can be enforced. 10 remaining advisories have no safe upgrade available (transitive via opendal, lance, datafusion, async-openai, aws-sdk on the legacy rustls 0.21 chain). Each is ignored in `deny.toml` with a per-entry rationale and a link to the RUSTSEC advisory. New advisories still fail CI. Fixes #3297 --------- Co-authored-by: Claude Opus 4.7 (1M context) --- .github/workflows/rust.yml | 14 ++++ Cargo.lock | 98 +++++++++++----------- deny.toml | 166 +++++++++++++++++++++++++++++++++++++ nodejs/Cargo.toml | 5 +- python/Cargo.toml | 1 + 5 files changed, 233 insertions(+), 51 deletions(-) create mode 100644 deny.toml diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 61d52754c..937124f5a 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -9,7 +9,10 @@ on: - Cargo.toml - Cargo.lock - rust-toolchain.toml + - deny.toml - rust/** + - nodejs/Cargo.toml + - python/Cargo.toml - .github/workflows/rust.yml permissions: @@ -56,6 +59,17 @@ jobs: - name: Run clippy (without remote feature) run: cargo clippy --profile ci --workspace --tests -- -D warnings + deny: + # Supply-chain checks: advisories, licenses, banned crates, and source + # restrictions. Configuration lives in `deny.toml` at the workspace root. + timeout-minutes: 10 + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + - uses: EmbarkStudios/cargo-deny-action@v2 + with: + command: check advisories bans licenses sources + build-no-lock: runs-on: ubuntu-24.04 timeout-minutes: 30 diff --git a/Cargo.lock b/Cargo.lock index a749b0208..da1d390d5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -572,9 +572,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.16.1" +version = "1.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94bffc006df10ac2a68c83692d734a465f8ee6c5b384d8545a636f81d858f4bf" +checksum = "0ec6fb3fe69024a75fa7e1bfb48aa6cf59706a101658ea01bfd33b2b248a038f" dependencies = [ "aws-lc-sys", "untrusted 0.7.1", @@ -583,9 +583,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.38.0" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4321e568ed89bb5a7d291a7f37997c2c0df89809d7b6d12062c81ddb54aa782e" +checksum = "f50037ee5e1e41e7b8f9d161680a725bd1626cb6f8c7e901f91f942850852fe7" dependencies = [ "cc", "cmake", @@ -1373,7 +1373,7 @@ dependencies = [ "memmap2 0.9.10", "num-traits", "num_cpus", - "rand 0.9.2", + "rand 0.9.4", "rand_distr 0.5.1", "rayon", "safetensors", @@ -1409,7 +1409,7 @@ dependencies = [ "candle-nn", "fancy-regex", "num-traits", - "rand 0.9.2", + "rand 0.9.4", "rayon", "serde", "serde_json", @@ -1966,7 +1966,7 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand 0.9.2", + "rand 0.9.4", "regex", "sqlparser 0.59.0", "tempfile", @@ -2080,7 +2080,7 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "rand 0.9.2", + "rand 0.9.4", "tokio", "url", ] @@ -2176,7 +2176,7 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand 0.9.2", + "rand 0.9.4", "tempfile", "url", ] @@ -2240,7 +2240,7 @@ dependencies = [ "log", "md-5", "num-traits", - "rand 0.9.2", + "rand 0.9.4", "regex", "sha2", "unicode-segmentation", @@ -2642,7 +2642,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2830,7 +2830,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2965,7 +2965,7 @@ checksum = "719a903cc23e4a89e87962c2a80fdb45cdaad0983a89bd150bb57b4c8571a7d5" dependencies = [ "half", "num-traits", - "rand 0.9.2", + "rand 0.9.4", "rand_distr 0.5.1", ] @@ -3014,7 +3014,7 @@ version = "6.0.0-beta.1" source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" dependencies = [ "arrow-array", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] @@ -3387,7 +3387,7 @@ dependencies = [ "cfg-if", "crunchy", "num-traits", - "rand 0.9.2", + "rand 0.9.4", "rand_distr 0.5.1", "zerocopy", ] @@ -3470,7 +3470,7 @@ dependencies = [ "libc", "log", "num_cpus", - "rand 0.9.2", + "rand 0.9.4", "reqwest", "serde", "serde_json", @@ -3980,7 +3980,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4043,7 +4043,7 @@ dependencies = [ "nom 8.0.0", "num-traits", "ordered-float", - "rand 0.9.2", + "rand 0.9.4", "ryu", "serde", "serde_json", @@ -4119,7 +4119,7 @@ dependencies = [ "prost", "prost-build", "prost-types", - "rand 0.9.2", + "rand 0.9.4", "roaring", "semver", "serde", @@ -4152,7 +4152,7 @@ dependencies = [ "half", "jsonb", "num-traits", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] @@ -4191,7 +4191,7 @@ dependencies = [ "object_store", "pin-project", "prost", - "rand 0.9.2", + "rand 0.9.4", "roaring", "serde_json", "snafu 0.9.0", @@ -4248,7 +4248,7 @@ dependencies = [ "futures", "half", "hex", - "rand 0.9.2", + "rand 0.9.4", "rand_distr 0.5.1", "rand_xoshiro", "random_word 0.5.2", @@ -4283,7 +4283,7 @@ dependencies = [ "prost", "prost-build", "prost-types", - "rand 0.9.2", + "rand 0.9.4", "snafu 0.9.0", "strum", "tokio", @@ -4374,7 +4374,7 @@ dependencies = [ "prost", "prost-build", "prost-types", - "rand 0.9.2", + "rand 0.9.4", "rand_distr 0.5.1", "rangemap", "rayon", @@ -4426,7 +4426,7 @@ dependencies = [ "path_abs", "pin-project", "prost", - "rand 0.9.2", + "rand 0.9.4", "serde", "snafu 0.9.0", "tempfile", @@ -4449,7 +4449,7 @@ dependencies = [ "lance-arrow", "lance-core", "num-traits", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] @@ -4488,7 +4488,7 @@ dependencies = [ "lance-table", "log", "object_store", - "rand 0.9.2", + "rand 0.9.4", "reqwest", "serde", "serde_json", @@ -4539,7 +4539,7 @@ dependencies = [ "prost", "prost-build", "prost-types", - "rand 0.9.2", + "rand 0.9.4", "rangemap", "roaring", "semver", @@ -4561,7 +4561,7 @@ dependencies = [ "arrow-schema", "lance-arrow", "num-traits", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] @@ -4637,7 +4637,7 @@ dependencies = [ "pin-project", "polars", "polars-arrow", - "rand 0.9.2", + "rand 0.9.4", "random_word 0.4.3", "regex", "reqwest", @@ -5235,7 +5235,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5357,7 +5357,7 @@ dependencies = [ "parking_lot", "percent-encoding", "quick-xml 0.38.4", - "rand 0.9.2", + "rand 0.9.4", "reqwest", "ring", "rustls-pemfile", @@ -6199,8 +6199,8 @@ version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ - "heck 0.5.0", - "itertools 0.11.0", + "heck 0.4.1", + "itertools 0.14.0", "log", "multimap", "petgraph", @@ -6219,7 +6219,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.117", @@ -6402,7 +6402,7 @@ dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand 0.9.2", + "rand 0.9.4", "ring", "rustc-hash", "rustls 0.23.37", @@ -6425,7 +6425,7 @@ dependencies = [ "once_cell", "socket2 0.6.3", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -6468,9 +6468,9 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.2" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.5", @@ -6531,7 +6531,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" dependencies = [ "num-traits", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] @@ -6566,7 +6566,7 @@ dependencies = [ "ahash", "brotli 8.0.2", "paste", - "rand 0.9.2", + "rand 0.9.4", "unicase", ] @@ -6954,7 +6954,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -7465,7 +7465,7 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", "syn 2.0.117", @@ -7477,7 +7477,7 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54254b8531cafa275c5e096f62d48c81435d1015405a91198ddb11e967301d40" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", "syn 2.0.117", @@ -7818,7 +7818,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -8242,7 +8242,7 @@ version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" dependencies = [ - "rand 0.9.2", + "rand 0.9.4", ] [[package]] @@ -8298,9 +8298,9 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.13.1" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da36089a805484bcccfffe0739803392c8298778a2d2f09febf76fac5ad9025b" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" [[package]] name = "unicode-width" @@ -8632,7 +8632,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/deny.toml b/deny.toml new file mode 100644 index 000000000..a2542235d --- /dev/null +++ b/deny.toml @@ -0,0 +1,166 @@ +# cargo-deny configuration for LanceDB. +# +# Run locally with `cargo deny check`. See +# https://embarkstudios.github.io/cargo-deny/ for the full reference. + +# The set of target triples we care about. cargo-deny will only consider +# dependencies that are used on at least one of these targets. Keeping this +# explicit avoids noise from platform-specific crates (e.g. wasm, android, +# ios) that we never actually ship. +[graph] +targets = [ + "x86_64-unknown-linux-gnu", + "aarch64-unknown-linux-gnu", + "x86_64-apple-darwin", + "aarch64-apple-darwin", + "x86_64-pc-windows-msvc", + "aarch64-pc-windows-msvc", +] +all-features = true + +[output] +feature-depth = 1 + +# --------------------------------------------------------------------------- +# Advisories: security vulnerabilities and yanked crates. +# --------------------------------------------------------------------------- +[advisories] +version = 2 +# Fail the check if any crate in the lockfile has been yanked from crates.io. +# Yanked crates are a signal the author retracted the release (often due to +# bugs or security issues) and should not be depended on. +yanked = "deny" +# Advisory IDs we have explicitly reviewed and chosen to accept. Every +# entry must include a rationale and, where possible, an upstream issue +# pointing to a fix. Revisit this list whenever dependencies are updated. +ignore = [ + # rsa: Marvin Attack timing side-channel in PKCS#1 v1.5 decryption. + # Reached only through opendal → reqsign → rsa. We do not use RSA + # decryption in LanceDB ourselves; this is dormant in the signing path. + # No fixed release exists upstream as of this writing. + # https://rustsec.org/advisories/RUSTSEC-2023-0071 + { id = "RUSTSEC-2023-0071", reason = "rsa crate via opendal/reqsign; no fixed upstream release" }, + + # instant: unmaintained. Pulled in via backoff → instant. Upstream + # recommends switching to `web-time`; fix has to come from backoff. + # https://rustsec.org/advisories/RUSTSEC-2024-0384 + { id = "RUSTSEC-2024-0384", reason = "transitive via backoff; waiting on backoff replacement" }, + + # paste: unmaintained (author archived the repo). Used transitively by + # datafusion and the arrow ecosystem; widespread, no drop-in replacement. + # https://rustsec.org/advisories/RUSTSEC-2024-0436 + { id = "RUSTSEC-2024-0436", reason = "transitive via datafusion; awaiting ecosystem migration" }, + + # tantivy: segfault on malformed input due to missing bounds check. + # Pulled in via lance for full-text search. We only feed tantivy + # documents we construct ourselves, not attacker-controlled bytes. + # Tracked for a lance dependency bump. + # https://rustsec.org/advisories/RUSTSEC-2025-0003 + { id = "RUSTSEC-2025-0003", reason = "tantivy via lance; inputs are internally produced, not user-supplied bytes" }, + + # backoff: unmaintained. Reached only via async-openai. Replacement + # requires async-openai to migrate (or us to drop async-openai). + # https://rustsec.org/advisories/RUSTSEC-2025-0012 + { id = "RUSTSEC-2025-0012", reason = "transitive via async-openai; waiting on upstream migration" }, + + # number_prefix: unmaintained. Transitive via indicatif → hf-hub. + # No security impact, just maintenance status. + # https://rustsec.org/advisories/RUSTSEC-2025-0119 + { id = "RUSTSEC-2025-0119", reason = "transitive via hf-hub/indicatif; cosmetic formatting crate" }, + + # rustls-pemfile: unmaintained. Reached from two separate chains: + # rustls-native-certs 0.6 (via hyper-rustls 0.24) and object_store 0.12. + # Both upstream dependencies need to move before we can drop it. + # https://rustsec.org/advisories/RUSTSEC-2025-0134 + { id = "RUSTSEC-2025-0134", reason = "transitive via rustls-native-certs/object_store; waiting on upstream migration" }, + + # rustls-webpki 0.101.7 (old major line): name-constraint checks for + # URI / wildcard names. Pulled in only via the legacy rustls 0.21 chain + # from aws-smithy-http-client. The 0.103 line we actively use is patched. + # Clearing the 0.101 copy requires the aws-sdk chain to migrate off + # rustls 0.21. + # https://rustsec.org/advisories/RUSTSEC-2026-0098 + # https://rustsec.org/advisories/RUSTSEC-2026-0099 + { id = "RUSTSEC-2026-0098", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" }, + { id = "RUSTSEC-2026-0099", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" }, +] + +# --------------------------------------------------------------------------- +# Licenses: only allow licenses we've reviewed as compatible with Apache-2.0. +# --------------------------------------------------------------------------- +[licenses] +version = 2 +# SPDX identifiers for licenses that are compatible with our Apache-2.0 +# distribution. Additions require legal review. +allow = [ + "Apache-2.0", + "Apache-2.0 WITH LLVM-exception", + "MIT", + "BSD-2-Clause", + "BSD-3-Clause", + "ISC", + "Unicode-3.0", + "Unicode-DFS-2016", + "Zlib", + "CC0-1.0", + "MPL-2.0", + "BSL-1.0", + "OpenSSL", + # 0BSD ("BSD Zero Clause") is effectively public domain — no attribution + # required. Pulled in by `mock_instant`. + "0BSD", + # bzip2-1.0.6 is the permissive upstream bzip2 license (BSD-like). Pulled + # in by `libbz2-rs-sys`, the pure-Rust bzip2 implementation. + "bzip2-1.0.6", + # CDLA-Permissive-2.0 is a permissive data license used by `webpki-roots` + # for the Mozilla CA root bundle. Data-only, distribution-compatible. + "CDLA-Permissive-2.0", +] +confidence-threshold = 0.8 +# Crates whose license cannot be determined from Cargo metadata but whose +# license we've manually confirmed from upstream. Keep this list minimal. +[[licenses.clarify]] +# polars-arrow-format omits the `license` field in its Cargo.toml, but the +# upstream repo (pola-rs/polars-arrow-format) is dual-licensed Apache-2.0 OR +# MIT. See https://github.com/pola-rs/polars-arrow-format/blob/main/LICENSE +crate = "polars-arrow-format" +expression = "Apache-2.0 OR MIT" +license-files = [] + +# --------------------------------------------------------------------------- +# Bans: disallow specific crates and flag dependency hygiene issues. +# --------------------------------------------------------------------------- +[bans] +# Warn (not deny) on duplicate versions of the same crate. In a large +# workspace like this one, duplicates are common and often unavoidable +# transitively. We surface them to discourage growth, but don't fail CI. +multiple-versions = "warn" +# Wildcard version requirements (`foo = "*"`) are a footgun — they let any +# future release in without review. Ban them outright. +wildcards = "deny" +# Internal workspace crates reference each other via `path = "..."`, which +# cargo-deny sees as a wildcard version. That's fine for private workspace +# members (not published to crates.io), so allow it specifically for paths. +allow-wildcard-paths = true +# Features that, if enabled, should cause the check to fail. +deny = [] +# Crates to skip when checking for duplicate versions. +skip = [] +# Similar to `skip`, but also skips the entire transitive subtree. +skip-tree = [] + +# --------------------------------------------------------------------------- +# Sources: restrict where crates can come from. +# --------------------------------------------------------------------------- +[sources] +# Deny any registry other than the ones explicitly listed below. +unknown-registry = "deny" +# Deny any git dependency whose host isn't in the allow-list below. This +# prevents accidental pulls from arbitrary forks. +unknown-git = "deny" +allow-registry = ["https://github.com/rust-lang/crates.io-index"] +# Lance is developed in a sibling repo and pulled as a git dependency until +# releases are cut to crates.io. Allow that specific host. +allow-git = [ + "https://github.com/lance-format/lance", +] diff --git a/nodejs/Cargo.toml b/nodejs/Cargo.toml index 627e7b256..6231fcf87 100644 --- a/nodejs/Cargo.toml +++ b/nodejs/Cargo.toml @@ -2,6 +2,7 @@ name = "lancedb-nodejs" edition.workspace = true version = "0.28.0-beta.9" +publish = false license.workspace = true description.workspace = true repository.workspace = true @@ -31,8 +32,8 @@ lzma-sys = { version = "0.1", features = ["static"] } log.workspace = true # Pin to resolve build failures; update periodically for security patches. -aws-lc-sys = "=0.38.0" -aws-lc-rs = "=1.16.1" +aws-lc-sys = "=0.40.0" +aws-lc-rs = "=1.16.3" [build-dependencies] napi-build = "2.3.1" diff --git a/python/Cargo.toml b/python/Cargo.toml index 481f4ebfe..4ac95f22c 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,6 +1,7 @@ [package] name = "lancedb-python" version = "0.31.0-beta.9" +publish = false edition.workspace = true description = "Python bindings for LanceDB" license.workspace = true From b84150a53e749f71ea11d7c3ca3720bebe9acce4 Mon Sep 17 00:00:00 2001 From: LanceDB Robot Date: Mon, 27 Apr 2026 15:13:07 -0700 Subject: [PATCH 05/20] chore: update lance dependency to v6.0.0-beta.4 (#3325) ## Summary - Updates Lance Rust dependencies to `6.0.0-beta.4` using `ci/set_lance_version.py`. - Updates the Java `lance-core.version` property to `6.0.0-beta.4`. - Triggering Lance tag: https://github.com/lance-format/lance/releases/tag/v6.0.0-beta.4 ## Verification - `cargo clippy --workspace --tests --all-features -- -D warnings` - `cargo fmt --all` --- Cargo.lock | 72 ++++++++++++++++++++++++++-------------------------- Cargo.toml | 28 ++++++++++---------- java/pom.xml | 2 +- 3 files changed, 51 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index da1d390d5..42ad17f65 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3010,8 +3010,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow-array", "rand 0.9.4", @@ -4066,8 +4066,8 @@ dependencies = [ [[package]] name = "lance" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow", "arrow-arith", @@ -4135,8 +4135,8 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow-array", "arrow-buffer", @@ -4157,8 +4157,8 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrayref", "paste", @@ -4167,8 +4167,8 @@ dependencies = [ [[package]] name = "lance-core" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow-array", "arrow-buffer", @@ -4205,8 +4205,8 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow", "arrow-array", @@ -4237,8 +4237,8 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow", "arrow-array", @@ -4256,8 +4256,8 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow-arith", "arrow-array", @@ -4294,8 +4294,8 @@ dependencies = [ [[package]] name = "lance-file" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow-arith", "arrow-array", @@ -4327,8 +4327,8 @@ dependencies = [ [[package]] name = "lance-index" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow", "arrow-arith", @@ -4392,8 +4392,8 @@ dependencies = [ [[package]] name = "lance-io" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow", "arrow-arith", @@ -4437,8 +4437,8 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow-array", "arrow-buffer", @@ -4454,8 +4454,8 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow", "async-trait", @@ -4468,8 +4468,8 @@ dependencies = [ [[package]] name = "lance-namespace-impls" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow", "arrow-ipc", @@ -4501,9 +4501,9 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.6.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee2e48de899e2931afb67fcddd0a08e439bf5d8b6ea2a2ed9cb8f4df669bd5cc" +checksum = "0f061dd6fe63e3ba4052702a9d40973ee4ac57f612f04222897a149576213832" dependencies = [ "reqwest", "serde", @@ -4514,8 +4514,8 @@ dependencies = [ [[package]] name = "lance-table" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow", "arrow-array", @@ -4554,8 +4554,8 @@ dependencies = [ [[package]] name = "lance-testing" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "arrow-array", "arrow-schema", @@ -4566,8 +4566,8 @@ dependencies = [ [[package]] name = "lance-tokenizer" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "6.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" dependencies = [ "rust-stemmers", "serde", diff --git a/Cargo.toml b/Cargo.toml index 074b0a869..94a42ad7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,20 +13,20 @@ categories = ["database-implementations"] rust-version = "1.91.0" [workspace.dependencies] -lance = { "version" = "=6.0.0-beta.1", default-features = false, "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-core = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-datagen = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-file = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-io = { "version" = "=6.0.0-beta.1", default-features = false, "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-index = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-linalg = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace-impls = { "version" = "=6.0.0-beta.1", default-features = false, "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-table = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-testing = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-datafusion = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-encoding = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-arrow = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } +lance = { "version" = "=6.0.0-beta.4", default-features = false, "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-core = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-datagen = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-file = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-io = { "version" = "=6.0.0-beta.4", default-features = false, "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-index = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-linalg = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-namespace = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-namespace-impls = { "version" = "=6.0.0-beta.4", default-features = false, "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-table = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-testing = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-datafusion = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-encoding = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-arrow = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } ahash = "0.8" # Note that this one does not include pyarrow arrow = { version = "57.2", optional = false } diff --git a/java/pom.xml b/java/pom.xml index 3684452bd..e288cfb96 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -28,7 +28,7 @@ UTF-8 15.0.0 - 6.0.0-beta.1 + 6.0.0-beta.4 false 2.30.0 1.7 From f31e27768ade19388eb2af202a86d61a7451ee69 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Mon, 27 Apr 2026 17:56:10 -0700 Subject: [PATCH 06/20] fix: address RUSTSEC-2026-0104 cargo-deny advisory (#3326) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Update `rustls-webpki` 0.103.10 → 0.103.13 to fix RUSTSEC-2026-0104 (reachable panic in CRL parsing) - Add advisory ignore for the legacy `rustls-webpki` 0.101.7 copy pinned to the aws-smithy/rustls 0.21 chain (same chain already exempted for RUSTSEC-2026-0098/0099) Fixes the `deny` CI job failure seen in #3325. ## Test plan - [x] `cargo deny check advisories` passes locally 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 (1M context) --- Cargo.lock | 6 +++--- deny.toml | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 42ad17f65..a6825a4b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6980,7 +6980,7 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.10", + "rustls-webpki 0.103.13", "subtle", "zeroize", ] @@ -7028,9 +7028,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.10" +version = "0.103.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" dependencies = [ "aws-lc-rs", "ring", diff --git a/deny.toml b/deny.toml index a2542235d..85231f920 100644 --- a/deny.toml +++ b/deny.toml @@ -83,6 +83,12 @@ ignore = [ # https://rustsec.org/advisories/RUSTSEC-2026-0099 { id = "RUSTSEC-2026-0098", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" }, { id = "RUSTSEC-2026-0099", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" }, + + # rustls-webpki 0.101.7: reachable panic in CRL parsing. Same legacy + # rustls 0.21 chain from aws-smithy-http-client as above. The 0.103 line + # we actively use is upgraded to 0.103.13 which contains the fix. + # https://rustsec.org/advisories/RUSTSEC-2026-0104 + { id = "RUSTSEC-2026-0104", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" }, ] # --------------------------------------------------------------------------- From 2e36cd9dad93700a781c3172f85a7667618f252c Mon Sep 17 00:00:00 2001 From: Lance Release Date: Tue, 28 Apr 2026 13:29:00 +0000 Subject: [PATCH 07/20] =?UTF-8?q?Bump=20version:=200.31.0-beta.9=20?= =?UTF-8?q?=E2=86=92=200.31.0-beta.10?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/.bumpversion.toml | 2 +- python/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/.bumpversion.toml b/python/.bumpversion.toml index 2dc1443bb..5fa2e4e1c 100644 --- a/python/.bumpversion.toml +++ b/python/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.31.0-beta.9" +current_version = "0.31.0-beta.10" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/python/Cargo.toml b/python/Cargo.toml index 4ac95f22c..fd38fa641 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lancedb-python" -version = "0.31.0-beta.9" +version = "0.31.0-beta.10" publish = false edition.workspace = true description = "Python bindings for LanceDB" From 4dcd7f431488bd5970939f0eabd83e58481e1946 Mon Sep 17 00:00:00 2001 From: Lance Release Date: Tue, 28 Apr 2026 13:29:17 +0000 Subject: [PATCH 08/20] =?UTF-8?q?Bump=20version:=200.28.0-beta.9=20?= =?UTF-8?q?=E2=86=92=200.28.0-beta.10?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.toml | 2 +- Cargo.lock | 6 +++--- docs/src/java/java.md | 2 +- java/lancedb-core/pom.xml | 2 +- java/pom.xml | 2 +- nodejs/Cargo.toml | 2 +- nodejs/npm/darwin-arm64/package.json | 2 +- nodejs/npm/linux-arm64-gnu/package.json | 2 +- nodejs/npm/linux-arm64-musl/package.json | 2 +- nodejs/npm/linux-x64-gnu/package.json | 2 +- nodejs/npm/linux-x64-musl/package.json | 2 +- nodejs/npm/win32-arm64-msvc/package.json | 2 +- nodejs/npm/win32-x64-msvc/package.json | 2 +- nodejs/package-lock.json | 4 ++-- nodejs/package.json | 2 +- rust/lancedb/Cargo.toml | 2 +- 16 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.bumpversion.toml b/.bumpversion.toml index f07c90c64..003efe386 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.28.0-beta.9" +current_version = "0.28.0-beta.10" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/Cargo.lock b/Cargo.lock index a6825a4b1..db63697ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4576,7 +4576,7 @@ dependencies = [ [[package]] name = "lancedb" -version = "0.28.0-beta.9" +version = "0.28.0-beta.10" dependencies = [ "ahash", "anyhow", @@ -4658,7 +4658,7 @@ dependencies = [ [[package]] name = "lancedb-nodejs" -version = "0.28.0-beta.9" +version = "0.28.0-beta.10" dependencies = [ "arrow-array", "arrow-buffer", @@ -4680,7 +4680,7 @@ dependencies = [ [[package]] name = "lancedb-python" -version = "0.31.0-beta.9" +version = "0.31.0-beta.10" dependencies = [ "arrow", "async-trait", diff --git a/docs/src/java/java.md b/docs/src/java/java.md index f56804eae..62c77153b 100644 --- a/docs/src/java/java.md +++ b/docs/src/java/java.md @@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`: com.lancedb lancedb-core - 0.28.0-beta.9 + 0.28.0-beta.10 ``` diff --git a/java/lancedb-core/pom.xml b/java/lancedb-core/pom.xml index 7b29c682d..7aaaab5c2 100644 --- a/java/lancedb-core/pom.xml +++ b/java/lancedb-core/pom.xml @@ -8,7 +8,7 @@ com.lancedb lancedb-parent - 0.28.0-beta.9 + 0.28.0-beta.10 ../pom.xml diff --git a/java/pom.xml b/java/pom.xml index e288cfb96..822d553e2 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -6,7 +6,7 @@ com.lancedb lancedb-parent - 0.28.0-beta.9 + 0.28.0-beta.10 pom ${project.artifactId} LanceDB Java SDK Parent POM diff --git a/nodejs/Cargo.toml b/nodejs/Cargo.toml index 6231fcf87..a4603e080 100644 --- a/nodejs/Cargo.toml +++ b/nodejs/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "lancedb-nodejs" edition.workspace = true -version = "0.28.0-beta.9" +version = "0.28.0-beta.10" publish = false license.workspace = true description.workspace = true diff --git a/nodejs/npm/darwin-arm64/package.json b/nodejs/npm/darwin-arm64/package.json index d8bec57fd..e41bb4053 100644 --- a/nodejs/npm/darwin-arm64/package.json +++ b/nodejs/npm/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-darwin-arm64", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.10", "os": ["darwin"], "cpu": ["arm64"], "main": "lancedb.darwin-arm64.node", diff --git a/nodejs/npm/linux-arm64-gnu/package.json b/nodejs/npm/linux-arm64-gnu/package.json index 4873e1c6e..565c2cc3e 100644 --- a/nodejs/npm/linux-arm64-gnu/package.json +++ b/nodejs/npm/linux-arm64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-arm64-gnu", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.10", "os": ["linux"], "cpu": ["arm64"], "main": "lancedb.linux-arm64-gnu.node", diff --git a/nodejs/npm/linux-arm64-musl/package.json b/nodejs/npm/linux-arm64-musl/package.json index b41857807..e3da9a674 100644 --- a/nodejs/npm/linux-arm64-musl/package.json +++ b/nodejs/npm/linux-arm64-musl/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-arm64-musl", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.10", "os": ["linux"], "cpu": ["arm64"], "main": "lancedb.linux-arm64-musl.node", diff --git a/nodejs/npm/linux-x64-gnu/package.json b/nodejs/npm/linux-x64-gnu/package.json index bd765ba07..d85b3fa74 100644 --- a/nodejs/npm/linux-x64-gnu/package.json +++ b/nodejs/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-x64-gnu", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.10", "os": ["linux"], "cpu": ["x64"], "main": "lancedb.linux-x64-gnu.node", diff --git a/nodejs/npm/linux-x64-musl/package.json b/nodejs/npm/linux-x64-musl/package.json index 48e8f6721..02023c768 100644 --- a/nodejs/npm/linux-x64-musl/package.json +++ b/nodejs/npm/linux-x64-musl/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-x64-musl", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.10", "os": ["linux"], "cpu": ["x64"], "main": "lancedb.linux-x64-musl.node", diff --git a/nodejs/npm/win32-arm64-msvc/package.json b/nodejs/npm/win32-arm64-msvc/package.json index 481324a8c..5547cb1d5 100644 --- a/nodejs/npm/win32-arm64-msvc/package.json +++ b/nodejs/npm/win32-arm64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-win32-arm64-msvc", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.10", "os": [ "win32" ], diff --git a/nodejs/npm/win32-x64-msvc/package.json b/nodejs/npm/win32-x64-msvc/package.json index 3cb420e8f..9f76092a7 100644 --- a/nodejs/npm/win32-x64-msvc/package.json +++ b/nodejs/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-win32-x64-msvc", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.10", "os": ["win32"], "cpu": ["x64"], "main": "lancedb.win32-x64-msvc.node", diff --git a/nodejs/package-lock.json b/nodejs/package-lock.json index aae49f278..1ff7cf5c6 100644 --- a/nodejs/package-lock.json +++ b/nodejs/package-lock.json @@ -1,12 +1,12 @@ { "name": "@lancedb/lancedb", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.10", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@lancedb/lancedb", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.10", "cpu": [ "x64", "arm64" diff --git a/nodejs/package.json b/nodejs/package.json index 48aa86146..3fa38c959 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -11,7 +11,7 @@ "ann" ], "private": false, - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.10", "main": "dist/index.js", "exports": { ".": "./dist/index.js", diff --git a/rust/lancedb/Cargo.toml b/rust/lancedb/Cargo.toml index 9bb30ba07..cf6f8c44d 100644 --- a/rust/lancedb/Cargo.toml +++ b/rust/lancedb/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lancedb" -version = "0.28.0-beta.9" +version = "0.28.0-beta.10" edition.workspace = true description = "LanceDB: A serverless, low-latency vector database for AI applications" license.workspace = true From 25dfe2cfd408f26862b1fd723a1c954e86d82bc3 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Wed, 29 Apr 2026 09:22:06 -0700 Subject: [PATCH 09/20] feat: add manifest-enabled directory namespace mode (#3332) Adds manifest_enabled for local/native connections so directory namespace manifests can be the source of truth, including migration from directory listing and Azure credential vending feature wiring. Also exposes the option through Rust, Python, and Node bindings with focused validation. --- Cargo.lock | 4 + docs/src/js/interfaces/ConnectionOptions.md | 23 +++ nodejs/src/connection.rs | 6 + nodejs/src/lib.rs | 7 + python/python/lancedb/__init__.py | 34 +++- python/python/lancedb/_lancedb.pyi | 2 + python/python/lancedb/db.py | 10 +- python/src/connection.rs | 10 +- rust/lancedb/Cargo.toml | 7 +- rust/lancedb/src/connection.rs | 175 ++++++++++++++++++++ rust/lancedb/src/database/listing.rs | 152 ++++++++++++++++- rust/lancedb/src/database/namespace.rs | 111 ++++++++++++- 12 files changed, 524 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index db63697ab..315556ed8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4476,9 +4476,11 @@ dependencies = [ "arrow-schema", "async-trait", "axum", + "base64 0.22.1", "bytes", "chrono", "futures", + "hmac", "lance", "lance-core", "lance-index", @@ -4488,10 +4490,12 @@ dependencies = [ "lance-table", "log", "object_store", + "quick-xml 0.38.4", "rand 0.9.4", "reqwest", "serde", "serde_json", + "sha2", "snafu 0.9.0", "tokio", "tower", diff --git a/docs/src/js/interfaces/ConnectionOptions.md b/docs/src/js/interfaces/ConnectionOptions.md index d617e8a19..1ad0e127a 100644 --- a/docs/src/js/interfaces/ConnectionOptions.md +++ b/docs/src/js/interfaces/ConnectionOptions.md @@ -41,6 +41,29 @@ for testing purposes. *** +### manifestEnabled? + +```ts +optional manifestEnabled: boolean; +``` + +(For LanceDB OSS only): use directory namespace manifests as the source +of truth for table metadata. Existing directory-listed root tables are +migrated into the manifest on access. + +*** + +### namespaceClientProperties? + +```ts +optional namespaceClientProperties: Record; +``` + +(For LanceDB OSS only): extra properties for the backing namespace +client used by manifest-enabled native connections. + +*** + ### readConsistencyInterval? ```ts diff --git a/nodejs/src/connection.rs b/nodejs/src/connection.rs index 19b2a5440..09be9465f 100644 --- a/nodejs/src/connection.rs +++ b/nodejs/src/connection.rs @@ -67,6 +67,12 @@ impl Connection { builder = builder.storage_option(key, value); } } + if let Some(manifest_enabled) = options.manifest_enabled { + builder = builder.manifest_enabled(manifest_enabled); + } + if let Some(namespace_client_properties) = options.namespace_client_properties { + builder = builder.namespace_client_properties(namespace_client_properties); + } // Create client config, optionally with header provider let client_config = options.client_config.unwrap_or_default(); diff --git a/nodejs/src/lib.rs b/nodejs/src/lib.rs index 055a6a3d3..87bc97ce7 100644 --- a/nodejs/src/lib.rs +++ b/nodejs/src/lib.rs @@ -37,6 +37,13 @@ pub struct ConnectionOptions { /// /// The available options are described at https://docs.lancedb.com/storage/ pub storage_options: Option>, + /// (For LanceDB OSS only): use directory namespace manifests as the source + /// of truth for table metadata. Existing directory-listed root tables are + /// migrated into the manifest on access. + pub manifest_enabled: Option, + /// (For LanceDB OSS only): extra properties for the backing namespace + /// client used by manifest-enabled native connections. + pub namespace_client_properties: Option>, /// (For LanceDB OSS only): the session to use for this connection. Holds /// shared caches and other session-specific state. pub session: Option, diff --git a/python/python/lancedb/__init__.py b/python/python/lancedb/__init__.py index ebf292b05..9e8ee0dd8 100644 --- a/python/python/lancedb/__init__.py +++ b/python/python/lancedb/__init__.py @@ -73,6 +73,7 @@ def connect( client_config: Union[ClientConfig, Dict[str, Any], None] = None, storage_options: Optional[Dict[str, str]] = None, session: Optional[Session] = None, + manifest_enabled: bool = False, namespace_client_impl: Optional[str] = None, namespace_client_properties: Optional[Dict[str, str]] = None, namespace_client_pushdown_operations: Optional[List[str]] = None, @@ -111,6 +112,10 @@ def connect( storage_options: dict, optional Additional options for the storage backend. See available options at + manifest_enabled : bool, default False + When true for local/native connections, use directory namespace + manifests as the source of truth for table metadata. Existing + directory-listed root tables are migrated into the manifest on access. session: Session, optional (For LanceDB OSS only) A session to use for this connection. Sessions allow you to configure @@ -158,11 +163,11 @@ def connect( conn : DBConnection A connection to a LanceDB database. """ - if namespace_client_impl is not None or namespace_client_properties is not None: - if namespace_client_impl is None or namespace_client_properties is None: + if namespace_client_impl is not None: + if namespace_client_properties is None: raise ValueError( - "Both namespace_client_impl and " - "namespace_client_properties must be provided" + "namespace_client_properties must be provided when " + "namespace_client_impl is set" ) if kwargs: raise ValueError(f"Unknown keyword arguments: {kwargs}") @@ -175,6 +180,12 @@ def connect( namespace_client_pushdown_operations=namespace_client_pushdown_operations, ) + if namespace_client_properties is not None and not manifest_enabled: + raise ValueError( + "namespace_client_impl must be provided when using " + "namespace_client_properties unless manifest_enabled=True" + ) + if namespace_client_pushdown_operations is not None: raise ValueError( "namespace_client_pushdown_operations is only valid when " @@ -212,6 +223,8 @@ def connect( read_consistency_interval=read_consistency_interval, storage_options=storage_options, session=session, + manifest_enabled=manifest_enabled, + namespace_client_properties=namespace_client_properties, ) @@ -289,6 +302,8 @@ def deserialize_conn( parsed["uri"], read_consistency_interval=rci, storage_options=storage_options, + manifest_enabled=parsed.get("manifest_enabled", False), + namespace_client_properties=parsed.get("namespace_client_properties"), ) else: raise ValueError(f"Unknown connection_type: {connection_type}") @@ -304,6 +319,8 @@ async def connect_async( client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None, storage_options: Optional[Dict[str, str]] = None, session: Optional[Session] = None, + manifest_enabled: bool = False, + namespace_client_properties: Optional[Dict[str, str]] = None, ) -> AsyncConnection: """Connect to a LanceDB database. @@ -343,6 +360,13 @@ async def connect_async( cache sizes for index and metadata caches, which can significantly impact memory use and performance. They can also be re-used across multiple connections to share the same cache state. + manifest_enabled : bool, default False + When true for local/native connections, use directory namespace + manifests as the source of truth for table metadata. Existing + directory-listed root tables are migrated into the manifest on access. + namespace_client_properties : dict, optional + Additional directory namespace client properties to use with + ``manifest_enabled=True``. Examples -------- @@ -385,6 +409,8 @@ async def connect_async( client_config, storage_options, session, + manifest_enabled, + namespace_client_properties, ) ) diff --git a/python/python/lancedb/_lancedb.pyi b/python/python/lancedb/_lancedb.pyi index 76c08041b..2298a9473 100644 --- a/python/python/lancedb/_lancedb.pyi +++ b/python/python/lancedb/_lancedb.pyi @@ -242,6 +242,8 @@ async def connect( client_config: Optional[Union[ClientConfig, Dict[str, Any]]], storage_options: Optional[Dict[str, str]], session: Optional[Session], + manifest_enabled: bool = False, + namespace_client_properties: Optional[Dict[str, str]] = None, ) -> Connection: ... class RecordBatchStream: diff --git a/python/python/lancedb/db.py b/python/python/lancedb/db.py index b07d409eb..276116db7 100644 --- a/python/python/lancedb/db.py +++ b/python/python/lancedb/db.py @@ -590,8 +590,13 @@ class LanceDBConnection(DBConnection): read_consistency_interval: Optional[timedelta] = None, storage_options: Optional[Dict[str, str]] = None, session: Optional[Session] = None, + manifest_enabled: bool = False, + namespace_client_properties: Optional[Dict[str, str]] = None, _inner: Optional[LanceDbConnection] = None, ): + self.storage_options = storage_options + self._manifest_enabled = manifest_enabled + self._namespace_client_properties = namespace_client_properties if _inner is not None: self._conn = _inner self._cached_namespace_client = None @@ -633,6 +638,8 @@ class LanceDBConnection(DBConnection): None, storage_options, session, + manifest_enabled, + namespace_client_properties, ) # TODO: It would be nice if we didn't store self.storage_options but it is @@ -640,7 +647,6 @@ class LanceDBConnection(DBConnection): # work because some paths like LanceDBConnection.from_inner will lose the # storage_options. Also, this class really shouldn't be holding any state # beyond _conn. - self.storage_options = storage_options self._conn = AsyncConnection(LOOP.run(do_connect())) self._cached_namespace_client: Optional[LanceNamespace] = None @@ -677,6 +683,8 @@ class LanceDBConnection(DBConnection): "connection_type": "local", "uri": self.uri, "storage_options": self.storage_options, + "manifest_enabled": self._manifest_enabled, + "namespace_client_properties": self._namespace_client_properties, "read_consistency_interval_seconds": ( rci.total_seconds() if rci else None ), diff --git a/python/src/connection.rs b/python/src/connection.rs index 9c67f38c7..1b12c33ab 100644 --- a/python/src/connection.rs +++ b/python/src/connection.rs @@ -525,7 +525,7 @@ impl Connection { } #[pyfunction] -#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))] +#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None, manifest_enabled=false, namespace_client_properties=None))] #[allow(clippy::too_many_arguments)] pub fn connect( py: Python<'_>, @@ -537,6 +537,8 @@ pub fn connect( client_config: Option, storage_options: Option>, session: Option, + manifest_enabled: bool, + namespace_client_properties: Option>, ) -> PyResult> { future_into_py(py, async move { let mut builder = lancedb::connect(&uri); @@ -556,6 +558,12 @@ pub fn connect( if let Some(storage_options) = storage_options { builder = builder.storage_options(storage_options); } + if manifest_enabled { + builder = builder.manifest_enabled(true); + } + if let Some(namespace_client_properties) = namespace_client_properties { + builder = builder.namespace_client_properties(namespace_client_properties); + } #[cfg(feature = "remote")] if let Some(client_config) = client_config { builder = builder.client_config(client_config.into()); diff --git a/rust/lancedb/Cargo.toml b/rust/lancedb/Cargo.toml index cf6f8c44d..b652ed1a5 100644 --- a/rust/lancedb/Cargo.toml +++ b/rust/lancedb/Cargo.toml @@ -111,7 +111,12 @@ default = [] aws = ["lance/aws", "lance-io/aws", "lance-namespace-impls/dir-aws"] oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"] gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"] -azure = ["lance/azure", "lance-io/azure", "lance-namespace-impls/dir-azure"] +azure = [ + "lance/azure", + "lance-io/azure", + "lance-namespace-impls/dir-azure", + "lance-namespace-impls/credential-vendor-azure", +] huggingface = [ "lance/huggingface", "lance-io/huggingface", diff --git a/rust/lancedb/src/connection.rs b/rust/lancedb/src/connection.rs index 9e0d3ea3f..8034c2a53 100644 --- a/rust/lancedb/src/connection.rs +++ b/rust/lancedb/src/connection.rs @@ -590,6 +590,15 @@ pub struct ConnectRequest { /// storage options. pub namespace_client_properties: HashMap, + /// Use directory namespace manifests as the source of truth for native + /// LanceDB table metadata. + /// + /// When enabled for a local/native connection, LanceDB returns a + /// namespace-backed database directly. Directory listing fallback remains + /// enabled for migration, and directory-listing-to-manifest migration is + /// forced on. + pub manifest_enabled: bool, + /// The interval at which to check for updates from other processes. /// /// If None, then consistency is not checked. For performance @@ -630,6 +639,7 @@ impl ConnectBuilder { read_consistency_interval: None, options: HashMap::new(), namespace_client_properties: HashMap::new(), + manifest_enabled: false, session: None, }, embedding_registry: None, @@ -791,6 +801,17 @@ impl ConnectBuilder { self } + /// Enable or disable manifest-backed directory namespace mode for local + /// native connections. + /// + /// When enabled, the connection uses the directory namespace database + /// directly for all table operations and forces + /// `dir_listing_to_manifest_migration_enabled=true`. + pub fn manifest_enabled(mut self, enabled: bool) -> Self { + self.request.manifest_enabled = enabled; + self + } + /// The interval at which to check for updates from other processes. This /// only affects LanceDB OSS. /// @@ -886,6 +907,16 @@ impl ConnectBuilder { pub async fn execute(self) -> Result { if self.request.uri.starts_with("db") { self.execute_remote() + } else if self.request.manifest_enabled { + let internal = Arc::new( + ListingDatabase::connect_manifest_enabled_namespace_database(&self.request).await?, + ); + Ok(Connection { + internal, + embedding_registry: self + .embedding_registry + .unwrap_or_else(|| Arc::new(MemoryRegistry::new())), + }) } else { let internal = Arc::new(ListingDatabase::connect_with_options(&self.request).await?); Ok(Connection { @@ -1132,6 +1163,9 @@ mod tests { use lance_testing::datagen::{BatchGenerator, IncrementingInt32}; use tempfile::tempdir; + use crate::database::listing::{ListingDatabaseOptions, OPT_NEW_TABLE_V2_MANIFEST_PATHS}; + use crate::database::namespace::LanceNamespaceDatabase; + use crate::table::NativeTable; use crate::test_utils::connection::new_test_connection; use super::*; @@ -1204,6 +1238,147 @@ mod tests { ); } + #[tokio::test] + async fn test_connect_with_manifest_enabled_uses_directory_namespace() { + let tmp_dir = tempdir().unwrap(); + let uri = tmp_dir.path().to_str().unwrap(); + + let db = connect(uri) + .manifest_enabled(true) + .storage_option("timeout", "30s") + .namespace_client_property("manifest_enabled", "false") + .namespace_client_property("dir_listing_to_manifest_migration_enabled", "false") + .execute() + .await + .unwrap(); + + assert!( + db.database() + .as_any() + .downcast_ref::() + .is_some() + ); + assert_eq!(db.uri(), uri); + + let (ns_impl, properties) = db.namespace_client_config().await.unwrap(); + assert_eq!(ns_impl, "dir"); + assert_eq!(properties.get("root"), Some(&uri.to_string())); + assert_eq!( + properties.get("manifest_enabled"), + Some(&"true".to_string()) + ); + assert_eq!( + properties.get("dir_listing_to_manifest_migration_enabled"), + Some(&"true".to_string()) + ); + assert_eq!(properties.get("storage.timeout"), Some(&"30s".to_string())); + } + + #[tokio::test] + async fn test_manifest_enabled_rejects_commit_engine_uri() { + let Err(err) = connect("s3+ddb://bucket/db?ddbTableName=manifest") + .manifest_enabled(true) + .execute() + .await + else { + panic!("expected manifest-enabled s3+ddb connection to fail"); + }; + assert!( + matches!(err, Error::NotSupported { message } if message.contains("commit engine URI schemes")) + ); + + let Err(err) = connect("s3://bucket/db?engine=ddb&ddbTableName=manifest") + .manifest_enabled(true) + .execute() + .await + else { + panic!("expected manifest-enabled engine query connection to fail"); + }; + assert!( + matches!(err, Error::NotSupported { message } if message.contains("commit engine")) + ); + } + + #[tokio::test] + async fn test_manifest_enabled_connection_migrates_root_listing_table() { + let tmp_dir = tempdir().unwrap(); + let uri = tmp_dir.path().to_str().unwrap(); + let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)])); + + connect(uri) + .execute() + .await + .unwrap() + .create_empty_table("legacy", schema) + .execute() + .await + .unwrap(); + + let db = connect(uri).manifest_enabled(true).execute().await.unwrap(); + let tables = db.table_names().execute().await.unwrap(); + assert_eq!(tables, vec!["legacy".to_string()]); + db.open_table("legacy").execute().await.unwrap(); + } + + #[tokio::test] + async fn test_manifest_enabled_preserves_new_table_options() { + let tmp_dir = tempdir().unwrap(); + let uri = tmp_dir.path().to_str().unwrap(); + let options = ListingDatabaseOptions::builder() + .enable_v2_manifest_paths(true) + .build(); + let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)])); + + let table = connect(uri) + .manifest_enabled(true) + .database_options(&options) + .execute() + .await + .unwrap() + .create_empty_table("v1_manifest", schema) + .storage_option(OPT_NEW_TABLE_V2_MANIFEST_PATHS, "false") + .execute() + .await + .unwrap(); + + let native_table = table + .base_table() + .as_any() + .downcast_ref::() + .unwrap(); + assert!(!native_table.uses_v2_manifest_paths().await.unwrap()); + } + + #[tokio::test] + async fn test_manifest_enabled_vend_input_storage_options() { + let tmp_dir = tempdir().unwrap(); + let uri = tmp_dir.path().to_str().unwrap(); + let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)])); + + let table = connect(uri) + .manifest_enabled(true) + .storage_option("test_storage_option", "test_value") + .namespace_client_property("vend_input_storage_options", "true") + .namespace_client_property( + "vend_input_storage_options_refresh_interval_millis", + "60000", + ) + .execute() + .await + .unwrap() + .create_empty_table("vended", schema) + .execute() + .await + .unwrap(); + + let storage_options = table.latest_storage_options().await.unwrap().unwrap(); + assert_eq!( + storage_options.get("test_storage_option"), + Some(&"test_value".to_string()) + ); + assert!(storage_options.contains_key("expires_at_millis")); + } + #[tokio::test] async fn test_table_names() { let tc = new_test_connection().await.unwrap(); diff --git a/rust/lancedb/src/database/listing.rs b/rust/lancedb/src/database/listing.rs index 02884bb63..73fad6eb9 100644 --- a/rust/lancedb/src/database/listing.rs +++ b/rust/lancedb/src/database/listing.rs @@ -285,7 +285,7 @@ const MIRRORED_STORE: &str = "mirroredStore"; /// A connection to LanceDB impl ListingDatabase { - fn build_namespace_client_properties( + pub(crate) fn build_namespace_client_properties( uri: &str, storage_options: &HashMap, namespace_client_properties: HashMap, @@ -298,6 +298,24 @@ impl ListingDatabase { properties } + pub(crate) fn build_manifest_enabled_namespace_client_properties( + uri: &str, + storage_options: &HashMap, + namespace_client_properties: HashMap, + ) -> HashMap { + let mut properties = Self::build_namespace_client_properties( + uri, + storage_options, + namespace_client_properties, + ); + properties.insert("manifest_enabled".to_string(), "true".to_string()); + properties.insert( + "dir_listing_to_manifest_migration_enabled".to_string(), + "true".to_string(), + ); + properties + } + async fn connect_namespace_database( uri: &str, storage_options: HashMap, @@ -323,6 +341,119 @@ impl ListingDatabase { )) } + async fn prepare_namespace_root( + uri: &str, + storage_options: &HashMap, + session: Arc, + ) -> Result { + match url::Url::parse(uri) { + Ok(url) if url.scheme().len() == 1 && cfg!(windows) => { + let (object_store, _) = ObjectStore::from_uri_and_params( + session.store_registry(), + uri, + &ObjectStoreParams::default(), + ) + .await?; + if object_store.is_local() { + Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?; + } + Ok(uri.to_string()) + } + Ok(mut url) => { + if url.scheme().contains('+') { + return Err(Error::NotSupported { + message: "commit engine URI schemes are not supported for manifest-enabled namespace connections".to_string(), + }); + } + + for (key, value) in url.query_pairs() { + if key == ENGINE { + return Err(Error::NotSupported { + message: format!( + "commit engine '{}' is not supported for manifest-enabled namespace connections", + value + ), + }); + } else if key == MIRRORED_STORE { + return Err(Error::NotSupported { + message: "mirrored store is not supported for manifest-enabled namespace connections" + .to_string(), + }); + } + } + + url.set_query(None); + let plain_uri = url.to_string(); + + let os_params = ObjectStoreParams { + storage_options_accessor: if storage_options.is_empty() { + None + } else { + Some(Arc::new(StorageOptionsAccessor::with_static_options( + storage_options.clone(), + ))) + }, + ..Default::default() + }; + let (object_store, _) = ObjectStore::from_uri_and_params( + session.store_registry(), + &plain_uri, + &os_params, + ) + .await?; + if object_store.is_local() { + Self::try_create_dir(&plain_uri).context(CreateDirSnafu { + path: plain_uri.clone(), + })?; + } + + Ok(plain_uri) + } + Err(_) => { + let (object_store, _) = ObjectStore::from_uri_and_params( + session.store_registry(), + uri, + &ObjectStoreParams::default(), + ) + .await?; + if object_store.is_local() { + Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?; + } + Ok(uri.to_string()) + } + } + } + + pub(crate) async fn connect_manifest_enabled_namespace_database( + request: &ConnectRequest, + ) -> Result { + let options = ListingDatabaseOptions::parse_from_map(&request.options)?; + let session = request + .session + .clone() + .unwrap_or_else(|| Arc::new(lance::session::Session::default())); + let namespace_root = + Self::prepare_namespace_root(&request.uri, &options.storage_options, session.clone()) + .await?; + let ns_properties = Self::build_manifest_enabled_namespace_client_properties( + &namespace_root, + &options.storage_options, + request.namespace_client_properties.clone(), + ); + + LanceNamespaceDatabase::connect_with_new_table_config( + "dir", + ns_properties, + options.storage_options, + request.read_consistency_interval, + Some(session), + HashSet::new(), + options.new_table_config, + ) + .await + .map(|db| db.with_uri(request.uri.clone())) + } + /// Connect to a listing database /// /// The URI should be a path to a directory where the tables are stored. @@ -690,15 +821,12 @@ impl ListingDatabase { store_params.storage_options_accessor = Some(Arc::new(accessor)); } - write_params.data_storage_version = self - .new_table_config - .data_storage_version - .or(storage_version_override); + write_params.data_storage_version = storage_version_override + .or(write_params.data_storage_version) + .or(self.new_table_config.data_storage_version); - if let Some(enable_v2_manifest_paths) = self - .new_table_config - .enable_v2_manifest_paths - .or(v2_manifest_override) + if let Some(enable_v2_manifest_paths) = + v2_manifest_override.or(self.new_table_config.enable_v2_manifest_paths) { write_params.enable_v2_manifest_paths = enable_v2_manifest_paths; } @@ -1158,6 +1286,7 @@ mod tests { client_config: Default::default(), options: Default::default(), namespace_client_properties: Default::default(), + manifest_enabled: false, read_consistency_interval: None, session: None, }; @@ -1292,6 +1421,7 @@ mod tests { client_config: Default::default(), options: options.clone(), namespace_client_properties: Default::default(), + manifest_enabled: false, read_consistency_interval: None, session: None, }; @@ -1827,6 +1957,7 @@ mod tests { client_config: Default::default(), options, namespace_client_properties: Default::default(), + manifest_enabled: false, read_consistency_interval: None, session: None, }; @@ -1933,6 +2064,7 @@ mod tests { client_config: Default::default(), options, namespace_client_properties: Default::default(), + manifest_enabled: false, read_consistency_interval: None, session: None, }; @@ -2005,6 +2137,7 @@ mod tests { client_config: Default::default(), options, namespace_client_properties: Default::default(), + manifest_enabled: false, read_consistency_interval: None, session: None, }; @@ -2202,6 +2335,7 @@ mod tests { client_config: Default::default(), options: Default::default(), namespace_client_properties, + manifest_enabled: false, read_consistency_interval: None, session: None, }; diff --git a/rust/lancedb/src/database/namespace.rs b/rust/lancedb/src/database/namespace.rs index 19dc1f174..de18f8db8 100644 --- a/rust/lancedb/src/database/namespace.rs +++ b/rust/lancedb/src/database/namespace.rs @@ -24,6 +24,10 @@ use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler; use crate::connection::NamespaceClientPushdownOperation; use crate::database::ReadConsistency; +use crate::database::listing::{ + NewTableConfig, OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS, OPT_NEW_TABLE_STORAGE_VERSION, + OPT_NEW_TABLE_V2_MANIFEST_PATHS, +}; use crate::error::{Error, Result}; use crate::table::NativeTable; use lance::dataset::WriteMode; @@ -50,6 +54,8 @@ pub struct LanceNamespaceDatabase { ns_impl: String, // Namespace properties used to construct the namespace client ns_properties: HashMap, + // Options for tables created by this connection + new_table_config: NewTableConfig, } impl LanceNamespaceDatabase { @@ -71,9 +77,15 @@ impl LanceNamespaceDatabase { pushdown_operations: namespace_client_pushdown_operations, ns_impl: namespace_client_impl, ns_properties: namespace_client_properties, + new_table_config: NewTableConfig::default(), } } + pub(crate) fn with_uri(mut self, uri: impl Into) -> Self { + self.uri = uri.into(); + self + } + pub async fn connect( ns_impl: &str, ns_properties: HashMap, @@ -81,6 +93,27 @@ impl LanceNamespaceDatabase { read_consistency_interval: Option, session: Option>, pushdown_operations: HashSet, + ) -> Result { + Self::connect_with_new_table_config( + ns_impl, + ns_properties, + storage_options, + read_consistency_interval, + session, + pushdown_operations, + NewTableConfig::default(), + ) + .await + } + + pub(crate) async fn connect_with_new_table_config( + ns_impl: &str, + ns_properties: HashMap, + storage_options: HashMap, + read_consistency_interval: Option, + session: Option>, + pushdown_operations: HashSet, + new_table_config: NewTableConfig, ) -> Result { let mut builder = ConnectBuilder::new(ns_impl); for (key, value) in ns_properties.clone() { @@ -102,8 +135,79 @@ impl LanceNamespaceDatabase { pushdown_operations, ns_impl: ns_impl.to_string(), ns_properties, + new_table_config, }) } + + fn extract_storage_overrides( + &self, + request: &DbCreateTableRequest, + ) -> Result<( + Option, + Option, + Option, + )> { + let storage_options = request + .write_options + .lance_write_params + .as_ref() + .and_then(|p| p.store_params.as_ref()) + .and_then(|sp| sp.storage_options()); + + let storage_version_override = storage_options + .and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION)) + .map(|s| s.parse::()) + .transpose()?; + + let v2_manifest_override = storage_options + .and_then(|opts| opts.get(OPT_NEW_TABLE_V2_MANIFEST_PATHS)) + .map(|s| s.parse::()) + .transpose() + .map_err(|_| Error::InvalidInput { + message: "enable_v2_manifest_paths must be a boolean".to_string(), + })?; + + let stable_row_ids_override = storage_options + .and_then(|opts| opts.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS)) + .map(|s| s.parse::()) + .transpose() + .map_err(|_| Error::InvalidInput { + message: "enable_stable_row_ids must be a boolean".to_string(), + })?; + + Ok(( + storage_version_override, + v2_manifest_override, + stable_row_ids_override, + )) + } + + fn apply_new_table_config( + &self, + params: &mut lance::dataset::WriteParams, + request: &DbCreateTableRequest, + ) -> Result<()> { + let (storage_version_override, v2_manifest_override, stable_row_ids_override) = + self.extract_storage_overrides(request)?; + + params.data_storage_version = storage_version_override + .or(params.data_storage_version) + .or(self.new_table_config.data_storage_version); + + if let Some(enable_v2_manifest_paths) = + v2_manifest_override.or(self.new_table_config.enable_v2_manifest_paths) + { + params.enable_v2_manifest_paths = enable_v2_manifest_paths; + } + + if let Some(enable_stable_row_ids) = + stable_row_ids_override.or(self.new_table_config.enable_stable_row_ids) + { + params.enable_stable_row_ids = enable_stable_row_ids; + } + + Ok(()) + } } impl std::fmt::Debug for LanceNamespaceDatabase { @@ -299,7 +403,12 @@ impl Database for LanceNamespaceDatabase { }; // Build write params with storage options and commit handler - let mut params = request.write_options.lance_write_params.unwrap_or_default(); + let mut params = request + .write_options + .lance_write_params + .clone() + .unwrap_or_default(); + self.apply_new_table_config(&mut params, &request)?; if matches!(request.mode, CreateTableMode::Overwrite) { params.mode = WriteMode::Overwrite; From 4a5341edb1fcc063be635a98259fd27b880114f8 Mon Sep 17 00:00:00 2001 From: LanceDB Robot Date: Wed, 29 Apr 2026 10:52:25 -0700 Subject: [PATCH 10/20] chore: update lance dependency to v6.0.0-beta.7 (#3334) ## Summary - Update Lance Rust dependencies to `6.0.0-beta.7` using `ci/set_lance_version.py`. - Update Java `lance-core.version` to `6.0.0-beta.7`. - Align Arrow/DataFusion/PyO3 dependency versions and apply required compatibility fixes for the Lance upgrade. Triggering tag: [v6.0.0-beta.7](https://github.com/lance-format/lance/releases/tag/v6.0.0-beta.7) ## Verification - `cargo clippy --workspace --tests --all-features -- -D warnings` - `cargo fmt --all` --- Cargo.lock | 406 +++++++++--------- Cargo.toml | 62 +-- java/pom.xml | 2 +- nodejs/Cargo.toml | 2 +- python/Cargo.toml | 8 +- python/src/expr.rs | 2 +- python/src/header.rs | 2 +- python/src/index.rs | 4 +- python/src/namespace.rs | 4 +- python/src/permutation.rs | 10 +- python/src/query.rs | 17 +- python/src/session.rs | 2 +- python/src/table.rs | 20 +- python/src/table/scannable.rs | 7 +- rust/lancedb/src/remote/table/insert.rs | 6 +- rust/lancedb/src/table/datafusion.rs | 13 +- rust/lancedb/src/table/datafusion/insert.rs | 6 +- .../src/table/datafusion/scannable_exec.rs | 6 +- 18 files changed, 301 insertions(+), 278 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 315556ed8..0c8354b78 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -166,9 +166,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" +checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" dependencies = [ "arrow-arith", "arrow-array", @@ -188,9 +188,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" +checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" dependencies = [ "arrow-array", "arrow-buffer", @@ -202,9 +202,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" +checksum = "772bd34cacdda8baec9418d80d23d0fb4d50ef0735685bd45158b83dfeb6e62d" dependencies = [ "ahash", "arrow-buffer", @@ -221,9 +221,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" +checksum = "898f4cf1e9598fdb77f356fdf2134feedfd0ee8d5a4e0a5f573e7d0aec16baa4" dependencies = [ "bytes", "half", @@ -233,9 +233,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" +checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" dependencies = [ "arrow-array", "arrow-buffer", @@ -255,9 +255,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" +checksum = "ca025bd0f38eeecb57c2153c0123b960494138e6a957bbda10da2b25415209fe" dependencies = [ "arrow-array", "arrow-cast", @@ -270,9 +270,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" +checksum = "42d10beeab2b1c3bb0b53a00f7c944a178b622173a5c7bcabc3cb45d90238df4" dependencies = [ "arrow-buffer", "arrow-schema", @@ -283,9 +283,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" +checksum = "609a441080e338147a84e8e6904b6da482cefb957c5cdc0f3398872f69a315d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -299,9 +299,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" +checksum = "6ead0914e4861a531be48fe05858265cf854a4880b9ed12618b1d08cba9bebc8" dependencies = [ "arrow-array", "arrow-buffer", @@ -323,9 +323,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" +checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" dependencies = [ "arrow-array", "arrow-buffer", @@ -336,9 +336,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d18c442b4c266aaf3d7f7dd40fd7ae058cef7f113b00ff0cd8256e1e218ec544" +checksum = "e63351dc11981a316c828a6032a5021345bba882f68bc4a36c36825a50725089" dependencies = [ "arrow-array", "arrow-data", @@ -348,9 +348,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" +checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" dependencies = [ "arrow-array", "arrow-buffer", @@ -361,9 +361,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" +checksum = "c30a1365d7a7dc50cc847e54154e6af49e4c4b0fddc9f607b687f29212082743" dependencies = [ "bitflags", "serde_core", @@ -372,9 +372,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" +checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" dependencies = [ "ahash", "arrow-array", @@ -386,9 +386,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" +checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" dependencies = [ "arrow-array", "arrow-buffer", @@ -1928,9 +1928,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c18ba387f9c05ac1f3be32a73f8f3cc6c1cfc43e5d4b7a8e5b0d3a5eb48dc7" +checksum = "93db0e623840612f7f2cd757f7e8a8922064192363732c88692e0870016e141b" dependencies = [ "arrow", "arrow-schema", @@ -1964,11 +1964,11 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "rand 0.9.4", "regex", - "sqlparser 0.59.0", + "sqlparser 0.61.0", "tempfile", "tokio", "url", @@ -1977,9 +1977,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c75a4ce672b27fb8423810efb92a3600027717a1664d06a2c307eeeabcec694" +checksum = "37cefde60b26a7f4ff61e9d2ff2833322f91df2b568d7238afe67bde5bdffb66" dependencies = [ "arrow", "async-trait", @@ -1995,16 +1995,16 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "tokio", ] [[package]] name = "datafusion-catalog-listing" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c8b9a3795ffb46bf4957a34c67d89a67558b311ae455c8d4295ff2115eeea50" +checksum = "17e112307715d6a7a331111a4c2330ff54bc237183511c319e3708a4cff431fb" dependencies = [ "arrow", "async-trait", @@ -2020,14 +2020,14 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", ] [[package]] name = "datafusion-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "205dc1e20441973f470e6b7ef87626a3b9187970e5106058fef1b713047f770c" +checksum = "d72a11ca44a95e1081870d3abb80c717496e8a7acb467a1d3e932bb636af5cc2" dependencies = [ "ahash", "arrow", @@ -2036,20 +2036,21 @@ dependencies = [ "half", "hashbrown 0.16.1", "indexmap 2.13.0", + "itertools 0.14.0", "libc", "log", - "object_store", + "object_store 0.13.2", "paste", - "sqlparser 0.59.0", + "sqlparser 0.61.0", "tokio", "web-time", ] [[package]] name = "datafusion-common-runtime" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cf5880c02ff6f5f11fb5bc19211789fb32fd3c53d79b7d6cb2b12e401312ba0" +checksum = "89f4afaed29670ec4fd6053643adc749fe3f4bc9d1ce1b8c5679b22c67d12def" dependencies = [ "futures", "log", @@ -2058,9 +2059,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc614d6e709450e29b7b032a42c1bdb705f166a6b2edef7bed7c7897eb905499" +checksum = "e9fb386e1691355355a96419978a0022b7947b44d4a24a6ea99f00b6b485cbb6" dependencies = [ "arrow", "async-trait", @@ -2079,7 +2080,7 @@ dependencies = [ "glob", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "rand 0.9.4", "tokio", "url", @@ -2087,9 +2088,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e497d5fc48dac7ce86f6b4fb09a3a494385774af301ff20ec91aebfae9b05b4" +checksum = "ffa6c52cfed0734c5f93754d1c0175f558175248bf686c944fb05c373e5fc096" dependencies = [ "arrow", "arrow-ipc", @@ -2105,15 +2106,15 @@ dependencies = [ "datafusion-session", "futures", "itertools 0.14.0", - "object_store", + "object_store 0.13.2", "tokio", ] [[package]] name = "datafusion-datasource-csv" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dfc250cad940d0327ca2e9109dc98830892d17a3d6b2ca11d68570e872cf379" +checksum = "503f29e0582c1fc189578d665ff57d9300da1f80c282777d7eb67bb79fb8cdca" dependencies = [ "arrow", "async-trait", @@ -2127,16 +2128,16 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.2", "regex", "tokio", ] [[package]] name = "datafusion-datasource-json" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91e9677ed62833b0e8129dec0d1a8f3c9bb7590bd6dd714a43e4c3b663e4aa0" +checksum = "e33804749abc8d0c8cb7473228483cb8070e524c6f6086ee1b85a64debe2b3d2" dependencies = [ "arrow", "async-trait", @@ -2150,31 +2151,35 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.2", + "serde_json", "tokio", + "tokio-stream", ] [[package]] name = "datafusion-doc" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e13e5fe3447baa0584b61ee8644086e007e1ef6e58f4be48bc8a72417854729" +checksum = "8de6ac0df1662b9148ad3c987978b32cbec7c772f199b1d53520c8fa764a87ee" [[package]] name = "datafusion-execution" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48a6cc03e34899a54546b229235f7b192634c8e832f78a267f0989b18216c56d" +checksum = "c03c7fbdaefcca4ef6ffe425a5fc2325763bfb426599bb0bf4536466efabe709" dependencies = [ "arrow", + "arrow-buffer", "async-trait", "chrono", "dashmap", "datafusion-common", "datafusion-expr", + "datafusion-physical-expr-common", "futures", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "rand 0.9.4", "tempfile", @@ -2183,9 +2188,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee3315d87eca7a7df58e52a1fb43b4c4171b545fd30ffc3102945c162a9f6ddb" +checksum = "574b9b6977fedbd2a611cbff12e5caf90f31640ad9dc5870f152836d94bad0dd" dependencies = [ "arrow", "async-trait", @@ -2200,14 +2205,14 @@ dependencies = [ "itertools 0.14.0", "paste", "serde_json", - "sqlparser 0.59.0", + "sqlparser 0.61.0", ] [[package]] name = "datafusion-expr-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c6d83feae0753799f933a2c47dfd15980c6947960cb95ed60f5c1f885548b3" +checksum = "7d7c3adf3db8bf61e92eb90cb659c8e8b734593a8f7c8e12a843c7ddba24b87e" dependencies = [ "arrow", "datafusion-common", @@ -2218,9 +2223,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b82962015cc3db4d7662459c9f7fcda0591b5edacb8af1cf3bc3031f274800" +checksum = "f28aa4e10384e782774b10e72aca4d93ef7b31aa653095d9d4536b0a3dbc51b6" dependencies = [ "arrow", "arrow-buffer", @@ -2239,6 +2244,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "memchr", "num-traits", "rand 0.9.4", "regex", @@ -2249,9 +2255,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e42c227d9e55a6c8041785d4a8a117e4de531033d480aae10984247ac62e27e" +checksum = "00aa6217e56098ba84e0a338176fe52f0a84cca398021512c6c8c5eff806d0ad" dependencies = [ "ahash", "arrow", @@ -2265,14 +2271,15 @@ dependencies = [ "datafusion-physical-expr-common", "half", "log", + "num-traits", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cead3cfed825b0b688700f4338d281cd7857e4907775a5b9554c083edd5f3f95" +checksum = "b511250349407db7c43832ab2de63f5557b19a20dfd236b39ca2c04468b50d47" dependencies = [ "ahash", "arrow", @@ -2283,9 +2290,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62ea99612970aebab8cf864d02eb3d296bbab7f4881e1023d282b57fe431b201" +checksum = "ef13a858e20d50f0a9bb5e96e7ac82b4e7597f247515bccca4fdd2992df0212a" dependencies = [ "arrow", "arrow-ord", @@ -2299,16 +2306,18 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", + "hashbrown 0.16.1", "itertools 0.14.0", + "itoa", "log", "paste", ] [[package]] name = "datafusion-functions-table" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83dbf3ab8b9af6f209b068825a7adbd3b88bf276f2a1ec14ba09567b97f5674" +checksum = "72b40d3f5bbb3905f9ccb1ce9485a9595c77b69758a7c24d3ba79e334ff51e7e" dependencies = [ "arrow", "async-trait", @@ -2322,9 +2331,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732edabe07496e2fc5a1e57a284d7a36edcea445a2821119770a0dea624b472c" +checksum = "d4e88ec9d57c9b685d02f58bfee7be62d72610430ddcedb82a08e5d9925dbfb6" dependencies = [ "arrow", "datafusion-common", @@ -2340,9 +2349,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c6e30e09700799bd52adce8c377ab03dda96e73a623e4803a31ad94fe7ce14" +checksum = "8307bb93519b1a91913723a1130cfafeee3f72200d870d88e91a6fc5470ede5c" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2350,9 +2359,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402f2a8ed70fb99a18f71580a1fe338604222a3d32ddeac6e72c5b34feea2d4d" +checksum = "2e367e6a71051d0ebdd29b2f85d12059b38b1d1f172c6906e80016da662226bd" dependencies = [ "datafusion-doc", "quote", @@ -2361,9 +2370,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99f32edb8ba12f08138f86c09b80fae3d4a320551262fa06b91d8a8cb3065a5b" +checksum = "e929015451a67f77d9d8b727b2bf3a40c4445fdef6cdc53281d7d97c76888ace" dependencies = [ "arrow", "chrono", @@ -2380,9 +2389,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "987c5e29e96186589301b42e25aa7d11bbe319a73eb02ef8d755edc55b5b89fc" +checksum = "4b1e68aba7a4b350401cfdf25a3d6f989ad898a7410164afe9ca52080244cb59" dependencies = [ "ahash", "arrow", @@ -2403,9 +2412,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1de89d0afa08b6686697bd8a6bac4ba2cd44c7003356e1bce6114d5a93f94b5c" +checksum = "ea22315f33cf2e0adc104e8ec42e285f6ed93998d565c65e82fec6a9ee9f9db4" dependencies = [ "arrow", "datafusion-common", @@ -2418,9 +2427,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602d1970c0fe87f1c3a36665d131fbfe1c4379d35f8fc5ec43a362229ad2954d" +checksum = "b04b45ea8ad3ac2d78f2ea2a76053e06591c9629c7a603eda16c10649ecf4362" dependencies = [ "ahash", "arrow", @@ -2435,9 +2444,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b24d704b6385ebe27c756a12e5ba15684576d3b47aeca79cc9fb09480236dc32" +checksum = "7cb13397809a425918f608dfe8653f332015a3e330004ab191b4404187238b95" dependencies = [ "arrow", "datafusion-common", @@ -2453,9 +2462,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c21d94141ea5043e98793f170798e9c1887095813b8291c5260599341e383a38" +checksum = "5edc023675791af9d5fb4cc4c24abf5f7bd3bd4dcf9e5bd90ea1eff6976dcc79" dependencies = [ "ahash", "arrow", @@ -2477,6 +2486,7 @@ dependencies = [ "indexmap 2.13.0", "itertools 0.14.0", "log", + "num-traits", "parking_lot", "pin-project-lite", "tokio", @@ -2484,9 +2494,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a68cce43d18c0dfac95cacd74e70565f7e2fb12b9ed41e2d312f0fa837626b1" +checksum = "ac8c76860e355616555081cab5968cec1af7a80701ff374510860bcd567e365a" dependencies = [ "arrow", "datafusion-common", @@ -2501,9 +2511,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4e1c40a0b1896aed4a4504145c2eb7fa9b9da13c2d04b40a4767a09f076199" +checksum = "5412111aa48e2424ba926112e192f7a6b7e4ccb450145d25ce5ede9f19dc491e" dependencies = [ "async-trait", "datafusion-common", @@ -2515,19 +2525,20 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f1891e5b106d1d73c7fe403bd8a265d19c3977edc17f60808daf26c2fe65ffb" +checksum = "fa0d133ddf8b9b3b872acac900157f783e7b879fe9a6bccf389abebbfac45ec1" dependencies = [ "arrow", "bigdecimal", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-functions-nested", "indexmap 2.13.0", "log", "regex", - "sqlparser 0.59.0", + "sqlparser 0.61.0", ] [[package]] @@ -3010,8 +3021,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow-array", "rand 0.9.4", @@ -3859,15 +3870,6 @@ dependencies = [ "web-time", ] -[[package]] -name = "indoc" -version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" -dependencies = [ - "rustversion", -] - [[package]] name = "inout" version = "0.1.4" @@ -4066,8 +4068,8 @@ dependencies = [ [[package]] name = "lance" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow", "arrow-arith", @@ -4113,7 +4115,7 @@ dependencies = [ "lance-tokenizer", "log", "moka", - "object_store", + "object_store 0.12.5", "permutation", "pin-project", "prost", @@ -4135,8 +4137,8 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow-array", "arrow-buffer", @@ -4157,8 +4159,8 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrayref", "paste", @@ -4167,8 +4169,8 @@ dependencies = [ [[package]] name = "lance-core" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow-array", "arrow-buffer", @@ -4188,7 +4190,7 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store", + "object_store 0.12.5", "pin-project", "prost", "rand 0.9.4", @@ -4205,8 +4207,8 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow", "arrow-array", @@ -4237,8 +4239,8 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow", "arrow-array", @@ -4256,8 +4258,8 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow-arith", "arrow-array", @@ -4294,8 +4296,8 @@ dependencies = [ [[package]] name = "lance-file" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow-arith", "arrow-array", @@ -4316,7 +4318,7 @@ dependencies = [ "lance-io", "log", "num-traits", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", @@ -4327,8 +4329,8 @@ dependencies = [ [[package]] name = "lance-index" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow", "arrow-arith", @@ -4370,7 +4372,7 @@ dependencies = [ "log", "ndarray", "num-traits", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", @@ -4392,8 +4394,8 @@ dependencies = [ [[package]] name = "lance-io" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow", "arrow-arith", @@ -4420,7 +4422,7 @@ dependencies = [ "libc", "log", "moka", - "object_store", + "object_store 0.12.5", "object_store_opendal", "opendal", "path_abs", @@ -4437,8 +4439,8 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow-array", "arrow-buffer", @@ -4454,8 +4456,8 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow", "async-trait", @@ -4468,8 +4470,8 @@ dependencies = [ [[package]] name = "lance-namespace-impls" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow", "arrow-ipc", @@ -4489,7 +4491,7 @@ dependencies = [ "lance-namespace", "lance-table", "log", - "object_store", + "object_store 0.12.5", "quick-xml 0.38.4", "rand 0.9.4", "reqwest", @@ -4518,8 +4520,8 @@ dependencies = [ [[package]] name = "lance-table" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow", "arrow-array", @@ -4539,7 +4541,7 @@ dependencies = [ "lance-file", "lance-io", "log", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", @@ -4558,8 +4560,8 @@ dependencies = [ [[package]] name = "lance-testing" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "arrow-array", "arrow-schema", @@ -4570,8 +4572,8 @@ dependencies = [ [[package]] name = "lance-tokenizer" -version = "6.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.4#226dafe0a00f995d9e3230c2d61cc06be51994d4" +version = "6.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" dependencies = [ "rust-stemmers", "serde", @@ -4637,7 +4639,7 @@ dependencies = [ "log", "moka", "num-traits", - "object_store", + "object_store 0.12.5", "pin-project", "polars", "polars-arrow", @@ -4892,9 +4894,9 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" +checksum = "db9a0d582c2874f68138a16ce1867e0ffde6c0bb0a0df85e1f36d04146db488a" dependencies = [ "twox-hash", ] @@ -4989,15 +4991,6 @@ dependencies = [ "stable_deref_trait", ] -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - [[package]] name = "mime" version = "0.3.17" @@ -5377,6 +5370,32 @@ dependencies = [ "web-time", ] +[[package]] +name = "object_store" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures-channel", + "futures-core", + "futures-util", + "http 1.4.0", + "humantime", + "itertools 0.14.0", + "parking_lot", + "percent-encoding", + "thiserror 2.0.18", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + [[package]] name = "object_store_opendal" version = "0.55.0" @@ -5387,7 +5406,7 @@ dependencies = [ "bytes", "chrono", "futures", - "object_store", + "object_store 0.12.5", "opendal", "pin-project", "tokio", @@ -6203,7 +6222,7 @@ version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "itertools 0.14.0", "log", "multimap", @@ -6273,28 +6292,26 @@ checksum = "40e24eee682d89fb193496edf918a7f407d30175b2e785fe057e4392dfd182e0" [[package]] name = "pyo3" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" +checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" dependencies = [ - "indoc", "libc", - "memoffset", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent", ] [[package]] name = "pyo3-async-runtimes" -version = "0.26.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ee6d4cb3e8d5b925f5cdb38da183e0ff18122eb2048d4041c9e7034d026e23" +checksum = "9e7364a95bf00e8377bbf9b0f09d7ff9715a29d8fcf93b47d1a967363b973178" dependencies = [ - "futures", + "futures-channel", + "futures-util", "once_cell", "pin-project-lite", "pyo3", @@ -6304,9 +6321,9 @@ dependencies = [ [[package]] name = "pyo3-async-runtimes-macros" -version = "0.26.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c29bc5c673e36a8102d0b9179149c1bb59990d8db4f3ae58bd7dceccab90b951" +checksum = "c23399970eea9c31d0ac84cee4a9d8dd05f89b1da2f4dd5bb44b32a3f66db4f8" dependencies = [ "proc-macro2", "quote", @@ -6315,18 +6332,18 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" +checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e" dependencies = [ "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" +checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e" dependencies = [ "libc", "pyo3-build-config", @@ -6334,9 +6351,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" +checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -6346,9 +6363,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" +checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -7469,7 +7486,7 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", "syn 2.0.117", @@ -7481,7 +7498,7 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54254b8531cafa275c5e096f62d48c81435d1015405a91198ddb11e967301d40" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", "syn 2.0.117", @@ -7573,9 +7590,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.59.0" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" dependencies = [ "log", "sqlparser_derive", @@ -7583,9 +7600,9 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" dependencies = [ "proc-macro2", "quote", @@ -8059,6 +8076,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] @@ -8324,12 +8342,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" -[[package]] -name = "unindent" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" - [[package]] name = "untrusted" version = "0.7.1" diff --git a/Cargo.toml b/Cargo.toml index 94a42ad7c..2f75c2ee3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,40 +13,40 @@ categories = ["database-implementations"] rust-version = "1.91.0" [workspace.dependencies] -lance = { "version" = "=6.0.0-beta.4", default-features = false, "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-core = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-datagen = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-file = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-io = { "version" = "=6.0.0-beta.4", default-features = false, "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-index = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-linalg = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace-impls = { "version" = "=6.0.0-beta.4", default-features = false, "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-table = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-testing = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-datafusion = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-encoding = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-arrow = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance = { "version" = "=6.0.0-beta.7", default-features = false, "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-core = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-datagen = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-file = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-io = { "version" = "=6.0.0-beta.7", default-features = false, "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-index = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-linalg = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-namespace = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-namespace-impls = { "version" = "=6.0.0-beta.7", default-features = false, "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-table = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-testing = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-datafusion = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-encoding = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-arrow = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } ahash = "0.8" # Note that this one does not include pyarrow -arrow = { version = "57.2", optional = false } -arrow-array = "57.2" -arrow-data = "57.2" -arrow-ipc = "57.2" -arrow-ord = "57.2" -arrow-schema = "57.2" -arrow-select = "57.2" -arrow-cast = "57.2" +arrow = { version = "58.0.0", optional = false } +arrow-array = "58.0.0" +arrow-data = "58.0.0" +arrow-ipc = "58.0.0" +arrow-ord = "58.0.0" +arrow-schema = "58.0.0" +arrow-select = "58.0.0" +arrow-cast = "58.0.0" async-trait = "0" -datafusion = { version = "52.1", default-features = false } -datafusion-catalog = "52.1" -datafusion-common = { version = "52.1", default-features = false } -datafusion-execution = "52.1" -datafusion-expr = "52.1" -datafusion-functions = "52.1" -datafusion-physical-plan = "52.1" -datafusion-physical-expr = "52.1" -datafusion-sql = "52.1" +datafusion = { version = "53.0.0", default-features = false } +datafusion-catalog = "53.0.0" +datafusion-common = { version = "53.0.0", default-features = false } +datafusion-execution = "53.0.0" +datafusion-expr = "53.0.0" +datafusion-functions = "53.0.0" +datafusion-physical-plan = "53.0.0" +datafusion-physical-expr = "53.0.0" +datafusion-sql = "53.0.0" env_logger = "0.11" half = { "version" = "2.7.1", default-features = false, features = [ "num-traits", diff --git a/java/pom.xml b/java/pom.xml index 822d553e2..886f64a8d 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -28,7 +28,7 @@ UTF-8 15.0.0 - 6.0.0-beta.4 + 6.0.0-beta.7 false 2.30.0 1.7 diff --git a/nodejs/Cargo.toml b/nodejs/Cargo.toml index a4603e080..2b54311ec 100644 --- a/nodejs/Cargo.toml +++ b/nodejs/Cargo.toml @@ -16,7 +16,7 @@ crate-type = ["cdylib"] async-trait.workspace = true arrow-ipc.workspace = true arrow-array.workspace = true -arrow-buffer = "57.2" +arrow-buffer = "58.0.0" half.workspace = true arrow-schema.workspace = true env_logger.workspace = true diff --git a/python/Cargo.toml b/python/Cargo.toml index fd38fa641..57195bf63 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -15,7 +15,7 @@ name = "_lancedb" crate-type = ["cdylib"] [dependencies] -arrow = { version = "57.2", features = ["pyarrow"] } +arrow = { version = "58.0.0", features = ["pyarrow"] } async-trait = "0.1" bytes = "1" lancedb = { path = "../rust/lancedb", default-features = false } @@ -25,8 +25,8 @@ lance-namespace-impls.workspace = true lance-io.workspace = true env_logger.workspace = true log.workspace = true -pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] } -pyo3-async-runtimes = { version = "0.26", features = [ +pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39"] } +pyo3-async-runtimes = { version = "0.28", features = [ "attributes", "tokio-runtime", ] } @@ -38,7 +38,7 @@ snafu.workspace = true tokio = { version = "1.40", features = ["sync"] } [build-dependencies] -pyo3-build-config = { version = "0.26", features = [ +pyo3-build-config = { version = "0.28", features = [ "extension-module", "abi3-py39", ] } diff --git a/python/src/expr.rs b/python/src/expr.rs index e12c7d0a8..7d29fcd05 100644 --- a/python/src/expr.rs +++ b/python/src/expr.rs @@ -17,7 +17,7 @@ use pyo3::{Bound, PyAny, PyResult, exceptions::PyValueError, prelude::*, pyfunct /// [`expr_lit`] and combined with the methods on this struct. On the Python /// side a thin wrapper class (`lancedb.expr.Expr`) delegates to these methods /// and adds Python operator overloads. -#[pyclass(name = "PyExpr")] +#[pyclass(name = "PyExpr", from_py_object)] #[derive(Clone)] pub struct PyExpr(pub DfExpr); diff --git a/python/src/header.rs b/python/src/header.rs index 13338f4ec..85ad14358 100644 --- a/python/src/header.rs +++ b/python/src/header.rs @@ -33,7 +33,7 @@ impl PyHeaderProvider { Ok(headers_py) => { // Convert Python dict to Rust HashMap let bound_headers = headers_py.bind(py); - let dict: &Bound = bound_headers.downcast().map_err(|e| { + let dict: &Bound = bound_headers.cast().map_err(|e| { format!("HeaderProvider.get_headers must return a dict: {}", e) })?; diff --git a/python/src/index.rs b/python/src/index.rs index 602b5e420..ce90280b0 100644 --- a/python/src/index.rs +++ b/python/src/index.rs @@ -13,7 +13,7 @@ use pyo3::{ Bound, FromPyObject, PyAny, PyResult, Python, exceptions::{PyKeyError, PyValueError}, intern, pyclass, pymethods, - types::PyAnyMethods, + types::{PyAnyMethods, PyString}, }; use crate::util::parse_distance_type; @@ -22,7 +22,7 @@ pub fn class_name(ob: &'_ Bound<'_, PyAny>) -> PyResult { let full_name = ob .getattr(intern!(ob.py(), "__class__"))? .getattr(intern!(ob.py(), "__name__"))?; - let full_name = full_name.downcast()?.to_string_lossy(); + let full_name = full_name.cast::()?.to_string_lossy(); match full_name.rsplit_once('.') { Some((_, name)) => Ok(name.to_string()), diff --git a/python/src/namespace.rs b/python/src/namespace.rs index e96c667a6..bb0e4d6d3 100644 --- a/python/src/namespace.rs +++ b/python/src/namespace.rs @@ -183,7 +183,7 @@ async fn call_py_method_primitive( ) -> lance_core::Result where Req: serde::Serialize + Send + 'static, - Resp: for<'py> pyo3::FromPyObject<'py> + Send + 'static, + Resp: for<'a, 'py> pyo3::FromPyObject<'a, 'py> + Send + 'static, { let request_json = serde_json::to_string(&request).map_err(|e| { lance_core::Error::io(format!( @@ -203,7 +203,7 @@ where // Call the Python method let result = py_namespace.call_method1(py, method_name, (request_arg,))?; - let value: Resp = result.extract(py)?; + let value: Resp = result.extract(py).map_err(Into::into)?; Ok::<_, PyErr>(value) }) }) diff --git a/python/src/permutation.rs b/python/src/permutation.rs index 21b8c9c47..ac20a2cc9 100644 --- a/python/src/permutation.rs +++ b/python/src/permutation.rs @@ -25,12 +25,12 @@ use pyo3_async_runtimes::tokio::future_into_py; fn table_from_py<'a>(table: Bound<'a, PyAny>) -> PyResult> { if table.hasattr("_inner")? { - Ok(table.getattr("_inner")?.downcast_into::()?) + Ok(table.getattr("_inner")?.cast_into::
()?) } else if table.hasattr("_table")? { Ok(table .getattr("_table")? .getattr("_inner")? - .downcast_into::
()?) + .cast_into::
()?) } else { Err(PyRuntimeError::new_err( "Provided table does not appear to be a Table or RemoteTable instance", @@ -90,9 +90,9 @@ impl PyAsyncPermutationBuilder { database .getattr("_conn")? .getattr("_inner")? - .downcast_into::()? + .cast_into::()? } else { - database.getattr("_inner")?.downcast_into::()? + database.getattr("_inner")?.cast_into::()? }; let database = conn.borrow().database()?; slf.modify(|builder| builder.persist(database, table_name)) @@ -243,7 +243,7 @@ impl PyPermutationReader { let Some(selection) = selection else { return Ok(Select::All); }; - let selection = selection.downcast_into::()?; + let selection = selection.cast_into::()?; let selection = selection .iter() .map(|(key, value)| { diff --git a/python/src/query.rs b/python/src/query.rs index 98876739b..1b64b5eaa 100644 --- a/python/src/query.rs +++ b/python/src/query.rs @@ -33,7 +33,7 @@ use pyo3::pyfunction; use pyo3::pymethods; use pyo3::types::PyList; use pyo3::types::{PyDict, PyString}; -use pyo3::{FromPyObject, exceptions::PyRuntimeError}; +use pyo3::{Borrowed, FromPyObject, exceptions::PyRuntimeError}; use pyo3::{PyErr, pyclass}; use pyo3::{exceptions::PyValueError, intern}; use pyo3_async_runtimes::tokio::future_into_py; @@ -43,9 +43,12 @@ use crate::util::parse_distance_type; use crate::{arrow::RecordBatchStream, util::PyLanceDB}; use crate::{error::PythonErrorExt, index::class_name}; -impl FromPyObject<'_> for PyLanceDB { - fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult { - match class_name(ob)?.as_str() { +impl<'a, 'py> FromPyObject<'a, 'py> for PyLanceDB { + type Error = PyErr; + + fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult { + let ob = ob.to_owned(); + match class_name(&ob)?.as_str() { "MatchQuery" => { let query = ob.getattr("query")?.extract()?; let column = ob.getattr("column")?.extract()?; @@ -424,7 +427,7 @@ impl Query { "Query text is required for nearest_to_text", ))?; - let query = if let Ok(query_text) = fts_query.downcast::() { + let query = if let Ok(query_text) = fts_query.cast::() { let mut query_text = query_text.to_string(); let columns = query .get_item("columns")? @@ -606,7 +609,7 @@ impl TakeQuery { } } -#[pyclass] +#[pyclass(from_py_object)] #[derive(Clone)] pub struct FTSQuery { inner: LanceDbQuery, @@ -735,7 +738,7 @@ impl FTSQuery { } } -#[pyclass] +#[pyclass(from_py_object)] #[derive(Clone)] pub struct VectorQuery { inner: LanceDbVectorQuery, diff --git a/python/src/session.rs b/python/src/session.rs index 2433114b6..891e61e44 100644 --- a/python/src/session.rs +++ b/python/src/session.rs @@ -11,7 +11,7 @@ use pyo3::{PyResult, pyclass, pymethods}; /// Sessions allow you to configure cache sizes for index and metadata caches, /// which can significantly impact memory use and performance. They can /// also be re-used across multiple connections to share the same cache state. -#[pyclass] +#[pyclass(from_py_object)] #[derive(Clone)] pub struct Session { pub(crate) inner: Arc, diff --git a/python/src/table.rs b/python/src/table.rs index d44b6c1fd..715ac79cc 100644 --- a/python/src/table.rs +++ b/python/src/table.rs @@ -29,7 +29,7 @@ use pyo3_async_runtimes::tokio::future_into_py; mod scannable; /// Statistics about a compaction operation. -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct CompactionStats { /// The number of fragments removed @@ -43,7 +43,7 @@ pub struct CompactionStats { } /// Statistics about a cleanup operation -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct RemovalStats { /// The number of bytes removed @@ -53,7 +53,7 @@ pub struct RemovalStats { } /// Statistics about an optimize operation -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct OptimizeStats { /// Statistics about the compaction operation @@ -62,7 +62,7 @@ pub struct OptimizeStats { pub prune: RemovalStats, } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct UpdateResult { pub rows_updated: u64, @@ -88,7 +88,7 @@ impl From for UpdateResult { } } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct AddResult { pub version: u64, @@ -109,7 +109,7 @@ impl From for AddResult { } } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct DeleteResult { pub num_deleted_rows: u64, @@ -135,7 +135,7 @@ impl From for DeleteResult { } } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct MergeResult { pub version: u64, @@ -171,7 +171,7 @@ impl From for MergeResult { } } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct AddColumnsResult { pub version: u64, @@ -192,7 +192,7 @@ impl From for AddColumnsResult { } } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct AlterColumnsResult { pub version: u64, @@ -213,7 +213,7 @@ impl From for AlterColumnsResult { } } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct DropColumnsResult { pub version: u64, diff --git a/python/src/table/scannable.rs b/python/src/table/scannable.rs index 5d02ca024..faf2c7fae 100644 --- a/python/src/table/scannable.rs +++ b/python/src/table/scannable.rs @@ -126,8 +126,11 @@ impl Scannable for PyScannable { } } -impl<'py> FromPyObject<'py> for PyScannable { - fn extract_bound(ob: &pyo3::Bound<'py, PyAny>) -> pyo3::PyResult { +impl<'a, 'py> FromPyObject<'a, 'py> for PyScannable { + type Error = pyo3::PyErr; + + fn extract(ob: pyo3::Borrowed<'a, 'py, PyAny>) -> pyo3::PyResult { + let ob = ob.to_owned(); // Convert from Scannable dataclass. let schema: PyArrowType = ob.getattr("schema")?.extract()?; let schema = Arc::new(schema.0); diff --git a/rust/lancedb/src/remote/table/insert.rs b/rust/lancedb/src/remote/table/insert.rs index 8aec28609..49ebb2015 100644 --- a/rust/lancedb/src/remote/table/insert.rs +++ b/rust/lancedb/src/remote/table/insert.rs @@ -43,7 +43,7 @@ pub struct RemoteInsertExec { client: RestfulLanceDbClient, input: Arc, overwrite: bool, - properties: PlanProperties, + properties: Arc, add_result: Arc>>, metrics: ExecutionPlanMetricsSet, upload_id: Option, @@ -118,7 +118,7 @@ impl RemoteInsertExec { client, input, overwrite, - properties, + properties: Arc::new(properties), add_result: Arc::new(Mutex::new(None)), metrics: ExecutionPlanMetricsSet::new(), upload_id, @@ -232,7 +232,7 @@ impl ExecutionPlan for RemoteInsertExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lancedb/src/table/datafusion.rs b/rust/lancedb/src/table/datafusion.rs index bd93dd05d..d8956acb2 100644 --- a/rust/lancedb/src/table/datafusion.rs +++ b/rust/lancedb/src/table/datafusion.rs @@ -39,21 +39,26 @@ use lance_index::scalar::FullTextSearchQuery; struct MetadataEraserExec { input: Arc, schema: Arc, - properties: PlanProperties, + properties: Arc, } impl MetadataEraserExec { fn compute_properties_from_input( input: &Arc, schema: &Arc, - ) -> PlanProperties { + ) -> Arc { let input_properties = input.properties(); let eq_properties = input_properties .eq_properties .clone() .with_new_schema(schema.clone()) .unwrap(); - input_properties.clone().with_eq_properties(eq_properties) + Arc::new( + input_properties + .as_ref() + .clone() + .with_eq_properties(eq_properties), + ) } fn new(input: Arc) -> Self { @@ -87,7 +92,7 @@ impl ExecutionPlan for MetadataEraserExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lancedb/src/table/datafusion/insert.rs b/rust/lancedb/src/table/datafusion/insert.rs index 51be4abb8..f2cf21f13 100644 --- a/rust/lancedb/src/table/datafusion/insert.rs +++ b/rust/lancedb/src/table/datafusion/insert.rs @@ -81,7 +81,7 @@ pub struct InsertExec { dataset: Arc, input: Arc, write_params: WriteParams, - properties: PlanProperties, + properties: Arc, partial_transactions: Arc>>, metrics: ExecutionPlanMetricsSet, } @@ -107,7 +107,7 @@ impl InsertExec { dataset, input, write_params, - properties, + properties: Arc::new(properties), partial_transactions: Arc::new(Mutex::new(Vec::with_capacity(num_partitions))), metrics: ExecutionPlanMetricsSet::new(), } @@ -136,7 +136,7 @@ impl ExecutionPlan for InsertExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lancedb/src/table/datafusion/scannable_exec.rs b/rust/lancedb/src/table/datafusion/scannable_exec.rs index a55b6e13f..8a7177480 100644 --- a/rust/lancedb/src/table/datafusion/scannable_exec.rs +++ b/rust/lancedb/src/table/datafusion/scannable_exec.rs @@ -20,7 +20,7 @@ pub(crate) struct ScannableExec { // We don't require Scannable to be Sync, so we wrap it in a Mutex to allow safe concurrent access. source: Mutex>, num_rows: Option, - properties: PlanProperties, + properties: Arc, tracker: Option>, } @@ -49,7 +49,7 @@ impl ScannableExec { Self { source, num_rows, - properties, + properties: Arc::new(properties), tracker, } } @@ -70,7 +70,7 @@ impl ExecutionPlan for ScannableExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } From a2aea7b4e5a7dd3a7af53765368d1fb96eed460b Mon Sep 17 00:00:00 2001 From: Lance Release Date: Wed, 29 Apr 2026 17:53:22 +0000 Subject: [PATCH 11/20] =?UTF-8?q?Bump=20version:=200.31.0-beta.10=20?= =?UTF-8?q?=E2=86=92=200.31.0-beta.11?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/.bumpversion.toml | 2 +- python/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/.bumpversion.toml b/python/.bumpversion.toml index 5fa2e4e1c..fbc8e545c 100644 --- a/python/.bumpversion.toml +++ b/python/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.31.0-beta.10" +current_version = "0.31.0-beta.11" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/python/Cargo.toml b/python/Cargo.toml index 57195bf63..0811264e6 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lancedb-python" -version = "0.31.0-beta.10" +version = "0.31.0-beta.11" publish = false edition.workspace = true description = "Python bindings for LanceDB" From c091243d5bf8070887a8ff3bf2cb8e0ffe1b167b Mon Sep 17 00:00:00 2001 From: Lance Release Date: Wed, 29 Apr 2026 17:53:38 +0000 Subject: [PATCH 12/20] =?UTF-8?q?Bump=20version:=200.28.0-beta.10=20?= =?UTF-8?q?=E2=86=92=200.28.0-beta.11?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.toml | 2 +- Cargo.lock | 6 +++--- docs/src/java/java.md | 2 +- java/lancedb-core/pom.xml | 2 +- java/pom.xml | 2 +- nodejs/Cargo.toml | 2 +- nodejs/npm/darwin-arm64/package.json | 2 +- nodejs/npm/linux-arm64-gnu/package.json | 2 +- nodejs/npm/linux-arm64-musl/package.json | 2 +- nodejs/npm/linux-x64-gnu/package.json | 2 +- nodejs/npm/linux-x64-musl/package.json | 2 +- nodejs/npm/win32-arm64-msvc/package.json | 2 +- nodejs/npm/win32-x64-msvc/package.json | 2 +- nodejs/package-lock.json | 4 ++-- nodejs/package.json | 2 +- rust/lancedb/Cargo.toml | 2 +- 16 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.bumpversion.toml b/.bumpversion.toml index 003efe386..1877b33b2 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.28.0-beta.10" +current_version = "0.28.0-beta.11" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/Cargo.lock b/Cargo.lock index 0c8354b78..2c2c3fd3c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4582,7 +4582,7 @@ dependencies = [ [[package]] name = "lancedb" -version = "0.28.0-beta.10" +version = "0.28.0-beta.11" dependencies = [ "ahash", "anyhow", @@ -4664,7 +4664,7 @@ dependencies = [ [[package]] name = "lancedb-nodejs" -version = "0.28.0-beta.10" +version = "0.28.0-beta.11" dependencies = [ "arrow-array", "arrow-buffer", @@ -4686,7 +4686,7 @@ dependencies = [ [[package]] name = "lancedb-python" -version = "0.31.0-beta.10" +version = "0.31.0-beta.11" dependencies = [ "arrow", "async-trait", diff --git a/docs/src/java/java.md b/docs/src/java/java.md index 62c77153b..364d268fb 100644 --- a/docs/src/java/java.md +++ b/docs/src/java/java.md @@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`: com.lancedb lancedb-core - 0.28.0-beta.10 + 0.28.0-beta.11 ``` diff --git a/java/lancedb-core/pom.xml b/java/lancedb-core/pom.xml index 7aaaab5c2..8fc29d6e7 100644 --- a/java/lancedb-core/pom.xml +++ b/java/lancedb-core/pom.xml @@ -8,7 +8,7 @@ com.lancedb lancedb-parent - 0.28.0-beta.10 + 0.28.0-beta.11 ../pom.xml diff --git a/java/pom.xml b/java/pom.xml index 886f64a8d..d8b82a408 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -6,7 +6,7 @@ com.lancedb lancedb-parent - 0.28.0-beta.10 + 0.28.0-beta.11 pom ${project.artifactId} LanceDB Java SDK Parent POM diff --git a/nodejs/Cargo.toml b/nodejs/Cargo.toml index 2b54311ec..c73121607 100644 --- a/nodejs/Cargo.toml +++ b/nodejs/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "lancedb-nodejs" edition.workspace = true -version = "0.28.0-beta.10" +version = "0.28.0-beta.11" publish = false license.workspace = true description.workspace = true diff --git a/nodejs/npm/darwin-arm64/package.json b/nodejs/npm/darwin-arm64/package.json index e41bb4053..5eab89996 100644 --- a/nodejs/npm/darwin-arm64/package.json +++ b/nodejs/npm/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-darwin-arm64", - "version": "0.28.0-beta.10", + "version": "0.28.0-beta.11", "os": ["darwin"], "cpu": ["arm64"], "main": "lancedb.darwin-arm64.node", diff --git a/nodejs/npm/linux-arm64-gnu/package.json b/nodejs/npm/linux-arm64-gnu/package.json index 565c2cc3e..0624f62ca 100644 --- a/nodejs/npm/linux-arm64-gnu/package.json +++ b/nodejs/npm/linux-arm64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-arm64-gnu", - "version": "0.28.0-beta.10", + "version": "0.28.0-beta.11", "os": ["linux"], "cpu": ["arm64"], "main": "lancedb.linux-arm64-gnu.node", diff --git a/nodejs/npm/linux-arm64-musl/package.json b/nodejs/npm/linux-arm64-musl/package.json index e3da9a674..9b5bba426 100644 --- a/nodejs/npm/linux-arm64-musl/package.json +++ b/nodejs/npm/linux-arm64-musl/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-arm64-musl", - "version": "0.28.0-beta.10", + "version": "0.28.0-beta.11", "os": ["linux"], "cpu": ["arm64"], "main": "lancedb.linux-arm64-musl.node", diff --git a/nodejs/npm/linux-x64-gnu/package.json b/nodejs/npm/linux-x64-gnu/package.json index d85b3fa74..4b8f2dfb3 100644 --- a/nodejs/npm/linux-x64-gnu/package.json +++ b/nodejs/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-x64-gnu", - "version": "0.28.0-beta.10", + "version": "0.28.0-beta.11", "os": ["linux"], "cpu": ["x64"], "main": "lancedb.linux-x64-gnu.node", diff --git a/nodejs/npm/linux-x64-musl/package.json b/nodejs/npm/linux-x64-musl/package.json index 02023c768..a4c120c50 100644 --- a/nodejs/npm/linux-x64-musl/package.json +++ b/nodejs/npm/linux-x64-musl/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-x64-musl", - "version": "0.28.0-beta.10", + "version": "0.28.0-beta.11", "os": ["linux"], "cpu": ["x64"], "main": "lancedb.linux-x64-musl.node", diff --git a/nodejs/npm/win32-arm64-msvc/package.json b/nodejs/npm/win32-arm64-msvc/package.json index 5547cb1d5..c315922ab 100644 --- a/nodejs/npm/win32-arm64-msvc/package.json +++ b/nodejs/npm/win32-arm64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-win32-arm64-msvc", - "version": "0.28.0-beta.10", + "version": "0.28.0-beta.11", "os": [ "win32" ], diff --git a/nodejs/npm/win32-x64-msvc/package.json b/nodejs/npm/win32-x64-msvc/package.json index 9f76092a7..8d450d50e 100644 --- a/nodejs/npm/win32-x64-msvc/package.json +++ b/nodejs/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-win32-x64-msvc", - "version": "0.28.0-beta.10", + "version": "0.28.0-beta.11", "os": ["win32"], "cpu": ["x64"], "main": "lancedb.win32-x64-msvc.node", diff --git a/nodejs/package-lock.json b/nodejs/package-lock.json index 1ff7cf5c6..19e7fcb55 100644 --- a/nodejs/package-lock.json +++ b/nodejs/package-lock.json @@ -1,12 +1,12 @@ { "name": "@lancedb/lancedb", - "version": "0.28.0-beta.10", + "version": "0.28.0-beta.11", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@lancedb/lancedb", - "version": "0.28.0-beta.10", + "version": "0.28.0-beta.11", "cpu": [ "x64", "arm64" diff --git a/nodejs/package.json b/nodejs/package.json index 3fa38c959..a57ccf48e 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -11,7 +11,7 @@ "ann" ], "private": false, - "version": "0.28.0-beta.10", + "version": "0.28.0-beta.11", "main": "dist/index.js", "exports": { ".": "./dist/index.js", diff --git a/rust/lancedb/Cargo.toml b/rust/lancedb/Cargo.toml index b652ed1a5..285fc7ecc 100644 --- a/rust/lancedb/Cargo.toml +++ b/rust/lancedb/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lancedb" -version = "0.28.0-beta.10" +version = "0.28.0-beta.11" edition.workspace = true description = "LanceDB: A serverless, low-latency vector database for AI applications" license.workspace = true From 59db036118c856abb40ede720e30a969e194e6d7 Mon Sep 17 00:00:00 2001 From: Nitesh Yadav <78202277+n1teshy@users.noreply.github.com> Date: Sun, 3 May 2026 04:21:00 +0530 Subject: [PATCH 13/20] fix(python): add missing space in hybrid query error message (#3340) Hi, the hybrid query error message looks like it can use a space, just added it. ```python def _validate_query(self, query, vector=None, text=None): if query is not None and (vector is not None or text is not None): raise ValueError( "You can either provide a string query in search() method" "or set `vector()` and `text()` explicitly for hybrid search." "But not both." ) ``` --- python/python/lancedb/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/python/lancedb/query.py b/python/python/lancedb/query.py index b796fc40c..0a9473a0a 100644 --- a/python/python/lancedb/query.py +++ b/python/python/lancedb/query.py @@ -1643,7 +1643,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder): def _validate_query(self, query, vector=None, text=None): if query is not None and (vector is not None or text is not None): raise ValueError( - "You can either provide a string query in search() method" + "You can either provide a string query in search() method " "or set `vector()` and `text()` explicitly for hybrid search." "But not both." ) From 87b831bcae38af683b3da673f4d8a106bb711611 Mon Sep 17 00:00:00 2001 From: qingfeng-occ Date: Tue, 5 May 2026 00:37:18 +0800 Subject: [PATCH 14/20] fix(node): remove redundant `postbuild:release` script to fix build failure (#3285) The `build:release` command already outputs the `*.node` files directly to the `dist/` directory via the `--output-dir dist` flag. Therefore, the `postbuild:release` script, which attempts to copy `*.node` files from the `lancedb/` source directory, fails with a "no such file or directory" error because the source files do not exist there. This commit removes the redundant `postbuild:release` script to resolve the build failure. fix #3284 Signed-off-by: qingfeng-occ --- nodejs/package.json | 1 - 1 file changed, 1 deletion(-) diff --git a/nodejs/package.json b/nodejs/package.json index a57ccf48e..8c8b7a8d2 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -75,7 +75,6 @@ "build:debug": "napi build --platform --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir lancedb", "postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/", "build:release": "napi build --platform --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir dist", - "postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/", "build": "npm run build:debug && npm run tsc", "build-release": "npm run build:release && npm run tsc", "tsc": "tsc -b", From 1fc23e5473275c95a22f2601e536de13dd2fd949 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Mon, 4 May 2026 21:37:58 -0700 Subject: [PATCH 15/20] fix(python): make Permutation picklable for PyTorch multiprocessing (#3335) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary When pytorch is used with multiprocessing and the mp mode is spawn then the Permutation needs to be pickled. It could not be pickled because `Table` and `Connection` are not serializable. This PR adds pickle support to Permutation without adding general pickle support to `Table` or `Connection`. To add general support we probably need to start by adding serialization in the namespace client. In the meantime this PR enable pickling by adding special cases for: * In-memory tables (just serialize as Arrow IPC) * Native tables (serialize the URI) If a user is not using one of the above cases (e.g. using a remote connection) then they will need to provide a connection factory that can be pickled. ## Breaking change `PermutationBuilder.persist(...)` is removed from the Python bindings; the permutation table is now always in-memory. The underlying Rust `PermutationBuilder::persist` API is untouched and can be re-exposed later if needed. It probably won't make sense to do that until we have a way to serialize `Table` and `Connection`. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.7 (1M context) --- python/python/lancedb/_lancedb.pyi | 2 +- python/python/lancedb/permutation.py | 288 ++++++++++++++++++++---- python/python/tests/test_permutation.py | 15 -- python/python/tests/test_torch.py | 108 ++++++++- python/src/permutation.rs | 22 +- 5 files changed, 354 insertions(+), 81 deletions(-) diff --git a/python/python/lancedb/_lancedb.pyi b/python/python/lancedb/_lancedb.pyi index 2298a9473..b33f89e40 100644 --- a/python/python/lancedb/_lancedb.pyi +++ b/python/python/lancedb/_lancedb.pyi @@ -442,7 +442,7 @@ class AsyncPermutationBuilder: async def execute(self) -> Table: ... def async_permutation_builder( - table: Table, dest_table_name: str + table: Table, ) -> AsyncPermutationBuilder: ... def fts_query_to_json(query: Any) -> str: ... diff --git a/python/python/lancedb/permutation.py b/python/python/lancedb/permutation.py index 724a0fd25..91532f0a7 100644 --- a/python/python/lancedb/permutation.py +++ b/python/python/lancedb/permutation.py @@ -1,11 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright The LanceDB Authors -from deprecation import deprecated -from lancedb import AsyncConnection, DBConnection -import pyarrow as pa +import copy import json +from deprecation import deprecated +import pyarrow as pa + from ._lancedb import async_permutation_builder, PermutationReader from .table import LanceTable from .background_loop import LOOP @@ -36,10 +37,7 @@ class PermutationBuilder: be referenced by name in the future. If names are not provided then they can only be referenced by their ordinal index. There is no requirement to name every split. - By default, the permutation will be stored in memory and will be lost when the - program exits. To persist the permutation (for very large datasets or to share - the permutation across multiple workers) use the [persist](#persist) method to - create a permanent table. + The permutation is stored in memory and will be lost when the program exits. """ def __init__(self, table: LanceTable): @@ -51,15 +49,6 @@ class PermutationBuilder: """ self._async = async_permutation_builder(table) - def persist( - self, database: Union[DBConnection, AsyncConnection], table_name: str - ) -> "PermutationBuilder": - """ - Persist the permutation to the given database. - """ - self._async.persist(database, table_name) - return self - def split_random( self, *, @@ -380,20 +369,44 @@ class Permutation: def __init__( self, - reader: PermutationReader, + base_table: LanceTable, + permutation_table: Optional[LanceTable], + split: int, selection: dict[str, str], batch_size: int, transform_fn: Callable[pa.RecordBatch, Any], + offset: Optional[int] = None, + limit: Optional[int] = None, + connection_factory: Optional[Callable[[str], LanceTable]] = None, + _reader: Optional[PermutationReader] = None, ): """ Internal constructor. Use [from_tables](#from_tables) instead. """ - assert reader is not None, "reader is required" + assert base_table is not None, "base_table is required" assert selection is not None, "selection is required" - self.reader = reader + self.base_table = base_table + self.permutation_table = permutation_table + self.split = split self.selection = selection self.transform_fn = transform_fn self.batch_size = batch_size + self.offset = offset + self.limit = limit + self.connection_factory = connection_factory + if _reader is None: + _reader = LOOP.run(self._build_reader()) + self.reader: PermutationReader = _reader + + async def _build_reader(self) -> PermutationReader: + reader = await PermutationReader.from_tables( + self.base_table, self.permutation_table, self.split + ) + if self.offset is not None: + reader = await reader.with_offset(self.offset) + if self.limit is not None: + reader = await reader.with_limit(self.limit) + return reader def _with_selection(self, selection: dict[str, str]) -> "Permutation": """ @@ -402,21 +415,97 @@ class Permutation: Does not validation of the selection and it replaces it entirely. This is not intended for public use. """ - return Permutation(self.reader, selection, self.batch_size, self.transform_fn) - - def _with_reader(self, reader: PermutationReader) -> "Permutation": - """ - Creates a new permutation with the given reader - - This is an internal method and should not be used directly. - """ - return Permutation(reader, self.selection, self.batch_size, self.transform_fn) + new = copy.copy(self) + new.selection = selection + return new def with_batch_size(self, batch_size: int) -> "Permutation": """ Creates a new permutation with the given batch size """ - return Permutation(self.reader, self.selection, batch_size, self.transform_fn) + new = copy.copy(self) + new.batch_size = batch_size + return new + + def with_connection_factory( + self, connection_factory: Callable[[str], LanceTable] + ) -> "Permutation": + """ + Creates a new permutation that will use ``connection_factory`` to reopen + the base table when this permutation is unpickled in a worker process. + + The factory is a callable that takes a single argument — the base table + name — and returns a [LanceTable]. It must be picklable; the worker + will pickle it via standard ``pickle`` and call it to recover the base + table. Picklable callables in practice means top-level (module-level) + functions, ``functools.partial`` of such functions, or instances of + picklable classes implementing ``__call__``. Lambdas and closures over + local variables don't pickle with the default protocol. + + Setting a factory is necessary when the URI alone is not enough to + re-open the connection — most importantly for LanceDB Cloud (``db://``) + connections, where ``api_key`` and ``region`` aren't recoverable from + the connection object after construction. + + For local file or cloud-storage paths the factory is optional: if not + set, ``__getstate__`` falls back to capturing + ``(uri, storage_options, namespace_path)`` and re-opening via + ``lancedb.connect(uri, storage_options=...)``. + + Examples + -------- + Basic native (file-system path), parameterized via ``functools.partial``:: + + import functools, lancedb + from lancedb.permutation import Permutation + + def open_native_table(uri: str, table_name: str): + return lancedb.connect(uri).open_table(table_name) + + factory = functools.partial(open_native_table, "/data/lance_db") + permutation = Permutation.identity( + factory("training") + ).with_connection_factory(factory) + + Native via :func:`lancedb.connect_namespace` (e.g. a directory- or + REST-backed namespace client). The factory takes the + implementation name and properties dict as partial-bound args so + the worker can rebuild the same namespace connection:: + + def open_via_namespace( + impl: str, properties: dict[str, str], table_name: str, + ): + return lancedb.connect_namespace(impl, properties).open_table( + table_name, + ) + + factory = functools.partial( + open_via_namespace, + "dir", + {"root": "/data/lance_db"}, + ) + + LanceDB Cloud, reading credentials from env vars at worker startup + so secrets aren't pickled into the dataset:: + + import os, lancedb + + def open_remote_table(table_name: str): + db = lancedb.connect( + "db://my-database", + api_key=os.environ["LANCEDB_API_KEY"], + region=os.environ.get("LANCEDB_REGION", "us-east-1"), + ) + return db.open_table(table_name) + + permutation = Permutation.identity( + open_remote_table("training") + ).with_connection_factory(open_remote_table) + """ + assert connection_factory is not None, "connection_factory is required" + new = copy.copy(self) + new.connection_factory = connection_factory + return new @classmethod def identity(cls, table: LanceTable) -> "Permutation": @@ -489,11 +578,126 @@ class Permutation: schema = await reader.output_schema(None) initial_selection = {name: name for name in schema.names} return cls( - reader, initial_selection, DEFAULT_BATCH_SIZE, Transforms.arrow2python + base_table, + permutation_table, + split, + initial_selection, + DEFAULT_BATCH_SIZE, + Transforms.arrow2python, + _reader=reader, ) return LOOP.run(do_from_tables()) + def __getstate__(self) -> dict[str, Any]: + """Build a picklable state dict for this permutation. + + The base table is captured either via a user-supplied + ``connection_factory`` (see [with_connection_factory]) or, as a + fallback, by introspecting ``(uri, storage_options, namespace_path)`` + on the connection. The permutation table — always an in-memory + LanceDB table — is captured as a pyarrow Table (which pickles via + Arrow IPC natively). The reader is dropped from the wire format; + ``__setstate__`` rebuilds it from the restored tables. + """ + permutation_data: Optional[pa.Table] = None + if self.permutation_table is not None: + permutation_data = self.permutation_table.to_arrow() + + common = { + "base_table_name": self.base_table.name, + "permutation_data": permutation_data, + "split": self.split, + "selection": self.selection, + "batch_size": self.batch_size, + "transform_fn": self.transform_fn, + "offset": self.offset, + "limit": self.limit, + "connection_factory": self.connection_factory, + } + + if self.connection_factory is not None: + # The factory carries enough state to recover the base table on + # its own; we don't need to capture the URI / storage options / + # namespace from the existing connection. + return common + + # URI-introspection fallback: only viable for native (OSS) connections + # where (uri, storage_options) is enough to reopen. Remote / cloud + # connections don't expose recoverable api_key / region — those users + # must call with_connection_factory(). + try: + base_uri = self.base_table._conn.uri + storage_options = self.base_table._conn.storage_options + except AttributeError as e: + raise ValueError( + "Cannot pickle this Permutation: the base table's connection " + "does not expose a uri/storage_options, which usually means it " + "is a remote (LanceDB Cloud) connection. Call " + "Permutation.with_connection_factory(...) first to provide a " + "picklable callable that re-opens the base table from a worker " + "process." + ) from e + + if base_uri.startswith("memory://"): + # In-memory base tables don't exist in any worker process by + # default, so dump the entire base table into the pickle. This + # can be expensive for large datasets — users with large + # in-memory base tables should either persist them or set a + # connection_factory. + return { + **common, + "base_table_data": self.base_table.to_arrow(), + } + + return { + **common, + "base_table_uri": base_uri, + "base_table_namespace": self.base_table._namespace_path, + "base_table_storage_options": storage_options, + } + + def __setstate__(self, state: dict[str, Any]) -> None: + from . import connect + + connection_factory = state["connection_factory"] + if connection_factory is not None: + base_table = connection_factory(state["base_table_name"]) + elif "base_table_data" in state: + # In-memory base table inlined into the pickle; rebuild the same + # way we rebuild the in-memory permutation table. + mem_db = connect("memory://") + base_table = mem_db.create_table( + state["base_table_name"], state["base_table_data"] + ) + else: + base_db = connect( + state["base_table_uri"], + storage_options=state["base_table_storage_options"], + ) + base_table = base_db.open_table( + state["base_table_name"], + namespace_path=state["base_table_namespace"] or None, + ) + + permutation_table: Optional[LanceTable] = None + if state["permutation_data"] is not None: + mem_db = connect("memory://") + permutation_table = mem_db.create_table( + "permutation", state["permutation_data"] + ) + + self.base_table = base_table + self.permutation_table = permutation_table + self.split = state["split"] + self.selection = state["selection"] + self.batch_size = state["batch_size"] + self.transform_fn = state["transform_fn"] + self.offset = state["offset"] + self.limit = state["limit"] + self.connection_factory = connection_factory + self.reader = LOOP.run(self._build_reader()) + @property def schema(self) -> pa.Schema: async def do_output_schema(): @@ -760,7 +964,9 @@ class Permutation: for expensive operations such as image decoding. """ assert transform is not None, "transform is required" - return Permutation(self.reader, self.selection, self.batch_size, transform) + new = copy.copy(self) + new.transform_fn = transform + return new def __getitem__(self, index: int) -> Any: """ @@ -795,12 +1001,10 @@ class Permutation: """ Skip the first `skip` rows of the permutation """ - - async def do_with_skip(): - reader = await self.reader.with_offset(skip) - return self._with_reader(reader) - - return LOOP.run(do_with_skip()) + new = copy.copy(self) + new.offset = skip + new.reader = LOOP.run(new._build_reader()) + return new @deprecated(details="Use with_take instead") def take(self, limit: int) -> "Permutation": @@ -818,12 +1022,10 @@ class Permutation: """ Limit the permutation to `limit` rows (following any `skip`) """ - - async def do_with_take(): - reader = await self.reader.with_limit(limit) - return self._with_reader(reader) - - return LOOP.run(do_with_take()) + new = copy.copy(self) + new.limit = limit + new.reader = LOOP.run(new._build_reader()) + return new @deprecated(details="Use with_repeat instead") def repeat(self, times: int) -> "Permutation": diff --git a/python/python/tests/test_permutation.py b/python/python/tests/test_permutation.py index bb92ba0ba..96d77f9d1 100644 --- a/python/python/tests/test_permutation.py +++ b/python/python/tests/test_permutation.py @@ -9,21 +9,6 @@ from lancedb import DBConnection, Table, connect from lancedb.permutation import Permutation, Permutations, permutation_builder -def test_permutation_persistence(tmp_path): - db = connect(tmp_path) - tbl = db.create_table("test_table", pa.table({"x": range(100), "y": range(100)})) - - permutation_tbl = ( - permutation_builder(tbl).shuffle().persist(db, "test_permutation").execute() - ) - assert permutation_tbl.count_rows() == 100 - - re_open = db.open_table("test_permutation") - assert re_open.count_rows() == 100 - - assert permutation_tbl.to_arrow() == re_open.to_arrow() - - def test_split_random_ratios(mem_db): """Test random splitting with ratios.""" tbl = mem_db.create_table( diff --git a/python/python/tests/test_torch.py b/python/python/tests/test_torch.py index ef1c5e73b..0ca1de3e8 100644 --- a/python/python/tests/test_torch.py +++ b/python/python/tests/test_torch.py @@ -1,14 +1,27 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright The LanceDB Authors +import functools +import pickle + +import lancedb import pyarrow as pa import pytest from lancedb.util import tbl_to_tensor -from lancedb.permutation import Permutation +from lancedb.permutation import Permutation, Permutations, permutation_builder torch = pytest.importorskip("torch") +def _open_native_table(uri: str, table_name: str): + """Top-level connection factory used by the explicit-factory pickle test. + + Defined at module scope so that pickle can resolve it by name in the + worker / unpickling process. + """ + return lancedb.connect(uri).open_table(table_name) + + def test_table_dataloader(mem_db): table = mem_db.create_table("test_table", pa.table({"a": range(1000)})) dataloader = torch.utils.data.DataLoader( @@ -40,3 +53,96 @@ def test_permutation_dataloader(mem_db): for batch in dataloader: assert batch.size(0) == 1 assert batch.size(1) == 10 + + +def test_permutation_is_picklable(tmp_db): + """A Permutation must be picklable so it can be used with PyTorch's + DataLoader when num_workers > 0 (which uses multiprocessing and pickles + the dataset to pass it to worker processes).""" + table = tmp_db.create_table("test_table", pa.table({"a": range(1000)})) + permutation = Permutation.identity(table) + + pickled = pickle.dumps(permutation) + restored = pickle.loads(pickled) + + assert len(restored) == 1000 + rows = restored.__getitems__([0, 1, 2]) + assert rows == [{"a": 0}, {"a": 1}, {"a": 2}] + + +def test_permutation_with_memory_base_is_picklable(mem_db): + """An in-memory base table is inlined into the pickle as Arrow IPC bytes + and rebuilt on the other side as an in-memory LanceTable, so the + Permutation round-trips even though the original database can't be + reopened across processes.""" + table = mem_db.create_table("test_table", pa.table({"a": range(50)})) + permutation = Permutation.identity(table) + + restored = pickle.loads(pickle.dumps(permutation)) + + assert len(restored) == 50 + assert restored.__getitems__([0, 10, 49]) == [{"a": 0}, {"a": 10}, {"a": 49}] + + +def test_permutation_dataloader_multiprocessing(tmp_db): + """Using a Permutation with a PyTorch DataLoader that has num_workers > 0 + must work end-to-end. Each worker process gets a pickled copy of the + dataset and reads batches from it.""" + table = tmp_db.create_table("test_table", pa.table({"a": range(1000)})) + permutation = Permutation.identity(table) + + dataloader = torch.utils.data.DataLoader( + permutation, + batch_size=10, + shuffle=True, + num_workers=2, + multiprocessing_context="spawn", + ) + seen = 0 + for batch in dataloader: + assert batch["a"].size(0) == 10 + seen += batch["a"].size(0) + assert seen == 1000 + + +def test_permutation_pickle_with_connection_factory(tmp_path): + """When the user provides a connection_factory, pickling should round-trip + through that factory rather than introspecting the connection URI. Useful + for remote / cloud connections where the URI alone isn't reopenable.""" + db = lancedb.connect(tmp_path) + db.create_table("test_table", pa.table({"a": range(50)})) + + factory = functools.partial(_open_native_table, str(tmp_path)) + permutation = Permutation.identity(factory("test_table")).with_connection_factory( + factory + ) + + restored = pickle.loads(pickle.dumps(permutation)) + + assert len(restored) == 50 + # The factory survives pickling and is what powered base-table reopen. + assert restored.connection_factory is not None + assert restored.connection_factory.func is _open_native_table + assert restored.__getitems__([0, 1, 2]) == [{"a": 0}, {"a": 1}, {"a": 2}] + + +def test_permutation_with_builder_is_picklable(tmp_db): + """A Permutation built from a non-identity permutation table must round-trip + through pickle while preserving the row order defined by the permutation.""" + table = tmp_db.create_table("test_table", pa.table({"a": range(100)})) + perm_tbl = ( + permutation_builder(table) + .split_random(ratios=[0.8, 0.2], seed=42, split_names=["train", "test"]) + .shuffle(seed=42) + .execute() + ) + permutations = Permutations(table, perm_tbl) + permutation = permutations["train"] + + indices = list(range(len(permutation))) + expected = permutation.__getitems__(indices) + + restored = pickle.loads(pickle.dumps(permutation)) + + assert len(restored) == len(permutation) + assert restored.__getitems__(indices) == expected diff --git a/python/src/permutation.rs b/python/src/permutation.rs index ac20a2cc9..114825938 100644 --- a/python/src/permutation.rs +++ b/python/src/permutation.rs @@ -3,9 +3,7 @@ use std::sync::{Arc, Mutex}; -use crate::{ - arrow::RecordBatchStream, connection::Connection, error::PythonErrorExt, table::Table, -}; +use crate::{arrow::RecordBatchStream, error::PythonErrorExt, table::Table}; use arrow::pyarrow::{PyArrowType, ToPyArrow}; use lancedb::{ dataloader::permutation::{ @@ -80,24 +78,6 @@ impl PyAsyncPermutationBuilder { #[pymethods] impl PyAsyncPermutationBuilder { - #[pyo3(signature = (database, table_name))] - pub fn persist( - slf: PyRefMut<'_, Self>, - database: Bound<'_, PyAny>, - table_name: String, - ) -> PyResult { - let conn = if database.hasattr("_conn")? { - database - .getattr("_conn")? - .getattr("_inner")? - .cast_into::()? - } else { - database.getattr("_inner")?.cast_into::()? - }; - let database = conn.borrow().database()?; - slf.modify(|builder| builder.persist(database, table_name)) - } - #[pyo3(signature = (*, ratios=None, counts=None, fixed=None, seed=None, split_names=None))] pub fn split_random( slf: PyRefMut<'_, Self>, From a17c241e8628c790c5a9d24d5d9d01ec8f230c44 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 5 May 2026 13:44:10 -0700 Subject: [PATCH 16/20] feat(python): make Permutation fork-safe for PyTorch DataLoader workers (#3339) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary PyTorch's `DataLoader` uses fork-based multiprocessing by default on Linux, but threads do not survive `fork()`. LanceDB's Python bindings drive async work through two threaded layers, both of which become inert in a forked child: - `BackgroundEventLoop` runs an asyncio loop on a Python `threading.Thread`. - `pyo3-async-runtimes::tokio` holds a global multi-threaded tokio runtime whose worker threads also die on fork — and its runtime lives in a `OnceLock` that cannot be replaced after first use. As a result, any `Permutation` (or other async API) used inside a fork-based `DataLoader` worker hangs indefinitely. This PR makes both layers fork-safe so `Permutation` works as a `torch.utils.data.Dataset` with `num_workers > 0`. ## Approach ### Rust — new `python/src/runtime.rs` Mirrors the pattern used in [Lance's Python bindings](https://github.com/lance-format/lance/blob/456198cd6f42be07f99617a6d7e39d6209cdf3cc/python/src/lib.rs#L139), adapted for the async-bridge use case. - `LanceRuntime` implements `pyo3_async_runtimes::generic::Runtime + ContextExt`, backed by an `AtomicPtr` we own (sidestepping `pyo3-async-runtimes`'s frozen `OnceLock` global). - A `pthread_atfork(after_in_child)` handler nulls the pointer; the next `spawn` rebuilds the runtime in the child. The previous runtime is intentionally **leaked** — calling `Drop` would try to join now-dead worker threads and hang. - `runtime::future_into_py` is a drop-in for `pyo3_async_runtimes::tokio::future_into_py`. All ~80 call sites in `arrow.rs` / `connection.rs` / `permutation.rs` / `query.rs` / `table.rs` are updated to route through it. - `python/Cargo.toml` adds `libc = "0.2"` and the tokio `rt-multi-thread` feature. ### Python — `lancedb/background_loop.py` - Refactors `BackgroundEventLoop.__init__` to a reusable `_start()` method. - An `os.register_at_fork(after_in_child=…)` hook calls `LOOP._start()` to give the singleton a fresh asyncio loop and thread **in place**. This matters because the rest of the codebase imports `LOOP` via `from .background_loop import LOOP` — rebinding the module attribute would leave those references holding the dead loop. ### Python — `lancedb/__init__.py` Removes the `__warn_on_fork` pre-fork warning (and the now-unused `import warnings`). Fork is supported. ## Test plan - [x] New `test_permutation_dataloader_fork_workers` in `python/tests/test_torch.py`: runs a `Permutation` through `torch.utils.data.DataLoader(num_workers=2, multiprocessing_context="fork")` inside a spawn-isolated child with a 30s hang detector. **Pre-fix**: timed out at 36s. **Post-fix**: passes in ~3.6s. - [x] New `test_remote_connection_after_fork` in `python/tests/test_remote_db.py`: forks a child that creates a fresh `lancedb.connect(...)` against a mock HTTP server and calls `table_names()`; passes in <1s, validates the runtime reset is sufficient for fresh remote clients. - [x] All 62 tests in `test_torch.py` + `test_permutation.py` pass. - [x] All 35 tests in `test_remote_db.py` pass. - [x] `test_table.py` (87) + `test_db.py` + `test_query.py` (157, minus one unrelated `sentence_transformers` import skip) — 244 passing. - [x] `cargo clippy -p lancedb-python --tests` clean. - [x] `cargo fmt`, `ruff check`, `ruff format` all clean. ## Known limitation (follow-up) This PR makes a **freshly-built** `lancedb.connect(...)` work in a forked child. An **inherited** `Connection` from the parent still carries an inherited `reqwest::Client` whose hyper connection pool references socket FDs and TCP/TLS state shared with the parent — using it from the child after fork is unsafe (especially with HTTP/1.1 keep-alive). The recommended pattern for fork-based `DataLoader` workers that hit a remote DB is to construct a new connection inside the worker. Auto-clearing inherited HTTP client pools on fork would require tracking live `Connection` instances in `lancedb` core and is left for a follow-up PR. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.7 (1M context) --- Cargo.lock | 1 + python/Cargo.toml | 3 +- python/python/lancedb/__init__.py | 11 -- python/python/lancedb/background_loop.py | 32 +++++ python/python/tests/test_remote_db.py | 81 +++++++++++++ python/python/tests/test_torch.py | 64 +++++++++- python/src/arrow.rs | 5 +- python/src/connection.rs | 13 +-- python/src/lib.rs | 1 + python/src/permutation.rs | 5 +- python/src/query.rs | 11 +- python/src/runtime.rs | 142 +++++++++++++++++++++++ python/src/table.rs | 2 +- 13 files changed, 339 insertions(+), 32 deletions(-) create mode 100644 python/src/runtime.rs diff --git a/Cargo.lock b/Cargo.lock index 2c2c3fd3c..ce8f0bdb5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4698,6 +4698,7 @@ dependencies = [ "lance-namespace", "lance-namespace-impls", "lancedb", + "libc", "log", "pin-project", "pyo3", diff --git a/python/Cargo.toml b/python/Cargo.toml index 0811264e6..fce27e65a 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -35,7 +35,8 @@ futures.workspace = true serde = "1" serde_json = "1" snafu.workspace = true -tokio = { version = "1.40", features = ["sync"] } +tokio = { version = "1.40", features = ["sync", "rt-multi-thread"] } +libc = "0.2" [build-dependencies] pyo3-build-config = { version = "0.28", features = [ diff --git a/python/python/lancedb/__init__.py b/python/python/lancedb/__init__.py index 9e8ee0dd8..efeed258f 100644 --- a/python/python/lancedb/__init__.py +++ b/python/python/lancedb/__init__.py @@ -7,7 +7,6 @@ import os from concurrent.futures import ThreadPoolExecutor from datetime import timedelta from typing import Dict, Optional, Union, Any, List -import warnings __version__ = importlib.metadata.version("lancedb") @@ -438,13 +437,3 @@ __all__ = [ "Table", "__version__", ] - - -def __warn_on_fork(): - warnings.warn( - "lance is not fork-safe. If you are using multiprocessing, use spawn instead.", - ) - - -if hasattr(os, "register_at_fork"): - os.register_at_fork(before=__warn_on_fork) # type: ignore[attr-defined] diff --git a/python/python/lancedb/background_loop.py b/python/python/lancedb/background_loop.py index d132dd82d..b39da229d 100644 --- a/python/python/lancedb/background_loop.py +++ b/python/python/lancedb/background_loop.py @@ -2,7 +2,9 @@ # SPDX-FileCopyrightText: Copyright The LanceDB Authors import asyncio +import os import threading +import warnings class BackgroundEventLoop: @@ -13,6 +15,9 @@ class BackgroundEventLoop: """ def __init__(self): + self._start() + + def _start(self): self.loop = asyncio.new_event_loop() self.thread = threading.Thread( target=self.loop.run_forever, @@ -31,3 +36,30 @@ class BackgroundEventLoop: LOOP = BackgroundEventLoop() + +_FORK_WARNED = False + + +def _reset_after_fork(): + # Threads do not survive fork(), so the asyncio loop in LOOP.thread is + # dead in the child. Re-initialize the singleton in place so existing + # `from .background_loop import LOOP` references in other modules see + # the new state. The Rust-side tokio runtime is reset analogously by a + # pthread_atfork hook installed in the _lancedb extension. + LOOP._start() + global _FORK_WARNED + if not _FORK_WARNED: + _FORK_WARNED = True + warnings.warn( + "lancedb fork support is experimental: the internal async " + "runtime has been reset in the forked child, but a small chance " + "of deadlock remains if other state was mid-operation at fork " + "time. The 'forkserver' or 'spawn' multiprocessing start method " + "is likely a safer alternative.", + RuntimeWarning, + stacklevel=2, + ) + + +if hasattr(os, "register_at_fork"): + os.register_at_fork(after_in_child=_reset_after_fork) diff --git a/python/python/tests/test_remote_db.py b/python/python/tests/test_remote_db.py index 0dd880cc0..a499275c5 100644 --- a/python/python/tests/test_remote_db.py +++ b/python/python/tests/test_remote_db.py @@ -6,6 +6,8 @@ import contextlib from datetime import timedelta import http.server import json +import multiprocessing as mp +import sys import threading import time from unittest.mock import MagicMock, patch @@ -1230,3 +1232,82 @@ def test_background_loop_cancellation(exception): with pytest.raises(exception): loop.run(None) mock_future.cancel.assert_called_once() + + +def _remote_fork_child(port: int, queue) -> None: + # Build a fresh Connection in the child so we exercise the at-fork-child + # tokio runtime reset rather than relying on an inherited reqwest client. + db = lancedb.connect( + "db://dev", + api_key="fake", + host_override=f"http://localhost:{port}", + client_config={ + "retry_config": {"retries": 0}, + "timeout_config": {"connect_timeout": 2, "read_timeout": 2}, + }, + ) + queue.put(db.table_names()) + + +@pytest.mark.skipif( + sys.platform != "linux", + reason=( + "fork() is unavailable on Windows and unsafe on macOS " + "(Apple frameworks/TLS are not fork-safe)" + ), +) +def test_remote_connection_after_fork(): + """A freshly-built remote Connection in a forked child should not hang. + + The pyo3-async-runtimes tokio runtime would otherwise be inherited from + the parent with dead worker threads; the at-fork-child handler in our + runtime module rebuilds it on first use in the child. + """ + + def handler(request): + request.send_response(200) + request.send_header("Content-Type", "application/json") + request.end_headers() + request.wfile.write(b'{"tables": []}') + + server = http.server.HTTPServer(("localhost", 0), make_mock_http_handler(handler)) + port = server.server_address[1] + server_thread = threading.Thread(target=server.serve_forever) + server_thread.start() + try: + # Hit the server in the parent first so the runtime + LOOP are warm + # before fork; a fresh child must still succeed. + parent_db = lancedb.connect( + "db://dev", + api_key="fake", + host_override=f"http://localhost:{port}", + client_config={ + "retry_config": {"retries": 0}, + "timeout_config": {"connect_timeout": 2, "read_timeout": 2}, + }, + ) + assert parent_db.table_names() == [] + + ctx = mp.get_context("fork") + queue = ctx.Queue() + proc = ctx.Process(target=_remote_fork_child, args=(port, queue)) + proc.start() + proc.join(timeout=15) + + if proc.is_alive(): + proc.terminate() + proc.join(timeout=5) + if proc.is_alive(): + proc.kill() + proc.join() + pytest.fail("Remote connection hung after fork") + + assert proc.exitcode == 0, f"child exited with code {proc.exitcode}" + assert not queue.empty(), "child produced no result" + assert queue.get() == [] + + # Parent connection must still be usable after the child returned. + assert parent_db.table_names() == [] + finally: + server.shutdown() + server_thread.join() diff --git a/python/python/tests/test_torch.py b/python/python/tests/test_torch.py index 0ca1de3e8..d17e60bbd 100644 --- a/python/python/tests/test_torch.py +++ b/python/python/tests/test_torch.py @@ -2,13 +2,15 @@ # SPDX-FileCopyrightText: Copyright The LanceDB Authors import functools +import multiprocessing as mp import pickle +import sys import lancedb import pyarrow as pa import pytest -from lancedb.util import tbl_to_tensor from lancedb.permutation import Permutation, Permutations, permutation_builder +from lancedb.util import tbl_to_tensor torch = pytest.importorskip("torch") @@ -146,3 +148,63 @@ def test_permutation_with_builder_is_picklable(tmp_db): assert len(restored) == len(permutation) assert restored.__getitems__(indices) == expected + + +def _multiworker_dataloader_target(db_uri: str, result_queue): + import lancedb + from lancedb.permutation import Permutation + + db = lancedb.connect(db_uri) + table = db.open_table("test_table") + permutation = Permutation.identity(table) + + dataloader = torch.utils.data.DataLoader( + permutation, + batch_size=10, + num_workers=2, + multiprocessing_context="fork", + ) + count = 0 + for batch in dataloader: + assert batch["a"].size(0) == 10 + count += 1 + result_queue.put(count) + + +@pytest.mark.skipif( + sys.platform != "linux", + reason=( + "fork() is unavailable on Windows and unsafe on macOS " + "(Apple frameworks/TLS are not fork-safe)" + ), +) +def test_permutation_dataloader_fork_workers(tmp_path): + """A Permutation used by a fork-based DataLoader should not hang. + + PyTorch's DataLoader uses fork-based multiprocessing by default on Linux. + LanceDB drives async work through a background asyncio thread that does + not survive a fork, so any LOOP.run() in a worker blocks forever. + """ + import lancedb + + db_uri = str(tmp_path / "db") + db = lancedb.connect(db_uri) + db.create_table("test_table", pa.table({"a": list(range(1000))})) + + ctx = mp.get_context("spawn") + queue = ctx.Queue() + proc = ctx.Process(target=_multiworker_dataloader_target, args=(db_uri, queue)) + proc.start() + proc.join(timeout=30) + + if proc.is_alive(): + proc.terminate() + proc.join(timeout=5) + if proc.is_alive(): + proc.kill() + proc.join() + pytest.fail("Permutation hung when iterated in a fork-based DataLoader worker") + + assert proc.exitcode == 0, f"child exited with code {proc.exitcode}" + assert not queue.empty(), "child produced no batches" + assert queue.get() == 100 diff --git a/python/src/arrow.rs b/python/src/arrow.rs index fd3a05964..f0b4bceed 100644 --- a/python/src/arrow.rs +++ b/python/src/arrow.rs @@ -3,6 +3,8 @@ use std::sync::Arc; +use crate::error::PythonErrorExt; +use crate::runtime::future_into_py; use arrow::{ datatypes::SchemaRef, pyarrow::{IntoPyArrow, ToPyArrow}, @@ -12,9 +14,6 @@ use lancedb::arrow::SendableRecordBatchStream; use pyo3::{ Bound, Py, PyAny, PyRef, PyResult, Python, exceptions::PyStopAsyncIteration, pyclass, pymethods, }; -use pyo3_async_runtimes::tokio::future_into_py; - -use crate::error::PythonErrorExt; #[pyclass] pub struct RecordBatchStream { diff --git a/python/src/connection.rs b/python/src/connection.rs index 1b12c33ab..703b44424 100644 --- a/python/src/connection.rs +++ b/python/src/connection.rs @@ -7,6 +7,12 @@ use std::{ time::Duration, }; +use crate::{ + error::PythonErrorExt, + namespace::{create_namespace_storage_options_provider, extract_namespace_arc}, + runtime::future_into_py, + table::Table, +}; use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::FromPyArrow}; use lancedb::{ connection::Connection as LanceConnection, @@ -20,13 +26,6 @@ use pyo3::{ pyclass, pyfunction, pymethods, types::{PyDict, PyDictMethods}, }; -use pyo3_async_runtimes::tokio::future_into_py; - -use crate::{ - error::PythonErrorExt, - namespace::{create_namespace_storage_options_provider, extract_namespace_arc}, - table::Table, -}; #[pyclass] pub struct Connection { diff --git a/python/src/lib.rs b/python/src/lib.rs index 7dd52bdc2..d0e933dba 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -28,6 +28,7 @@ pub mod index; pub mod namespace; pub mod permutation; pub mod query; +pub mod runtime; pub mod session; pub mod table; pub mod util; diff --git a/python/src/permutation.rs b/python/src/permutation.rs index 114825938..75e1fe1b7 100644 --- a/python/src/permutation.rs +++ b/python/src/permutation.rs @@ -3,7 +3,9 @@ use std::sync::{Arc, Mutex}; -use crate::{arrow::RecordBatchStream, error::PythonErrorExt, table::Table}; +use crate::{ + arrow::RecordBatchStream, error::PythonErrorExt, runtime::future_into_py, table::Table, +}; use arrow::pyarrow::{PyArrowType, ToPyArrow}; use lancedb::{ dataloader::permutation::{ @@ -19,7 +21,6 @@ use pyo3::{ pyclass, pymethods, types::{PyAnyMethods, PyDict, PyDictMethods, PyType}, }; -use pyo3_async_runtimes::tokio::future_into_py; fn table_from_py<'a>(table: Bound<'a, PyAny>) -> PyResult> { if table.hasattr("_inner")? { diff --git a/python/src/query.rs b/python/src/query.rs index 1b64b5eaa..1dc4f08db 100644 --- a/python/src/query.rs +++ b/python/src/query.rs @@ -4,6 +4,11 @@ use std::sync::Arc; use std::time::Duration; +use crate::expr::PyExpr; +use crate::runtime::future_into_py; +use crate::util::parse_distance_type; +use crate::{arrow::RecordBatchStream, util::PyLanceDB}; +use crate::{error::PythonErrorExt, index::class_name}; use arrow::array::Array; use arrow::array::ArrayData; use arrow::array::make_array; @@ -36,12 +41,6 @@ use pyo3::types::{PyDict, PyString}; use pyo3::{Borrowed, FromPyObject, exceptions::PyRuntimeError}; use pyo3::{PyErr, pyclass}; use pyo3::{exceptions::PyValueError, intern}; -use pyo3_async_runtimes::tokio::future_into_py; - -use crate::expr::PyExpr; -use crate::util::parse_distance_type; -use crate::{arrow::RecordBatchStream, util::PyLanceDB}; -use crate::{error::PythonErrorExt, index::class_name}; impl<'a, 'py> FromPyObject<'a, 'py> for PyLanceDB { type Error = PyErr; diff --git a/python/src/runtime.rs b/python/src/runtime.rs new file mode 100644 index 000000000..39ebfdaa8 --- /dev/null +++ b/python/src/runtime.rs @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The LanceDB Authors + +//! Fork-safe wrapper around tokio + pyo3-async-runtimes. +//! +//! `pyo3_async_runtimes::tokio` keeps its multi-threaded runtime in a +//! `OnceLock` that can never be replaced. Tokio's worker threads do not +//! survive `fork()`, so once a child inherits a "frozen" runtime, every +//! `future_into_py` call hangs forever. +//! +//! We sidestep the global by routing every future through our own +//! [`LanceRuntime`] (a [`pyo3_async_runtimes::generic::Runtime`] impl) backed +//! by an [`AtomicPtr`] to a tokio runtime that we own. A `pthread_atfork` +//! child handler nulls the pointer; the next `spawn` rebuilds the runtime in +//! the child. This mirrors the pattern used in the Lance Python bindings. + +use std::future::Future; +use std::pin::Pin; +use std::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; + +use pyo3::{Bound, PyAny, PyResult, Python, conversion::IntoPyObject}; +use pyo3_async_runtimes::{ + TaskLocals, + generic::{ContextExt, JoinError, Runtime}, +}; +use tokio::{runtime, task}; + +static RUNTIME: AtomicPtr = AtomicPtr::new(std::ptr::null_mut()); +static RUNTIME_INSTALLING: AtomicBool = AtomicBool::new(false); +static ATFORK_INSTALLED: AtomicBool = AtomicBool::new(false); + +fn create_runtime() -> runtime::Runtime { + runtime::Builder::new_multi_thread() + .enable_all() + .thread_name("lancedb-tokio-worker") + .build() + .expect("Failed to build tokio runtime") +} + +fn get_runtime() -> &'static runtime::Runtime { + loop { + let ptr = RUNTIME.load(Ordering::SeqCst); + if !ptr.is_null() { + return unsafe { &*ptr }; + } + if !RUNTIME_INSTALLING.fetch_or(true, Ordering::SeqCst) { + break; + } + std::thread::yield_now(); + } + if !ATFORK_INSTALLED.fetch_or(true, Ordering::SeqCst) { + install_atfork(); + } + let new_ptr = Box::into_raw(Box::new(create_runtime())); + RUNTIME.store(new_ptr, Ordering::SeqCst); + unsafe { &*new_ptr } +} + +/// Runs in async-signal context after `fork()` in the child. We can only +/// touch atomics here; we deliberately leak the previous runtime because +/// dropping a tokio `Runtime` would try to join its (now-dead) worker +/// threads and hang. +extern "C" fn atfork_child() { + RUNTIME.store(std::ptr::null_mut(), Ordering::SeqCst); + RUNTIME_INSTALLING.store(false, Ordering::SeqCst); +} + +#[cfg(not(windows))] +fn install_atfork() { + unsafe { libc::pthread_atfork(None, None, Some(atfork_child)) }; +} + +#[cfg(windows)] +fn install_atfork() {} + +/// Marker type implementing [`Runtime`] over our fork-safe runtime slot. +pub struct LanceRuntime; + +/// Newtype wrapper around `tokio::task::JoinError` so we can implement the +/// foreign [`JoinError`] trait without violating orphan rules. +pub struct LanceJoinError(task::JoinError); + +impl JoinError for LanceJoinError { + fn is_panic(&self) -> bool { + self.0.is_panic() + } + fn into_panic(self) -> Box { + self.0.into_panic() + } +} + +impl Runtime for LanceRuntime { + type JoinError = LanceJoinError; + type JoinHandle = Pin> + Send>>; + + fn spawn(fut: F) -> Self::JoinHandle + where + F: Future + Send + 'static, + { + let handle = get_runtime().spawn(fut); + Box::pin(async move { handle.await.map_err(LanceJoinError) }) + } + + fn spawn_blocking(f: F) -> Self::JoinHandle + where + F: FnOnce() + Send + 'static, + { + let handle = get_runtime().spawn_blocking(f); + Box::pin(async move { handle.await.map_err(LanceJoinError) }) + } +} + +tokio::task_local! { + static TASK_LOCALS: std::cell::OnceCell; +} + +impl ContextExt for LanceRuntime { + fn scope(locals: TaskLocals, fut: F) -> Pin + Send>> + where + F: Future + Send + 'static, + { + let cell = std::cell::OnceCell::new(); + cell.set(locals).unwrap(); + Box::pin(TASK_LOCALS.scope(cell, fut)) + } + + fn get_task_locals() -> Option { + TASK_LOCALS + .try_with(|c| c.get().cloned()) + .unwrap_or_default() + } +} + +/// Drop-in replacement for `pyo3_async_runtimes::tokio::future_into_py` that +/// uses our fork-safe runtime. +pub fn future_into_py(py: Python<'_>, fut: F) -> PyResult> +where + F: Future> + Send + 'static, + T: for<'py> IntoPyObject<'py> + Send + 'static, +{ + pyo3_async_runtimes::generic::future_into_py::(py, fut) +} diff --git a/python/src/table.rs b/python/src/table.rs index 715ac79cc..9ac5af807 100644 --- a/python/src/table.rs +++ b/python/src/table.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright The LanceDB Authors use std::{collections::HashMap, sync::Arc}; +use crate::runtime::future_into_py; use crate::{ connection::Connection, error::PythonErrorExt, @@ -24,7 +25,6 @@ use pyo3::{ pyclass, pymethods, types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods}, }; -use pyo3_async_runtimes::tokio::future_into_py; mod scannable; From 47a34f5ccaa1cd6971b36739b273167db921cd90 Mon Sep 17 00:00:00 2001 From: LanceDB Robot Date: Tue, 5 May 2026 18:36:39 -0700 Subject: [PATCH 17/20] chore: update lance dependency to v7.0.0-beta.4 (#3348) ## Summary - Update Lance Rust dependencies to `v7.0.0-beta.4` using `ci/set_lance_version.py`. - Update the Java `lance-core` dependency property to `7.0.0-beta.4`. - Align LanceDB with dependency updates required by Lance 7, including `object_store` 0.13 API compatibility. Triggering tag: https://github.com/lance-format/lance/releases/tag/v7.0.0-beta.4 ## Verification - `cargo clippy --workspace --tests --all-features -- -D warnings` - `cargo fmt --all` --- Cargo.lock | 1407 ++++++++++++++--- Cargo.toml | 30 +- java/pom.xml | 2 +- rust/lancedb/Cargo.toml | 7 +- rust/lancedb/src/database/listing.rs | 2 +- rust/lancedb/src/io/object_store.rs | 64 +- .../src/io/object_store/io_tracking.rs | 71 +- 7 files changed, 1284 insertions(+), 299 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ce8f0bdb5..7eb588570 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,9 +14,9 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "cipher", - "cpufeatures", + "cpufeatures 0.2.17", ] [[package]] @@ -25,7 +25,7 @@ version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "const-random", "getrandom 0.3.4", "once_cell", @@ -108,7 +108,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -119,7 +119,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -128,6 +128,15 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "ar_archive_writer" version = "0.5.1" @@ -458,7 +467,7 @@ dependencies = [ "derive_builder", "futures", "rand 0.8.5", - "reqwest", + "reqwest 0.12.28", "reqwest-eventsource", "secrecy", "serde", @@ -1246,9 +1255,9 @@ dependencies = [ "arrayref", "arrayvec", "cc", - "cfg-if", + "cfg-if 1.0.4", "constant_time_eq", - "cpufeatures", + "cpufeatures 0.2.17", ] [[package]] @@ -1311,6 +1320,17 @@ dependencies = [ "alloc-stdlib", ] +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -1438,6 +1458,12 @@ dependencies = [ "shlex", ] +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + [[package]] name = "cfg-if" version = "1.0.4" @@ -1450,6 +1476,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if 1.0.4", + "cpufeatures 0.3.0", + "rand_core 0.10.1", +] + [[package]] name = "chrono" version = "0.4.44" @@ -1506,6 +1543,46 @@ dependencies = [ "inout", ] +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + [[package]] name = "cmake" version = "0.1.57" @@ -1521,6 +1598,25 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "colored" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + [[package]] name = "comfy-table" version = "7.2.2" @@ -1597,6 +1693,21 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const-str" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18f12cc9948ed9604230cdddc7c86e270f9401ccbe3c2e98a4378c5e7632212f" + +[[package]] +name = "const_panic" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e262cdaac42494e3ae34c43969f9cdeb7da178bdb4b66fa6a1ea2edb4c8ae652" +dependencies = [ + "typewit", +] + [[package]] name = "constant_time_eq" version = "0.4.2" @@ -1638,6 +1749,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "countio" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9702aee5d1d744c01d82f6915644f950f898e014903385464c773b96fefdecb" +dependencies = [ + "futures-io", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -1647,6 +1767,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crc" version = "3.3.0" @@ -1689,7 +1818,7 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -1918,7 +2047,7 @@ version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "crossbeam-utils", "hashbrown 0.14.5", "lock_api", @@ -1964,7 +2093,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.2", + "object_store", "parking_lot", "rand 0.9.4", "regex", @@ -1995,7 +2124,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.2", + "object_store", "parking_lot", "tokio", ] @@ -2020,7 +2149,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.2", + "object_store", ] [[package]] @@ -2039,7 +2168,7 @@ dependencies = [ "itertools 0.14.0", "libc", "log", - "object_store 0.13.2", + "object_store", "paste", "sqlparser 0.61.0", "tokio", @@ -2080,7 +2209,7 @@ dependencies = [ "glob", "itertools 0.14.0", "log", - "object_store 0.13.2", + "object_store", "rand 0.9.4", "tokio", "url", @@ -2106,7 +2235,7 @@ dependencies = [ "datafusion-session", "futures", "itertools 0.14.0", - "object_store 0.13.2", + "object_store", "tokio", ] @@ -2128,7 +2257,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store 0.13.2", + "object_store", "regex", "tokio", ] @@ -2151,7 +2280,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store 0.13.2", + "object_store", "serde_json", "tokio", "tokio-stream", @@ -2179,7 +2308,7 @@ dependencies = [ "datafusion-physical-expr-common", "futures", "log", - "object_store 0.13.2", + "object_store", "parking_lot", "rand 0.9.4", "tempfile", @@ -2653,7 +2782,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2778,7 +2907,7 @@ version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -2841,7 +2970,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -3021,8 +3150,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow-array", "rand 0.9.4", @@ -3137,6 +3266,15 @@ dependencies = [ "slab", ] +[[package]] +name = "gearhash" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8cf82cf76cd16485e56295a1377c775ce708c9f1a0be6b029076d60a245d213" +dependencies = [ + "cfg-if 0.1.10", +] + [[package]] name = "gemm" version = "0.19.0" @@ -3263,7 +3401,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9" dependencies = [ "cc", - "cfg-if", + "cfg-if 1.0.4", "libc", "log", "rustversion", @@ -3287,10 +3425,10 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "js-sys", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -3300,7 +3438,7 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "js-sys", "libc", "r-efi 5.3.0", @@ -3314,11 +3452,34 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", + "js-sys", "libc", "r-efi 6.0.0", + "rand_core 0.10.1", "wasip2", "wasip3", + "wasm-bindgen", +] + +[[package]] +name = "git-version" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad568aa3db0fcbc81f2f116137f263d7304f512a1209b35b85150d3ef88ad19" +dependencies = [ + "git-version-macro", +] + +[[package]] +name = "git-version-macro" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53010ccb100b96a67bc32c0175f0ed1426b31b655d562898e57325f81c023ac0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] @@ -3395,7 +3556,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "bytemuck", - "cfg-if", + "cfg-if 1.0.4", "crunchy", "num-traits", "rand 0.9.4", @@ -3444,6 +3605,12 @@ dependencies = [ "serde_core", ] +[[package]] +name = "heapify" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0049b265b7f201ca9ab25475b22b47fe444060126a51abe00f77d986fc5cc52e" + [[package]] name = "heck" version = "0.4.1" @@ -3482,7 +3649,7 @@ dependencies = [ "log", "num_cpus", "rand 0.9.4", - "reqwest", + "reqwest 0.12.28", "serde", "serde_json", "thiserror 2.0.18", @@ -3491,6 +3658,28 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "hf-xet" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "430b33fa84f92796d4d263070b6c0d3ca219df7b9a0e1853ee431029b1612bcd" +dependencies = [ + "async-trait", + "bytes", + "http 1.4.0", + "more-asserts", + "serde", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tracing", + "uuid", + "xet-client", + "xet-core-structures", + "xet-data", + "xet-runtime", +] + [[package]] name = "hmac" version = "0.12.1" @@ -3679,7 +3868,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.3", + "socket2 0.5.10", "system-configuration", "tokio", "tower-service", @@ -3886,7 +4075,7 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -3896,7 +4085,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdd7bddefd0a8833b88a4b68f90dae22c7450d11b354198baee3874fd811b344" dependencies = [ "bitflags", - "cfg-if", + "cfg-if 1.0.4", "libc", ] @@ -3978,11 +4167,13 @@ checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", + "js-sys", "log", "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.61.2", + "wasm-bindgen", + "windows-sys 0.52.0", ] [[package]] @@ -4011,6 +4202,55 @@ dependencies = [ "jiff-tzdb", ] +[[package]] +name = "jni" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efd9a482cf3a427f00d6b35f14332adc7902ce91efb778580e180ff90fa3498" +dependencies = [ + "cfg-if 1.0.4", + "combine", + "jni-macros", + "jni-sys", + "log", + "simd_cesu8", + "thiserror 2.0.18", + "walkdir", + "windows-link", +] + +[[package]] +name = "jni-macros" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00109accc170f0bdb141fed3e393c565b6f5e072365c3bd58f5b062591560a3" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "simd_cesu8", + "syn 2.0.117", +] + +[[package]] +name = "jni-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn 2.0.117", +] + [[package]] name = "jobserver" version = "0.1.34" @@ -4053,23 +4293,42 @@ dependencies = [ [[package]] name = "jsonwebtoken" -version = "9.3.1" +version = "10.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" +checksum = "0529410abe238729a60b108898784df8984c87f6054c9c4fcacc47e4803c1ce1" dependencies = [ + "aws-lc-rs", "base64 0.22.1", + "getrandom 0.2.17", "js-sys", "pem", - "ring", "serde", "serde_json", + "signature 2.2.0", "simple_asn1", ] +[[package]] +name = "konst" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f660d5f887e3562f9ab6f4a14988795b694099d66b4f5dedc02d197ba9becb1d" +dependencies = [ + "const_panic", + "konst_proc_macros", + "typewit", +] + +[[package]] +name = "konst_proc_macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e037a2e1d8d5fdbd49b16a4ea09d5d6401c1f29eca5ff29d03d3824dba16256a" + [[package]] name = "lance" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow", "arrow-arith", @@ -4115,7 +4374,7 @@ dependencies = [ "lance-tokenizer", "log", "moka", - "object_store 0.12.5", + "object_store", "permutation", "pin-project", "prost", @@ -4137,8 +4396,8 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow-array", "arrow-buffer", @@ -4159,8 +4418,8 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrayref", "paste", @@ -4169,8 +4428,8 @@ dependencies = [ [[package]] name = "lance-core" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow-array", "arrow-buffer", @@ -4190,7 +4449,7 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store 0.12.5", + "object_store", "pin-project", "prost", "rand 0.9.4", @@ -4207,8 +4466,8 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow", "arrow-array", @@ -4239,8 +4498,8 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow", "arrow-array", @@ -4258,8 +4517,8 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow-arith", "arrow-array", @@ -4296,8 +4555,8 @@ dependencies = [ [[package]] name = "lance-file" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow-arith", "arrow-array", @@ -4318,7 +4577,7 @@ dependencies = [ "lance-io", "log", "num-traits", - "object_store 0.12.5", + "object_store", "prost", "prost-build", "prost-types", @@ -4329,8 +4588,8 @@ dependencies = [ [[package]] name = "lance-index" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow", "arrow-arith", @@ -4372,7 +4631,7 @@ dependencies = [ "log", "ndarray", "num-traits", - "object_store 0.12.5", + "object_store", "prost", "prost-build", "prost-types", @@ -4394,8 +4653,8 @@ dependencies = [ [[package]] name = "lance-io" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow", "arrow-arith", @@ -4422,7 +4681,7 @@ dependencies = [ "libc", "log", "moka", - "object_store 0.12.5", + "object_store", "object_store_opendal", "opendal", "path_abs", @@ -4439,8 +4698,8 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow-array", "arrow-buffer", @@ -4456,8 +4715,8 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow", "async-trait", @@ -4470,8 +4729,8 @@ dependencies = [ [[package]] name = "lance-namespace-impls" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow", "arrow-ipc", @@ -4491,10 +4750,10 @@ dependencies = [ "lance-namespace", "lance-table", "log", - "object_store 0.12.5", + "object_store", "quick-xml 0.38.4", "rand 0.9.4", - "reqwest", + "reqwest 0.12.28", "serde", "serde_json", "sha2", @@ -4507,21 +4766,22 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.7.2" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f061dd6fe63e3ba4052702a9d40973ee4ac57f612f04222897a149576213832" +checksum = "f65e31bdaa13e01dab6e7cf566da31df243c34a542f0d915d3601ec0e01e61d2" dependencies = [ - "reqwest", + "reqwest 0.12.28", "serde", "serde_json", "serde_repr", + "serde_with", "url", ] [[package]] name = "lance-table" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow", "arrow-array", @@ -4541,7 +4801,7 @@ dependencies = [ "lance-file", "lance-io", "log", - "object_store 0.12.5", + "object_store", "prost", "prost-build", "prost-types", @@ -4560,8 +4820,8 @@ dependencies = [ [[package]] name = "lance-testing" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "arrow-array", "arrow-schema", @@ -4572,8 +4832,8 @@ dependencies = [ [[package]] name = "lance-tokenizer" -version = "6.0.0-beta.7" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.7#bdcc31608cd00eac60aefbb8da2551f7eb6fc8aa" +version = "7.0.0-beta.4" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" dependencies = [ "rust-stemmers", "serde", @@ -4639,14 +4899,14 @@ dependencies = [ "log", "moka", "num-traits", - "object_store 0.12.5", + "object_store", "pin-project", "polars", "polars-arrow", "rand 0.9.4", "random_word 0.4.3", "regex", - "reqwest", + "reqwest 0.12.28", "rstest", "semver", "serde", @@ -4794,7 +5054,7 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "windows-link", ] @@ -4852,7 +5112,7 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "generator", "scoped-tls", "tracing", @@ -4963,10 +5223,19 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "digest", ] +[[package]] +name = "mea" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6747f54621d156e1b47eb6b25f39a941b9fc347f98f67d25d8881ff99e8ed832" +dependencies = [ + "slab", +] + [[package]] name = "memchr" version = "2.8.0" @@ -5031,7 +5300,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.61.2", ] @@ -5083,6 +5352,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "more-asserts" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fafa6961cabd9c63bcd77a45d7e3b7f3b552b70417831fb0f56db717e72407e" + [[package]] name = "multimap" version = "0.10.1" @@ -5233,7 +5508,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -5324,6 +5599,34 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags", +] + +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-system-configuration" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7216bd11cbda54ccabcab84d523dc93b858ec75ecfb3a7d89513fa22464da396" +dependencies = [ + "objc2-core-foundation", +] + [[package]] name = "object" version = "0.37.3" @@ -5335,16 +5638,18 @@ dependencies = [ [[package]] name = "object_store" -version = "0.12.5" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" +checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" dependencies = [ "async-trait", "base64 0.22.1", "bytes", "chrono", "form_urlencoded", - "futures", + "futures-channel", + "futures-core", + "futures-util", "http 1.4.0", "http-body-util", "httparse", @@ -5354,11 +5659,11 @@ dependencies = [ "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.38.4", - "rand 0.9.4", - "reqwest", + "quick-xml 0.39.3", + "rand 0.10.1", + "reqwest 0.12.28", "ring", - "rustls-pemfile", + "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", @@ -5371,43 +5676,18 @@ dependencies = [ "web-time", ] -[[package]] -name = "object_store" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures-channel", - "futures-core", - "futures-util", - "http 1.4.0", - "humantime", - "itertools 0.14.0", - "parking_lot", - "percent-encoding", - "thiserror 2.0.18", - "tokio", - "tracing", - "url", - "walkdir", - "wasm-bindgen-futures", - "web-time", -] - [[package]] name = "object_store_opendal" -version = "0.55.0" +version = "0.56.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "113ab0769e972eee585e57407b98de08bda5354fa28e8ba4d89038d6cb6a8991" +checksum = "08298874eee5935c95bcaa393148834f9c53d904461ca15584a041d8a1c907c2" dependencies = [ "async-trait", "bytes", "chrono", "futures", - "object_store 0.12.5", + "mea", + "object_store", "opendal", "pin-project", "tokio", @@ -5425,6 +5705,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oneshot" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" + [[package]] name = "onig" version = "6.5.1" @@ -5449,32 +5735,219 @@ dependencies = [ [[package]] name = "opendal" -version = "0.55.0" +version = "0.56.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d075ab8a203a6ab4bc1bce0a4b9fe486a72bf8b939037f4b78d95386384bc80a" +checksum = "97b31d3d8e99a85d83b73ec26647f5607b80578ed9375810b6e44ffa3590a236" +dependencies = [ + "ctor", + "opendal-core", + "opendal-layer-concurrent-limit", + "opendal-layer-logging", + "opendal-layer-retry", + "opendal-layer-timeout", + "opendal-service-azblob", + "opendal-service-azdls", + "opendal-service-gcs", + "opendal-service-hf", + "opendal-service-oss", + "opendal-service-s3", +] + +[[package]] +name = "opendal-core" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1849dd2687e173e776d3af5fce1ba3ae47b9dd37a09d1c4deba850ef45fe00ca" dependencies = [ "anyhow", - "backon", "base64 0.22.1", "bytes", - "crc32c", "futures", - "getrandom 0.2.17", "http 1.4.0", "http-body 1.0.1", "jiff", "log", "md-5", + "mea", "percent-encoding", "quick-xml 0.38.4", - "reqsign", - "reqwest", + "reqsign-core", + "reqwest 0.13.3", "serde", "serde_json", - "sha2", "tokio", "url", "uuid", + "web-time", +] + +[[package]] +name = "opendal-layer-concurrent-limit" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "048b1b29c503263bdd80a9afe46a68cd02ea9bd361185b1feab4b151078998e9" +dependencies = [ + "futures", + "http 1.4.0", + "mea", + "opendal-core", +] + +[[package]] +name = "opendal-layer-logging" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2645adc988b12eda106e2679ae529facfbbaa868ceb706f6f8125c6af15c47b" +dependencies = [ + "log", + "opendal-core", +] + +[[package]] +name = "opendal-layer-retry" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eac134ffa4ddda6131a640a84a5315996424b9416c85052f8c64c1a33b70ad4" +dependencies = [ + "backon", + "log", + "opendal-core", +] + +[[package]] +name = "opendal-layer-timeout" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "619586ab7480c2e3009f6d18eabab18957bc094778fd130bcc38924970a90f4c" +dependencies = [ + "opendal-core", + "tokio", +] + +[[package]] +name = "opendal-service-azblob" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7452bf3ec61cfd81ac9ad9ada17825931e9e371d44a045c6bfab9596c0a2ac3b" +dependencies = [ + "base64 0.22.1", + "bytes", + "http 1.4.0", + "log", + "opendal-core", + "opendal-service-azure-common", + "quick-xml 0.38.4", + "reqsign-azure-storage", + "reqsign-core", + "reqsign-file-read-tokio", + "serde", + "sha2", + "uuid", +] + +[[package]] +name = "opendal-service-azdls" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f9884c2d8cf8ba2bb077d79c877dac5863ba3bab9e2c9c1e41a2e0491404772" +dependencies = [ + "bytes", + "http 1.4.0", + "log", + "opendal-core", + "opendal-service-azure-common", + "quick-xml 0.38.4", + "reqsign-azure-storage", + "reqsign-core", + "reqsign-file-read-tokio", + "serde", + "serde_json", +] + +[[package]] +name = "opendal-service-azure-common" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb0e45d6c8dcf66ce2da20e241bcb80e6e540e109a4ff20f318f6c9b4c54e0c" +dependencies = [ + "http 1.4.0", + "opendal-core", +] + +[[package]] +name = "opendal-service-gcs" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70a49477a10163431896d106136117f5670717f9c9e49cf6f710528800c6633a" +dependencies = [ + "async-trait", + "bytes", + "http 1.4.0", + "log", + "opendal-core", + "percent-encoding", + "quick-xml 0.38.4", + "reqsign-core", + "reqsign-file-read-tokio", + "reqsign-google", + "serde", + "serde_json", + "tokio", +] + +[[package]] +name = "opendal-service-hf" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2ab7a2a8a11dfe257ef4db5c0de798acbcd0d6429c37382dad2154bc06a388" +dependencies = [ + "bytes", + "hf-xet", + "http 1.4.0", + "log", + "opendal-core", + "percent-encoding", + "reqwest 0.13.3", + "serde", + "serde_json", +] + +[[package]] +name = "opendal-service-oss" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c8a917829ad06d21b639558532cb0101fe49b040d946d673a73018683fac05" +dependencies = [ + "bytes", + "http 1.4.0", + "log", + "opendal-core", + "quick-xml 0.38.4", + "reqsign-aliyun-oss", + "reqsign-core", + "reqsign-file-read-tokio", + "serde", +] + +[[package]] +name = "opendal-service-s3" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dadddeb9bb50b0d30927dd914c298c4ddca47e4c1cfa7674d311f0cf9b051c8" +dependencies = [ + "base64 0.22.1", + "bytes", + "crc32c", + "http 1.4.0", + "log", + "md-5", + "opendal-core", + "quick-xml 0.38.4", + "reqsign-aws-v4", + "reqsign-core", + "reqsign-file-read-tokio", + "serde", + "url", ] [[package]] @@ -5508,6 +5981,15 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "os_str_bytes" +version = "6.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" +dependencies = [ + "memchr", +] + [[package]] name = "outref" version = "0.5.2" @@ -5547,7 +6029,7 @@ version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "libc", "redox_syscall", "smallvec", @@ -6136,7 +6618,7 @@ dependencies = [ "rayon", "smartstring", "stacker", - "sysinfo", + "sysinfo 0.30.13", "version_check", ] @@ -6223,8 +6705,8 @@ version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ - "heck 0.5.0", - "itertools 0.14.0", + "heck 0.4.1", + "itertools 0.11.0", "log", "multimap", "petgraph", @@ -6243,7 +6725,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.11.0", "proc-macro2", "quote", "syn 2.0.117", @@ -6275,7 +6757,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e205bb30d5b916c55e584c22201771bcf2bad9aabd5d4127f38387140c38632" dependencies = [ "bytemuck", - "cfg-if", + "cfg-if 1.0.4", "libm", "num-complex", "paste", @@ -6377,9 +6859,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.37.5" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" dependencies = [ "memchr", "serde", @@ -6387,9 +6869,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.38.4" +version = "0.39.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" +checksum = "721da970c312655cde9b4ffe0547f20a8494866a4af5ff51f18b7c633d0c870b" dependencies = [ "memchr", "serde", @@ -6408,7 +6890,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.37", - "socket2 0.6.3", + "socket2 0.5.10", "thiserror 2.0.18", "tokio", "tracing", @@ -6421,6 +6903,7 @@ version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ + "aws-lc-rs", "bytes", "getrandom 0.3.4", "lru-slab", @@ -6445,7 +6928,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.3", + "socket2 0.5.10", "tracing", "windows-sys 0.60.2", ] @@ -6498,6 +6981,17 @@ dependencies = [ "rand_core 0.9.5", ] +[[package]] +name = "rand" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" +dependencies = [ + "chacha20", + "getrandom 0.4.2", + "rand_core 0.10.1", +] + [[package]] name = "rand_chacha" version = "0.3.1" @@ -6536,6 +7030,12 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rand_core" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" + [[package]] name = "rand_distr" version = "0.4.3" @@ -6670,6 +7170,15 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "redb" +version = "3.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ba239c1c1693315d3cc0e601db3b3965543afbf48c41730fdca2f069f510f4a" +dependencies = [ + "libc", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -6752,33 +7261,114 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" [[package]] -name = "reqsign" -version = "0.16.5" +name = "reqsign-aliyun-oss" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" +checksum = "57ac2757f3140aa2e213b554148ae0b52733e624fc6723f0cc6bb3d440176c95" dependencies = [ "anyhow", - "async-trait", - "base64 0.22.1", - "chrono", "form_urlencoded", - "getrandom 0.2.17", - "hex", - "hmac", - "home", "http 1.4.0", - "jsonwebtoken", "log", - "once_cell", "percent-encoding", - "quick-xml 0.37.5", - "rand 0.8.5", - "reqwest", - "rsa", + "reqsign-core", "rust-ini", "serde", "serde_json", +] + +[[package]] +name = "reqsign-aws-v4" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44eaca382e94505a49f1a4849658d153aebf79d9c1a58e5dd3b10361511e9f43" +dependencies = [ + "anyhow", + "bytes", + "form_urlencoded", + "http 1.4.0", + "log", + "percent-encoding", + "quick-xml 0.39.3", + "reqsign-core", + "rust-ini", + "serde", + "serde_json", + "serde_urlencoded", "sha1", +] + +[[package]] +name = "reqsign-azure-storage" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a321980405d596bd34aaf95c4722a3de4128a67fd19e74a81a83aa3fdf082e6" +dependencies = [ + "anyhow", + "base64 0.22.1", + "bytes", + "form_urlencoded", + "http 1.4.0", + "jsonwebtoken", + "log", + "pem", + "percent-encoding", + "reqsign-core", + "rsa", + "serde", + "serde_json", + "sha1", +] + +[[package]] +name = "reqsign-core" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b10302cf0a7d7e7352ba211fc92c3c5bebf1286153e49cc5aa87348078a8e102" +dependencies = [ + "anyhow", + "base64 0.22.1", + "bytes", + "form_urlencoded", + "futures", + "hex", + "hmac", + "http 1.4.0", + "jiff", + "log", + "percent-encoding", + "sha1", + "sha2", + "windows-sys 0.61.2", +] + +[[package]] +name = "reqsign-file-read-tokio" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d89295b3d17abea31851cc8de55d843d89c52132c864963c38d41920613dc5" +dependencies = [ + "anyhow", + "reqsign-core", + "tokio", +] + +[[package]] +name = "reqsign-google" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35cc609b49c69e76ecaceb775a03f792d1ed3e7755ab3548d4534fd801e3242e" +dependencies = [ + "form_urlencoded", + "http 1.4.0", + "jsonwebtoken", + "log", + "percent-encoding", + "reqsign-aws-v4", + "reqsign-core", + "rsa", + "serde", + "serde_json", "sha2", "tokio", ] @@ -6824,11 +7414,51 @@ dependencies = [ "url", "wasm-bindgen", "wasm-bindgen-futures", - "wasm-streams", + "wasm-streams 0.4.2", "web-sys", "webpki-roots 1.0.6", ] +[[package]] +name = "reqwest" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62e0021ea2c22aed41653bc7e1419abb2c97e038ff2c33d0e1309e49a97deec0" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-rustls 0.27.7", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls 0.23.37", + "rustls-pki-types", + "rustls-platform-verifier", + "serde", + "serde_json", + "sync_wrapper", + "tokio", + "tokio-rustls 0.26.4", + "tokio-util", + "tower", + "tower-http 0.6.8", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams 0.5.0", + "web-sys", +] + [[package]] name = "reqwest-eventsource" version = "0.6.0" @@ -6841,10 +7471,24 @@ dependencies = [ "mime", "nom 7.1.3", "pin-project-lite", - "reqwest", + "reqwest 0.12.28", "thiserror 1.0.69", ] +[[package]] +name = "reqwest-middleware" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199dda04a536b532d0cc04d7979e39b1c763ea749bf91507017069c00b96056f" +dependencies = [ + "anyhow", + "async-trait", + "http 1.4.0", + "reqwest 0.13.3", + "thiserror 2.0.18", + "tower-service", +] + [[package]] name = "rfc6979" version = "0.3.1" @@ -6863,7 +7507,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", - "cfg-if", + "cfg-if 1.0.4", "getrandom 0.2.17", "libc", "untrusted 0.9.0", @@ -6872,9 +7516,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.11.3" +version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ba9ce64a8f45d7fc86358410bb1a82e8c987504c0d4900e9141d69a9f26c885" +checksum = "1dedc5658c6ecb3bdb5ef5f3295bb9253f42dcf3fd1402c03f6b1f7659c3c4a9" dependencies = [ "bytemuck", "byteorder", @@ -6919,7 +7563,7 @@ version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "825ea780781b15345a146be27eaefb05085e337e869bff01b4306a4fd4a9ad5a" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "glob", "proc-macro-crate", "proc-macro2", @@ -6937,7 +7581,7 @@ version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "ordered-multimap", ] @@ -6976,7 +7620,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -7019,15 +7663,6 @@ dependencies = [ "security-framework", ] -[[package]] -name = "rustls-pemfile" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" -dependencies = [ - "rustls-pki-types", -] - [[package]] name = "rustls-pki-types" version = "1.14.0" @@ -7038,6 +7673,33 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-platform-verifier" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d1e2536ce4f35f4846aa13bff16bd0ff40157cdb14cc056c7b14ba41233ba0" +dependencies = [ + "core-foundation 0.10.1", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls 0.23.37", + "rustls-native-certs", + "rustls-platform-verifier-android", + "rustls-webpki 0.103.13", + "security-framework", + "security-framework-sys", + "webpki-root-certs", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustls-platform-verifier-android" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" + [[package]] name = "rustls-webpki" version = "0.101.7" @@ -7072,6 +7734,12 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "safe-transmute" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3944826ff8fa8093089aba3acb4ef44b9446a99a16f3bf4e74af3f77d340ab7d" + [[package]] name = "safetensors" version = "0.7.0" @@ -7349,8 +8017,8 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ - "cfg-if", - "cpufeatures", + "cfg-if 1.0.4", + "cpufeatures 0.2.17", "digest", ] @@ -7360,9 +8028,19 @@ version = "0.10.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ - "cfg-if", - "cpufeatures", + "cfg-if 1.0.4", + "cpufeatures 0.2.17", "digest", + "sha2-asm", +] + +[[package]] +name = "sha2-asm" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b845214d6175804686b2bd482bcffe96651bb2d1200742b712003504a2dac1ab" +dependencies = [ + "cc", ] [[package]] @@ -7374,6 +8052,17 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shellexpand" +version = "3.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32824fab5e16e6c4d86dc1ba84489390419a39f97699852b66480bb87d297ed8" +dependencies = [ + "bstr", + "dirs", + "os_str_bytes", +] + [[package]] name = "shlex" version = "1.3.0" @@ -7416,6 +8105,16 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "simd_cesu8" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f90157bb87cddf702797c5dadfa0be7d266cdf49e22da2fcaa32eff75b2c33" +dependencies = [ + "rustc_version", + "simdutf8", +] + [[package]] name = "simdutf8" version = "0.1.5" @@ -7487,7 +8186,7 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", "syn 2.0.117", @@ -7499,7 +8198,7 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54254b8531cafa275c5e096f62d48c81435d1015405a91198ddb11e967301d40" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", "syn 2.0.117", @@ -7522,7 +8221,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -7623,7 +8322,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013" dependencies = [ "cc", - "cfg-if", + "cfg-if 1.0.4", "libc", "psm", "windows-sys 0.59.0", @@ -7635,6 +8334,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "statrs" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a3fe7c28c6512e766b0874335db33c94ad7b8f9054228ae1c2abd47ce7d335e" +dependencies = [ + "approx", + "num-traits", +] + [[package]] name = "std_prelude" version = "0.2.12" @@ -7715,6 +8424,12 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "symlink" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7973cce6668464ea31f176d85b13c7ab3bba2cb3b77a2ed26abd7801688010a" + [[package]] name = "syn" version = "1.0.109" @@ -7777,12 +8492,26 @@ version = "0.30.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "core-foundation-sys", "libc", "ntapi", "once_cell", - "windows", + "windows 0.52.0", +] + +[[package]] +name = "sysinfo" +version = "0.38.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ab6a2f8bfe508deb3c6406578252e491d299cbbf3bc0529ecc3313aee4a52f" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows 0.62.2", ] [[package]] @@ -7840,7 +8569,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -7920,7 +8649,7 @@ version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -8048,6 +8777,17 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "tokio-retry" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40f644c762e9d396831ae2f8935c954b0d758c4532e924bead0f666d0c1c8640" +dependencies = [ + "pin-project-lite", + "rand 0.10.1", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.24.1" @@ -8203,6 +8943,19 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-appender" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "050686193eb999b4bb3bc2acfa891a13da00f79734704c4b8b4ef1a10b368a3c" +dependencies = [ + "crossbeam-channel", + "symlink", + "thiserror 2.0.18", + "time", + "tracing-subscriber", +] + [[package]] name = "tracing-attributes" version = "0.1.31" @@ -8235,6 +8988,16 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.23" @@ -8245,12 +9008,15 @@ dependencies = [ "nu-ansi-term", "once_cell", "regex-automata", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", "tracing", "tracing-core", "tracing-log", + "tracing-serde", ] [[package]] @@ -8280,6 +9046,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "typewit" +version = "1.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "214ca0b2191785cbc06209b9ca1861e048e39b5ba33574b3cedd58363d5bb5f6" + [[package]] name = "unicase" version = "2.9.0" @@ -8465,6 +9237,15 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasi" +version = "0.14.7+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +dependencies = [ + "wasip2", +] + [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" @@ -8483,13 +9264,22 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasite" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fe902b4a6b8028a753d5424909b764ccf79b7a209eac9bf97e59cda9f71a42" +dependencies = [ + "wasi 0.14.7+wasi-0.2.4", +] + [[package]] name = "wasm-bindgen" version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "once_cell", "rustversion", "wasm-bindgen-macro", @@ -8502,7 +9292,7 @@ version = "0.4.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "futures-util", "js-sys", "once_cell", @@ -8577,6 +9367,19 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasm-streams" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wasmparser" version = "0.244.0" @@ -8609,6 +9412,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-root-certs" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31141ce3fc3e300ae89b78c0dd67f9708061d1d2eda54b8209346fd6be9a92c" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "webpki-roots" version = "0.26.11" @@ -8627,6 +9439,19 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "whoami" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6a5b12f9df4f978d2cfdb1bd3bac52433f44393342d7ee9c25f5a1c14c0f45d" +dependencies = [ + "libc", + "libredox", + "objc2-system-configuration", + "wasite", + "web-sys", +] + [[package]] name = "winapi" version = "0.3.9" @@ -8649,7 +9474,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -8668,6 +9493,27 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" +dependencies = [ + "windows-collections", + "windows-core 0.62.2", + "windows-future", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" +dependencies = [ + "windows-core 0.62.2", +] + [[package]] name = "windows-core" version = "0.52.0" @@ -8690,6 +9536,17 @@ dependencies = [ "windows-strings", ] +[[package]] +name = "windows-future" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" +dependencies = [ + "windows-core 0.62.2", + "windows-link", + "windows-threading", +] + [[package]] name = "windows-implement" version = "0.60.2" @@ -8718,6 +9575,16 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-numerics" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" +dependencies = [ + "windows-core 0.62.2", + "windows-link", +] + [[package]] name = "windows-registry" version = "0.6.1" @@ -8816,6 +9683,15 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows-threading" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" +dependencies = [ + "windows-link", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -9024,6 +9900,153 @@ dependencies = [ "tap", ] +[[package]] +name = "xet-client" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e1e496dcbe6a09017acdfaf48e1a646735e7ff5b2a49e2c7e081cca77a59bc8" +dependencies = [ + "anyhow", + "async-trait", + "base64 0.22.1", + "bytes", + "clap", + "crc32fast", + "futures", + "http 1.4.0", + "hyper 1.8.1", + "lazy_static", + "more-asserts", + "rand 0.10.1", + "redb", + "reqwest 0.13.3", + "reqwest-middleware", + "serde", + "serde_json", + "serde_repr", + "statrs", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tokio-retry", + "tracing", + "tracing-subscriber", + "url", + "urlencoding", + "web-time", + "xet-core-structures", + "xet-runtime", +] + +[[package]] +name = "xet-core-structures" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb838aa8eb67d730af301584cf003caad407487606058292a6750711b603fbee" +dependencies = [ + "async-trait", + "base64 0.22.1", + "blake3", + "bytemuck", + "bytes", + "clap", + "countio", + "csv", + "futures", + "futures-util", + "getrandom 0.4.2", + "heapify", + "itertools 0.14.0", + "lazy_static", + "lz4_flex", + "more-asserts", + "rand 0.10.1", + "regex", + "safe-transmute", + "serde", + "static_assertions", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tracing", + "uuid", + "web-time", + "xet-runtime", +] + +[[package]] +name = "xet-data" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67fd409bef621411a9d9013798540bb8036cb2678f03ab39af89a5e88034ed8c" +dependencies = [ + "anyhow", + "async-trait", + "bytes", + "chrono", + "clap", + "gearhash", + "http 1.4.0", + "itertools 0.14.0", + "lazy_static", + "more-asserts", + "rand 0.10.1", + "serde", + "serde_json", + "sha2", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tracing", + "url", + "uuid", + "walkdir", + "xet-client", + "xet-core-structures", + "xet-runtime", +] + +[[package]] +name = "xet-runtime" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15d8f121c33866f7648b737abe70d0e2dd9c0af4ffdd7219207531d0283aa63d" +dependencies = [ + "anyhow", + "async-trait", + "bytes", + "chrono", + "colored", + "const-str", + "ctor", + "dirs", + "futures", + "git-version", + "humantime", + "konst", + "lazy_static", + "libc", + "more-asserts", + "oneshot", + "pin-project", + "rand 0.10.1", + "reqwest 0.13.3", + "serde", + "serde_json", + "shellexpand", + "sysinfo 0.38.4", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tracing", + "tracing-appender", + "tracing-subscriber", + "whoami", + "winapi", +] + [[package]] name = "xmlparser" version = "0.13.6" diff --git a/Cargo.toml b/Cargo.toml index 2f75c2ee3..6922ba487 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,20 +13,20 @@ categories = ["database-implementations"] rust-version = "1.91.0" [workspace.dependencies] -lance = { "version" = "=6.0.0-beta.7", default-features = false, "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } -lance-core = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } -lance-datagen = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } -lance-file = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } -lance-io = { "version" = "=6.0.0-beta.7", default-features = false, "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } -lance-index = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } -lance-linalg = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace-impls = { "version" = "=6.0.0-beta.7", default-features = false, "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } -lance-table = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } -lance-testing = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } -lance-datafusion = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } -lance-encoding = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } -lance-arrow = { "version" = "=6.0.0-beta.7", "tag" = "v6.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance = { "version" = "=7.0.0-beta.4", default-features = false, "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-core = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-datagen = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-file = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-io = { "version" = "=7.0.0-beta.4", default-features = false, "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-index = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-linalg = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-namespace = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-namespace-impls = { "version" = "=7.0.0-beta.4", default-features = false, "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-table = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-testing = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-datafusion = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-encoding = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance-arrow = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } ahash = "0.8" # Note that this one does not include pyarrow arrow = { version = "58.0.0", optional = false } @@ -54,7 +54,7 @@ half = { "version" = "2.7.1", default-features = false, features = [ futures = "0" log = "0.4" moka = { version = "0.12", features = ["future"] } -object_store = "0.12.0" +object_store = "0.13.2" pin-project = "1.0.7" rand = "0.9" snafu = "0.8" diff --git a/java/pom.xml b/java/pom.xml index d8b82a408..b3e7f99c2 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -28,7 +28,7 @@ UTF-8 15.0.0 - 6.0.0-beta.7 + 7.0.0-beta.4 false 2.30.0 1.7 diff --git a/rust/lancedb/Cargo.toml b/rust/lancedb/Cargo.toml index 285fc7ecc..292226ede 100644 --- a/rust/lancedb/Cargo.toml +++ b/rust/lancedb/Cargo.toml @@ -108,7 +108,12 @@ test-log = "0.2" [features] default = [] -aws = ["lance/aws", "lance-io/aws", "lance-namespace-impls/dir-aws"] +aws = [ + "lance/aws", + "lance-io/aws", + "lance-namespace-impls/dir-aws", + "object_store/aws", +] oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"] gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"] azure = [ diff --git a/rust/lancedb/src/database/listing.rs b/rust/lancedb/src/database/listing.rs index 73fad6eb9..d1831f52d 100644 --- a/rust/lancedb/src/database/listing.rs +++ b/rust/lancedb/src/database/listing.rs @@ -715,7 +715,7 @@ impl ListingDatabase { let commit_handler = commit_handler_from_url(&uri, &Some(object_store_params)).await?; for name in names { let dir_name = format!("{}.{}", name, LANCE_EXTENSION); - let full_path = self.base_path.child(dir_name.clone()); + let full_path = self.base_path.clone().join(dir_name.clone()); commit_handler.delete(&full_path).await?; diff --git a/rust/lancedb/src/io/object_store.rs b/rust/lancedb/src/io/object_store.rs index d4739291a..d27357b82 100644 --- a/rust/lancedb/src/io/object_store.rs +++ b/rust/lancedb/src/io/object_store.rs @@ -5,11 +5,12 @@ use std::{fmt::Formatter, sync::Arc}; -use futures::{TryFutureExt, stream::BoxStream}; +use futures::{StreamExt, TryFutureExt, stream::BoxStream}; use lance::io::WrappingObjectStore; use object_store::{ - Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, - PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart, path::Path, + CopyOptions, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, + ObjectStore, ObjectStoreExt, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, + UploadPart, path::Path, }; use async_trait::async_trait; @@ -93,20 +94,6 @@ impl ObjectStore for MirroringObjectStore { self.primary.get_opts(location, options).await } - async fn head(&self, location: &Path) -> Result { - self.primary.head(location).await - } - - async fn delete(&self, location: &Path) -> Result<()> { - if !location.primary_only() { - match self.secondary.delete(location).await { - Err(Error::NotFound { .. }) | Ok(_) => {} - Err(e) => return Err(e), - } - } - self.primary.delete(location).await - } - fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { self.primary.list(prefix) } @@ -115,21 +102,40 @@ impl ObjectStore for MirroringObjectStore { self.primary.list_with_delimiter(prefix).await } - async fn copy(&self, from: &Path, to: &Path) -> Result<()> { - if to.primary_only() { - self.primary.copy(from, to).await - } else { - self.secondary.copy(from, to).await?; - self.primary.copy(from, to).await?; - Ok(()) - } + fn delete_stream( + &self, + locations: BoxStream<'static, Result>, + ) -> BoxStream<'static, Result> { + let primary = self.primary.clone(); + let secondary = self.secondary.clone(); + locations + .map(move |location| { + let primary = primary.clone(); + let secondary = secondary.clone(); + async move { + let location = location?; + if !location.primary_only() { + match secondary.delete(&location).await { + Err(Error::NotFound { .. }) | Ok(_) => {} + Err(e) => return Err(e), + } + } + primary.delete(&location).await?; + Ok(location) + } + }) + .buffered(10) + .boxed() } - async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { - if !to.primary_only() { - self.secondary.copy(from, to).await?; + async fn copy_opts(&self, from: &Path, to: &Path, options: CopyOptions) -> Result<()> { + if to.primary_only() { + self.primary.copy_opts(from, to, options).await + } else { + self.secondary.copy_opts(from, to, options.clone()).await?; + self.primary.copy_opts(from, to, options).await?; + Ok(()) } - self.primary.copy_if_not_exists(from, to).await } } diff --git a/rust/lancedb/src/io/object_store/io_tracking.rs b/rust/lancedb/src/io/object_store/io_tracking.rs index 20f0a020a..bd4f8f54a 100644 --- a/rust/lancedb/src/io/object_store/io_tracking.rs +++ b/rust/lancedb/src/io/object_store/io_tracking.rs @@ -10,9 +10,9 @@ use bytes::Bytes; use futures::stream::BoxStream; use lance::io::WrappingObjectStore; use object_store::{ - GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, - PutMultipartOptions, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart, - path::Path, + CopyOptions, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, + PutMultipartOptions, PutOptions, PutPayload, PutResult, RenameOptions, Result as OSResult, + UploadPart, path::Path, }; #[derive(Debug, Default)] @@ -81,11 +81,6 @@ impl IoTrackingStore { #[async_trait::async_trait] #[deny(clippy::missing_trait_methods)] impl ObjectStore for IoTrackingStore { - async fn put(&self, location: &Path, bytes: PutPayload) -> OSResult { - self.record_write(bytes.content_length() as u64); - self.target.put(location, bytes).await - } - async fn put_opts( &self, location: &Path, @@ -96,14 +91,6 @@ impl ObjectStore for IoTrackingStore { self.target.put_opts(location, bytes, opts).await } - async fn put_multipart(&self, location: &Path) -> OSResult> { - let target = self.target.put_multipart(location).await?; - Ok(Box::new(IoTrackingMultipartUpload { - target, - stats: self.stats.clone(), - })) - } - async fn put_multipart_opts( &self, location: &Path, @@ -116,15 +103,6 @@ impl ObjectStore for IoTrackingStore { })) } - async fn get(&self, location: &Path) -> OSResult { - let result = self.target.get(location).await; - if let Ok(result) = &result { - let num_bytes = result.range.end - result.range.start; - self.record_read(num_bytes); - } - result - } - async fn get_opts(&self, location: &Path, options: GetOptions) -> OSResult { let result = self.target.get_opts(location, options).await; if let Ok(result) = &result { @@ -134,14 +112,6 @@ impl ObjectStore for IoTrackingStore { result } - async fn get_range(&self, location: &Path, range: std::ops::Range) -> OSResult { - let result = self.target.get_range(location, range).await; - if let Ok(result) = &result { - self.record_read(result.len() as u64); - } - result - } - async fn get_ranges( &self, location: &Path, @@ -154,20 +124,11 @@ impl ObjectStore for IoTrackingStore { result } - async fn head(&self, location: &Path) -> OSResult { - self.record_read(0); - self.target.head(location).await - } - - async fn delete(&self, location: &Path) -> OSResult<()> { + fn delete_stream( + &self, + locations: BoxStream<'static, OSResult>, + ) -> BoxStream<'static, OSResult> { self.record_write(0); - self.target.delete(location).await - } - - fn delete_stream<'a>( - &'a self, - locations: BoxStream<'a, OSResult>, - ) -> BoxStream<'a, OSResult> { self.target.delete_stream(locations) } @@ -190,24 +151,14 @@ impl ObjectStore for IoTrackingStore { self.target.list_with_delimiter(prefix).await } - async fn copy(&self, from: &Path, to: &Path) -> OSResult<()> { + async fn copy_opts(&self, from: &Path, to: &Path, options: CopyOptions) -> OSResult<()> { self.record_write(0); - self.target.copy(from, to).await + self.target.copy_opts(from, to, options).await } - async fn rename(&self, from: &Path, to: &Path) -> OSResult<()> { + async fn rename_opts(&self, from: &Path, to: &Path, options: RenameOptions) -> OSResult<()> { self.record_write(0); - self.target.rename(from, to).await - } - - async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> OSResult<()> { - self.record_write(0); - self.target.rename_if_not_exists(from, to).await - } - - async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> OSResult<()> { - self.record_write(0); - self.target.copy_if_not_exists(from, to).await + self.target.rename_opts(from, to, options).await } } From 5338aeb0069a3abb380f60c6b602e806e51735ed Mon Sep 17 00:00:00 2001 From: Octopus Date: Thu, 7 May 2026 23:45:27 +0800 Subject: [PATCH 18/20] ci: avoid passing GPG passphrase on command line in Java publish workflow (#3313) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #3299 ## Problem Two security issues exist in `.github/workflows/java-publish.yml`: 1. **`gpg-passphrase` input is misused**: `actions/setup-java`'s `gpg-passphrase` input expects the **name** of an environment variable (default: `GPG_PASSPHRASE`), not the secret value itself. The previous value `${{ secrets.GPG_PASSPHRASE }}` was setting the env var name to the actual secret, which is incorrect. 2. **Passphrase visible on the command line**: `-Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }}` passes the GPG passphrase as a Maven system property argument, making it visible in process listings and potentially echoed in debug logs — a supply-chain security risk for release workflows. ## Solution - Fix `gpg-passphrase: MAVEN_GPG_PASSPHRASE` — use the correct env var name so `actions/setup-java` generates a proper Maven `settings.xml` entry that reads from `MAVEN_GPG_PASSPHRASE`. - Remove `-Dgpg.passphrase=...` from the Maven CLI invocation. - Add `MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}` to the `env:` block of the Publish step, so the passphrase is available as an environment variable rather than a CLI argument. ## Testing The Java publish workflow only runs on tag pushes, so this cannot be exercised in a PR build. The logic change is straightforward: `actions/setup-java` is documented to write a `settings.xml` that reads `` from the named env var, and `maven-gpg-plugin` picks it up from there without any CLI argument. Co-authored-by: octo-patch --- .github/workflows/java-publish.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/java-publish.yml b/.github/workflows/java-publish.yml index 0f21d34fb..2d435eb8d 100644 --- a/.github/workflows/java-publish.yml +++ b/.github/workflows/java-publish.yml @@ -43,7 +43,7 @@ jobs: server-username: SONATYPE_USER server-password: SONATYPE_TOKEN gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} - gpg-passphrase: ${{ secrets.GPG_PASSPHRASE }} + gpg-passphrase: MAVEN_GPG_PASSPHRASE - name: Set git config run: | git config --global user.email "dev+gha@lancedb.com" @@ -58,10 +58,11 @@ jobs: echo "use-agent" >> ~/.gnupg/gpg.conf echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf export GPG_TTY=$(tty) - ./mvnw --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -pl lancedb-core -am -P deploy-to-ossrh + ./mvnw --batch-mode -DskipTests -DpushChanges=false deploy -pl lancedb-core -am -P deploy-to-ossrh env: SONATYPE_USER: ${{ secrets.SONATYPE_USER }} SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }} + MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} report-failure: name: Report Workflow Failure From 455ba5abbf7c501342a177f647eeeea7823a64ee Mon Sep 17 00:00:00 2001 From: LanceDB Robot Date: Thu, 7 May 2026 16:04:38 -0700 Subject: [PATCH 19/20] chore: update lance dependency to v7.0.0-beta.7 (#3356) ## Summary - Update Lance Rust workspace dependencies to `7.0.0-beta.7` using `ci/set_lance_version.py`. - Update the Java `lance-core` Maven property to `7.0.0-beta.7`. - Refresh `Cargo.lock` for the new Lance tag: https://github.com/lance-format/lance/releases/tag/v7.0.0-beta.7 ## Verification - `cargo clippy --workspace --tests --all-features -- -D warnings` - `cargo fmt --all` --- Cargo.lock | 78 +++++++++++++++++++++++++++++----------------------- Cargo.toml | 28 +++++++++---------- java/pom.xml | 2 +- 3 files changed, 59 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7eb588570..930e1494a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,6 +146,15 @@ dependencies = [ "object", ] +[[package]] +name = "arc-swap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" +dependencies = [ + "rustversion", +] + [[package]] name = "argminmax" version = "0.6.3" @@ -3150,8 +3159,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-array", "rand 0.9.4", @@ -4327,8 +4336,8 @@ checksum = "e037a2e1d8d5fdbd49b16a4ea09d5d6401c1f29eca5ff29d03d3824dba16256a" [[package]] name = "lance" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "arrow-arith", @@ -4396,8 +4405,8 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-array", "arrow-buffer", @@ -4418,8 +4427,8 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrayref", "paste", @@ -4428,8 +4437,8 @@ dependencies = [ [[package]] name = "lance-core" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-array", "arrow-buffer", @@ -4466,8 +4475,8 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "arrow-array", @@ -4498,8 +4507,8 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "arrow-array", @@ -4517,8 +4526,8 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-arith", "arrow-array", @@ -4555,8 +4564,8 @@ dependencies = [ [[package]] name = "lance-file" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-arith", "arrow-array", @@ -4588,9 +4597,10 @@ dependencies = [ [[package]] name = "lance-index" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ + "arc-swap", "arrow", "arrow-arith", "arrow-array", @@ -4653,8 +4663,8 @@ dependencies = [ [[package]] name = "lance-io" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "arrow-arith", @@ -4698,8 +4708,8 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-array", "arrow-buffer", @@ -4715,8 +4725,8 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "async-trait", @@ -4729,8 +4739,8 @@ dependencies = [ [[package]] name = "lance-namespace-impls" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "arrow-ipc", @@ -4780,8 +4790,8 @@ dependencies = [ [[package]] name = "lance-table" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "arrow-array", @@ -4820,8 +4830,8 @@ dependencies = [ [[package]] name = "lance-testing" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-array", "arrow-schema", @@ -4832,8 +4842,8 @@ dependencies = [ [[package]] name = "lance-tokenizer" -version = "7.0.0-beta.4" -source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.4#f29339b540867bb3e75b2729ee87f3bcaf0e9f96" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "rust-stemmers", "serde", diff --git a/Cargo.toml b/Cargo.toml index 6922ba487..a7b038307 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,20 +13,20 @@ categories = ["database-implementations"] rust-version = "1.91.0" [workspace.dependencies] -lance = { "version" = "=7.0.0-beta.4", default-features = false, "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-core = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-datagen = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-file = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-io = { "version" = "=7.0.0-beta.4", default-features = false, "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-index = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-linalg = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace-impls = { "version" = "=7.0.0-beta.4", default-features = false, "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-table = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-testing = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-datafusion = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-encoding = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } -lance-arrow = { "version" = "=7.0.0-beta.4", "tag" = "v7.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" } +lance = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-core = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-datagen = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-file = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-io = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-index = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-linalg = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-namespace = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-namespace-impls = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-table = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-testing = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-datafusion = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-encoding = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-arrow = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } ahash = "0.8" # Note that this one does not include pyarrow arrow = { version = "58.0.0", optional = false } diff --git a/java/pom.xml b/java/pom.xml index b3e7f99c2..00f1df078 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -28,7 +28,7 @@ UTF-8 15.0.0 - 7.0.0-beta.4 + 7.0.0-beta.7 false 2.30.0 1.7 From 694aa48e198f3e90fad378e221ad7771ec1c7371 Mon Sep 17 00:00:00 2001 From: Heng Ge Date: Thu, 7 May 2026 23:29:29 -0700 Subject: [PATCH 20/20] fix(database): drop spurious trailing `?` from listing-database URIs (#3357) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary `url::Url::query_pairs_mut()` leaves the URL with `query=Some("")` after `.clear()` even when the input had no query string. The listing-database connect path then captured that empty query into `ListingDatabase::query_string`, and `table_uri()` blindly appended `?` to every per-table URI — producing URIs like `s3://bucket/prefix/foo.lance?`. The trailing `?` is benign for normal table operations, but it breaks any caller that constructs a sub-path from the table URI. In particular, MemWAL flushes write to `/_mem_wal//_gen_`, which `url::Url::parse` then re-parses as `path=` + `query=/_mem_wal/...`. `Dataset::write` resolves the base table dataset, finds it already exists, and fails with `Dataset already exists: …_gen_1` on the very first MemTable flush (observed deterministically against S3 across all merge_insert LSM modes; tracked in [lance-format/lance#6713](https://github.com/lance-format/lance/pull/6715)). ## Fix Treat `Some("")` query the same as no query when capturing `query_string`. A real `?foo=bar` query is still propagated unchanged. Adds a regression test covering both the empty-query and non-empty-query paths. ## Verification - `url::Url::parse("s3://bucket/prefix/").query()` → `None`, but after `query_pairs_mut().clear()` → `Some("")`. Confirmed in a standalone repro. - Without this fix, every `table_uri()` for an `s3://`-style connection ends with `?`, breaking MemWAL and any future sub-path consumer in the same way. - New unit test `test_table_uri_url_path_has_no_trailing_question_mark` exercises both code paths. --- rust/lancedb/src/database/listing.rs | 138 ++++++++++++++++++++++++++- 1 file changed, 136 insertions(+), 2 deletions(-) diff --git a/rust/lancedb/src/database/listing.rs b/rust/lancedb/src/database/listing.rs index d1831f52d..7b7657bf3 100644 --- a/rust/lancedb/src/database/listing.rs +++ b/rust/lancedb/src/database/listing.rs @@ -505,8 +505,15 @@ impl ListingDatabase { // Filter out the commit store query param -- it's a lancedb param url.query_pairs_mut().clear(); url.query_pairs_mut().extend_pairs(filtered_querys); - // Take a copy of the query string so we can propagate it to lance - let query_string = url.query().map(|s| s.to_string()); + // Take a copy of the query string so we can propagate it to lance. + // `query_pairs_mut()` leaves the URL with `Some("")` even when no + // pairs survive (or none existed in the first place), so an empty + // string here must be treated the same as "no query" — otherwise + // every table URI ends up with a trailing `?`, which makes downstream + // sub-paths (e.g. MemWAL gen paths) re-parse as path= + + // query=, causing Lance to find the base table dataset + // when looking up the sub-path. + let query_string = url.query().filter(|q| !q.is_empty()).map(|s| s.to_string()); // clear the query string so we can use the url as the base uri // use .set_query(None) instead of .set_query("") because the latter // will add a trailing '?' to the url @@ -2213,6 +2220,133 @@ mod tests { assert_eq!(uri, expected); } + /// Regression: connecting via a URL-style URI (which goes through + /// `url::Url::parse` and the `query_pairs_mut()` path) must not + /// append a trailing `?` to per-table URIs when the input URI has + /// no query string. + /// + /// Earlier, `query_pairs_mut().clear()` left the URL with + /// `query=Some("")`, which then propagated as a trailing `?` on + /// every table URI. Sub-path lookups against that URI (e.g. MemWAL + /// `/_mem_wal//_gen_`) re-parsed as + /// `path=` + `query=/_mem_wal/...`, causing + /// `Dataset::write` to find the base table dataset and falsely + /// report `Dataset already exists`. + /// Mirrors the URL-mutation step from + /// [`ListingDatabase::connect_with_options`] so we can assert the + /// fix without going through filesystem setup (which is awkward + /// across platforms — see the `file://` test below). + fn capture_query_like_connect(input_uri: &str) -> Option { + let mut url = url::Url::parse(input_uri).unwrap(); + let mut filtered_querys = Vec::new(); + for (key, value) in url.query_pairs() { + if key == ENGINE || key == MIRRORED_STORE { + continue; + } + filtered_querys.push((key.to_string(), value.to_string())); + } + url.query_pairs_mut().clear(); + url.query_pairs_mut().extend_pairs(filtered_querys); + url.query().filter(|q| !q.is_empty()).map(|s| s.to_string()) + } + + #[test] + fn test_capture_query_treats_empty_as_none() { + // No query at all. With the bug, `query_pairs_mut()` left the + // URL with `query=Some("")` and we used to propagate that. + assert_eq!( + capture_query_like_connect("s3://bucket/prefix/"), + None, + "empty query after mutation must be treated as no query" + ); + + // Real query is propagated. + assert_eq!( + capture_query_like_connect("s3://bucket/prefix/?foo=bar"), + Some("foo=bar".to_string()) + ); + + // lancedb-internal `engine=` is stripped; nothing remains, so + // query_string is None — not Some(""). + assert_eq!( + capture_query_like_connect(&format!("s3://bucket/prefix/?{}=mem", ENGINE)), + None + ); + + // Mixed: drop `engine=`, keep the rest. + let captured = + capture_query_like_connect(&format!("s3://bucket/prefix/?{}=mem&foo=bar", ENGINE)); + assert_eq!(captured.as_deref(), Some("foo=bar")); + } + + /// Regression: connecting via a URL-style URI (which goes through + /// `url::Url::parse` and the `query_pairs_mut()` path) must not + /// append a trailing `?` to per-table URIs when the input URI has + /// no query string. Sub-path lookups against such a URI (e.g. + /// MemWAL `/_mem_wal//_gen_`) re-parse + /// as `path=` + `query=/_mem_wal/...`, causing + /// `Dataset::write` to find the base table dataset and falsely + /// report `Dataset already exists`. + /// + /// Skipped on Windows: `try_create_dir` does not understand + /// `file:///C:/…` paths so `connect_with_options` fails before + /// even reaching the URL-mutation logic. The pure URL-mutation + /// invariant is covered by + /// `test_capture_query_treats_empty_as_none` above, which runs + /// on all platforms. + #[cfg(not(windows))] + #[tokio::test] + async fn test_table_uri_url_path_has_no_trailing_question_mark() { + let tempdir = tempdir().unwrap(); + let uri = format!("file://{}", tempdir.path().to_str().unwrap()); + + let request = ConnectRequest { + uri: uri.clone(), + #[cfg(feature = "remote")] + client_config: Default::default(), + options: Default::default(), + namespace_client_properties: Default::default(), + manifest_enabled: false, + read_consistency_interval: None, + session: None, + }; + let db = ListingDatabase::connect_with_options(&request) + .await + .unwrap(); + + assert_eq!( + db.query_string, None, + "no input query → no captured query_string" + ); + + let table_uri = db.table_uri("test").unwrap(); + assert!( + !table_uri.ends_with('?'), + "table_uri must not have a trailing `?`: {}", + table_uri + ); + assert_eq!(table_uri, format!("{}/test.lance", uri)); + + // A real query string should still be propagated. + let with_query = format!("{}?foo=bar", uri); + let request_with_query = ConnectRequest { + uri: with_query, + #[cfg(feature = "remote")] + client_config: Default::default(), + options: Default::default(), + namespace_client_properties: Default::default(), + manifest_enabled: false, + read_consistency_interval: None, + session: None, + }; + let db_with_query = ListingDatabase::connect_with_options(&request_with_query) + .await + .unwrap(); + assert_eq!(db_with_query.query_string.as_deref(), Some("foo=bar")); + let table_uri = db_with_query.table_uri("test").unwrap(); + assert_eq!(table_uri, format!("{}/test.lance?foo=bar", uri)); + } + #[tokio::test] async fn test_namespace_client() { let (_tempdir, db) = setup_database().await;