diff --git a/.bumpversion.toml b/.bumpversion.toml index f07c90c64..1877b33b2 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.28.0-beta.9" +current_version = "0.28.0-beta.11" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..4107990ea --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,18 @@ +version: 2 + +# Scope: the root Cargo workspace, which produces the Rust binaries we +# ship to users (the Node.js and Python native extensions). The +# `rust/lancedb` library crate shares the same lockfile; its consumers +# pick their own dependency versions, but bumping transitive deps here +# keeps the binaries we ship current. +updates: + - package-ecosystem: cargo + directory: / + schedule: + interval: weekly + open-pull-requests-limit: 10 + groups: + rust-minor-patch: + update-types: + - minor + - patch diff --git a/.github/workflows/java-publish.yml b/.github/workflows/java-publish.yml index 0f21d34fb..2d435eb8d 100644 --- a/.github/workflows/java-publish.yml +++ b/.github/workflows/java-publish.yml @@ -43,7 +43,7 @@ jobs: server-username: SONATYPE_USER server-password: SONATYPE_TOKEN gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} - gpg-passphrase: ${{ secrets.GPG_PASSPHRASE }} + gpg-passphrase: MAVEN_GPG_PASSPHRASE - name: Set git config run: | git config --global user.email "dev+gha@lancedb.com" @@ -58,10 +58,11 @@ jobs: echo "use-agent" >> ~/.gnupg/gpg.conf echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf export GPG_TTY=$(tty) - ./mvnw --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -pl lancedb-core -am -P deploy-to-ossrh + ./mvnw --batch-mode -DskipTests -DpushChanges=false deploy -pl lancedb-core -am -P deploy-to-ossrh env: SONATYPE_USER: ${{ secrets.SONATYPE_USER }} SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }} + MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} report-failure: name: Report Workflow Failure diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 976dec77f..ca6e3219b 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -21,6 +21,9 @@ jobs: linux: name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }} timeout-minutes: 60 + permissions: + id-token: write + contents: read strategy: matrix: config: @@ -60,10 +63,12 @@ jobs: - uses: ./.github/workflows/upload_wheel if: startsWith(github.ref, 'refs/tags/python-v') with: - pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }} fury_token: ${{ secrets.FURY_TOKEN }} mac: timeout-minutes: 90 + permissions: + id-token: write + contents: read runs-on: ${{ matrix.config.runner }} strategy: matrix: @@ -88,10 +93,12 @@ jobs: - uses: ./.github/workflows/upload_wheel if: startsWith(github.ref, 'refs/tags/python-v') with: - pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }} fury_token: ${{ secrets.FURY_TOKEN }} windows: timeout-minutes: 60 + permissions: + id-token: write + contents: read runs-on: windows-latest steps: - uses: actions/checkout@v4 @@ -110,7 +117,6 @@ jobs: - uses: ./.github/workflows/upload_wheel if: startsWith(github.ref, 'refs/tags/python-v') with: - pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }} fury_token: ${{ secrets.FURY_TOKEN }} gh-release: if: startsWith(github.ref, 'refs/tags/python-v') diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 61d52754c..937124f5a 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -9,7 +9,10 @@ on: - Cargo.toml - Cargo.lock - rust-toolchain.toml + - deny.toml - rust/** + - nodejs/Cargo.toml + - python/Cargo.toml - .github/workflows/rust.yml permissions: @@ -56,6 +59,17 @@ jobs: - name: Run clippy (without remote feature) run: cargo clippy --profile ci --workspace --tests -- -D warnings + deny: + # Supply-chain checks: advisories, licenses, banned crates, and source + # restrictions. Configuration lives in `deny.toml` at the workspace root. + timeout-minutes: 10 + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + - uses: EmbarkStudios/cargo-deny-action@v2 + with: + command: check advisories bans licenses sources + build-no-lock: runs-on: ubuntu-24.04 timeout-minutes: 30 diff --git a/.github/workflows/upload_wheel/action.yml b/.github/workflows/upload_wheel/action.yml index 03725d03f..8bcdb7a88 100644 --- a/.github/workflows/upload_wheel/action.yml +++ b/.github/workflows/upload_wheel/action.yml @@ -2,9 +2,6 @@ name: upload-wheel description: "Upload wheels to Pypi" inputs: - pypi_token: - required: true - description: "release token for the repo" fury_token: required: true description: "release token for the fury repo" @@ -12,12 +9,6 @@ inputs: runs: using: "composite" steps: - - name: Install dependencies - shell: bash - run: | - python -m pip install --upgrade pip - pip install twine - python3 -m pip install --upgrade pkginfo - name: Choose repo shell: bash id: choose_repo @@ -27,19 +18,17 @@ runs: else echo "repo=pypi" >> $GITHUB_OUTPUT fi - - name: Publish to PyPI + - name: Publish to Fury + if: steps.choose_repo.outputs.repo == 'fury' shell: bash env: FURY_TOKEN: ${{ inputs.fury_token }} - PYPI_TOKEN: ${{ inputs.pypi_token }} run: | - if [[ ${{ steps.choose_repo.outputs.repo }} == fury ]]; then - WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1) - echo "Uploading $WHEEL to Fury" - curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/ - else - twine upload --repository ${{ steps.choose_repo.outputs.repo }} \ - --username __token__ \ - --password $PYPI_TOKEN \ - target/wheels/lancedb-*.whl - fi + WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1) + echo "Uploading $WHEEL to Fury" + curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/ + - name: Publish to PyPI + if: steps.choose_repo.outputs.repo == 'pypi' + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: target/wheels/ diff --git a/Cargo.lock b/Cargo.lock index ee87b9f25..d78a5057f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,9 +14,9 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "cipher", - "cpufeatures", + "cpufeatures 0.2.17", ] [[package]] @@ -25,7 +25,7 @@ version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "const-random", "getrandom 0.3.4", "once_cell", @@ -108,7 +108,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -119,7 +119,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -128,6 +128,15 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "ar_archive_writer" version = "0.5.1" @@ -137,6 +146,15 @@ dependencies = [ "object", ] +[[package]] +name = "arc-swap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" +dependencies = [ + "rustversion", +] + [[package]] name = "argminmax" version = "0.6.3" @@ -166,9 +184,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" +checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" dependencies = [ "arrow-arith", "arrow-array", @@ -188,9 +206,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" +checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" dependencies = [ "arrow-array", "arrow-buffer", @@ -202,9 +220,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" +checksum = "772bd34cacdda8baec9418d80d23d0fb4d50ef0735685bd45158b83dfeb6e62d" dependencies = [ "ahash", "arrow-buffer", @@ -221,9 +239,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" +checksum = "898f4cf1e9598fdb77f356fdf2134feedfd0ee8d5a4e0a5f573e7d0aec16baa4" dependencies = [ "bytes", "half", @@ -233,9 +251,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" +checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" dependencies = [ "arrow-array", "arrow-buffer", @@ -255,9 +273,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" +checksum = "ca025bd0f38eeecb57c2153c0123b960494138e6a957bbda10da2b25415209fe" dependencies = [ "arrow-array", "arrow-cast", @@ -270,9 +288,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" +checksum = "42d10beeab2b1c3bb0b53a00f7c944a178b622173a5c7bcabc3cb45d90238df4" dependencies = [ "arrow-buffer", "arrow-schema", @@ -283,9 +301,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" +checksum = "609a441080e338147a84e8e6904b6da482cefb957c5cdc0f3398872f69a315d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -299,9 +317,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" +checksum = "6ead0914e4861a531be48fe05858265cf854a4880b9ed12618b1d08cba9bebc8" dependencies = [ "arrow-array", "arrow-buffer", @@ -323,9 +341,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" +checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" dependencies = [ "arrow-array", "arrow-buffer", @@ -336,9 +354,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d18c442b4c266aaf3d7f7dd40fd7ae058cef7f113b00ff0cd8256e1e218ec544" +checksum = "e63351dc11981a316c828a6032a5021345bba882f68bc4a36c36825a50725089" dependencies = [ "arrow-array", "arrow-data", @@ -348,9 +366,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" +checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" dependencies = [ "arrow-array", "arrow-buffer", @@ -361,9 +379,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" +checksum = "c30a1365d7a7dc50cc847e54154e6af49e4c4b0fddc9f607b687f29212082743" dependencies = [ "bitflags 2.11.0", "serde_core", @@ -372,9 +390,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" +checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" dependencies = [ "ahash", "arrow-array", @@ -386,9 +404,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" +checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" dependencies = [ "arrow-array", "arrow-buffer", @@ -458,7 +476,7 @@ dependencies = [ "derive_builder", "futures", "rand 0.8.5", - "reqwest", + "reqwest 0.12.28", "reqwest-eventsource", "secrecy", "serde", @@ -572,9 +590,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.16.1" +version = "1.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94bffc006df10ac2a68c83692d734a465f8ee6c5b384d8545a636f81d858f4bf" +checksum = "0ec6fb3fe69024a75fa7e1bfb48aa6cf59706a101658ea01bfd33b2b248a038f" dependencies = [ "aws-lc-sys", "untrusted 0.7.1", @@ -583,9 +601,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.38.0" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4321e568ed89bb5a7d291a7f37997c2c0df89809d7b6d12062c81ddb54aa782e" +checksum = "f50037ee5e1e41e7b8f9d161680a725bd1626cb6f8c7e901f91f942850852fe7" dependencies = [ "cc", "cmake", @@ -1272,9 +1290,9 @@ dependencies = [ "arrayref", "arrayvec", "cc", - "cfg-if", + "cfg-if 1.0.4", "constant_time_eq", - "cpufeatures", + "cpufeatures 0.2.17", ] [[package]] @@ -1337,6 +1355,17 @@ dependencies = [ "alloc-stdlib", ] +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -1399,7 +1428,7 @@ dependencies = [ "memmap2 0.9.10", "num-traits", "num_cpus", - "rand 0.9.2", + "rand 0.9.4", "rand_distr 0.5.1", "rayon", "safetensors", @@ -1435,7 +1464,7 @@ dependencies = [ "candle-nn", "fancy-regex", "num-traits", - "rand 0.9.2", + "rand 0.9.4", "rayon", "serde", "serde_json", @@ -1473,6 +1502,12 @@ dependencies = [ "smallvec", ] +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + [[package]] name = "cfg-if" version = "1.0.4" @@ -1485,6 +1520,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if 1.0.4", + "cpufeatures 0.3.0", + "rand_core 0.10.1", +] + [[package]] name = "chrono" version = "0.4.44" @@ -1541,6 +1587,46 @@ dependencies = [ "inout", ] +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + [[package]] name = "cmake" version = "0.1.57" @@ -1556,6 +1642,25 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "colored" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + [[package]] name = "comfy-table" version = "7.2.2" @@ -1632,6 +1737,21 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const-str" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18f12cc9948ed9604230cdddc7c86e270f9401ccbe3c2e98a4378c5e7632212f" + +[[package]] +name = "const_panic" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e262cdaac42494e3ae34c43969f9cdeb7da178bdb4b66fa6a1ea2edb4c8ae652" +dependencies = [ + "typewit", +] + [[package]] name = "constant_time_eq" version = "0.4.2" @@ -1673,6 +1793,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "countio" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9702aee5d1d744c01d82f6915644f950f898e014903385464c773b96fefdecb" +dependencies = [ + "futures-io", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -1682,6 +1811,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crc" version = "3.3.0" @@ -1724,7 +1862,7 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -1953,7 +2091,7 @@ version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "crossbeam-utils", "hashbrown 0.14.5", "lock_api", @@ -1963,9 +2101,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c18ba387f9c05ac1f3be32a73f8f3cc6c1cfc43e5d4b7a8e5b0d3a5eb48dc7" +checksum = "93db0e623840612f7f2cd757f7e8a8922064192363732c88692e0870016e141b" dependencies = [ "arrow", "arrow-schema", @@ -2001,9 +2139,9 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand 0.9.2", + "rand 0.9.4", "regex", - "sqlparser 0.59.0", + "sqlparser 0.61.0", "tempfile", "tokio", "url", @@ -2012,9 +2150,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c75a4ce672b27fb8423810efb92a3600027717a1664d06a2c307eeeabcec694" +checksum = "37cefde60b26a7f4ff61e9d2ff2833322f91df2b568d7238afe67bde5bdffb66" dependencies = [ "arrow", "async-trait", @@ -2037,9 +2175,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c8b9a3795ffb46bf4957a34c67d89a67558b311ae455c8d4295ff2115eeea50" +checksum = "17e112307715d6a7a331111a4c2330ff54bc237183511c319e3708a4cff431fb" dependencies = [ "arrow", "async-trait", @@ -2060,9 +2198,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "205dc1e20441973f470e6b7ef87626a3b9187970e5106058fef1b713047f770c" +checksum = "d72a11ca44a95e1081870d3abb80c717496e8a7acb467a1d3e932bb636af5cc2" dependencies = [ "ahash", "arrow", @@ -2071,20 +2209,21 @@ dependencies = [ "half", "hashbrown 0.16.1", "indexmap 2.13.0", + "itertools 0.14.0", "libc", "log", "object_store", "paste", - "sqlparser 0.59.0", + "sqlparser 0.61.0", "tokio", "web-time", ] [[package]] name = "datafusion-common-runtime" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cf5880c02ff6f5f11fb5bc19211789fb32fd3c53d79b7d6cb2b12e401312ba0" +checksum = "89f4afaed29670ec4fd6053643adc749fe3f4bc9d1ce1b8c5679b22c67d12def" dependencies = [ "futures", "log", @@ -2093,9 +2232,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc614d6e709450e29b7b032a42c1bdb705f166a6b2edef7bed7c7897eb905499" +checksum = "e9fb386e1691355355a96419978a0022b7947b44d4a24a6ea99f00b6b485cbb6" dependencies = [ "arrow", "async-trait", @@ -2115,16 +2254,16 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "rand 0.9.2", + "rand 0.9.4", "tokio", "url", ] [[package]] name = "datafusion-datasource-arrow" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e497d5fc48dac7ce86f6b4fb09a3a494385774af301ff20ec91aebfae9b05b4" +checksum = "ffa6c52cfed0734c5f93754d1c0175f558175248bf686c944fb05c373e5fc096" dependencies = [ "arrow", "arrow-ipc", @@ -2146,9 +2285,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dfc250cad940d0327ca2e9109dc98830892d17a3d6b2ca11d68570e872cf379" +checksum = "503f29e0582c1fc189578d665ff57d9300da1f80c282777d7eb67bb79fb8cdca" dependencies = [ "arrow", "async-trait", @@ -2169,9 +2308,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91e9677ed62833b0e8129dec0d1a8f3c9bb7590bd6dd714a43e4c3b663e4aa0" +checksum = "e33804749abc8d0c8cb7473228483cb8070e524c6f6086ee1b85a64debe2b3d2" dependencies = [ "arrow", "async-trait", @@ -2186,41 +2325,45 @@ dependencies = [ "datafusion-session", "futures", "object_store", + "serde_json", "tokio", + "tokio-stream", ] [[package]] name = "datafusion-doc" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e13e5fe3447baa0584b61ee8644086e007e1ef6e58f4be48bc8a72417854729" +checksum = "8de6ac0df1662b9148ad3c987978b32cbec7c772f199b1d53520c8fa764a87ee" [[package]] name = "datafusion-execution" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48a6cc03e34899a54546b229235f7b192634c8e832f78a267f0989b18216c56d" +checksum = "c03c7fbdaefcca4ef6ffe425a5fc2325763bfb426599bb0bf4536466efabe709" dependencies = [ "arrow", + "arrow-buffer", "async-trait", "chrono", "dashmap", "datafusion-common", "datafusion-expr", + "datafusion-physical-expr-common", "futures", "log", "object_store", "parking_lot", - "rand 0.9.2", + "rand 0.9.4", "tempfile", "url", ] [[package]] name = "datafusion-expr" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee3315d87eca7a7df58e52a1fb43b4c4171b545fd30ffc3102945c162a9f6ddb" +checksum = "574b9b6977fedbd2a611cbff12e5caf90f31640ad9dc5870f152836d94bad0dd" dependencies = [ "arrow", "async-trait", @@ -2235,14 +2378,14 @@ dependencies = [ "itertools 0.14.0", "paste", "serde_json", - "sqlparser 0.59.0", + "sqlparser 0.61.0", ] [[package]] name = "datafusion-expr-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c6d83feae0753799f933a2c47dfd15980c6947960cb95ed60f5c1f885548b3" +checksum = "7d7c3adf3db8bf61e92eb90cb659c8e8b734593a8f7c8e12a843c7ddba24b87e" dependencies = [ "arrow", "datafusion-common", @@ -2253,9 +2396,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b82962015cc3db4d7662459c9f7fcda0591b5edacb8af1cf3bc3031f274800" +checksum = "f28aa4e10384e782774b10e72aca4d93ef7b31aa653095d9d4536b0a3dbc51b6" dependencies = [ "arrow", "arrow-buffer", @@ -2274,8 +2417,9 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "memchr", "num-traits", - "rand 0.9.2", + "rand 0.9.4", "regex", "sha2", "unicode-segmentation", @@ -2284,9 +2428,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e42c227d9e55a6c8041785d4a8a117e4de531033d480aae10984247ac62e27e" +checksum = "00aa6217e56098ba84e0a338176fe52f0a84cca398021512c6c8c5eff806d0ad" dependencies = [ "ahash", "arrow", @@ -2300,14 +2444,15 @@ dependencies = [ "datafusion-physical-expr-common", "half", "log", + "num-traits", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cead3cfed825b0b688700f4338d281cd7857e4907775a5b9554c083edd5f3f95" +checksum = "b511250349407db7c43832ab2de63f5557b19a20dfd236b39ca2c04468b50d47" dependencies = [ "ahash", "arrow", @@ -2318,9 +2463,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62ea99612970aebab8cf864d02eb3d296bbab7f4881e1023d282b57fe431b201" +checksum = "ef13a858e20d50f0a9bb5e96e7ac82b4e7597f247515bccca4fdd2992df0212a" dependencies = [ "arrow", "arrow-ord", @@ -2334,16 +2479,18 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", + "hashbrown 0.16.1", "itertools 0.14.0", + "itoa", "log", "paste", ] [[package]] name = "datafusion-functions-table" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83dbf3ab8b9af6f209b068825a7adbd3b88bf276f2a1ec14ba09567b97f5674" +checksum = "72b40d3f5bbb3905f9ccb1ce9485a9595c77b69758a7c24d3ba79e334ff51e7e" dependencies = [ "arrow", "async-trait", @@ -2357,9 +2504,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732edabe07496e2fc5a1e57a284d7a36edcea445a2821119770a0dea624b472c" +checksum = "d4e88ec9d57c9b685d02f58bfee7be62d72610430ddcedb82a08e5d9925dbfb6" dependencies = [ "arrow", "datafusion-common", @@ -2375,9 +2522,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c6e30e09700799bd52adce8c377ab03dda96e73a623e4803a31ad94fe7ce14" +checksum = "8307bb93519b1a91913723a1130cfafeee3f72200d870d88e91a6fc5470ede5c" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2385,9 +2532,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402f2a8ed70fb99a18f71580a1fe338604222a3d32ddeac6e72c5b34feea2d4d" +checksum = "2e367e6a71051d0ebdd29b2f85d12059b38b1d1f172c6906e80016da662226bd" dependencies = [ "datafusion-doc", "quote", @@ -2396,9 +2543,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99f32edb8ba12f08138f86c09b80fae3d4a320551262fa06b91d8a8cb3065a5b" +checksum = "e929015451a67f77d9d8b727b2bf3a40c4445fdef6cdc53281d7d97c76888ace" dependencies = [ "arrow", "chrono", @@ -2415,9 +2562,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "987c5e29e96186589301b42e25aa7d11bbe319a73eb02ef8d755edc55b5b89fc" +checksum = "4b1e68aba7a4b350401cfdf25a3d6f989ad898a7410164afe9ca52080244cb59" dependencies = [ "ahash", "arrow", @@ -2438,9 +2585,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1de89d0afa08b6686697bd8a6bac4ba2cd44c7003356e1bce6114d5a93f94b5c" +checksum = "ea22315f33cf2e0adc104e8ec42e285f6ed93998d565c65e82fec6a9ee9f9db4" dependencies = [ "arrow", "datafusion-common", @@ -2453,9 +2600,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602d1970c0fe87f1c3a36665d131fbfe1c4379d35f8fc5ec43a362229ad2954d" +checksum = "b04b45ea8ad3ac2d78f2ea2a76053e06591c9629c7a603eda16c10649ecf4362" dependencies = [ "ahash", "arrow", @@ -2470,9 +2617,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b24d704b6385ebe27c756a12e5ba15684576d3b47aeca79cc9fb09480236dc32" +checksum = "7cb13397809a425918f608dfe8653f332015a3e330004ab191b4404187238b95" dependencies = [ "arrow", "datafusion-common", @@ -2488,9 +2635,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c21d94141ea5043e98793f170798e9c1887095813b8291c5260599341e383a38" +checksum = "5edc023675791af9d5fb4cc4c24abf5f7bd3bd4dcf9e5bd90ea1eff6976dcc79" dependencies = [ "ahash", "arrow", @@ -2512,6 +2659,7 @@ dependencies = [ "indexmap 2.13.0", "itertools 0.14.0", "log", + "num-traits", "parking_lot", "pin-project-lite", "tokio", @@ -2519,9 +2667,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a68cce43d18c0dfac95cacd74e70565f7e2fb12b9ed41e2d312f0fa837626b1" +checksum = "ac8c76860e355616555081cab5968cec1af7a80701ff374510860bcd567e365a" dependencies = [ "arrow", "datafusion-common", @@ -2536,9 +2684,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4e1c40a0b1896aed4a4504145c2eb7fa9b9da13c2d04b40a4767a09f076199" +checksum = "5412111aa48e2424ba926112e192f7a6b7e4ccb450145d25ce5ede9f19dc491e" dependencies = [ "async-trait", "datafusion-common", @@ -2550,19 +2698,20 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f1891e5b106d1d73c7fe403bd8a265d19c3977edc17f60808daf26c2fe65ffb" +checksum = "fa0d133ddf8b9b3b872acac900157f783e7b879fe9a6bccf389abebbfac45ec1" dependencies = [ "arrow", "bigdecimal", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-functions-nested", "indexmap 2.13.0", "log", "regex", - "sqlparser 0.59.0", + "sqlparser 0.61.0", ] [[package]] @@ -2866,7 +3015,7 @@ version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -2938,7 +3087,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -3084,7 +3233,7 @@ checksum = "719a903cc23e4a89e87962c2a80fdb45cdaad0983a89bd150bb57b4c8571a7d5" dependencies = [ "half", "num-traits", - "rand 0.9.2", + "rand 0.9.4", "rand_distr 0.5.1", ] @@ -3129,11 +3278,11 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-array", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] @@ -3245,6 +3394,15 @@ dependencies = [ "slab", ] +[[package]] +name = "gearhash" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8cf82cf76cd16485e56295a1377c775ce708c9f1a0be6b029076d60a245d213" +dependencies = [ + "cfg-if 0.1.10", +] + [[package]] name = "gemm" version = "0.19.0" @@ -3371,7 +3529,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9" dependencies = [ "cc", - "cfg-if", + "cfg-if 1.0.4", "libc", "log", "rustversion", @@ -3395,10 +3553,10 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "js-sys", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -3408,7 +3566,7 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "js-sys", "libc", "r-efi 5.3.0", @@ -3422,11 +3580,34 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", + "js-sys", "libc", "r-efi 6.0.0", + "rand_core 0.10.1", "wasip2", "wasip3", + "wasm-bindgen", +] + +[[package]] +name = "git-version" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad568aa3db0fcbc81f2f116137f263d7304f512a1209b35b85150d3ef88ad19" +dependencies = [ + "git-version-macro", +] + +[[package]] +name = "git-version-macro" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53010ccb100b96a67bc32c0175f0ed1426b31b655d562898e57325f81c023ac0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] @@ -3503,10 +3684,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "bytemuck", - "cfg-if", + "cfg-if 1.0.4", "crunchy", "num-traits", - "rand 0.9.2", + "rand 0.9.4", "rand_distr 0.5.1", "zerocopy", ] @@ -3552,6 +3733,12 @@ dependencies = [ "serde_core", ] +[[package]] +name = "heapify" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0049b265b7f201ca9ab25475b22b47fe444060126a51abe00f77d986fc5cc52e" + [[package]] name = "heck" version = "0.4.1" @@ -3589,8 +3776,8 @@ dependencies = [ "libc", "log", "num_cpus", - "rand 0.9.2", - "reqwest", + "rand 0.9.4", + "reqwest 0.12.28", "serde", "serde_json", "thiserror 2.0.18", @@ -3599,6 +3786,28 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "hf-xet" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "430b33fa84f92796d4d263070b6c0d3ca219df7b9a0e1853ee431029b1612bcd" +dependencies = [ + "async-trait", + "bytes", + "http 1.4.0", + "more-asserts", + "serde", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tracing", + "uuid", + "xet-client", + "xet-core-structures", + "xet-data", + "xet-runtime", +] + [[package]] name = "hmac" version = "0.12.1" @@ -3787,7 +3996,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.3", + "socket2 0.5.10", "system-configuration", "tokio", "tower-service", @@ -3978,15 +4187,6 @@ dependencies = [ "web-time", ] -[[package]] -name = "indoc" -version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" -dependencies = [ - "rustversion", -] - [[package]] name = "inout" version = "0.1.4" @@ -4003,7 +4203,7 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -4013,7 +4213,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdd7bddefd0a8833b88a4b68f90dae22c7450d11b354198baee3874fd811b344" dependencies = [ "bitflags 2.11.0", - "cfg-if", + "cfg-if 1.0.4", "libc", ] @@ -4117,11 +4317,13 @@ checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", + "js-sys", "log", "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.59.0", + "wasm-bindgen", + "windows-sys 0.52.0", ] [[package]] @@ -4150,6 +4352,55 @@ dependencies = [ "jiff-tzdb", ] +[[package]] +name = "jni" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efd9a482cf3a427f00d6b35f14332adc7902ce91efb778580e180ff90fa3498" +dependencies = [ + "cfg-if 1.0.4", + "combine", + "jni-macros", + "jni-sys", + "log", + "simd_cesu8", + "thiserror 2.0.18", + "walkdir", + "windows-link", +] + +[[package]] +name = "jni-macros" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00109accc170f0bdb141fed3e393c565b6f5e072365c3bd58f5b062591560a3" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "simd_cesu8", + "syn 2.0.117", +] + +[[package]] +name = "jni-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn 2.0.117", +] + [[package]] name = "jobserver" version = "0.1.34" @@ -4184,7 +4435,7 @@ dependencies = [ "nom 8.0.0", "num-traits", "ordered-float", - "rand 0.9.2", + "rand 0.9.4", "ryu", "serde", "serde_json", @@ -4192,16 +4443,18 @@ dependencies = [ [[package]] name = "jsonwebtoken" -version = "9.3.1" +version = "10.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" +checksum = "0529410abe238729a60b108898784df8984c87f6054c9c4fcacc47e4803c1ce1" dependencies = [ + "aws-lc-rs", "base64 0.22.1", + "getrandom 0.2.17", "js-sys", "pem", - "ring", "serde", "serde_json", + "signature 2.2.0", "simple_asn1", ] @@ -4214,10 +4467,27 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "konst" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f660d5f887e3562f9ab6f4a14988795b694099d66b4f5dedc02d197ba9becb1d" +dependencies = [ + "const_panic", + "konst_proc_macros", + "typewit", +] + +[[package]] +name = "konst_proc_macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e037a2e1d8d5fdbd49b16a4ea09d5d6401c1f29eca5ff29d03d3824dba16256a" + [[package]] name = "lance" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "arrow-arith", @@ -4269,7 +4539,7 @@ dependencies = [ "prost", "prost-build", "prost-types", - "rand 0.9.2", + "rand 0.9.4", "roaring", "semver", "serde", @@ -4285,8 +4555,8 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-array", "arrow-buffer", @@ -4302,13 +4572,13 @@ dependencies = [ "half", "jsonb", "num-traits", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] name = "lance-bitpacking" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrayref", "paste", @@ -4317,8 +4587,8 @@ dependencies = [ [[package]] name = "lance-core" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-array", "arrow-buffer", @@ -4341,7 +4611,7 @@ dependencies = [ "object_store", "pin-project", "prost", - "rand 0.9.2", + "rand 0.9.4", "roaring", "serde_json", "snafu 0.9.0", @@ -4355,8 +4625,8 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "arrow-array", @@ -4387,8 +4657,8 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "arrow-array", @@ -4398,7 +4668,7 @@ dependencies = [ "futures", "half", "hex", - "rand 0.9.2", + "rand 0.9.4", "rand_distr 0.5.1", "rand_xoshiro", "random_word 0.5.2", @@ -4406,8 +4676,8 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-arith", "arrow-array", @@ -4433,7 +4703,7 @@ dependencies = [ "prost", "prost-build", "prost-types", - "rand 0.9.2", + "rand 0.9.4", "snafu 0.9.0", "strum 0.26.3", "tokio", @@ -4444,8 +4714,8 @@ dependencies = [ [[package]] name = "lance-file" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-arith", "arrow-array", @@ -4477,9 +4747,10 @@ dependencies = [ [[package]] name = "lance-index" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ + "arc-swap", "arrow", "arrow-arith", "arrow-array", @@ -4525,7 +4796,7 @@ dependencies = [ "prost", "prost-build", "prost-types", - "rand 0.9.2", + "rand 0.9.4", "rand_distr 0.5.1", "rangemap", "rayon", @@ -4543,8 +4814,8 @@ dependencies = [ [[package]] name = "lance-io" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "arrow-arith", @@ -4577,7 +4848,7 @@ dependencies = [ "path_abs", "pin-project", "prost", - "rand 0.9.2", + "rand 0.9.4", "serde", "snafu 0.9.0", "tempfile", @@ -4588,8 +4859,8 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-array", "arrow-buffer", @@ -4600,13 +4871,13 @@ dependencies = [ "lance-arrow", "lance-core", "num-traits", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] name = "lance-namespace" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "async-trait", @@ -4619,17 +4890,19 @@ dependencies = [ [[package]] name = "lance-namespace-impls" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "arrow-ipc", "arrow-schema", "async-trait", "axum", + "base64 0.22.1", "bytes", "chrono", "futures", + "hmac", "lance", "lance-core", "lance-index", @@ -4639,10 +4912,12 @@ dependencies = [ "lance-table", "log", "object_store", - "rand 0.9.2", - "reqwest", + "quick-xml 0.38.4", + "rand 0.9.4", + "reqwest 0.12.28", "serde", "serde_json", + "sha2", "snafu 0.9.0", "tokio", "tower", @@ -4652,21 +4927,22 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.6.1" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee2e48de899e2931afb67fcddd0a08e439bf5d8b6ea2a2ed9cb8f4df669bd5cc" +checksum = "f65e31bdaa13e01dab6e7cf566da31df243c34a542f0d915d3601ec0e01e61d2" dependencies = [ - "reqwest", + "reqwest 0.12.28", "serde", "serde_json", "serde_repr", + "serde_with", "url", ] [[package]] name = "lance-table" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow", "arrow-array", @@ -4690,7 +4966,7 @@ dependencies = [ "prost", "prost-build", "prost-types", - "rand 0.9.2", + "rand 0.9.4", "rangemap", "roaring", "semver", @@ -4705,20 +4981,20 @@ dependencies = [ [[package]] name = "lance-testing" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "arrow-array", "arrow-schema", "lance-arrow", "num-traits", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] name = "lance-tokenizer" -version = "6.0.0-beta.1" -source = "git+https://github.com/lance-format/lance.git?tag=v6.0.0-beta.1#c7a7d3a0e944646e793d297d4a2e2cf7e4fb28a3" +version = "7.0.0-beta.7" +source = "git+https://github.com/lance-format/lance.git?tag=v7.0.0-beta.7#f6932459689b5568c89baa435ff85a4abf067b45" dependencies = [ "jieba-rs", "lindera", @@ -4729,7 +5005,7 @@ dependencies = [ [[package]] name = "lancedb" -version = "0.28.0-beta.9" +version = "0.28.0-beta.11" dependencies = [ "ahash", "anyhow", @@ -4790,10 +5066,10 @@ dependencies = [ "pin-project", "polars", "polars-arrow", - "rand 0.9.2", + "rand 0.9.4", "random_word 0.4.3", "regex", - "reqwest", + "reqwest 0.12.28", "rstest", "semver", "serde", @@ -4811,7 +5087,7 @@ dependencies = [ [[package]] name = "lancedb-nodejs" -version = "0.28.0-beta.9" +version = "0.28.0-beta.11" dependencies = [ "arrow-array", "arrow-buffer", @@ -4833,7 +5109,7 @@ dependencies = [ [[package]] name = "lancedb-python" -version = "0.31.0-beta.9" +version = "0.31.0-beta.11" dependencies = [ "arrow", "async-trait", @@ -4845,6 +5121,7 @@ dependencies = [ "lance-namespace", "lance-namespace-impls", "lancedb", + "libc", "log", "pin-project", "pyo3", @@ -4940,7 +5217,7 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "windows-link", ] @@ -5125,7 +5402,7 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "generator", "scoped-tls", "tracing", @@ -5168,9 +5445,9 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" +checksum = "db9a0d582c2874f68138a16ce1867e0ffde6c0bb0a0df85e1f36d04146db488a" dependencies = [ "twox-hash", ] @@ -5236,7 +5513,7 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "digest", ] @@ -5246,6 +5523,15 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae960838283323069879657ca3de837e9f7bbb4c7bf6ea7f1b290d5e9476d2e0" +[[package]] +name = "mea" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6747f54621d156e1b47eb6b25f39a941b9fc347f98f67d25d8881ff99e8ed832" +dependencies = [ + "slab", +] + [[package]] name = "memchr" version = "2.8.0" @@ -5271,15 +5557,6 @@ dependencies = [ "stable_deref_trait", ] -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - [[package]] name = "mime" version = "0.3.17" @@ -5319,7 +5596,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.61.2", ] @@ -5371,6 +5648,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "more-asserts" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fafa6961cabd9c63bcd77a45d7e3b7f3b552b70417831fb0f56db717e72407e" + [[package]] name = "multimap" version = "0.10.1" @@ -5612,6 +5895,34 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags", +] + +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-system-configuration" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7216bd11cbda54ccabcab84d523dc93b858ec75ecfb3a7d89513fa22464da396" +dependencies = [ + "objc2-core-foundation", +] + [[package]] name = "object" version = "0.37.3" @@ -5623,16 +5934,18 @@ dependencies = [ [[package]] name = "object_store" -version = "0.12.5" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" +checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" dependencies = [ "async-trait", "base64 0.22.1", "bytes", "chrono", "form_urlencoded", - "futures", + "futures-channel", + "futures-core", + "futures-util", "http 1.4.0", "http-body-util", "httparse", @@ -5642,11 +5955,11 @@ dependencies = [ "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.38.4", - "rand 0.9.2", - "reqwest", + "quick-xml 0.39.3", + "rand 0.10.1", + "reqwest 0.12.28", "ring", - "rustls-pemfile", + "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", @@ -5661,14 +5974,15 @@ dependencies = [ [[package]] name = "object_store_opendal" -version = "0.55.0" +version = "0.56.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "113ab0769e972eee585e57407b98de08bda5354fa28e8ba4d89038d6cb6a8991" +checksum = "08298874eee5935c95bcaa393148834f9c53d904461ca15584a041d8a1c907c2" dependencies = [ "async-trait", "bytes", "chrono", "futures", + "mea", "object_store", "opendal", "pin-project", @@ -5687,6 +6001,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oneshot" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" + [[package]] name = "onig" version = "6.5.1" @@ -5711,32 +6031,219 @@ dependencies = [ [[package]] name = "opendal" -version = "0.55.0" +version = "0.56.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d075ab8a203a6ab4bc1bce0a4b9fe486a72bf8b939037f4b78d95386384bc80a" +checksum = "97b31d3d8e99a85d83b73ec26647f5607b80578ed9375810b6e44ffa3590a236" +dependencies = [ + "ctor", + "opendal-core", + "opendal-layer-concurrent-limit", + "opendal-layer-logging", + "opendal-layer-retry", + "opendal-layer-timeout", + "opendal-service-azblob", + "opendal-service-azdls", + "opendal-service-gcs", + "opendal-service-hf", + "opendal-service-oss", + "opendal-service-s3", +] + +[[package]] +name = "opendal-core" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1849dd2687e173e776d3af5fce1ba3ae47b9dd37a09d1c4deba850ef45fe00ca" dependencies = [ "anyhow", - "backon", "base64 0.22.1", "bytes", - "crc32c", "futures", - "getrandom 0.2.17", "http 1.4.0", "http-body 1.0.1", "jiff", "log", "md-5", + "mea", "percent-encoding", "quick-xml 0.38.4", - "reqsign", - "reqwest", + "reqsign-core", + "reqwest 0.13.3", "serde", "serde_json", - "sha2", "tokio", "url", "uuid", + "web-time", +] + +[[package]] +name = "opendal-layer-concurrent-limit" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "048b1b29c503263bdd80a9afe46a68cd02ea9bd361185b1feab4b151078998e9" +dependencies = [ + "futures", + "http 1.4.0", + "mea", + "opendal-core", +] + +[[package]] +name = "opendal-layer-logging" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2645adc988b12eda106e2679ae529facfbbaa868ceb706f6f8125c6af15c47b" +dependencies = [ + "log", + "opendal-core", +] + +[[package]] +name = "opendal-layer-retry" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eac134ffa4ddda6131a640a84a5315996424b9416c85052f8c64c1a33b70ad4" +dependencies = [ + "backon", + "log", + "opendal-core", +] + +[[package]] +name = "opendal-layer-timeout" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "619586ab7480c2e3009f6d18eabab18957bc094778fd130bcc38924970a90f4c" +dependencies = [ + "opendal-core", + "tokio", +] + +[[package]] +name = "opendal-service-azblob" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7452bf3ec61cfd81ac9ad9ada17825931e9e371d44a045c6bfab9596c0a2ac3b" +dependencies = [ + "base64 0.22.1", + "bytes", + "http 1.4.0", + "log", + "opendal-core", + "opendal-service-azure-common", + "quick-xml 0.38.4", + "reqsign-azure-storage", + "reqsign-core", + "reqsign-file-read-tokio", + "serde", + "sha2", + "uuid", +] + +[[package]] +name = "opendal-service-azdls" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f9884c2d8cf8ba2bb077d79c877dac5863ba3bab9e2c9c1e41a2e0491404772" +dependencies = [ + "bytes", + "http 1.4.0", + "log", + "opendal-core", + "opendal-service-azure-common", + "quick-xml 0.38.4", + "reqsign-azure-storage", + "reqsign-core", + "reqsign-file-read-tokio", + "serde", + "serde_json", +] + +[[package]] +name = "opendal-service-azure-common" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb0e45d6c8dcf66ce2da20e241bcb80e6e540e109a4ff20f318f6c9b4c54e0c" +dependencies = [ + "http 1.4.0", + "opendal-core", +] + +[[package]] +name = "opendal-service-gcs" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70a49477a10163431896d106136117f5670717f9c9e49cf6f710528800c6633a" +dependencies = [ + "async-trait", + "bytes", + "http 1.4.0", + "log", + "opendal-core", + "percent-encoding", + "quick-xml 0.38.4", + "reqsign-core", + "reqsign-file-read-tokio", + "reqsign-google", + "serde", + "serde_json", + "tokio", +] + +[[package]] +name = "opendal-service-hf" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2ab7a2a8a11dfe257ef4db5c0de798acbcd0d6429c37382dad2154bc06a388" +dependencies = [ + "bytes", + "hf-xet", + "http 1.4.0", + "log", + "opendal-core", + "percent-encoding", + "reqwest 0.13.3", + "serde", + "serde_json", +] + +[[package]] +name = "opendal-service-oss" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c8a917829ad06d21b639558532cb0101fe49b040d946d673a73018683fac05" +dependencies = [ + "bytes", + "http 1.4.0", + "log", + "opendal-core", + "quick-xml 0.38.4", + "reqsign-aliyun-oss", + "reqsign-core", + "reqsign-file-read-tokio", + "serde", +] + +[[package]] +name = "opendal-service-s3" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dadddeb9bb50b0d30927dd914c298c4ddca47e4c1cfa7674d311f0cf9b051c8" +dependencies = [ + "base64 0.22.1", + "bytes", + "crc32c", + "http 1.4.0", + "log", + "md-5", + "opendal-core", + "quick-xml 0.38.4", + "reqsign-aws-v4", + "reqsign-core", + "reqsign-file-read-tokio", + "serde", + "url", ] [[package]] @@ -5770,6 +6277,15 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "os_str_bytes" +version = "6.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" +dependencies = [ + "memchr", +] + [[package]] name = "outref" version = "0.5.2" @@ -5809,7 +6325,7 @@ version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "libc", "redox_syscall 0.5.18", "smallvec", @@ -6443,7 +6959,7 @@ dependencies = [ "rayon", "smartstring", "stacker", - "sysinfo", + "sysinfo 0.30.13", "version_check", ] @@ -6530,7 +7046,7 @@ version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "itertools 0.11.0", "log", "multimap", @@ -6582,7 +7098,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e205bb30d5b916c55e584c22201771bcf2bad9aabd5d4127f38387140c38632" dependencies = [ "bytemuck", - "cfg-if", + "cfg-if 1.0.4", "libm", "num-complex", "paste", @@ -6600,28 +7116,26 @@ checksum = "40e24eee682d89fb193496edf918a7f407d30175b2e785fe057e4392dfd182e0" [[package]] name = "pyo3" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" +checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" dependencies = [ - "indoc", "libc", - "memoffset", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent", ] [[package]] name = "pyo3-async-runtimes" -version = "0.26.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ee6d4cb3e8d5b925f5cdb38da183e0ff18122eb2048d4041c9e7034d026e23" +checksum = "9e7364a95bf00e8377bbf9b0f09d7ff9715a29d8fcf93b47d1a967363b973178" dependencies = [ - "futures", + "futures-channel", + "futures-util", "once_cell", "pin-project-lite", "pyo3", @@ -6631,9 +7145,9 @@ dependencies = [ [[package]] name = "pyo3-async-runtimes-macros" -version = "0.26.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c29bc5c673e36a8102d0b9179149c1bb59990d8db4f3ae58bd7dceccab90b951" +checksum = "c23399970eea9c31d0ac84cee4a9d8dd05f89b1da2f4dd5bb44b32a3f66db4f8" dependencies = [ "proc-macro2", "quote", @@ -6642,18 +7156,18 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" +checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e" dependencies = [ "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" +checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e" dependencies = [ "libc", "pyo3-build-config", @@ -6661,9 +7175,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" +checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -6673,9 +7187,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" +checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -6686,9 +7200,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.37.5" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" dependencies = [ "memchr", "serde", @@ -6696,9 +7210,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.38.4" +version = "0.39.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" +checksum = "721da970c312655cde9b4ffe0547f20a8494866a4af5ff51f18b7c633d0c870b" dependencies = [ "memchr", "serde", @@ -6717,7 +7231,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.37", - "socket2 0.6.3", + "socket2 0.5.10", "thiserror 2.0.18", "tokio", "tracing", @@ -6730,10 +7244,11 @@ version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ + "aws-lc-rs", "bytes", "getrandom 0.3.4", "lru-slab", - "rand 0.9.2", + "rand 0.9.4", "ring", "rustc-hash", "rustls 0.23.37", @@ -6754,9 +7269,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.3", + "socket2 0.5.10", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -6799,14 +7314,25 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.2" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.5", ] +[[package]] +name = "rand" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" +dependencies = [ + "chacha20", + "getrandom 0.4.2", + "rand_core 0.10.1", +] + [[package]] name = "rand_chacha" version = "0.3.1" @@ -6845,6 +7371,12 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rand_core" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" + [[package]] name = "rand_distr" version = "0.4.3" @@ -6862,7 +7394,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" dependencies = [ "num-traits", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] @@ -6897,7 +7429,7 @@ dependencies = [ "ahash", "brotli 8.0.2", "paste", - "rand 0.9.2", + "rand 0.9.4", "unicase", ] @@ -6979,6 +7511,15 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "redb" +version = "3.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ba239c1c1693315d3cc0e601db3b3965543afbf48c41730fdca2f069f510f4a" +dependencies = [ + "libc", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -7070,33 +7611,114 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" [[package]] -name = "reqsign" -version = "0.16.5" +name = "reqsign-aliyun-oss" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" +checksum = "57ac2757f3140aa2e213b554148ae0b52733e624fc6723f0cc6bb3d440176c95" dependencies = [ "anyhow", - "async-trait", - "base64 0.22.1", - "chrono", "form_urlencoded", - "getrandom 0.2.17", - "hex", - "hmac", - "home", "http 1.4.0", - "jsonwebtoken", "log", - "once_cell", "percent-encoding", - "quick-xml 0.37.5", - "rand 0.8.5", - "reqwest", - "rsa", + "reqsign-core", "rust-ini", "serde", "serde_json", +] + +[[package]] +name = "reqsign-aws-v4" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44eaca382e94505a49f1a4849658d153aebf79d9c1a58e5dd3b10361511e9f43" +dependencies = [ + "anyhow", + "bytes", + "form_urlencoded", + "http 1.4.0", + "log", + "percent-encoding", + "quick-xml 0.39.3", + "reqsign-core", + "rust-ini", + "serde", + "serde_json", + "serde_urlencoded", "sha1", +] + +[[package]] +name = "reqsign-azure-storage" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a321980405d596bd34aaf95c4722a3de4128a67fd19e74a81a83aa3fdf082e6" +dependencies = [ + "anyhow", + "base64 0.22.1", + "bytes", + "form_urlencoded", + "http 1.4.0", + "jsonwebtoken", + "log", + "pem", + "percent-encoding", + "reqsign-core", + "rsa", + "serde", + "serde_json", + "sha1", +] + +[[package]] +name = "reqsign-core" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b10302cf0a7d7e7352ba211fc92c3c5bebf1286153e49cc5aa87348078a8e102" +dependencies = [ + "anyhow", + "base64 0.22.1", + "bytes", + "form_urlencoded", + "futures", + "hex", + "hmac", + "http 1.4.0", + "jiff", + "log", + "percent-encoding", + "sha1", + "sha2", + "windows-sys 0.61.2", +] + +[[package]] +name = "reqsign-file-read-tokio" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d89295b3d17abea31851cc8de55d843d89c52132c864963c38d41920613dc5" +dependencies = [ + "anyhow", + "reqsign-core", + "tokio", +] + +[[package]] +name = "reqsign-google" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35cc609b49c69e76ecaceb775a03f792d1ed3e7755ab3548d4534fd801e3242e" +dependencies = [ + "form_urlencoded", + "http 1.4.0", + "jsonwebtoken", + "log", + "percent-encoding", + "reqsign-aws-v4", + "reqsign-core", + "rsa", + "serde", + "serde_json", "sha2", "tokio", ] @@ -7142,11 +7764,51 @@ dependencies = [ "url", "wasm-bindgen", "wasm-bindgen-futures", - "wasm-streams", + "wasm-streams 0.4.2", "web-sys", "webpki-roots 1.0.6", ] +[[package]] +name = "reqwest" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62e0021ea2c22aed41653bc7e1419abb2c97e038ff2c33d0e1309e49a97deec0" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-rustls 0.27.7", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls 0.23.37", + "rustls-pki-types", + "rustls-platform-verifier", + "serde", + "serde_json", + "sync_wrapper", + "tokio", + "tokio-rustls 0.26.4", + "tokio-util", + "tower", + "tower-http 0.6.8", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams 0.5.0", + "web-sys", +] + [[package]] name = "reqwest-eventsource" version = "0.6.0" @@ -7159,10 +7821,24 @@ dependencies = [ "mime", "nom 7.1.3", "pin-project-lite", - "reqwest", + "reqwest 0.12.28", "thiserror 1.0.69", ] +[[package]] +name = "reqwest-middleware" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199dda04a536b532d0cc04d7979e39b1c763ea749bf91507017069c00b96056f" +dependencies = [ + "anyhow", + "async-trait", + "http 1.4.0", + "reqwest 0.13.3", + "thiserror 2.0.18", + "tower-service", +] + [[package]] name = "rfc6979" version = "0.3.1" @@ -7181,7 +7857,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", - "cfg-if", + "cfg-if 1.0.4", "getrandom 0.2.17", "libc", "untrusted 0.9.0", @@ -7190,9 +7866,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.11.3" +version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ba9ce64a8f45d7fc86358410bb1a82e8c987504c0d4900e9141d69a9f26c885" +checksum = "1dedc5658c6ecb3bdb5ef5f3295bb9253f42dcf3fd1402c03f6b1f7659c3c4a9" dependencies = [ "bytemuck", "byteorder", @@ -7237,7 +7913,7 @@ version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "825ea780781b15345a146be27eaefb05085e337e869bff01b4306a4fd4a9ad5a" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "glob", "proc-macro-crate", "proc-macro2", @@ -7255,7 +7931,7 @@ version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "ordered-multimap", ] @@ -7294,7 +7970,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -7320,7 +7996,7 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.10", + "rustls-webpki 0.103.13", "subtle", "zeroize", ] @@ -7337,15 +8013,6 @@ dependencies = [ "security-framework", ] -[[package]] -name = "rustls-pemfile" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" -dependencies = [ - "rustls-pki-types", -] - [[package]] name = "rustls-pki-types" version = "1.14.0" @@ -7356,6 +8023,33 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-platform-verifier" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d1e2536ce4f35f4846aa13bff16bd0ff40157cdb14cc056c7b14ba41233ba0" +dependencies = [ + "core-foundation 0.10.1", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls 0.23.37", + "rustls-native-certs", + "rustls-platform-verifier-android", + "rustls-webpki 0.103.13", + "security-framework", + "security-framework-sys", + "webpki-root-certs", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustls-platform-verifier-android" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" + [[package]] name = "rustls-webpki" version = "0.101.7" @@ -7368,9 +8062,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.10" +version = "0.103.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" dependencies = [ "aws-lc-rs", "ring", @@ -7390,6 +8084,12 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "safe-transmute" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3944826ff8fa8093089aba3acb4ef44b9446a99a16f3bf4e74af3f77d340ab7d" + [[package]] name = "safetensors" version = "0.7.0" @@ -7680,8 +8380,8 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ - "cfg-if", - "cpufeatures", + "cfg-if 1.0.4", + "cpufeatures 0.2.17", "digest", ] @@ -7691,9 +8391,19 @@ version = "0.10.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ - "cfg-if", - "cpufeatures", + "cfg-if 1.0.4", + "cpufeatures 0.2.17", "digest", + "sha2-asm", +] + +[[package]] +name = "sha2-asm" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b845214d6175804686b2bd482bcffe96651bb2d1200742b712003504a2dac1ab" +dependencies = [ + "cc", ] [[package]] @@ -7705,6 +8415,17 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shellexpand" +version = "3.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32824fab5e16e6c4d86dc1ba84489390419a39f97699852b66480bb87d297ed8" +dependencies = [ + "bstr", + "dirs", + "os_str_bytes", +] + [[package]] name = "shlex" version = "1.3.0" @@ -7747,6 +8468,16 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "simd_cesu8" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f90157bb87cddf702797c5dadfa0be7d266cdf49e22da2fcaa32eff75b2c33" +dependencies = [ + "rustc_version", + "simdutf8", +] + [[package]] name = "simdutf8" version = "0.1.5" @@ -7818,7 +8549,7 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", "syn 2.0.117", @@ -7830,7 +8561,7 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54254b8531cafa275c5e096f62d48c81435d1015405a91198ddb11e967301d40" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", "syn 2.0.117", @@ -7853,7 +8584,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -7922,9 +8653,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.59.0" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" dependencies = [ "log", "sqlparser_derive", @@ -7932,9 +8663,9 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" dependencies = [ "proc-macro2", "quote", @@ -7954,7 +8685,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013" dependencies = [ "cc", - "cfg-if", + "cfg-if 1.0.4", "libc", "psm", "windows-sys 0.59.0", @@ -7966,6 +8697,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "statrs" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a3fe7c28c6512e766b0874335db33c94ad7b8f9054228ae1c2abd47ce7d335e" +dependencies = [ + "approx", + "num-traits", +] + [[package]] name = "std_prelude" version = "0.2.12" @@ -8067,6 +8808,12 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "symlink" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7973cce6668464ea31f176d85b13c7ab3bba2cb3b77a2ed26abd7801688010a" + [[package]] name = "syn" version = "1.0.109" @@ -8129,12 +8876,26 @@ version = "0.30.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "core-foundation-sys", "libc", "ntapi", "once_cell", - "windows", + "windows 0.52.0", +] + +[[package]] +name = "sysinfo" +version = "0.38.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ab6a2f8bfe508deb3c6406578252e491d299cbbf3bc0529ecc3313aee4a52f" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows 0.62.2", ] [[package]] @@ -8203,7 +8964,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -8283,7 +9044,7 @@ version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -8411,6 +9172,17 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "tokio-retry" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40f644c762e9d396831ae2f8935c954b0d758c4532e924bead0f666d0c1c8640" +dependencies = [ + "pin-project-lite", + "rand 0.10.1", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.24.1" @@ -8440,6 +9212,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] @@ -8565,6 +9338,19 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-appender" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "050686193eb999b4bb3bc2acfa891a13da00f79734704c4b8b4ef1a10b368a3c" +dependencies = [ + "crossbeam-channel", + "symlink", + "thiserror 2.0.18", + "time", + "tracing-subscriber", +] + [[package]] name = "tracing-attributes" version = "0.1.31" @@ -8597,6 +9383,16 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.23" @@ -8607,12 +9403,15 @@ dependencies = [ "nu-ansi-term", "once_cell", "regex-automata", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", "tracing", "tracing-core", "tracing-log", + "tracing-serde", ] [[package]] @@ -8627,7 +9426,7 @@ version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" dependencies = [ - "rand 0.9.2", + "rand 0.9.4", ] [[package]] @@ -8642,6 +9441,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "typewit" +version = "1.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "214ca0b2191785cbc06209b9ca1861e048e39b5ba33574b3cedd58363d5bb5f6" + [[package]] name = "unicase" version = "2.9.0" @@ -8689,9 +9494,9 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.13.1" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da36089a805484bcccfffe0739803392c8298778a2d2f09febf76fac5ad9025b" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" [[package]] name = "unicode-width" @@ -8857,6 +9662,15 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasi" +version = "0.14.7+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +dependencies = [ + "wasip2", +] + [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" @@ -8875,13 +9689,22 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasite" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fe902b4a6b8028a753d5424909b764ccf79b7a209eac9bf97e59cda9f71a42" +dependencies = [ + "wasi 0.14.7+wasi-0.2.4", +] + [[package]] name = "wasm-bindgen" version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "once_cell", "rustversion", "wasm-bindgen-macro", @@ -8894,7 +9717,7 @@ version = "0.4.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "futures-util", "js-sys", "once_cell", @@ -8969,6 +9792,19 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasm-streams" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wasmparser" version = "0.244.0" @@ -9001,6 +9837,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-root-certs" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31141ce3fc3e300ae89b78c0dd67f9708061d1d2eda54b8209346fd6be9a92c" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "webpki-roots" version = "0.26.11" @@ -9019,6 +9864,19 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "whoami" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6a5b12f9df4f978d2cfdb1bd3bac52433f44393342d7ee9c25f5a1c14c0f45d" +dependencies = [ + "libc", + "libredox", + "objc2-system-configuration", + "wasite", + "web-sys", +] + [[package]] name = "winapi" version = "0.3.9" @@ -9041,7 +9899,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -9060,6 +9918,27 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" +dependencies = [ + "windows-collections", + "windows-core 0.62.2", + "windows-future", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" +dependencies = [ + "windows-core 0.62.2", +] + [[package]] name = "windows-core" version = "0.52.0" @@ -9082,6 +9961,17 @@ dependencies = [ "windows-strings", ] +[[package]] +name = "windows-future" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" +dependencies = [ + "windows-core 0.62.2", + "windows-link", + "windows-threading", +] + [[package]] name = "windows-implement" version = "0.60.2" @@ -9110,6 +10000,16 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-numerics" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" +dependencies = [ + "windows-core 0.62.2", + "windows-link", +] + [[package]] name = "windows-registry" version = "0.6.1" @@ -9208,6 +10108,15 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows-threading" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" +dependencies = [ + "windows-link", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -9426,6 +10335,153 @@ dependencies = [ "rustix", ] +[[package]] +name = "xet-client" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e1e496dcbe6a09017acdfaf48e1a646735e7ff5b2a49e2c7e081cca77a59bc8" +dependencies = [ + "anyhow", + "async-trait", + "base64 0.22.1", + "bytes", + "clap", + "crc32fast", + "futures", + "http 1.4.0", + "hyper 1.8.1", + "lazy_static", + "more-asserts", + "rand 0.10.1", + "redb", + "reqwest 0.13.3", + "reqwest-middleware", + "serde", + "serde_json", + "serde_repr", + "statrs", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tokio-retry", + "tracing", + "tracing-subscriber", + "url", + "urlencoding", + "web-time", + "xet-core-structures", + "xet-runtime", +] + +[[package]] +name = "xet-core-structures" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb838aa8eb67d730af301584cf003caad407487606058292a6750711b603fbee" +dependencies = [ + "async-trait", + "base64 0.22.1", + "blake3", + "bytemuck", + "bytes", + "clap", + "countio", + "csv", + "futures", + "futures-util", + "getrandom 0.4.2", + "heapify", + "itertools 0.14.0", + "lazy_static", + "lz4_flex", + "more-asserts", + "rand 0.10.1", + "regex", + "safe-transmute", + "serde", + "static_assertions", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tracing", + "uuid", + "web-time", + "xet-runtime", +] + +[[package]] +name = "xet-data" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67fd409bef621411a9d9013798540bb8036cb2678f03ab39af89a5e88034ed8c" +dependencies = [ + "anyhow", + "async-trait", + "bytes", + "chrono", + "clap", + "gearhash", + "http 1.4.0", + "itertools 0.14.0", + "lazy_static", + "more-asserts", + "rand 0.10.1", + "serde", + "serde_json", + "sha2", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tracing", + "url", + "uuid", + "walkdir", + "xet-client", + "xet-core-structures", + "xet-runtime", +] + +[[package]] +name = "xet-runtime" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15d8f121c33866f7648b737abe70d0e2dd9c0af4ffdd7219207531d0283aa63d" +dependencies = [ + "anyhow", + "async-trait", + "bytes", + "chrono", + "colored", + "const-str", + "ctor", + "dirs", + "futures", + "git-version", + "humantime", + "konst", + "lazy_static", + "libc", + "more-asserts", + "oneshot", + "pin-project", + "rand 0.10.1", + "reqwest 0.13.3", + "serde", + "serde_json", + "shellexpand", + "sysinfo 0.38.4", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tracing", + "tracing-appender", + "tracing-subscriber", + "whoami", + "winapi", +] + [[package]] name = "xmlparser" version = "0.13.6" diff --git a/Cargo.toml b/Cargo.toml index e8d5a95b4..a7b038307 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,5 @@ [workspace] members = ["rust/lancedb", "nodejs", "python"] -# Python package needs to be built by maturin. -exclude = ["python"] resolver = "2" [workspace.package] @@ -15,40 +13,40 @@ categories = ["database-implementations"] rust-version = "1.91.0" [workspace.dependencies] -lance = { "version" = "=6.0.0-beta.1", default-features = false, "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-core = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-datagen = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-file = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-io = { "version" = "=6.0.0-beta.1", default-features = false, "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-index = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-linalg = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace-impls = { "version" = "=6.0.0-beta.1", default-features = false, "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-table = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-testing = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-datafusion = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-encoding = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } -lance-arrow = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" } +lance = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-core = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-datagen = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-file = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-io = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-index = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-linalg = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-namespace = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-namespace-impls = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-table = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-testing = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-datafusion = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-encoding = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } +lance-arrow = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" } ahash = "0.8" # Note that this one does not include pyarrow -arrow = { version = "57.2", optional = false } -arrow-array = "57.2" -arrow-data = "57.2" -arrow-ipc = "57.2" -arrow-ord = "57.2" -arrow-schema = "57.2" -arrow-select = "57.2" -arrow-cast = "57.2" +arrow = { version = "58.0.0", optional = false } +arrow-array = "58.0.0" +arrow-data = "58.0.0" +arrow-ipc = "58.0.0" +arrow-ord = "58.0.0" +arrow-schema = "58.0.0" +arrow-select = "58.0.0" +arrow-cast = "58.0.0" async-trait = "0" -datafusion = { version = "52.1", default-features = false } -datafusion-catalog = "52.1" -datafusion-common = { version = "52.1", default-features = false } -datafusion-execution = "52.1" -datafusion-expr = "52.1" -datafusion-functions = "52.1" -datafusion-physical-plan = "52.1" -datafusion-physical-expr = "52.1" -datafusion-sql = "52.1" +datafusion = { version = "53.0.0", default-features = false } +datafusion-catalog = "53.0.0" +datafusion-common = { version = "53.0.0", default-features = false } +datafusion-execution = "53.0.0" +datafusion-expr = "53.0.0" +datafusion-functions = "53.0.0" +datafusion-physical-plan = "53.0.0" +datafusion-physical-expr = "53.0.0" +datafusion-sql = "53.0.0" env_logger = "0.11" half = { "version" = "2.7.1", default-features = false, features = [ "num-traits", @@ -56,7 +54,7 @@ half = { "version" = "2.7.1", default-features = false, features = [ futures = "0" log = "0.4" moka = { version = "0.12", features = ["future"] } -object_store = "0.12.0" +object_store = "0.13.2" pin-project = "1.0.7" rand = "0.9" snafu = "0.8" diff --git a/deny.toml b/deny.toml new file mode 100644 index 000000000..85231f920 --- /dev/null +++ b/deny.toml @@ -0,0 +1,172 @@ +# cargo-deny configuration for LanceDB. +# +# Run locally with `cargo deny check`. See +# https://embarkstudios.github.io/cargo-deny/ for the full reference. + +# The set of target triples we care about. cargo-deny will only consider +# dependencies that are used on at least one of these targets. Keeping this +# explicit avoids noise from platform-specific crates (e.g. wasm, android, +# ios) that we never actually ship. +[graph] +targets = [ + "x86_64-unknown-linux-gnu", + "aarch64-unknown-linux-gnu", + "x86_64-apple-darwin", + "aarch64-apple-darwin", + "x86_64-pc-windows-msvc", + "aarch64-pc-windows-msvc", +] +all-features = true + +[output] +feature-depth = 1 + +# --------------------------------------------------------------------------- +# Advisories: security vulnerabilities and yanked crates. +# --------------------------------------------------------------------------- +[advisories] +version = 2 +# Fail the check if any crate in the lockfile has been yanked from crates.io. +# Yanked crates are a signal the author retracted the release (often due to +# bugs or security issues) and should not be depended on. +yanked = "deny" +# Advisory IDs we have explicitly reviewed and chosen to accept. Every +# entry must include a rationale and, where possible, an upstream issue +# pointing to a fix. Revisit this list whenever dependencies are updated. +ignore = [ + # rsa: Marvin Attack timing side-channel in PKCS#1 v1.5 decryption. + # Reached only through opendal → reqsign → rsa. We do not use RSA + # decryption in LanceDB ourselves; this is dormant in the signing path. + # No fixed release exists upstream as of this writing. + # https://rustsec.org/advisories/RUSTSEC-2023-0071 + { id = "RUSTSEC-2023-0071", reason = "rsa crate via opendal/reqsign; no fixed upstream release" }, + + # instant: unmaintained. Pulled in via backoff → instant. Upstream + # recommends switching to `web-time`; fix has to come from backoff. + # https://rustsec.org/advisories/RUSTSEC-2024-0384 + { id = "RUSTSEC-2024-0384", reason = "transitive via backoff; waiting on backoff replacement" }, + + # paste: unmaintained (author archived the repo). Used transitively by + # datafusion and the arrow ecosystem; widespread, no drop-in replacement. + # https://rustsec.org/advisories/RUSTSEC-2024-0436 + { id = "RUSTSEC-2024-0436", reason = "transitive via datafusion; awaiting ecosystem migration" }, + + # tantivy: segfault on malformed input due to missing bounds check. + # Pulled in via lance for full-text search. We only feed tantivy + # documents we construct ourselves, not attacker-controlled bytes. + # Tracked for a lance dependency bump. + # https://rustsec.org/advisories/RUSTSEC-2025-0003 + { id = "RUSTSEC-2025-0003", reason = "tantivy via lance; inputs are internally produced, not user-supplied bytes" }, + + # backoff: unmaintained. Reached only via async-openai. Replacement + # requires async-openai to migrate (or us to drop async-openai). + # https://rustsec.org/advisories/RUSTSEC-2025-0012 + { id = "RUSTSEC-2025-0012", reason = "transitive via async-openai; waiting on upstream migration" }, + + # number_prefix: unmaintained. Transitive via indicatif → hf-hub. + # No security impact, just maintenance status. + # https://rustsec.org/advisories/RUSTSEC-2025-0119 + { id = "RUSTSEC-2025-0119", reason = "transitive via hf-hub/indicatif; cosmetic formatting crate" }, + + # rustls-pemfile: unmaintained. Reached from two separate chains: + # rustls-native-certs 0.6 (via hyper-rustls 0.24) and object_store 0.12. + # Both upstream dependencies need to move before we can drop it. + # https://rustsec.org/advisories/RUSTSEC-2025-0134 + { id = "RUSTSEC-2025-0134", reason = "transitive via rustls-native-certs/object_store; waiting on upstream migration" }, + + # rustls-webpki 0.101.7 (old major line): name-constraint checks for + # URI / wildcard names. Pulled in only via the legacy rustls 0.21 chain + # from aws-smithy-http-client. The 0.103 line we actively use is patched. + # Clearing the 0.101 copy requires the aws-sdk chain to migrate off + # rustls 0.21. + # https://rustsec.org/advisories/RUSTSEC-2026-0098 + # https://rustsec.org/advisories/RUSTSEC-2026-0099 + { id = "RUSTSEC-2026-0098", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" }, + { id = "RUSTSEC-2026-0099", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" }, + + # rustls-webpki 0.101.7: reachable panic in CRL parsing. Same legacy + # rustls 0.21 chain from aws-smithy-http-client as above. The 0.103 line + # we actively use is upgraded to 0.103.13 which contains the fix. + # https://rustsec.org/advisories/RUSTSEC-2026-0104 + { id = "RUSTSEC-2026-0104", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" }, +] + +# --------------------------------------------------------------------------- +# Licenses: only allow licenses we've reviewed as compatible with Apache-2.0. +# --------------------------------------------------------------------------- +[licenses] +version = 2 +# SPDX identifiers for licenses that are compatible with our Apache-2.0 +# distribution. Additions require legal review. +allow = [ + "Apache-2.0", + "Apache-2.0 WITH LLVM-exception", + "MIT", + "BSD-2-Clause", + "BSD-3-Clause", + "ISC", + "Unicode-3.0", + "Unicode-DFS-2016", + "Zlib", + "CC0-1.0", + "MPL-2.0", + "BSL-1.0", + "OpenSSL", + # 0BSD ("BSD Zero Clause") is effectively public domain — no attribution + # required. Pulled in by `mock_instant`. + "0BSD", + # bzip2-1.0.6 is the permissive upstream bzip2 license (BSD-like). Pulled + # in by `libbz2-rs-sys`, the pure-Rust bzip2 implementation. + "bzip2-1.0.6", + # CDLA-Permissive-2.0 is a permissive data license used by `webpki-roots` + # for the Mozilla CA root bundle. Data-only, distribution-compatible. + "CDLA-Permissive-2.0", +] +confidence-threshold = 0.8 +# Crates whose license cannot be determined from Cargo metadata but whose +# license we've manually confirmed from upstream. Keep this list minimal. +[[licenses.clarify]] +# polars-arrow-format omits the `license` field in its Cargo.toml, but the +# upstream repo (pola-rs/polars-arrow-format) is dual-licensed Apache-2.0 OR +# MIT. See https://github.com/pola-rs/polars-arrow-format/blob/main/LICENSE +crate = "polars-arrow-format" +expression = "Apache-2.0 OR MIT" +license-files = [] + +# --------------------------------------------------------------------------- +# Bans: disallow specific crates and flag dependency hygiene issues. +# --------------------------------------------------------------------------- +[bans] +# Warn (not deny) on duplicate versions of the same crate. In a large +# workspace like this one, duplicates are common and often unavoidable +# transitively. We surface them to discourage growth, but don't fail CI. +multiple-versions = "warn" +# Wildcard version requirements (`foo = "*"`) are a footgun — they let any +# future release in without review. Ban them outright. +wildcards = "deny" +# Internal workspace crates reference each other via `path = "..."`, which +# cargo-deny sees as a wildcard version. That's fine for private workspace +# members (not published to crates.io), so allow it specifically for paths. +allow-wildcard-paths = true +# Features that, if enabled, should cause the check to fail. +deny = [] +# Crates to skip when checking for duplicate versions. +skip = [] +# Similar to `skip`, but also skips the entire transitive subtree. +skip-tree = [] + +# --------------------------------------------------------------------------- +# Sources: restrict where crates can come from. +# --------------------------------------------------------------------------- +[sources] +# Deny any registry other than the ones explicitly listed below. +unknown-registry = "deny" +# Deny any git dependency whose host isn't in the allow-list below. This +# prevents accidental pulls from arbitrary forks. +unknown-git = "deny" +allow-registry = ["https://github.com/rust-lang/crates.io-index"] +# Lance is developed in a sibling repo and pulled as a git dependency until +# releases are cut to crates.io. Allow that specific host. +allow-git = [ + "https://github.com/lance-format/lance", +] diff --git a/docs/src/java/java.md b/docs/src/java/java.md index f56804eae..364d268fb 100644 --- a/docs/src/java/java.md +++ b/docs/src/java/java.md @@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`: com.lancedb lancedb-core - 0.28.0-beta.9 + 0.28.0-beta.11 ``` diff --git a/docs/src/js/interfaces/ConnectionOptions.md b/docs/src/js/interfaces/ConnectionOptions.md index d617e8a19..1ad0e127a 100644 --- a/docs/src/js/interfaces/ConnectionOptions.md +++ b/docs/src/js/interfaces/ConnectionOptions.md @@ -41,6 +41,29 @@ for testing purposes. *** +### manifestEnabled? + +```ts +optional manifestEnabled: boolean; +``` + +(For LanceDB OSS only): use directory namespace manifests as the source +of truth for table metadata. Existing directory-listed root tables are +migrated into the manifest on access. + +*** + +### namespaceClientProperties? + +```ts +optional namespaceClientProperties: Record; +``` + +(For LanceDB OSS only): extra properties for the backing namespace +client used by manifest-enabled native connections. + +*** + ### readConsistencyInterval? ```ts diff --git a/java/lancedb-core/pom.xml b/java/lancedb-core/pom.xml index 7b29c682d..8fc29d6e7 100644 --- a/java/lancedb-core/pom.xml +++ b/java/lancedb-core/pom.xml @@ -8,7 +8,7 @@ com.lancedb lancedb-parent - 0.28.0-beta.9 + 0.28.0-beta.11 ../pom.xml diff --git a/java/pom.xml b/java/pom.xml index 3684452bd..00f1df078 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -6,7 +6,7 @@ com.lancedb lancedb-parent - 0.28.0-beta.9 + 0.28.0-beta.11 pom ${project.artifactId} LanceDB Java SDK Parent POM @@ -28,7 +28,7 @@ UTF-8 15.0.0 - 6.0.0-beta.1 + 7.0.0-beta.7 false 2.30.0 1.7 diff --git a/nodejs/Cargo.toml b/nodejs/Cargo.toml index 627e7b256..c73121607 100644 --- a/nodejs/Cargo.toml +++ b/nodejs/Cargo.toml @@ -1,7 +1,8 @@ [package] name = "lancedb-nodejs" edition.workspace = true -version = "0.28.0-beta.9" +version = "0.28.0-beta.11" +publish = false license.workspace = true description.workspace = true repository.workspace = true @@ -15,7 +16,7 @@ crate-type = ["cdylib"] async-trait.workspace = true arrow-ipc.workspace = true arrow-array.workspace = true -arrow-buffer = "57.2" +arrow-buffer = "58.0.0" half.workspace = true arrow-schema.workspace = true env_logger.workspace = true @@ -31,8 +32,8 @@ lzma-sys = { version = "0.1", features = ["static"] } log.workspace = true # Pin to resolve build failures; update periodically for security patches. -aws-lc-sys = "=0.38.0" -aws-lc-rs = "=1.16.1" +aws-lc-sys = "=0.40.0" +aws-lc-rs = "=1.16.3" [build-dependencies] napi-build = "2.3.1" diff --git a/nodejs/npm/darwin-arm64/package.json b/nodejs/npm/darwin-arm64/package.json index d8bec57fd..5eab89996 100644 --- a/nodejs/npm/darwin-arm64/package.json +++ b/nodejs/npm/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-darwin-arm64", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.11", "os": ["darwin"], "cpu": ["arm64"], "main": "lancedb.darwin-arm64.node", diff --git a/nodejs/npm/linux-arm64-gnu/package.json b/nodejs/npm/linux-arm64-gnu/package.json index 4873e1c6e..0624f62ca 100644 --- a/nodejs/npm/linux-arm64-gnu/package.json +++ b/nodejs/npm/linux-arm64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-arm64-gnu", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.11", "os": ["linux"], "cpu": ["arm64"], "main": "lancedb.linux-arm64-gnu.node", diff --git a/nodejs/npm/linux-arm64-musl/package.json b/nodejs/npm/linux-arm64-musl/package.json index b41857807..9b5bba426 100644 --- a/nodejs/npm/linux-arm64-musl/package.json +++ b/nodejs/npm/linux-arm64-musl/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-arm64-musl", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.11", "os": ["linux"], "cpu": ["arm64"], "main": "lancedb.linux-arm64-musl.node", diff --git a/nodejs/npm/linux-x64-gnu/package.json b/nodejs/npm/linux-x64-gnu/package.json index bd765ba07..4b8f2dfb3 100644 --- a/nodejs/npm/linux-x64-gnu/package.json +++ b/nodejs/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-x64-gnu", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.11", "os": ["linux"], "cpu": ["x64"], "main": "lancedb.linux-x64-gnu.node", diff --git a/nodejs/npm/linux-x64-musl/package.json b/nodejs/npm/linux-x64-musl/package.json index 48e8f6721..a4c120c50 100644 --- a/nodejs/npm/linux-x64-musl/package.json +++ b/nodejs/npm/linux-x64-musl/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-x64-musl", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.11", "os": ["linux"], "cpu": ["x64"], "main": "lancedb.linux-x64-musl.node", diff --git a/nodejs/npm/win32-arm64-msvc/package.json b/nodejs/npm/win32-arm64-msvc/package.json index 481324a8c..c315922ab 100644 --- a/nodejs/npm/win32-arm64-msvc/package.json +++ b/nodejs/npm/win32-arm64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-win32-arm64-msvc", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.11", "os": [ "win32" ], diff --git a/nodejs/npm/win32-x64-msvc/package.json b/nodejs/npm/win32-x64-msvc/package.json index 3cb420e8f..8d450d50e 100644 --- a/nodejs/npm/win32-x64-msvc/package.json +++ b/nodejs/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-win32-x64-msvc", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.11", "os": ["win32"], "cpu": ["x64"], "main": "lancedb.win32-x64-msvc.node", diff --git a/nodejs/package-lock.json b/nodejs/package-lock.json index aae49f278..19e7fcb55 100644 --- a/nodejs/package-lock.json +++ b/nodejs/package-lock.json @@ -1,12 +1,12 @@ { "name": "@lancedb/lancedb", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.11", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@lancedb/lancedb", - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.11", "cpu": [ "x64", "arm64" diff --git a/nodejs/package.json b/nodejs/package.json index 48aa86146..8c8b7a8d2 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -11,7 +11,7 @@ "ann" ], "private": false, - "version": "0.28.0-beta.9", + "version": "0.28.0-beta.11", "main": "dist/index.js", "exports": { ".": "./dist/index.js", @@ -75,7 +75,6 @@ "build:debug": "napi build --platform --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir lancedb", "postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/", "build:release": "napi build --platform --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir dist", - "postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/", "build": "npm run build:debug && npm run tsc", "build-release": "npm run build:release && npm run tsc", "tsc": "tsc -b", diff --git a/nodejs/src/connection.rs b/nodejs/src/connection.rs index 19b2a5440..09be9465f 100644 --- a/nodejs/src/connection.rs +++ b/nodejs/src/connection.rs @@ -67,6 +67,12 @@ impl Connection { builder = builder.storage_option(key, value); } } + if let Some(manifest_enabled) = options.manifest_enabled { + builder = builder.manifest_enabled(manifest_enabled); + } + if let Some(namespace_client_properties) = options.namespace_client_properties { + builder = builder.namespace_client_properties(namespace_client_properties); + } // Create client config, optionally with header provider let client_config = options.client_config.unwrap_or_default(); diff --git a/nodejs/src/lib.rs b/nodejs/src/lib.rs index 055a6a3d3..87bc97ce7 100644 --- a/nodejs/src/lib.rs +++ b/nodejs/src/lib.rs @@ -37,6 +37,13 @@ pub struct ConnectionOptions { /// /// The available options are described at https://docs.lancedb.com/storage/ pub storage_options: Option>, + /// (For LanceDB OSS only): use directory namespace manifests as the source + /// of truth for table metadata. Existing directory-listed root tables are + /// migrated into the manifest on access. + pub manifest_enabled: Option, + /// (For LanceDB OSS only): extra properties for the backing namespace + /// client used by manifest-enabled native connections. + pub namespace_client_properties: Option>, /// (For LanceDB OSS only): the session to use for this connection. Holds /// shared caches and other session-specific state. pub session: Option, diff --git a/python/.bumpversion.toml b/python/.bumpversion.toml index 2dc1443bb..fbc8e545c 100644 --- a/python/.bumpversion.toml +++ b/python/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.31.0-beta.9" +current_version = "0.31.0-beta.11" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/python/Cargo.toml b/python/Cargo.toml index 481f4ebfe..fce27e65a 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,6 +1,7 @@ [package] name = "lancedb-python" -version = "0.31.0-beta.9" +version = "0.31.0-beta.11" +publish = false edition.workspace = true description = "Python bindings for LanceDB" license.workspace = true @@ -14,7 +15,7 @@ name = "_lancedb" crate-type = ["cdylib"] [dependencies] -arrow = { version = "57.2", features = ["pyarrow"] } +arrow = { version = "58.0.0", features = ["pyarrow"] } async-trait = "0.1" bytes = "1" lancedb = { path = "../rust/lancedb", default-features = false } @@ -24,8 +25,8 @@ lance-namespace-impls.workspace = true lance-io.workspace = true env_logger.workspace = true log.workspace = true -pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] } -pyo3-async-runtimes = { version = "0.26", features = [ +pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39"] } +pyo3-async-runtimes = { version = "0.28", features = [ "attributes", "tokio-runtime", ] } @@ -34,10 +35,11 @@ futures.workspace = true serde = "1" serde_json = "1" snafu.workspace = true -tokio = { version = "1.40", features = ["sync"] } +tokio = { version = "1.40", features = ["sync", "rt-multi-thread"] } +libc = "0.2" [build-dependencies] -pyo3-build-config = { version = "0.26", features = [ +pyo3-build-config = { version = "0.28", features = [ "extension-module", "abi3-py39", ] } diff --git a/python/python/lancedb/__init__.py b/python/python/lancedb/__init__.py index ebf292b05..efeed258f 100644 --- a/python/python/lancedb/__init__.py +++ b/python/python/lancedb/__init__.py @@ -7,7 +7,6 @@ import os from concurrent.futures import ThreadPoolExecutor from datetime import timedelta from typing import Dict, Optional, Union, Any, List -import warnings __version__ = importlib.metadata.version("lancedb") @@ -73,6 +72,7 @@ def connect( client_config: Union[ClientConfig, Dict[str, Any], None] = None, storage_options: Optional[Dict[str, str]] = None, session: Optional[Session] = None, + manifest_enabled: bool = False, namespace_client_impl: Optional[str] = None, namespace_client_properties: Optional[Dict[str, str]] = None, namespace_client_pushdown_operations: Optional[List[str]] = None, @@ -111,6 +111,10 @@ def connect( storage_options: dict, optional Additional options for the storage backend. See available options at + manifest_enabled : bool, default False + When true for local/native connections, use directory namespace + manifests as the source of truth for table metadata. Existing + directory-listed root tables are migrated into the manifest on access. session: Session, optional (For LanceDB OSS only) A session to use for this connection. Sessions allow you to configure @@ -158,11 +162,11 @@ def connect( conn : DBConnection A connection to a LanceDB database. """ - if namespace_client_impl is not None or namespace_client_properties is not None: - if namespace_client_impl is None or namespace_client_properties is None: + if namespace_client_impl is not None: + if namespace_client_properties is None: raise ValueError( - "Both namespace_client_impl and " - "namespace_client_properties must be provided" + "namespace_client_properties must be provided when " + "namespace_client_impl is set" ) if kwargs: raise ValueError(f"Unknown keyword arguments: {kwargs}") @@ -175,6 +179,12 @@ def connect( namespace_client_pushdown_operations=namespace_client_pushdown_operations, ) + if namespace_client_properties is not None and not manifest_enabled: + raise ValueError( + "namespace_client_impl must be provided when using " + "namespace_client_properties unless manifest_enabled=True" + ) + if namespace_client_pushdown_operations is not None: raise ValueError( "namespace_client_pushdown_operations is only valid when " @@ -212,6 +222,8 @@ def connect( read_consistency_interval=read_consistency_interval, storage_options=storage_options, session=session, + manifest_enabled=manifest_enabled, + namespace_client_properties=namespace_client_properties, ) @@ -289,6 +301,8 @@ def deserialize_conn( parsed["uri"], read_consistency_interval=rci, storage_options=storage_options, + manifest_enabled=parsed.get("manifest_enabled", False), + namespace_client_properties=parsed.get("namespace_client_properties"), ) else: raise ValueError(f"Unknown connection_type: {connection_type}") @@ -304,6 +318,8 @@ async def connect_async( client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None, storage_options: Optional[Dict[str, str]] = None, session: Optional[Session] = None, + manifest_enabled: bool = False, + namespace_client_properties: Optional[Dict[str, str]] = None, ) -> AsyncConnection: """Connect to a LanceDB database. @@ -343,6 +359,13 @@ async def connect_async( cache sizes for index and metadata caches, which can significantly impact memory use and performance. They can also be re-used across multiple connections to share the same cache state. + manifest_enabled : bool, default False + When true for local/native connections, use directory namespace + manifests as the source of truth for table metadata. Existing + directory-listed root tables are migrated into the manifest on access. + namespace_client_properties : dict, optional + Additional directory namespace client properties to use with + ``manifest_enabled=True``. Examples -------- @@ -385,6 +408,8 @@ async def connect_async( client_config, storage_options, session, + manifest_enabled, + namespace_client_properties, ) ) @@ -412,13 +437,3 @@ __all__ = [ "Table", "__version__", ] - - -def __warn_on_fork(): - warnings.warn( - "lance is not fork-safe. If you are using multiprocessing, use spawn instead.", - ) - - -if hasattr(os, "register_at_fork"): - os.register_at_fork(before=__warn_on_fork) # type: ignore[attr-defined] diff --git a/python/python/lancedb/_lancedb.pyi b/python/python/lancedb/_lancedb.pyi index 76c08041b..b33f89e40 100644 --- a/python/python/lancedb/_lancedb.pyi +++ b/python/python/lancedb/_lancedb.pyi @@ -242,6 +242,8 @@ async def connect( client_config: Optional[Union[ClientConfig, Dict[str, Any]]], storage_options: Optional[Dict[str, str]], session: Optional[Session], + manifest_enabled: bool = False, + namespace_client_properties: Optional[Dict[str, str]] = None, ) -> Connection: ... class RecordBatchStream: @@ -440,7 +442,7 @@ class AsyncPermutationBuilder: async def execute(self) -> Table: ... def async_permutation_builder( - table: Table, dest_table_name: str + table: Table, ) -> AsyncPermutationBuilder: ... def fts_query_to_json(query: Any) -> str: ... diff --git a/python/python/lancedb/background_loop.py b/python/python/lancedb/background_loop.py index d132dd82d..b39da229d 100644 --- a/python/python/lancedb/background_loop.py +++ b/python/python/lancedb/background_loop.py @@ -2,7 +2,9 @@ # SPDX-FileCopyrightText: Copyright The LanceDB Authors import asyncio +import os import threading +import warnings class BackgroundEventLoop: @@ -13,6 +15,9 @@ class BackgroundEventLoop: """ def __init__(self): + self._start() + + def _start(self): self.loop = asyncio.new_event_loop() self.thread = threading.Thread( target=self.loop.run_forever, @@ -31,3 +36,30 @@ class BackgroundEventLoop: LOOP = BackgroundEventLoop() + +_FORK_WARNED = False + + +def _reset_after_fork(): + # Threads do not survive fork(), so the asyncio loop in LOOP.thread is + # dead in the child. Re-initialize the singleton in place so existing + # `from .background_loop import LOOP` references in other modules see + # the new state. The Rust-side tokio runtime is reset analogously by a + # pthread_atfork hook installed in the _lancedb extension. + LOOP._start() + global _FORK_WARNED + if not _FORK_WARNED: + _FORK_WARNED = True + warnings.warn( + "lancedb fork support is experimental: the internal async " + "runtime has been reset in the forked child, but a small chance " + "of deadlock remains if other state was mid-operation at fork " + "time. The 'forkserver' or 'spawn' multiprocessing start method " + "is likely a safer alternative.", + RuntimeWarning, + stacklevel=2, + ) + + +if hasattr(os, "register_at_fork"): + os.register_at_fork(after_in_child=_reset_after_fork) diff --git a/python/python/lancedb/db.py b/python/python/lancedb/db.py index b07d409eb..276116db7 100644 --- a/python/python/lancedb/db.py +++ b/python/python/lancedb/db.py @@ -590,8 +590,13 @@ class LanceDBConnection(DBConnection): read_consistency_interval: Optional[timedelta] = None, storage_options: Optional[Dict[str, str]] = None, session: Optional[Session] = None, + manifest_enabled: bool = False, + namespace_client_properties: Optional[Dict[str, str]] = None, _inner: Optional[LanceDbConnection] = None, ): + self.storage_options = storage_options + self._manifest_enabled = manifest_enabled + self._namespace_client_properties = namespace_client_properties if _inner is not None: self._conn = _inner self._cached_namespace_client = None @@ -633,6 +638,8 @@ class LanceDBConnection(DBConnection): None, storage_options, session, + manifest_enabled, + namespace_client_properties, ) # TODO: It would be nice if we didn't store self.storage_options but it is @@ -640,7 +647,6 @@ class LanceDBConnection(DBConnection): # work because some paths like LanceDBConnection.from_inner will lose the # storage_options. Also, this class really shouldn't be holding any state # beyond _conn. - self.storage_options = storage_options self._conn = AsyncConnection(LOOP.run(do_connect())) self._cached_namespace_client: Optional[LanceNamespace] = None @@ -677,6 +683,8 @@ class LanceDBConnection(DBConnection): "connection_type": "local", "uri": self.uri, "storage_options": self.storage_options, + "manifest_enabled": self._manifest_enabled, + "namespace_client_properties": self._namespace_client_properties, "read_consistency_interval_seconds": ( rci.total_seconds() if rci else None ), diff --git a/python/python/lancedb/permutation.py b/python/python/lancedb/permutation.py index 724a0fd25..91532f0a7 100644 --- a/python/python/lancedb/permutation.py +++ b/python/python/lancedb/permutation.py @@ -1,11 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright The LanceDB Authors -from deprecation import deprecated -from lancedb import AsyncConnection, DBConnection -import pyarrow as pa +import copy import json +from deprecation import deprecated +import pyarrow as pa + from ._lancedb import async_permutation_builder, PermutationReader from .table import LanceTable from .background_loop import LOOP @@ -36,10 +37,7 @@ class PermutationBuilder: be referenced by name in the future. If names are not provided then they can only be referenced by their ordinal index. There is no requirement to name every split. - By default, the permutation will be stored in memory and will be lost when the - program exits. To persist the permutation (for very large datasets or to share - the permutation across multiple workers) use the [persist](#persist) method to - create a permanent table. + The permutation is stored in memory and will be lost when the program exits. """ def __init__(self, table: LanceTable): @@ -51,15 +49,6 @@ class PermutationBuilder: """ self._async = async_permutation_builder(table) - def persist( - self, database: Union[DBConnection, AsyncConnection], table_name: str - ) -> "PermutationBuilder": - """ - Persist the permutation to the given database. - """ - self._async.persist(database, table_name) - return self - def split_random( self, *, @@ -380,20 +369,44 @@ class Permutation: def __init__( self, - reader: PermutationReader, + base_table: LanceTable, + permutation_table: Optional[LanceTable], + split: int, selection: dict[str, str], batch_size: int, transform_fn: Callable[pa.RecordBatch, Any], + offset: Optional[int] = None, + limit: Optional[int] = None, + connection_factory: Optional[Callable[[str], LanceTable]] = None, + _reader: Optional[PermutationReader] = None, ): """ Internal constructor. Use [from_tables](#from_tables) instead. """ - assert reader is not None, "reader is required" + assert base_table is not None, "base_table is required" assert selection is not None, "selection is required" - self.reader = reader + self.base_table = base_table + self.permutation_table = permutation_table + self.split = split self.selection = selection self.transform_fn = transform_fn self.batch_size = batch_size + self.offset = offset + self.limit = limit + self.connection_factory = connection_factory + if _reader is None: + _reader = LOOP.run(self._build_reader()) + self.reader: PermutationReader = _reader + + async def _build_reader(self) -> PermutationReader: + reader = await PermutationReader.from_tables( + self.base_table, self.permutation_table, self.split + ) + if self.offset is not None: + reader = await reader.with_offset(self.offset) + if self.limit is not None: + reader = await reader.with_limit(self.limit) + return reader def _with_selection(self, selection: dict[str, str]) -> "Permutation": """ @@ -402,21 +415,97 @@ class Permutation: Does not validation of the selection and it replaces it entirely. This is not intended for public use. """ - return Permutation(self.reader, selection, self.batch_size, self.transform_fn) - - def _with_reader(self, reader: PermutationReader) -> "Permutation": - """ - Creates a new permutation with the given reader - - This is an internal method and should not be used directly. - """ - return Permutation(reader, self.selection, self.batch_size, self.transform_fn) + new = copy.copy(self) + new.selection = selection + return new def with_batch_size(self, batch_size: int) -> "Permutation": """ Creates a new permutation with the given batch size """ - return Permutation(self.reader, self.selection, batch_size, self.transform_fn) + new = copy.copy(self) + new.batch_size = batch_size + return new + + def with_connection_factory( + self, connection_factory: Callable[[str], LanceTable] + ) -> "Permutation": + """ + Creates a new permutation that will use ``connection_factory`` to reopen + the base table when this permutation is unpickled in a worker process. + + The factory is a callable that takes a single argument — the base table + name — and returns a [LanceTable]. It must be picklable; the worker + will pickle it via standard ``pickle`` and call it to recover the base + table. Picklable callables in practice means top-level (module-level) + functions, ``functools.partial`` of such functions, or instances of + picklable classes implementing ``__call__``. Lambdas and closures over + local variables don't pickle with the default protocol. + + Setting a factory is necessary when the URI alone is not enough to + re-open the connection — most importantly for LanceDB Cloud (``db://``) + connections, where ``api_key`` and ``region`` aren't recoverable from + the connection object after construction. + + For local file or cloud-storage paths the factory is optional: if not + set, ``__getstate__`` falls back to capturing + ``(uri, storage_options, namespace_path)`` and re-opening via + ``lancedb.connect(uri, storage_options=...)``. + + Examples + -------- + Basic native (file-system path), parameterized via ``functools.partial``:: + + import functools, lancedb + from lancedb.permutation import Permutation + + def open_native_table(uri: str, table_name: str): + return lancedb.connect(uri).open_table(table_name) + + factory = functools.partial(open_native_table, "/data/lance_db") + permutation = Permutation.identity( + factory("training") + ).with_connection_factory(factory) + + Native via :func:`lancedb.connect_namespace` (e.g. a directory- or + REST-backed namespace client). The factory takes the + implementation name and properties dict as partial-bound args so + the worker can rebuild the same namespace connection:: + + def open_via_namespace( + impl: str, properties: dict[str, str], table_name: str, + ): + return lancedb.connect_namespace(impl, properties).open_table( + table_name, + ) + + factory = functools.partial( + open_via_namespace, + "dir", + {"root": "/data/lance_db"}, + ) + + LanceDB Cloud, reading credentials from env vars at worker startup + so secrets aren't pickled into the dataset:: + + import os, lancedb + + def open_remote_table(table_name: str): + db = lancedb.connect( + "db://my-database", + api_key=os.environ["LANCEDB_API_KEY"], + region=os.environ.get("LANCEDB_REGION", "us-east-1"), + ) + return db.open_table(table_name) + + permutation = Permutation.identity( + open_remote_table("training") + ).with_connection_factory(open_remote_table) + """ + assert connection_factory is not None, "connection_factory is required" + new = copy.copy(self) + new.connection_factory = connection_factory + return new @classmethod def identity(cls, table: LanceTable) -> "Permutation": @@ -489,11 +578,126 @@ class Permutation: schema = await reader.output_schema(None) initial_selection = {name: name for name in schema.names} return cls( - reader, initial_selection, DEFAULT_BATCH_SIZE, Transforms.arrow2python + base_table, + permutation_table, + split, + initial_selection, + DEFAULT_BATCH_SIZE, + Transforms.arrow2python, + _reader=reader, ) return LOOP.run(do_from_tables()) + def __getstate__(self) -> dict[str, Any]: + """Build a picklable state dict for this permutation. + + The base table is captured either via a user-supplied + ``connection_factory`` (see [with_connection_factory]) or, as a + fallback, by introspecting ``(uri, storage_options, namespace_path)`` + on the connection. The permutation table — always an in-memory + LanceDB table — is captured as a pyarrow Table (which pickles via + Arrow IPC natively). The reader is dropped from the wire format; + ``__setstate__`` rebuilds it from the restored tables. + """ + permutation_data: Optional[pa.Table] = None + if self.permutation_table is not None: + permutation_data = self.permutation_table.to_arrow() + + common = { + "base_table_name": self.base_table.name, + "permutation_data": permutation_data, + "split": self.split, + "selection": self.selection, + "batch_size": self.batch_size, + "transform_fn": self.transform_fn, + "offset": self.offset, + "limit": self.limit, + "connection_factory": self.connection_factory, + } + + if self.connection_factory is not None: + # The factory carries enough state to recover the base table on + # its own; we don't need to capture the URI / storage options / + # namespace from the existing connection. + return common + + # URI-introspection fallback: only viable for native (OSS) connections + # where (uri, storage_options) is enough to reopen. Remote / cloud + # connections don't expose recoverable api_key / region — those users + # must call with_connection_factory(). + try: + base_uri = self.base_table._conn.uri + storage_options = self.base_table._conn.storage_options + except AttributeError as e: + raise ValueError( + "Cannot pickle this Permutation: the base table's connection " + "does not expose a uri/storage_options, which usually means it " + "is a remote (LanceDB Cloud) connection. Call " + "Permutation.with_connection_factory(...) first to provide a " + "picklable callable that re-opens the base table from a worker " + "process." + ) from e + + if base_uri.startswith("memory://"): + # In-memory base tables don't exist in any worker process by + # default, so dump the entire base table into the pickle. This + # can be expensive for large datasets — users with large + # in-memory base tables should either persist them or set a + # connection_factory. + return { + **common, + "base_table_data": self.base_table.to_arrow(), + } + + return { + **common, + "base_table_uri": base_uri, + "base_table_namespace": self.base_table._namespace_path, + "base_table_storage_options": storage_options, + } + + def __setstate__(self, state: dict[str, Any]) -> None: + from . import connect + + connection_factory = state["connection_factory"] + if connection_factory is not None: + base_table = connection_factory(state["base_table_name"]) + elif "base_table_data" in state: + # In-memory base table inlined into the pickle; rebuild the same + # way we rebuild the in-memory permutation table. + mem_db = connect("memory://") + base_table = mem_db.create_table( + state["base_table_name"], state["base_table_data"] + ) + else: + base_db = connect( + state["base_table_uri"], + storage_options=state["base_table_storage_options"], + ) + base_table = base_db.open_table( + state["base_table_name"], + namespace_path=state["base_table_namespace"] or None, + ) + + permutation_table: Optional[LanceTable] = None + if state["permutation_data"] is not None: + mem_db = connect("memory://") + permutation_table = mem_db.create_table( + "permutation", state["permutation_data"] + ) + + self.base_table = base_table + self.permutation_table = permutation_table + self.split = state["split"] + self.selection = state["selection"] + self.batch_size = state["batch_size"] + self.transform_fn = state["transform_fn"] + self.offset = state["offset"] + self.limit = state["limit"] + self.connection_factory = connection_factory + self.reader = LOOP.run(self._build_reader()) + @property def schema(self) -> pa.Schema: async def do_output_schema(): @@ -760,7 +964,9 @@ class Permutation: for expensive operations such as image decoding. """ assert transform is not None, "transform is required" - return Permutation(self.reader, self.selection, self.batch_size, transform) + new = copy.copy(self) + new.transform_fn = transform + return new def __getitem__(self, index: int) -> Any: """ @@ -795,12 +1001,10 @@ class Permutation: """ Skip the first `skip` rows of the permutation """ - - async def do_with_skip(): - reader = await self.reader.with_offset(skip) - return self._with_reader(reader) - - return LOOP.run(do_with_skip()) + new = copy.copy(self) + new.offset = skip + new.reader = LOOP.run(new._build_reader()) + return new @deprecated(details="Use with_take instead") def take(self, limit: int) -> "Permutation": @@ -818,12 +1022,10 @@ class Permutation: """ Limit the permutation to `limit` rows (following any `skip`) """ - - async def do_with_take(): - reader = await self.reader.with_limit(limit) - return self._with_reader(reader) - - return LOOP.run(do_with_take()) + new = copy.copy(self) + new.limit = limit + new.reader = LOOP.run(new._build_reader()) + return new @deprecated(details="Use with_repeat instead") def repeat(self, times: int) -> "Permutation": diff --git a/python/python/lancedb/query.py b/python/python/lancedb/query.py index b796fc40c..0a9473a0a 100644 --- a/python/python/lancedb/query.py +++ b/python/python/lancedb/query.py @@ -1643,7 +1643,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder): def _validate_query(self, query, vector=None, text=None): if query is not None and (vector is not None or text is not None): raise ValueError( - "You can either provide a string query in search() method" + "You can either provide a string query in search() method " "or set `vector()` and `text()` explicitly for hybrid search." "But not both." ) diff --git a/python/python/tests/test_permutation.py b/python/python/tests/test_permutation.py index bb92ba0ba..96d77f9d1 100644 --- a/python/python/tests/test_permutation.py +++ b/python/python/tests/test_permutation.py @@ -9,21 +9,6 @@ from lancedb import DBConnection, Table, connect from lancedb.permutation import Permutation, Permutations, permutation_builder -def test_permutation_persistence(tmp_path): - db = connect(tmp_path) - tbl = db.create_table("test_table", pa.table({"x": range(100), "y": range(100)})) - - permutation_tbl = ( - permutation_builder(tbl).shuffle().persist(db, "test_permutation").execute() - ) - assert permutation_tbl.count_rows() == 100 - - re_open = db.open_table("test_permutation") - assert re_open.count_rows() == 100 - - assert permutation_tbl.to_arrow() == re_open.to_arrow() - - def test_split_random_ratios(mem_db): """Test random splitting with ratios.""" tbl = mem_db.create_table( diff --git a/python/python/tests/test_remote_db.py b/python/python/tests/test_remote_db.py index 0dd880cc0..a499275c5 100644 --- a/python/python/tests/test_remote_db.py +++ b/python/python/tests/test_remote_db.py @@ -6,6 +6,8 @@ import contextlib from datetime import timedelta import http.server import json +import multiprocessing as mp +import sys import threading import time from unittest.mock import MagicMock, patch @@ -1230,3 +1232,82 @@ def test_background_loop_cancellation(exception): with pytest.raises(exception): loop.run(None) mock_future.cancel.assert_called_once() + + +def _remote_fork_child(port: int, queue) -> None: + # Build a fresh Connection in the child so we exercise the at-fork-child + # tokio runtime reset rather than relying on an inherited reqwest client. + db = lancedb.connect( + "db://dev", + api_key="fake", + host_override=f"http://localhost:{port}", + client_config={ + "retry_config": {"retries": 0}, + "timeout_config": {"connect_timeout": 2, "read_timeout": 2}, + }, + ) + queue.put(db.table_names()) + + +@pytest.mark.skipif( + sys.platform != "linux", + reason=( + "fork() is unavailable on Windows and unsafe on macOS " + "(Apple frameworks/TLS are not fork-safe)" + ), +) +def test_remote_connection_after_fork(): + """A freshly-built remote Connection in a forked child should not hang. + + The pyo3-async-runtimes tokio runtime would otherwise be inherited from + the parent with dead worker threads; the at-fork-child handler in our + runtime module rebuilds it on first use in the child. + """ + + def handler(request): + request.send_response(200) + request.send_header("Content-Type", "application/json") + request.end_headers() + request.wfile.write(b'{"tables": []}') + + server = http.server.HTTPServer(("localhost", 0), make_mock_http_handler(handler)) + port = server.server_address[1] + server_thread = threading.Thread(target=server.serve_forever) + server_thread.start() + try: + # Hit the server in the parent first so the runtime + LOOP are warm + # before fork; a fresh child must still succeed. + parent_db = lancedb.connect( + "db://dev", + api_key="fake", + host_override=f"http://localhost:{port}", + client_config={ + "retry_config": {"retries": 0}, + "timeout_config": {"connect_timeout": 2, "read_timeout": 2}, + }, + ) + assert parent_db.table_names() == [] + + ctx = mp.get_context("fork") + queue = ctx.Queue() + proc = ctx.Process(target=_remote_fork_child, args=(port, queue)) + proc.start() + proc.join(timeout=15) + + if proc.is_alive(): + proc.terminate() + proc.join(timeout=5) + if proc.is_alive(): + proc.kill() + proc.join() + pytest.fail("Remote connection hung after fork") + + assert proc.exitcode == 0, f"child exited with code {proc.exitcode}" + assert not queue.empty(), "child produced no result" + assert queue.get() == [] + + # Parent connection must still be usable after the child returned. + assert parent_db.table_names() == [] + finally: + server.shutdown() + server_thread.join() diff --git a/python/python/tests/test_torch.py b/python/python/tests/test_torch.py index ef1c5e73b..d17e60bbd 100644 --- a/python/python/tests/test_torch.py +++ b/python/python/tests/test_torch.py @@ -1,14 +1,29 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright The LanceDB Authors +import functools +import multiprocessing as mp +import pickle +import sys + +import lancedb import pyarrow as pa import pytest +from lancedb.permutation import Permutation, Permutations, permutation_builder from lancedb.util import tbl_to_tensor -from lancedb.permutation import Permutation torch = pytest.importorskip("torch") +def _open_native_table(uri: str, table_name: str): + """Top-level connection factory used by the explicit-factory pickle test. + + Defined at module scope so that pickle can resolve it by name in the + worker / unpickling process. + """ + return lancedb.connect(uri).open_table(table_name) + + def test_table_dataloader(mem_db): table = mem_db.create_table("test_table", pa.table({"a": range(1000)})) dataloader = torch.utils.data.DataLoader( @@ -40,3 +55,156 @@ def test_permutation_dataloader(mem_db): for batch in dataloader: assert batch.size(0) == 1 assert batch.size(1) == 10 + + +def test_permutation_is_picklable(tmp_db): + """A Permutation must be picklable so it can be used with PyTorch's + DataLoader when num_workers > 0 (which uses multiprocessing and pickles + the dataset to pass it to worker processes).""" + table = tmp_db.create_table("test_table", pa.table({"a": range(1000)})) + permutation = Permutation.identity(table) + + pickled = pickle.dumps(permutation) + restored = pickle.loads(pickled) + + assert len(restored) == 1000 + rows = restored.__getitems__([0, 1, 2]) + assert rows == [{"a": 0}, {"a": 1}, {"a": 2}] + + +def test_permutation_with_memory_base_is_picklable(mem_db): + """An in-memory base table is inlined into the pickle as Arrow IPC bytes + and rebuilt on the other side as an in-memory LanceTable, so the + Permutation round-trips even though the original database can't be + reopened across processes.""" + table = mem_db.create_table("test_table", pa.table({"a": range(50)})) + permutation = Permutation.identity(table) + + restored = pickle.loads(pickle.dumps(permutation)) + + assert len(restored) == 50 + assert restored.__getitems__([0, 10, 49]) == [{"a": 0}, {"a": 10}, {"a": 49}] + + +def test_permutation_dataloader_multiprocessing(tmp_db): + """Using a Permutation with a PyTorch DataLoader that has num_workers > 0 + must work end-to-end. Each worker process gets a pickled copy of the + dataset and reads batches from it.""" + table = tmp_db.create_table("test_table", pa.table({"a": range(1000)})) + permutation = Permutation.identity(table) + + dataloader = torch.utils.data.DataLoader( + permutation, + batch_size=10, + shuffle=True, + num_workers=2, + multiprocessing_context="spawn", + ) + seen = 0 + for batch in dataloader: + assert batch["a"].size(0) == 10 + seen += batch["a"].size(0) + assert seen == 1000 + + +def test_permutation_pickle_with_connection_factory(tmp_path): + """When the user provides a connection_factory, pickling should round-trip + through that factory rather than introspecting the connection URI. Useful + for remote / cloud connections where the URI alone isn't reopenable.""" + db = lancedb.connect(tmp_path) + db.create_table("test_table", pa.table({"a": range(50)})) + + factory = functools.partial(_open_native_table, str(tmp_path)) + permutation = Permutation.identity(factory("test_table")).with_connection_factory( + factory + ) + + restored = pickle.loads(pickle.dumps(permutation)) + + assert len(restored) == 50 + # The factory survives pickling and is what powered base-table reopen. + assert restored.connection_factory is not None + assert restored.connection_factory.func is _open_native_table + assert restored.__getitems__([0, 1, 2]) == [{"a": 0}, {"a": 1}, {"a": 2}] + + +def test_permutation_with_builder_is_picklable(tmp_db): + """A Permutation built from a non-identity permutation table must round-trip + through pickle while preserving the row order defined by the permutation.""" + table = tmp_db.create_table("test_table", pa.table({"a": range(100)})) + perm_tbl = ( + permutation_builder(table) + .split_random(ratios=[0.8, 0.2], seed=42, split_names=["train", "test"]) + .shuffle(seed=42) + .execute() + ) + permutations = Permutations(table, perm_tbl) + permutation = permutations["train"] + + indices = list(range(len(permutation))) + expected = permutation.__getitems__(indices) + + restored = pickle.loads(pickle.dumps(permutation)) + + assert len(restored) == len(permutation) + assert restored.__getitems__(indices) == expected + + +def _multiworker_dataloader_target(db_uri: str, result_queue): + import lancedb + from lancedb.permutation import Permutation + + db = lancedb.connect(db_uri) + table = db.open_table("test_table") + permutation = Permutation.identity(table) + + dataloader = torch.utils.data.DataLoader( + permutation, + batch_size=10, + num_workers=2, + multiprocessing_context="fork", + ) + count = 0 + for batch in dataloader: + assert batch["a"].size(0) == 10 + count += 1 + result_queue.put(count) + + +@pytest.mark.skipif( + sys.platform != "linux", + reason=( + "fork() is unavailable on Windows and unsafe on macOS " + "(Apple frameworks/TLS are not fork-safe)" + ), +) +def test_permutation_dataloader_fork_workers(tmp_path): + """A Permutation used by a fork-based DataLoader should not hang. + + PyTorch's DataLoader uses fork-based multiprocessing by default on Linux. + LanceDB drives async work through a background asyncio thread that does + not survive a fork, so any LOOP.run() in a worker blocks forever. + """ + import lancedb + + db_uri = str(tmp_path / "db") + db = lancedb.connect(db_uri) + db.create_table("test_table", pa.table({"a": list(range(1000))})) + + ctx = mp.get_context("spawn") + queue = ctx.Queue() + proc = ctx.Process(target=_multiworker_dataloader_target, args=(db_uri, queue)) + proc.start() + proc.join(timeout=30) + + if proc.is_alive(): + proc.terminate() + proc.join(timeout=5) + if proc.is_alive(): + proc.kill() + proc.join() + pytest.fail("Permutation hung when iterated in a fork-based DataLoader worker") + + assert proc.exitcode == 0, f"child exited with code {proc.exitcode}" + assert not queue.empty(), "child produced no batches" + assert queue.get() == 100 diff --git a/python/src/arrow.rs b/python/src/arrow.rs index fd3a05964..f0b4bceed 100644 --- a/python/src/arrow.rs +++ b/python/src/arrow.rs @@ -3,6 +3,8 @@ use std::sync::Arc; +use crate::error::PythonErrorExt; +use crate::runtime::future_into_py; use arrow::{ datatypes::SchemaRef, pyarrow::{IntoPyArrow, ToPyArrow}, @@ -12,9 +14,6 @@ use lancedb::arrow::SendableRecordBatchStream; use pyo3::{ Bound, Py, PyAny, PyRef, PyResult, Python, exceptions::PyStopAsyncIteration, pyclass, pymethods, }; -use pyo3_async_runtimes::tokio::future_into_py; - -use crate::error::PythonErrorExt; #[pyclass] pub struct RecordBatchStream { diff --git a/python/src/connection.rs b/python/src/connection.rs index 9c67f38c7..703b44424 100644 --- a/python/src/connection.rs +++ b/python/src/connection.rs @@ -7,6 +7,12 @@ use std::{ time::Duration, }; +use crate::{ + error::PythonErrorExt, + namespace::{create_namespace_storage_options_provider, extract_namespace_arc}, + runtime::future_into_py, + table::Table, +}; use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::FromPyArrow}; use lancedb::{ connection::Connection as LanceConnection, @@ -20,13 +26,6 @@ use pyo3::{ pyclass, pyfunction, pymethods, types::{PyDict, PyDictMethods}, }; -use pyo3_async_runtimes::tokio::future_into_py; - -use crate::{ - error::PythonErrorExt, - namespace::{create_namespace_storage_options_provider, extract_namespace_arc}, - table::Table, -}; #[pyclass] pub struct Connection { @@ -525,7 +524,7 @@ impl Connection { } #[pyfunction] -#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))] +#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None, manifest_enabled=false, namespace_client_properties=None))] #[allow(clippy::too_many_arguments)] pub fn connect( py: Python<'_>, @@ -537,6 +536,8 @@ pub fn connect( client_config: Option, storage_options: Option>, session: Option, + manifest_enabled: bool, + namespace_client_properties: Option>, ) -> PyResult> { future_into_py(py, async move { let mut builder = lancedb::connect(&uri); @@ -556,6 +557,12 @@ pub fn connect( if let Some(storage_options) = storage_options { builder = builder.storage_options(storage_options); } + if manifest_enabled { + builder = builder.manifest_enabled(true); + } + if let Some(namespace_client_properties) = namespace_client_properties { + builder = builder.namespace_client_properties(namespace_client_properties); + } #[cfg(feature = "remote")] if let Some(client_config) = client_config { builder = builder.client_config(client_config.into()); diff --git a/python/src/expr.rs b/python/src/expr.rs index e12c7d0a8..7d29fcd05 100644 --- a/python/src/expr.rs +++ b/python/src/expr.rs @@ -17,7 +17,7 @@ use pyo3::{Bound, PyAny, PyResult, exceptions::PyValueError, prelude::*, pyfunct /// [`expr_lit`] and combined with the methods on this struct. On the Python /// side a thin wrapper class (`lancedb.expr.Expr`) delegates to these methods /// and adds Python operator overloads. -#[pyclass(name = "PyExpr")] +#[pyclass(name = "PyExpr", from_py_object)] #[derive(Clone)] pub struct PyExpr(pub DfExpr); diff --git a/python/src/header.rs b/python/src/header.rs index 13338f4ec..85ad14358 100644 --- a/python/src/header.rs +++ b/python/src/header.rs @@ -33,7 +33,7 @@ impl PyHeaderProvider { Ok(headers_py) => { // Convert Python dict to Rust HashMap let bound_headers = headers_py.bind(py); - let dict: &Bound = bound_headers.downcast().map_err(|e| { + let dict: &Bound = bound_headers.cast().map_err(|e| { format!("HeaderProvider.get_headers must return a dict: {}", e) })?; diff --git a/python/src/index.rs b/python/src/index.rs index 602b5e420..ce90280b0 100644 --- a/python/src/index.rs +++ b/python/src/index.rs @@ -13,7 +13,7 @@ use pyo3::{ Bound, FromPyObject, PyAny, PyResult, Python, exceptions::{PyKeyError, PyValueError}, intern, pyclass, pymethods, - types::PyAnyMethods, + types::{PyAnyMethods, PyString}, }; use crate::util::parse_distance_type; @@ -22,7 +22,7 @@ pub fn class_name(ob: &'_ Bound<'_, PyAny>) -> PyResult { let full_name = ob .getattr(intern!(ob.py(), "__class__"))? .getattr(intern!(ob.py(), "__name__"))?; - let full_name = full_name.downcast()?.to_string_lossy(); + let full_name = full_name.cast::()?.to_string_lossy(); match full_name.rsplit_once('.') { Some((_, name)) => Ok(name.to_string()), diff --git a/python/src/lib.rs b/python/src/lib.rs index 7dd52bdc2..d0e933dba 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -28,6 +28,7 @@ pub mod index; pub mod namespace; pub mod permutation; pub mod query; +pub mod runtime; pub mod session; pub mod table; pub mod util; diff --git a/python/src/namespace.rs b/python/src/namespace.rs index e96c667a6..bb0e4d6d3 100644 --- a/python/src/namespace.rs +++ b/python/src/namespace.rs @@ -183,7 +183,7 @@ async fn call_py_method_primitive( ) -> lance_core::Result where Req: serde::Serialize + Send + 'static, - Resp: for<'py> pyo3::FromPyObject<'py> + Send + 'static, + Resp: for<'a, 'py> pyo3::FromPyObject<'a, 'py> + Send + 'static, { let request_json = serde_json::to_string(&request).map_err(|e| { lance_core::Error::io(format!( @@ -203,7 +203,7 @@ where // Call the Python method let result = py_namespace.call_method1(py, method_name, (request_arg,))?; - let value: Resp = result.extract(py)?; + let value: Resp = result.extract(py).map_err(Into::into)?; Ok::<_, PyErr>(value) }) }) diff --git a/python/src/permutation.rs b/python/src/permutation.rs index 21b8c9c47..75e1fe1b7 100644 --- a/python/src/permutation.rs +++ b/python/src/permutation.rs @@ -4,7 +4,7 @@ use std::sync::{Arc, Mutex}; use crate::{ - arrow::RecordBatchStream, connection::Connection, error::PythonErrorExt, table::Table, + arrow::RecordBatchStream, error::PythonErrorExt, runtime::future_into_py, table::Table, }; use arrow::pyarrow::{PyArrowType, ToPyArrow}; use lancedb::{ @@ -21,16 +21,15 @@ use pyo3::{ pyclass, pymethods, types::{PyAnyMethods, PyDict, PyDictMethods, PyType}, }; -use pyo3_async_runtimes::tokio::future_into_py; fn table_from_py<'a>(table: Bound<'a, PyAny>) -> PyResult> { if table.hasattr("_inner")? { - Ok(table.getattr("_inner")?.downcast_into::()?) + Ok(table.getattr("_inner")?.cast_into::
()?) } else if table.hasattr("_table")? { Ok(table .getattr("_table")? .getattr("_inner")? - .downcast_into::
()?) + .cast_into::
()?) } else { Err(PyRuntimeError::new_err( "Provided table does not appear to be a Table or RemoteTable instance", @@ -80,24 +79,6 @@ impl PyAsyncPermutationBuilder { #[pymethods] impl PyAsyncPermutationBuilder { - #[pyo3(signature = (database, table_name))] - pub fn persist( - slf: PyRefMut<'_, Self>, - database: Bound<'_, PyAny>, - table_name: String, - ) -> PyResult { - let conn = if database.hasattr("_conn")? { - database - .getattr("_conn")? - .getattr("_inner")? - .downcast_into::()? - } else { - database.getattr("_inner")?.downcast_into::()? - }; - let database = conn.borrow().database()?; - slf.modify(|builder| builder.persist(database, table_name)) - } - #[pyo3(signature = (*, ratios=None, counts=None, fixed=None, seed=None, split_names=None))] pub fn split_random( slf: PyRefMut<'_, Self>, @@ -243,7 +224,7 @@ impl PyPermutationReader { let Some(selection) = selection else { return Ok(Select::All); }; - let selection = selection.downcast_into::()?; + let selection = selection.cast_into::()?; let selection = selection .iter() .map(|(key, value)| { diff --git a/python/src/query.rs b/python/src/query.rs index 98876739b..1dc4f08db 100644 --- a/python/src/query.rs +++ b/python/src/query.rs @@ -4,6 +4,11 @@ use std::sync::Arc; use std::time::Duration; +use crate::expr::PyExpr; +use crate::runtime::future_into_py; +use crate::util::parse_distance_type; +use crate::{arrow::RecordBatchStream, util::PyLanceDB}; +use crate::{error::PythonErrorExt, index::class_name}; use arrow::array::Array; use arrow::array::ArrayData; use arrow::array::make_array; @@ -33,19 +38,16 @@ use pyo3::pyfunction; use pyo3::pymethods; use pyo3::types::PyList; use pyo3::types::{PyDict, PyString}; -use pyo3::{FromPyObject, exceptions::PyRuntimeError}; +use pyo3::{Borrowed, FromPyObject, exceptions::PyRuntimeError}; use pyo3::{PyErr, pyclass}; use pyo3::{exceptions::PyValueError, intern}; -use pyo3_async_runtimes::tokio::future_into_py; -use crate::expr::PyExpr; -use crate::util::parse_distance_type; -use crate::{arrow::RecordBatchStream, util::PyLanceDB}; -use crate::{error::PythonErrorExt, index::class_name}; +impl<'a, 'py> FromPyObject<'a, 'py> for PyLanceDB { + type Error = PyErr; -impl FromPyObject<'_> for PyLanceDB { - fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult { - match class_name(ob)?.as_str() { + fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult { + let ob = ob.to_owned(); + match class_name(&ob)?.as_str() { "MatchQuery" => { let query = ob.getattr("query")?.extract()?; let column = ob.getattr("column")?.extract()?; @@ -424,7 +426,7 @@ impl Query { "Query text is required for nearest_to_text", ))?; - let query = if let Ok(query_text) = fts_query.downcast::() { + let query = if let Ok(query_text) = fts_query.cast::() { let mut query_text = query_text.to_string(); let columns = query .get_item("columns")? @@ -606,7 +608,7 @@ impl TakeQuery { } } -#[pyclass] +#[pyclass(from_py_object)] #[derive(Clone)] pub struct FTSQuery { inner: LanceDbQuery, @@ -735,7 +737,7 @@ impl FTSQuery { } } -#[pyclass] +#[pyclass(from_py_object)] #[derive(Clone)] pub struct VectorQuery { inner: LanceDbVectorQuery, diff --git a/python/src/runtime.rs b/python/src/runtime.rs new file mode 100644 index 000000000..39ebfdaa8 --- /dev/null +++ b/python/src/runtime.rs @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The LanceDB Authors + +//! Fork-safe wrapper around tokio + pyo3-async-runtimes. +//! +//! `pyo3_async_runtimes::tokio` keeps its multi-threaded runtime in a +//! `OnceLock` that can never be replaced. Tokio's worker threads do not +//! survive `fork()`, so once a child inherits a "frozen" runtime, every +//! `future_into_py` call hangs forever. +//! +//! We sidestep the global by routing every future through our own +//! [`LanceRuntime`] (a [`pyo3_async_runtimes::generic::Runtime`] impl) backed +//! by an [`AtomicPtr`] to a tokio runtime that we own. A `pthread_atfork` +//! child handler nulls the pointer; the next `spawn` rebuilds the runtime in +//! the child. This mirrors the pattern used in the Lance Python bindings. + +use std::future::Future; +use std::pin::Pin; +use std::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; + +use pyo3::{Bound, PyAny, PyResult, Python, conversion::IntoPyObject}; +use pyo3_async_runtimes::{ + TaskLocals, + generic::{ContextExt, JoinError, Runtime}, +}; +use tokio::{runtime, task}; + +static RUNTIME: AtomicPtr = AtomicPtr::new(std::ptr::null_mut()); +static RUNTIME_INSTALLING: AtomicBool = AtomicBool::new(false); +static ATFORK_INSTALLED: AtomicBool = AtomicBool::new(false); + +fn create_runtime() -> runtime::Runtime { + runtime::Builder::new_multi_thread() + .enable_all() + .thread_name("lancedb-tokio-worker") + .build() + .expect("Failed to build tokio runtime") +} + +fn get_runtime() -> &'static runtime::Runtime { + loop { + let ptr = RUNTIME.load(Ordering::SeqCst); + if !ptr.is_null() { + return unsafe { &*ptr }; + } + if !RUNTIME_INSTALLING.fetch_or(true, Ordering::SeqCst) { + break; + } + std::thread::yield_now(); + } + if !ATFORK_INSTALLED.fetch_or(true, Ordering::SeqCst) { + install_atfork(); + } + let new_ptr = Box::into_raw(Box::new(create_runtime())); + RUNTIME.store(new_ptr, Ordering::SeqCst); + unsafe { &*new_ptr } +} + +/// Runs in async-signal context after `fork()` in the child. We can only +/// touch atomics here; we deliberately leak the previous runtime because +/// dropping a tokio `Runtime` would try to join its (now-dead) worker +/// threads and hang. +extern "C" fn atfork_child() { + RUNTIME.store(std::ptr::null_mut(), Ordering::SeqCst); + RUNTIME_INSTALLING.store(false, Ordering::SeqCst); +} + +#[cfg(not(windows))] +fn install_atfork() { + unsafe { libc::pthread_atfork(None, None, Some(atfork_child)) }; +} + +#[cfg(windows)] +fn install_atfork() {} + +/// Marker type implementing [`Runtime`] over our fork-safe runtime slot. +pub struct LanceRuntime; + +/// Newtype wrapper around `tokio::task::JoinError` so we can implement the +/// foreign [`JoinError`] trait without violating orphan rules. +pub struct LanceJoinError(task::JoinError); + +impl JoinError for LanceJoinError { + fn is_panic(&self) -> bool { + self.0.is_panic() + } + fn into_panic(self) -> Box { + self.0.into_panic() + } +} + +impl Runtime for LanceRuntime { + type JoinError = LanceJoinError; + type JoinHandle = Pin> + Send>>; + + fn spawn(fut: F) -> Self::JoinHandle + where + F: Future + Send + 'static, + { + let handle = get_runtime().spawn(fut); + Box::pin(async move { handle.await.map_err(LanceJoinError) }) + } + + fn spawn_blocking(f: F) -> Self::JoinHandle + where + F: FnOnce() + Send + 'static, + { + let handle = get_runtime().spawn_blocking(f); + Box::pin(async move { handle.await.map_err(LanceJoinError) }) + } +} + +tokio::task_local! { + static TASK_LOCALS: std::cell::OnceCell; +} + +impl ContextExt for LanceRuntime { + fn scope(locals: TaskLocals, fut: F) -> Pin + Send>> + where + F: Future + Send + 'static, + { + let cell = std::cell::OnceCell::new(); + cell.set(locals).unwrap(); + Box::pin(TASK_LOCALS.scope(cell, fut)) + } + + fn get_task_locals() -> Option { + TASK_LOCALS + .try_with(|c| c.get().cloned()) + .unwrap_or_default() + } +} + +/// Drop-in replacement for `pyo3_async_runtimes::tokio::future_into_py` that +/// uses our fork-safe runtime. +pub fn future_into_py(py: Python<'_>, fut: F) -> PyResult> +where + F: Future> + Send + 'static, + T: for<'py> IntoPyObject<'py> + Send + 'static, +{ + pyo3_async_runtimes::generic::future_into_py::(py, fut) +} diff --git a/python/src/session.rs b/python/src/session.rs index 2433114b6..891e61e44 100644 --- a/python/src/session.rs +++ b/python/src/session.rs @@ -11,7 +11,7 @@ use pyo3::{PyResult, pyclass, pymethods}; /// Sessions allow you to configure cache sizes for index and metadata caches, /// which can significantly impact memory use and performance. They can /// also be re-used across multiple connections to share the same cache state. -#[pyclass] +#[pyclass(from_py_object)] #[derive(Clone)] pub struct Session { pub(crate) inner: Arc, diff --git a/python/src/table.rs b/python/src/table.rs index d44b6c1fd..9ac5af807 100644 --- a/python/src/table.rs +++ b/python/src/table.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright The LanceDB Authors use std::{collections::HashMap, sync::Arc}; +use crate::runtime::future_into_py; use crate::{ connection::Connection, error::PythonErrorExt, @@ -24,12 +25,11 @@ use pyo3::{ pyclass, pymethods, types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods}, }; -use pyo3_async_runtimes::tokio::future_into_py; mod scannable; /// Statistics about a compaction operation. -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct CompactionStats { /// The number of fragments removed @@ -43,7 +43,7 @@ pub struct CompactionStats { } /// Statistics about a cleanup operation -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct RemovalStats { /// The number of bytes removed @@ -53,7 +53,7 @@ pub struct RemovalStats { } /// Statistics about an optimize operation -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct OptimizeStats { /// Statistics about the compaction operation @@ -62,7 +62,7 @@ pub struct OptimizeStats { pub prune: RemovalStats, } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct UpdateResult { pub rows_updated: u64, @@ -88,7 +88,7 @@ impl From for UpdateResult { } } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct AddResult { pub version: u64, @@ -109,7 +109,7 @@ impl From for AddResult { } } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct DeleteResult { pub num_deleted_rows: u64, @@ -135,7 +135,7 @@ impl From for DeleteResult { } } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct MergeResult { pub version: u64, @@ -171,7 +171,7 @@ impl From for MergeResult { } } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct AddColumnsResult { pub version: u64, @@ -192,7 +192,7 @@ impl From for AddColumnsResult { } } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct AlterColumnsResult { pub version: u64, @@ -213,7 +213,7 @@ impl From for AlterColumnsResult { } } -#[pyclass(get_all)] +#[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct DropColumnsResult { pub version: u64, diff --git a/python/src/table/scannable.rs b/python/src/table/scannable.rs index 5d02ca024..faf2c7fae 100644 --- a/python/src/table/scannable.rs +++ b/python/src/table/scannable.rs @@ -126,8 +126,11 @@ impl Scannable for PyScannable { } } -impl<'py> FromPyObject<'py> for PyScannable { - fn extract_bound(ob: &pyo3::Bound<'py, PyAny>) -> pyo3::PyResult { +impl<'a, 'py> FromPyObject<'a, 'py> for PyScannable { + type Error = pyo3::PyErr; + + fn extract(ob: pyo3::Borrowed<'a, 'py, PyAny>) -> pyo3::PyResult { + let ob = ob.to_owned(); // Convert from Scannable dataclass. let schema: PyArrowType = ob.getattr("schema")?.extract()?; let schema = Arc::new(schema.0); diff --git a/rust/lancedb/Cargo.toml b/rust/lancedb/Cargo.toml index 4913a420c..b05302b9b 100644 --- a/rust/lancedb/Cargo.toml +++ b/rust/lancedb/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lancedb" -version = "0.28.0-beta.9" +version = "0.28.0-beta.11" edition.workspace = true description = "LanceDB: A serverless, low-latency vector database for AI applications" license.workspace = true @@ -108,10 +108,20 @@ test-log = "0.2" [features] default = [] -aws = ["lance/aws", "lance-io/aws", "lance-namespace-impls/dir-aws"] +aws = [ + "lance/aws", + "lance-io/aws", + "lance-namespace-impls/dir-aws", + "object_store/aws", +] oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"] gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"] -azure = ["lance/azure", "lance-io/azure", "lance-namespace-impls/dir-azure"] +azure = [ + "lance/azure", + "lance-io/azure", + "lance-namespace-impls/dir-azure", + "lance-namespace-impls/credential-vendor-azure", +] huggingface = [ "lance/huggingface", "lance-io/huggingface", diff --git a/rust/lancedb/src/connection.rs b/rust/lancedb/src/connection.rs index 9e0d3ea3f..8034c2a53 100644 --- a/rust/lancedb/src/connection.rs +++ b/rust/lancedb/src/connection.rs @@ -590,6 +590,15 @@ pub struct ConnectRequest { /// storage options. pub namespace_client_properties: HashMap, + /// Use directory namespace manifests as the source of truth for native + /// LanceDB table metadata. + /// + /// When enabled for a local/native connection, LanceDB returns a + /// namespace-backed database directly. Directory listing fallback remains + /// enabled for migration, and directory-listing-to-manifest migration is + /// forced on. + pub manifest_enabled: bool, + /// The interval at which to check for updates from other processes. /// /// If None, then consistency is not checked. For performance @@ -630,6 +639,7 @@ impl ConnectBuilder { read_consistency_interval: None, options: HashMap::new(), namespace_client_properties: HashMap::new(), + manifest_enabled: false, session: None, }, embedding_registry: None, @@ -791,6 +801,17 @@ impl ConnectBuilder { self } + /// Enable or disable manifest-backed directory namespace mode for local + /// native connections. + /// + /// When enabled, the connection uses the directory namespace database + /// directly for all table operations and forces + /// `dir_listing_to_manifest_migration_enabled=true`. + pub fn manifest_enabled(mut self, enabled: bool) -> Self { + self.request.manifest_enabled = enabled; + self + } + /// The interval at which to check for updates from other processes. This /// only affects LanceDB OSS. /// @@ -886,6 +907,16 @@ impl ConnectBuilder { pub async fn execute(self) -> Result { if self.request.uri.starts_with("db") { self.execute_remote() + } else if self.request.manifest_enabled { + let internal = Arc::new( + ListingDatabase::connect_manifest_enabled_namespace_database(&self.request).await?, + ); + Ok(Connection { + internal, + embedding_registry: self + .embedding_registry + .unwrap_or_else(|| Arc::new(MemoryRegistry::new())), + }) } else { let internal = Arc::new(ListingDatabase::connect_with_options(&self.request).await?); Ok(Connection { @@ -1132,6 +1163,9 @@ mod tests { use lance_testing::datagen::{BatchGenerator, IncrementingInt32}; use tempfile::tempdir; + use crate::database::listing::{ListingDatabaseOptions, OPT_NEW_TABLE_V2_MANIFEST_PATHS}; + use crate::database::namespace::LanceNamespaceDatabase; + use crate::table::NativeTable; use crate::test_utils::connection::new_test_connection; use super::*; @@ -1204,6 +1238,147 @@ mod tests { ); } + #[tokio::test] + async fn test_connect_with_manifest_enabled_uses_directory_namespace() { + let tmp_dir = tempdir().unwrap(); + let uri = tmp_dir.path().to_str().unwrap(); + + let db = connect(uri) + .manifest_enabled(true) + .storage_option("timeout", "30s") + .namespace_client_property("manifest_enabled", "false") + .namespace_client_property("dir_listing_to_manifest_migration_enabled", "false") + .execute() + .await + .unwrap(); + + assert!( + db.database() + .as_any() + .downcast_ref::() + .is_some() + ); + assert_eq!(db.uri(), uri); + + let (ns_impl, properties) = db.namespace_client_config().await.unwrap(); + assert_eq!(ns_impl, "dir"); + assert_eq!(properties.get("root"), Some(&uri.to_string())); + assert_eq!( + properties.get("manifest_enabled"), + Some(&"true".to_string()) + ); + assert_eq!( + properties.get("dir_listing_to_manifest_migration_enabled"), + Some(&"true".to_string()) + ); + assert_eq!(properties.get("storage.timeout"), Some(&"30s".to_string())); + } + + #[tokio::test] + async fn test_manifest_enabled_rejects_commit_engine_uri() { + let Err(err) = connect("s3+ddb://bucket/db?ddbTableName=manifest") + .manifest_enabled(true) + .execute() + .await + else { + panic!("expected manifest-enabled s3+ddb connection to fail"); + }; + assert!( + matches!(err, Error::NotSupported { message } if message.contains("commit engine URI schemes")) + ); + + let Err(err) = connect("s3://bucket/db?engine=ddb&ddbTableName=manifest") + .manifest_enabled(true) + .execute() + .await + else { + panic!("expected manifest-enabled engine query connection to fail"); + }; + assert!( + matches!(err, Error::NotSupported { message } if message.contains("commit engine")) + ); + } + + #[tokio::test] + async fn test_manifest_enabled_connection_migrates_root_listing_table() { + let tmp_dir = tempdir().unwrap(); + let uri = tmp_dir.path().to_str().unwrap(); + let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)])); + + connect(uri) + .execute() + .await + .unwrap() + .create_empty_table("legacy", schema) + .execute() + .await + .unwrap(); + + let db = connect(uri).manifest_enabled(true).execute().await.unwrap(); + let tables = db.table_names().execute().await.unwrap(); + assert_eq!(tables, vec!["legacy".to_string()]); + db.open_table("legacy").execute().await.unwrap(); + } + + #[tokio::test] + async fn test_manifest_enabled_preserves_new_table_options() { + let tmp_dir = tempdir().unwrap(); + let uri = tmp_dir.path().to_str().unwrap(); + let options = ListingDatabaseOptions::builder() + .enable_v2_manifest_paths(true) + .build(); + let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)])); + + let table = connect(uri) + .manifest_enabled(true) + .database_options(&options) + .execute() + .await + .unwrap() + .create_empty_table("v1_manifest", schema) + .storage_option(OPT_NEW_TABLE_V2_MANIFEST_PATHS, "false") + .execute() + .await + .unwrap(); + + let native_table = table + .base_table() + .as_any() + .downcast_ref::() + .unwrap(); + assert!(!native_table.uses_v2_manifest_paths().await.unwrap()); + } + + #[tokio::test] + async fn test_manifest_enabled_vend_input_storage_options() { + let tmp_dir = tempdir().unwrap(); + let uri = tmp_dir.path().to_str().unwrap(); + let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)])); + + let table = connect(uri) + .manifest_enabled(true) + .storage_option("test_storage_option", "test_value") + .namespace_client_property("vend_input_storage_options", "true") + .namespace_client_property( + "vend_input_storage_options_refresh_interval_millis", + "60000", + ) + .execute() + .await + .unwrap() + .create_empty_table("vended", schema) + .execute() + .await + .unwrap(); + + let storage_options = table.latest_storage_options().await.unwrap().unwrap(); + assert_eq!( + storage_options.get("test_storage_option"), + Some(&"test_value".to_string()) + ); + assert!(storage_options.contains_key("expires_at_millis")); + } + #[tokio::test] async fn test_table_names() { let tc = new_test_connection().await.unwrap(); diff --git a/rust/lancedb/src/database/listing.rs b/rust/lancedb/src/database/listing.rs index 02884bb63..7b7657bf3 100644 --- a/rust/lancedb/src/database/listing.rs +++ b/rust/lancedb/src/database/listing.rs @@ -285,7 +285,7 @@ const MIRRORED_STORE: &str = "mirroredStore"; /// A connection to LanceDB impl ListingDatabase { - fn build_namespace_client_properties( + pub(crate) fn build_namespace_client_properties( uri: &str, storage_options: &HashMap, namespace_client_properties: HashMap, @@ -298,6 +298,24 @@ impl ListingDatabase { properties } + pub(crate) fn build_manifest_enabled_namespace_client_properties( + uri: &str, + storage_options: &HashMap, + namespace_client_properties: HashMap, + ) -> HashMap { + let mut properties = Self::build_namespace_client_properties( + uri, + storage_options, + namespace_client_properties, + ); + properties.insert("manifest_enabled".to_string(), "true".to_string()); + properties.insert( + "dir_listing_to_manifest_migration_enabled".to_string(), + "true".to_string(), + ); + properties + } + async fn connect_namespace_database( uri: &str, storage_options: HashMap, @@ -323,6 +341,119 @@ impl ListingDatabase { )) } + async fn prepare_namespace_root( + uri: &str, + storage_options: &HashMap, + session: Arc, + ) -> Result { + match url::Url::parse(uri) { + Ok(url) if url.scheme().len() == 1 && cfg!(windows) => { + let (object_store, _) = ObjectStore::from_uri_and_params( + session.store_registry(), + uri, + &ObjectStoreParams::default(), + ) + .await?; + if object_store.is_local() { + Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?; + } + Ok(uri.to_string()) + } + Ok(mut url) => { + if url.scheme().contains('+') { + return Err(Error::NotSupported { + message: "commit engine URI schemes are not supported for manifest-enabled namespace connections".to_string(), + }); + } + + for (key, value) in url.query_pairs() { + if key == ENGINE { + return Err(Error::NotSupported { + message: format!( + "commit engine '{}' is not supported for manifest-enabled namespace connections", + value + ), + }); + } else if key == MIRRORED_STORE { + return Err(Error::NotSupported { + message: "mirrored store is not supported for manifest-enabled namespace connections" + .to_string(), + }); + } + } + + url.set_query(None); + let plain_uri = url.to_string(); + + let os_params = ObjectStoreParams { + storage_options_accessor: if storage_options.is_empty() { + None + } else { + Some(Arc::new(StorageOptionsAccessor::with_static_options( + storage_options.clone(), + ))) + }, + ..Default::default() + }; + let (object_store, _) = ObjectStore::from_uri_and_params( + session.store_registry(), + &plain_uri, + &os_params, + ) + .await?; + if object_store.is_local() { + Self::try_create_dir(&plain_uri).context(CreateDirSnafu { + path: plain_uri.clone(), + })?; + } + + Ok(plain_uri) + } + Err(_) => { + let (object_store, _) = ObjectStore::from_uri_and_params( + session.store_registry(), + uri, + &ObjectStoreParams::default(), + ) + .await?; + if object_store.is_local() { + Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?; + } + Ok(uri.to_string()) + } + } + } + + pub(crate) async fn connect_manifest_enabled_namespace_database( + request: &ConnectRequest, + ) -> Result { + let options = ListingDatabaseOptions::parse_from_map(&request.options)?; + let session = request + .session + .clone() + .unwrap_or_else(|| Arc::new(lance::session::Session::default())); + let namespace_root = + Self::prepare_namespace_root(&request.uri, &options.storage_options, session.clone()) + .await?; + let ns_properties = Self::build_manifest_enabled_namespace_client_properties( + &namespace_root, + &options.storage_options, + request.namespace_client_properties.clone(), + ); + + LanceNamespaceDatabase::connect_with_new_table_config( + "dir", + ns_properties, + options.storage_options, + request.read_consistency_interval, + Some(session), + HashSet::new(), + options.new_table_config, + ) + .await + .map(|db| db.with_uri(request.uri.clone())) + } + /// Connect to a listing database /// /// The URI should be a path to a directory where the tables are stored. @@ -374,8 +505,15 @@ impl ListingDatabase { // Filter out the commit store query param -- it's a lancedb param url.query_pairs_mut().clear(); url.query_pairs_mut().extend_pairs(filtered_querys); - // Take a copy of the query string so we can propagate it to lance - let query_string = url.query().map(|s| s.to_string()); + // Take a copy of the query string so we can propagate it to lance. + // `query_pairs_mut()` leaves the URL with `Some("")` even when no + // pairs survive (or none existed in the first place), so an empty + // string here must be treated the same as "no query" — otherwise + // every table URI ends up with a trailing `?`, which makes downstream + // sub-paths (e.g. MemWAL gen paths) re-parse as path= + + // query=, causing Lance to find the base table dataset + // when looking up the sub-path. + let query_string = url.query().filter(|q| !q.is_empty()).map(|s| s.to_string()); // clear the query string so we can use the url as the base uri // use .set_query(None) instead of .set_query("") because the latter // will add a trailing '?' to the url @@ -584,7 +722,7 @@ impl ListingDatabase { let commit_handler = commit_handler_from_url(&uri, &Some(object_store_params)).await?; for name in names { let dir_name = format!("{}.{}", name, LANCE_EXTENSION); - let full_path = self.base_path.child(dir_name.clone()); + let full_path = self.base_path.clone().join(dir_name.clone()); commit_handler.delete(&full_path).await?; @@ -690,15 +828,12 @@ impl ListingDatabase { store_params.storage_options_accessor = Some(Arc::new(accessor)); } - write_params.data_storage_version = self - .new_table_config - .data_storage_version - .or(storage_version_override); + write_params.data_storage_version = storage_version_override + .or(write_params.data_storage_version) + .or(self.new_table_config.data_storage_version); - if let Some(enable_v2_manifest_paths) = self - .new_table_config - .enable_v2_manifest_paths - .or(v2_manifest_override) + if let Some(enable_v2_manifest_paths) = + v2_manifest_override.or(self.new_table_config.enable_v2_manifest_paths) { write_params.enable_v2_manifest_paths = enable_v2_manifest_paths; } @@ -1158,6 +1293,7 @@ mod tests { client_config: Default::default(), options: Default::default(), namespace_client_properties: Default::default(), + manifest_enabled: false, read_consistency_interval: None, session: None, }; @@ -1292,6 +1428,7 @@ mod tests { client_config: Default::default(), options: options.clone(), namespace_client_properties: Default::default(), + manifest_enabled: false, read_consistency_interval: None, session: None, }; @@ -1827,6 +1964,7 @@ mod tests { client_config: Default::default(), options, namespace_client_properties: Default::default(), + manifest_enabled: false, read_consistency_interval: None, session: None, }; @@ -1933,6 +2071,7 @@ mod tests { client_config: Default::default(), options, namespace_client_properties: Default::default(), + manifest_enabled: false, read_consistency_interval: None, session: None, }; @@ -2005,6 +2144,7 @@ mod tests { client_config: Default::default(), options, namespace_client_properties: Default::default(), + manifest_enabled: false, read_consistency_interval: None, session: None, }; @@ -2080,6 +2220,133 @@ mod tests { assert_eq!(uri, expected); } + /// Regression: connecting via a URL-style URI (which goes through + /// `url::Url::parse` and the `query_pairs_mut()` path) must not + /// append a trailing `?` to per-table URIs when the input URI has + /// no query string. + /// + /// Earlier, `query_pairs_mut().clear()` left the URL with + /// `query=Some("")`, which then propagated as a trailing `?` on + /// every table URI. Sub-path lookups against that URI (e.g. MemWAL + /// `/_mem_wal//_gen_`) re-parsed as + /// `path=` + `query=/_mem_wal/...`, causing + /// `Dataset::write` to find the base table dataset and falsely + /// report `Dataset already exists`. + /// Mirrors the URL-mutation step from + /// [`ListingDatabase::connect_with_options`] so we can assert the + /// fix without going through filesystem setup (which is awkward + /// across platforms — see the `file://` test below). + fn capture_query_like_connect(input_uri: &str) -> Option { + let mut url = url::Url::parse(input_uri).unwrap(); + let mut filtered_querys = Vec::new(); + for (key, value) in url.query_pairs() { + if key == ENGINE || key == MIRRORED_STORE { + continue; + } + filtered_querys.push((key.to_string(), value.to_string())); + } + url.query_pairs_mut().clear(); + url.query_pairs_mut().extend_pairs(filtered_querys); + url.query().filter(|q| !q.is_empty()).map(|s| s.to_string()) + } + + #[test] + fn test_capture_query_treats_empty_as_none() { + // No query at all. With the bug, `query_pairs_mut()` left the + // URL with `query=Some("")` and we used to propagate that. + assert_eq!( + capture_query_like_connect("s3://bucket/prefix/"), + None, + "empty query after mutation must be treated as no query" + ); + + // Real query is propagated. + assert_eq!( + capture_query_like_connect("s3://bucket/prefix/?foo=bar"), + Some("foo=bar".to_string()) + ); + + // lancedb-internal `engine=` is stripped; nothing remains, so + // query_string is None — not Some(""). + assert_eq!( + capture_query_like_connect(&format!("s3://bucket/prefix/?{}=mem", ENGINE)), + None + ); + + // Mixed: drop `engine=`, keep the rest. + let captured = + capture_query_like_connect(&format!("s3://bucket/prefix/?{}=mem&foo=bar", ENGINE)); + assert_eq!(captured.as_deref(), Some("foo=bar")); + } + + /// Regression: connecting via a URL-style URI (which goes through + /// `url::Url::parse` and the `query_pairs_mut()` path) must not + /// append a trailing `?` to per-table URIs when the input URI has + /// no query string. Sub-path lookups against such a URI (e.g. + /// MemWAL `/_mem_wal//_gen_`) re-parse + /// as `path=` + `query=/_mem_wal/...`, causing + /// `Dataset::write` to find the base table dataset and falsely + /// report `Dataset already exists`. + /// + /// Skipped on Windows: `try_create_dir` does not understand + /// `file:///C:/…` paths so `connect_with_options` fails before + /// even reaching the URL-mutation logic. The pure URL-mutation + /// invariant is covered by + /// `test_capture_query_treats_empty_as_none` above, which runs + /// on all platforms. + #[cfg(not(windows))] + #[tokio::test] + async fn test_table_uri_url_path_has_no_trailing_question_mark() { + let tempdir = tempdir().unwrap(); + let uri = format!("file://{}", tempdir.path().to_str().unwrap()); + + let request = ConnectRequest { + uri: uri.clone(), + #[cfg(feature = "remote")] + client_config: Default::default(), + options: Default::default(), + namespace_client_properties: Default::default(), + manifest_enabled: false, + read_consistency_interval: None, + session: None, + }; + let db = ListingDatabase::connect_with_options(&request) + .await + .unwrap(); + + assert_eq!( + db.query_string, None, + "no input query → no captured query_string" + ); + + let table_uri = db.table_uri("test").unwrap(); + assert!( + !table_uri.ends_with('?'), + "table_uri must not have a trailing `?`: {}", + table_uri + ); + assert_eq!(table_uri, format!("{}/test.lance", uri)); + + // A real query string should still be propagated. + let with_query = format!("{}?foo=bar", uri); + let request_with_query = ConnectRequest { + uri: with_query, + #[cfg(feature = "remote")] + client_config: Default::default(), + options: Default::default(), + namespace_client_properties: Default::default(), + manifest_enabled: false, + read_consistency_interval: None, + session: None, + }; + let db_with_query = ListingDatabase::connect_with_options(&request_with_query) + .await + .unwrap(); + assert_eq!(db_with_query.query_string.as_deref(), Some("foo=bar")); + let table_uri = db_with_query.table_uri("test").unwrap(); + assert_eq!(table_uri, format!("{}/test.lance?foo=bar", uri)); + } + #[tokio::test] async fn test_namespace_client() { let (_tempdir, db) = setup_database().await; @@ -2202,6 +2469,7 @@ mod tests { client_config: Default::default(), options: Default::default(), namespace_client_properties, + manifest_enabled: false, read_consistency_interval: None, session: None, }; diff --git a/rust/lancedb/src/database/namespace.rs b/rust/lancedb/src/database/namespace.rs index 19dc1f174..de18f8db8 100644 --- a/rust/lancedb/src/database/namespace.rs +++ b/rust/lancedb/src/database/namespace.rs @@ -24,6 +24,10 @@ use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler; use crate::connection::NamespaceClientPushdownOperation; use crate::database::ReadConsistency; +use crate::database::listing::{ + NewTableConfig, OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS, OPT_NEW_TABLE_STORAGE_VERSION, + OPT_NEW_TABLE_V2_MANIFEST_PATHS, +}; use crate::error::{Error, Result}; use crate::table::NativeTable; use lance::dataset::WriteMode; @@ -50,6 +54,8 @@ pub struct LanceNamespaceDatabase { ns_impl: String, // Namespace properties used to construct the namespace client ns_properties: HashMap, + // Options for tables created by this connection + new_table_config: NewTableConfig, } impl LanceNamespaceDatabase { @@ -71,9 +77,15 @@ impl LanceNamespaceDatabase { pushdown_operations: namespace_client_pushdown_operations, ns_impl: namespace_client_impl, ns_properties: namespace_client_properties, + new_table_config: NewTableConfig::default(), } } + pub(crate) fn with_uri(mut self, uri: impl Into) -> Self { + self.uri = uri.into(); + self + } + pub async fn connect( ns_impl: &str, ns_properties: HashMap, @@ -81,6 +93,27 @@ impl LanceNamespaceDatabase { read_consistency_interval: Option, session: Option>, pushdown_operations: HashSet, + ) -> Result { + Self::connect_with_new_table_config( + ns_impl, + ns_properties, + storage_options, + read_consistency_interval, + session, + pushdown_operations, + NewTableConfig::default(), + ) + .await + } + + pub(crate) async fn connect_with_new_table_config( + ns_impl: &str, + ns_properties: HashMap, + storage_options: HashMap, + read_consistency_interval: Option, + session: Option>, + pushdown_operations: HashSet, + new_table_config: NewTableConfig, ) -> Result { let mut builder = ConnectBuilder::new(ns_impl); for (key, value) in ns_properties.clone() { @@ -102,8 +135,79 @@ impl LanceNamespaceDatabase { pushdown_operations, ns_impl: ns_impl.to_string(), ns_properties, + new_table_config, }) } + + fn extract_storage_overrides( + &self, + request: &DbCreateTableRequest, + ) -> Result<( + Option, + Option, + Option, + )> { + let storage_options = request + .write_options + .lance_write_params + .as_ref() + .and_then(|p| p.store_params.as_ref()) + .and_then(|sp| sp.storage_options()); + + let storage_version_override = storage_options + .and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION)) + .map(|s| s.parse::()) + .transpose()?; + + let v2_manifest_override = storage_options + .and_then(|opts| opts.get(OPT_NEW_TABLE_V2_MANIFEST_PATHS)) + .map(|s| s.parse::()) + .transpose() + .map_err(|_| Error::InvalidInput { + message: "enable_v2_manifest_paths must be a boolean".to_string(), + })?; + + let stable_row_ids_override = storage_options + .and_then(|opts| opts.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS)) + .map(|s| s.parse::()) + .transpose() + .map_err(|_| Error::InvalidInput { + message: "enable_stable_row_ids must be a boolean".to_string(), + })?; + + Ok(( + storage_version_override, + v2_manifest_override, + stable_row_ids_override, + )) + } + + fn apply_new_table_config( + &self, + params: &mut lance::dataset::WriteParams, + request: &DbCreateTableRequest, + ) -> Result<()> { + let (storage_version_override, v2_manifest_override, stable_row_ids_override) = + self.extract_storage_overrides(request)?; + + params.data_storage_version = storage_version_override + .or(params.data_storage_version) + .or(self.new_table_config.data_storage_version); + + if let Some(enable_v2_manifest_paths) = + v2_manifest_override.or(self.new_table_config.enable_v2_manifest_paths) + { + params.enable_v2_manifest_paths = enable_v2_manifest_paths; + } + + if let Some(enable_stable_row_ids) = + stable_row_ids_override.or(self.new_table_config.enable_stable_row_ids) + { + params.enable_stable_row_ids = enable_stable_row_ids; + } + + Ok(()) + } } impl std::fmt::Debug for LanceNamespaceDatabase { @@ -299,7 +403,12 @@ impl Database for LanceNamespaceDatabase { }; // Build write params with storage options and commit handler - let mut params = request.write_options.lance_write_params.unwrap_or_default(); + let mut params = request + .write_options + .lance_write_params + .clone() + .unwrap_or_default(); + self.apply_new_table_config(&mut params, &request)?; if matches!(request.mode, CreateTableMode::Overwrite) { params.mode = WriteMode::Overwrite; diff --git a/rust/lancedb/src/io/object_store.rs b/rust/lancedb/src/io/object_store.rs index d4739291a..d27357b82 100644 --- a/rust/lancedb/src/io/object_store.rs +++ b/rust/lancedb/src/io/object_store.rs @@ -5,11 +5,12 @@ use std::{fmt::Formatter, sync::Arc}; -use futures::{TryFutureExt, stream::BoxStream}; +use futures::{StreamExt, TryFutureExt, stream::BoxStream}; use lance::io::WrappingObjectStore; use object_store::{ - Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, - PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart, path::Path, + CopyOptions, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, + ObjectStore, ObjectStoreExt, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, + UploadPart, path::Path, }; use async_trait::async_trait; @@ -93,20 +94,6 @@ impl ObjectStore for MirroringObjectStore { self.primary.get_opts(location, options).await } - async fn head(&self, location: &Path) -> Result { - self.primary.head(location).await - } - - async fn delete(&self, location: &Path) -> Result<()> { - if !location.primary_only() { - match self.secondary.delete(location).await { - Err(Error::NotFound { .. }) | Ok(_) => {} - Err(e) => return Err(e), - } - } - self.primary.delete(location).await - } - fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { self.primary.list(prefix) } @@ -115,21 +102,40 @@ impl ObjectStore for MirroringObjectStore { self.primary.list_with_delimiter(prefix).await } - async fn copy(&self, from: &Path, to: &Path) -> Result<()> { - if to.primary_only() { - self.primary.copy(from, to).await - } else { - self.secondary.copy(from, to).await?; - self.primary.copy(from, to).await?; - Ok(()) - } + fn delete_stream( + &self, + locations: BoxStream<'static, Result>, + ) -> BoxStream<'static, Result> { + let primary = self.primary.clone(); + let secondary = self.secondary.clone(); + locations + .map(move |location| { + let primary = primary.clone(); + let secondary = secondary.clone(); + async move { + let location = location?; + if !location.primary_only() { + match secondary.delete(&location).await { + Err(Error::NotFound { .. }) | Ok(_) => {} + Err(e) => return Err(e), + } + } + primary.delete(&location).await?; + Ok(location) + } + }) + .buffered(10) + .boxed() } - async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { - if !to.primary_only() { - self.secondary.copy(from, to).await?; + async fn copy_opts(&self, from: &Path, to: &Path, options: CopyOptions) -> Result<()> { + if to.primary_only() { + self.primary.copy_opts(from, to, options).await + } else { + self.secondary.copy_opts(from, to, options.clone()).await?; + self.primary.copy_opts(from, to, options).await?; + Ok(()) } - self.primary.copy_if_not_exists(from, to).await } } diff --git a/rust/lancedb/src/io/object_store/io_tracking.rs b/rust/lancedb/src/io/object_store/io_tracking.rs index 20f0a020a..bd4f8f54a 100644 --- a/rust/lancedb/src/io/object_store/io_tracking.rs +++ b/rust/lancedb/src/io/object_store/io_tracking.rs @@ -10,9 +10,9 @@ use bytes::Bytes; use futures::stream::BoxStream; use lance::io::WrappingObjectStore; use object_store::{ - GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, - PutMultipartOptions, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart, - path::Path, + CopyOptions, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, + PutMultipartOptions, PutOptions, PutPayload, PutResult, RenameOptions, Result as OSResult, + UploadPart, path::Path, }; #[derive(Debug, Default)] @@ -81,11 +81,6 @@ impl IoTrackingStore { #[async_trait::async_trait] #[deny(clippy::missing_trait_methods)] impl ObjectStore for IoTrackingStore { - async fn put(&self, location: &Path, bytes: PutPayload) -> OSResult { - self.record_write(bytes.content_length() as u64); - self.target.put(location, bytes).await - } - async fn put_opts( &self, location: &Path, @@ -96,14 +91,6 @@ impl ObjectStore for IoTrackingStore { self.target.put_opts(location, bytes, opts).await } - async fn put_multipart(&self, location: &Path) -> OSResult> { - let target = self.target.put_multipart(location).await?; - Ok(Box::new(IoTrackingMultipartUpload { - target, - stats: self.stats.clone(), - })) - } - async fn put_multipart_opts( &self, location: &Path, @@ -116,15 +103,6 @@ impl ObjectStore for IoTrackingStore { })) } - async fn get(&self, location: &Path) -> OSResult { - let result = self.target.get(location).await; - if let Ok(result) = &result { - let num_bytes = result.range.end - result.range.start; - self.record_read(num_bytes); - } - result - } - async fn get_opts(&self, location: &Path, options: GetOptions) -> OSResult { let result = self.target.get_opts(location, options).await; if let Ok(result) = &result { @@ -134,14 +112,6 @@ impl ObjectStore for IoTrackingStore { result } - async fn get_range(&self, location: &Path, range: std::ops::Range) -> OSResult { - let result = self.target.get_range(location, range).await; - if let Ok(result) = &result { - self.record_read(result.len() as u64); - } - result - } - async fn get_ranges( &self, location: &Path, @@ -154,20 +124,11 @@ impl ObjectStore for IoTrackingStore { result } - async fn head(&self, location: &Path) -> OSResult { - self.record_read(0); - self.target.head(location).await - } - - async fn delete(&self, location: &Path) -> OSResult<()> { + fn delete_stream( + &self, + locations: BoxStream<'static, OSResult>, + ) -> BoxStream<'static, OSResult> { self.record_write(0); - self.target.delete(location).await - } - - fn delete_stream<'a>( - &'a self, - locations: BoxStream<'a, OSResult>, - ) -> BoxStream<'a, OSResult> { self.target.delete_stream(locations) } @@ -190,24 +151,14 @@ impl ObjectStore for IoTrackingStore { self.target.list_with_delimiter(prefix).await } - async fn copy(&self, from: &Path, to: &Path) -> OSResult<()> { + async fn copy_opts(&self, from: &Path, to: &Path, options: CopyOptions) -> OSResult<()> { self.record_write(0); - self.target.copy(from, to).await + self.target.copy_opts(from, to, options).await } - async fn rename(&self, from: &Path, to: &Path) -> OSResult<()> { + async fn rename_opts(&self, from: &Path, to: &Path, options: RenameOptions) -> OSResult<()> { self.record_write(0); - self.target.rename(from, to).await - } - - async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> OSResult<()> { - self.record_write(0); - self.target.rename_if_not_exists(from, to).await - } - - async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> OSResult<()> { - self.record_write(0); - self.target.copy_if_not_exists(from, to).await + self.target.rename_opts(from, to, options).await } } diff --git a/rust/lancedb/src/remote/client.rs b/rust/lancedb/src/remote/client.rs index b50ca2206..7fd5c6497 100644 --- a/rust/lancedb/src/remote/client.rs +++ b/rust/lancedb/src/remote/client.rs @@ -16,7 +16,7 @@ use crate::remote::retry::{ResolvedRetryConfig, RetryCounter}; const REQUEST_ID_HEADER: HeaderName = HeaderName::from_static("x-request-id"); /// Configuration for TLS/mTLS settings. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug)] pub struct TlsConfig { /// Path to the client certificate file (PEM format) pub cert_file: Option, @@ -24,10 +24,22 @@ pub struct TlsConfig { pub key_file: Option, /// Path to the CA certificate file for server verification (PEM format) pub ssl_ca_cert: Option, - /// Whether to verify the hostname in the server's certificate + /// Whether to verify the hostname in the server's certificate. + /// Defaults to `true`. pub assert_hostname: bool, } +impl Default for TlsConfig { + fn default() -> Self { + Self { + cert_file: None, + key_file: None, + ssl_ca_cert: None, + assert_hostname: true, + } + } +} + /// Trait for providing custom headers for each request #[async_trait::async_trait] pub trait HeaderProvider: Send + Sync + std::fmt::Debug { @@ -926,7 +938,7 @@ mod tests { assert!(config.cert_file.is_none()); assert!(config.key_file.is_none()); assert!(config.ssl_ca_cert.is_none()); - assert!(!config.assert_hostname); + assert!(config.assert_hostname); } #[test] diff --git a/rust/lancedb/src/remote/table/insert.rs b/rust/lancedb/src/remote/table/insert.rs index 8aec28609..49ebb2015 100644 --- a/rust/lancedb/src/remote/table/insert.rs +++ b/rust/lancedb/src/remote/table/insert.rs @@ -43,7 +43,7 @@ pub struct RemoteInsertExec { client: RestfulLanceDbClient, input: Arc, overwrite: bool, - properties: PlanProperties, + properties: Arc, add_result: Arc>>, metrics: ExecutionPlanMetricsSet, upload_id: Option, @@ -118,7 +118,7 @@ impl RemoteInsertExec { client, input, overwrite, - properties, + properties: Arc::new(properties), add_result: Arc::new(Mutex::new(None)), metrics: ExecutionPlanMetricsSet::new(), upload_id, @@ -232,7 +232,7 @@ impl ExecutionPlan for RemoteInsertExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lancedb/src/table/datafusion.rs b/rust/lancedb/src/table/datafusion.rs index bd93dd05d..d8956acb2 100644 --- a/rust/lancedb/src/table/datafusion.rs +++ b/rust/lancedb/src/table/datafusion.rs @@ -39,21 +39,26 @@ use lance_index::scalar::FullTextSearchQuery; struct MetadataEraserExec { input: Arc, schema: Arc, - properties: PlanProperties, + properties: Arc, } impl MetadataEraserExec { fn compute_properties_from_input( input: &Arc, schema: &Arc, - ) -> PlanProperties { + ) -> Arc { let input_properties = input.properties(); let eq_properties = input_properties .eq_properties .clone() .with_new_schema(schema.clone()) .unwrap(); - input_properties.clone().with_eq_properties(eq_properties) + Arc::new( + input_properties + .as_ref() + .clone() + .with_eq_properties(eq_properties), + ) } fn new(input: Arc) -> Self { @@ -87,7 +92,7 @@ impl ExecutionPlan for MetadataEraserExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lancedb/src/table/datafusion/insert.rs b/rust/lancedb/src/table/datafusion/insert.rs index 51be4abb8..f2cf21f13 100644 --- a/rust/lancedb/src/table/datafusion/insert.rs +++ b/rust/lancedb/src/table/datafusion/insert.rs @@ -81,7 +81,7 @@ pub struct InsertExec { dataset: Arc, input: Arc, write_params: WriteParams, - properties: PlanProperties, + properties: Arc, partial_transactions: Arc>>, metrics: ExecutionPlanMetricsSet, } @@ -107,7 +107,7 @@ impl InsertExec { dataset, input, write_params, - properties, + properties: Arc::new(properties), partial_transactions: Arc::new(Mutex::new(Vec::with_capacity(num_partitions))), metrics: ExecutionPlanMetricsSet::new(), } @@ -136,7 +136,7 @@ impl ExecutionPlan for InsertExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties } diff --git a/rust/lancedb/src/table/datafusion/scannable_exec.rs b/rust/lancedb/src/table/datafusion/scannable_exec.rs index a55b6e13f..8a7177480 100644 --- a/rust/lancedb/src/table/datafusion/scannable_exec.rs +++ b/rust/lancedb/src/table/datafusion/scannable_exec.rs @@ -20,7 +20,7 @@ pub(crate) struct ScannableExec { // We don't require Scannable to be Sync, so we wrap it in a Mutex to allow safe concurrent access. source: Mutex>, num_rows: Option, - properties: PlanProperties, + properties: Arc, tracker: Option>, } @@ -49,7 +49,7 @@ impl ScannableExec { Self { source, num_rows, - properties, + properties: Arc::new(properties), tracker, } } @@ -70,7 +70,7 @@ impl ExecutionPlan for ScannableExec { self } - fn properties(&self) -> &PlanProperties { + fn properties(&self) -> &Arc { &self.properties }