diff --git a/.bumpversion.toml b/.bumpversion.toml index 7fc6165a5..e05a16962 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.27.0-beta.5" +current_version = "0.27.2" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/.github/workflows/build_linux_wheel/action.yml b/.github/workflows/build_linux_wheel/action.yml index f13dd7017..a2842cb1a 100644 --- a/.github/workflows/build_linux_wheel/action.yml +++ b/.github/workflows/build_linux_wheel/action.yml @@ -23,8 +23,10 @@ runs: steps: - name: CONFIRM ARM BUILD shell: bash + env: + ARM_BUILD: ${{ inputs.arm-build }} run: | - echo "ARM BUILD: ${{ inputs.arm-build }}" + echo "ARM BUILD: $ARM_BUILD" - name: Build x86_64 Manylinux wheel if: ${{ inputs.arm-build == 'false' }} uses: PyO3/maturin-action@v1 diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 19911d3f2..b988fedd3 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -207,14 +207,14 @@ jobs: - name: Downgrade dependencies # These packages have newer requirements for MSRV run: | - cargo update -p aws-sdk-bedrockruntime --precise 1.64.0 - cargo update -p aws-sdk-dynamodb --precise 1.55.0 - cargo update -p aws-config --precise 1.5.10 - cargo update -p aws-sdk-kms --precise 1.51.0 - cargo update -p aws-sdk-s3 --precise 1.65.0 - cargo update -p aws-sdk-sso --precise 1.50.0 - cargo update -p aws-sdk-ssooidc --precise 1.51.0 - cargo update -p aws-sdk-sts --precise 1.51.0 + cargo update -p aws-sdk-bedrockruntime --precise 1.77.0 + cargo update -p aws-sdk-dynamodb --precise 1.68.0 + cargo update -p aws-config --precise 1.6.0 + cargo update -p aws-sdk-kms --precise 1.63.0 + cargo update -p aws-sdk-s3 --precise 1.79.0 + cargo update -p aws-sdk-sso --precise 1.62.0 + cargo update -p aws-sdk-ssooidc --precise 1.63.0 + cargo update -p aws-sdk-sts --precise 1.63.0 cargo update -p home --precise 0.5.9 - name: cargo +${{ matrix.msrv }} check env: diff --git a/Cargo.lock b/Cargo.lock index 19619af51..2321227c4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,7 +27,7 @@ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", "const-random", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "version_check", "zerocopy", @@ -35,9 +35,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] @@ -74,9 +74,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.21" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" dependencies = [ "anstyle", "anstyle-parse", @@ -89,59 +89,62 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anstyle-parse" -version = "0.2.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.4" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.10" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] -name = "arbitrary" -version = "1.4.2" +name = "ar_archive_writer" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" dependencies = [ - "derive_arbitrary", + "object", ] [[package]] name = "arc-swap" -version = "1.7.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6" +dependencies = [ + "rustversion", +] [[package]] name = "argminmax" @@ -172,9 +175,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a2b10dcb159faf30d3f81f6d56c1211a5bea2ca424eabe477648a44b993320e" +checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" dependencies = [ "arrow-arith", "arrow-array", @@ -194,9 +197,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "288015089e7931843c80ed4032c5274f02b37bcb720c4a42096d50b390e70372" +checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" dependencies = [ "arrow-array", "arrow-buffer", @@ -208,9 +211,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65ca404ea6191e06bf30956394173337fa9c35f445bd447fe6c21ab944e1a23c" +checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" dependencies = [ "ahash", "arrow-buffer", @@ -227,9 +230,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36356383099be0151dacc4245309895f16ba7917d79bdb71a7148659c9206c56" +checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" dependencies = [ "bytes", "half", @@ -239,9 +242,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8e372ed52bd4ee88cc1e6c3859aa7ecea204158ac640b10e187936e7e87074" +checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" dependencies = [ "arrow-array", "arrow-buffer", @@ -261,9 +264,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e4100b729fe656f2e4fb32bc5884f14acf9118d4ad532b7b33c1132e4dce896" +checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" dependencies = [ "arrow-array", "arrow-cast", @@ -276,9 +279,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf87f4ff5fc13290aa47e499a8b669a82c5977c6a1fedce22c7f542c1fd5a597" +checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" dependencies = [ "arrow-buffer", "arrow-schema", @@ -289,9 +292,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3ca63edd2073fcb42ba112f8ae165df1de935627ead6e203d07c99445f2081" +checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" dependencies = [ "arrow-array", "arrow-buffer", @@ -299,15 +302,15 @@ dependencies = [ "arrow-schema", "arrow-select", "flatbuffers", - "lz4_flex 0.12.0", + "lz4_flex 0.12.1", "zstd", ] [[package]] name = "arrow-json" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a36b2332559d3310ebe3e173f75b29989b4412df4029a26a30cc3f7da0869297" +checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" dependencies = [ "arrow-array", "arrow-buffer", @@ -329,9 +332,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c4e0530272ca755d6814218dffd04425c5b7854b87fa741d5ff848bf50aa39" +checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" dependencies = [ "arrow-array", "arrow-buffer", @@ -342,9 +345,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f45c7989cb70214b2f362eaa10266d15e1a433692f2ea1514018be3aace679f4" +checksum = "d18c442b4c266aaf3d7f7dd40fd7ae058cef7f113b00ff0cd8256e1e218ec544" dependencies = [ "arrow-array", "arrow-data", @@ -354,9 +357,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b07f52788744cc71c4628567ad834cadbaeb9f09026ff1d7a4120f69edf7abd3" +checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" dependencies = [ "arrow-array", "arrow-buffer", @@ -367,20 +370,20 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bb63203e8e0e54b288d0d8043ca8fa1013820822a27692ef1b78a977d879f2c" +checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" dependencies = [ - "bitflags 2.9.4", + "bitflags", "serde_core", "serde_json", ] [[package]] name = "arrow-select" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c96d8a1c180b44ecf2e66c9a2f2bbcb8b1b6f14e165ce46ac8bde211a363411b" +checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" dependencies = [ "ahash", "arrow-array", @@ -392,9 +395,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8ad6a81add9d3ea30bf8374ee8329992c7fd246ffd8b7e2f48a3cea5aa0cc9a" +checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" dependencies = [ "arrow-array", "arrow-buffer", @@ -421,13 +424,12 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.19" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1" dependencies = [ - "flate2", - "futures-core", - "memchr", + "compression-codecs", + "compression-core", "pin-project-lite", "tokio", ] @@ -443,9 +445,9 @@ dependencies = [ [[package]] name = "async-lock" -version = "3.4.1" +version = "3.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd03604047cee9b6ce9de9f70c6cd540a0520c813cbd49bae61f33ab80ed1dc" +checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" dependencies = [ "event-listener", "event-listener-strategy", @@ -485,7 +487,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -496,7 +498,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -556,7 +558,7 @@ dependencies = [ "bytes", "fastrand", "hex", - "http 1.3.1", + "http 1.4.0", "ring", "time", "tokio", @@ -619,7 +621,7 @@ dependencies = [ "bytes-utils", "fastrand", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "http-body 1.0.1", "percent-encoding", @@ -721,7 +723,7 @@ dependencies = [ "hex", "hmac", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "lru", "percent-encoding", @@ -815,7 +817,7 @@ dependencies = [ "hex", "hmac", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "p256", "percent-encoding", "ring", @@ -839,9 +841,9 @@ dependencies = [ [[package]] name = "aws-smithy-checksums" -version = "0.63.9" +version = "0.63.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "165d8583d8d906e2fb5511d29201d447cc710864f075debcdd9c31c265412806" +checksum = "23374b9170cbbcc6f5df8dc5ebb9b6c5c28a3c8f599f0e8b8b10eb6f4a5c6e74" dependencies = [ "aws-smithy-http 0.62.6", "aws-smithy-types", @@ -882,7 +884,7 @@ dependencies = [ "futures-core", "futures-util", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "percent-encoding", "pin-project-lite", @@ -902,7 +904,7 @@ dependencies = [ "bytes-utils", "futures-core", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "percent-encoding", @@ -921,18 +923,18 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "h2 0.3.27", - "h2 0.4.12", + "h2 0.4.13", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "hyper 0.14.32", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-rustls 0.24.2", "hyper-rustls 0.27.7", "hyper-util", "pin-project-lite", "rustls 0.21.12", - "rustls 0.23.31", + "rustls 0.23.37", "rustls-native-certs", "rustls-pki-types", "tokio", @@ -984,7 +986,7 @@ dependencies = [ "bytes", "fastrand", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "http-body 1.0.1", "http-body-util", @@ -1004,7 +1006,7 @@ dependencies = [ "aws-smithy-types", "bytes", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "pin-project-lite", "tokio", "tracing", @@ -1022,7 +1024,7 @@ dependencies = [ "bytes-utils", "futures-core", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "http-body 1.0.1", "http-body-util", @@ -1070,10 +1072,10 @@ dependencies = [ "axum-core", "bytes", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-util", "itoa", "matchit", @@ -1103,7 +1105,7 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "mime", @@ -1122,7 +1124,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" dependencies = [ "futures-core", - "getrandom 0.2.16", + "getrandom 0.2.17", "instant", "pin-project-lite", "rand 0.8.5", @@ -1176,15 +1178,15 @@ dependencies = [ [[package]] name = "base64ct" -version = "1.8.0" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" [[package]] name = "bigdecimal" -version = "0.4.8" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" dependencies = [ "autocfg", "libm", @@ -1195,36 +1197,30 @@ dependencies = [ [[package]] name = "bit-set" -version = "0.5.3" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" dependencies = [ "bit-vec", ] [[package]] name = "bit-vec" -version = "0.6.3" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" [[package]] name = "bitflags" -version = "1.3.2" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] name = "bitpacking" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c1d3e2bfd8d06048a179f7b17afc3188effa10385e7b00dc65af6aae732ea92" +checksum = "96a7139abd3d9cebf8cd6f920a389cf3dc9576172e32f4563f188cae3c3eb019" dependencies = [ "crunchy", ] @@ -1252,15 +1248,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.2" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", + "cpufeatures", ] [[package]] @@ -1283,9 +1280,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.8.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f44aa969f86ffb99e5c2d51f393ec9ed6e9fe2f47b609c917b0071f129854d29" +checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" dependencies = [ "bon-macros", "rustversion", @@ -1293,17 +1290,17 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.8.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e78cd86b6a6515d87392332fd63c4950ed3e50eab54275259a5f59f3666f90" +checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" dependencies = [ - "darling 0.21.3", + "darling 0.23.0", "ident_case", "prettyplease", "proc-macro2", "quote", "rustversion", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -1350,28 +1347,28 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "bytemuck" -version = "1.23.1" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" dependencies = [ "bytemuck_derive", ] [[package]] name = "bytemuck_derive" -version = "1.8.1" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fa76293b4f7bb636ab88fd78228235b5248b4d05cc589aed610f954af5d7c7a" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -1398,46 +1395,48 @@ dependencies = [ [[package]] name = "candle-core" -version = "0.9.1" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9f51e2ecf6efe9737af8f993433c839f956d2b6ed4fd2dd4a7c6d8b0fa667ff" +checksum = "c15b675b80d994b2eadb20a4bbe434eabeb454eac3ee5e2b4cf6f147ee9be091" dependencies = [ "byteorder", - "gemm 0.17.1", + "float8", + "gemm", "half", - "memmap2 0.9.8", + "libm", + "memmap2 0.9.10", "num-traits", "num_cpus", "rand 0.9.2", "rand_distr 0.5.1", "rayon", "safetensors", - "thiserror 1.0.69", - "ug", - "yoke 0.7.5", + "thiserror 2.0.18", + "yoke", "zip", ] [[package]] name = "candle-nn" -version = "0.9.1" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1980d53280c8f9e2c6cbe1785855d7ff8010208b46e21252b978badf13ad69d" +checksum = "3045fa9e7aef8567d209a27d56b692f60b96f4d0569f4c3011f8ca6715c65e03" dependencies = [ "candle-core", "half", + "libc", "num-traits", "rayon", "safetensors", "serde", - "thiserror 1.0.69", + "thiserror 2.0.18", ] [[package]] name = "candle-transformers" -version = "0.9.1" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "186cb80045dbe47e0b387ea6d3e906f02fb3056297080d9922984c90e90a72b0" +checksum = "b538ec4aa807c416a2ddd3621044888f188827862e2a6fcacba4738e89795d01" dependencies = [ "byteorder", "candle-core", @@ -1463,9 +1462,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.40" +version = "1.2.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d05d92f4b1fd76aad469d46cdd858ca761576082cd37df81416691e50199fb" +checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" dependencies = [ "find-msvc-tools", "jobserver", @@ -1481,9 +1480,9 @@ checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "cfg_aliases" @@ -1493,16 +1492,16 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", "js-sys", "num-traits", "serde", "wasm-bindgen", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -1549,32 +1548,47 @@ dependencies = [ [[package]] name = "cmake" -version = "0.1.54" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" +checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" dependencies = [ "cc", ] [[package]] name = "colorchoice" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "comfy-table" -version = "7.1.2" +version = "7.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" dependencies = [ - "crossterm 0.27.0", - "crossterm 0.28.1", - "strum", - "strum_macros 0.26.4", + "crossterm", + "unicode-segmentation", "unicode-width", ] +[[package]] +name = "compression-codecs" +version = "0.4.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" +dependencies = [ + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -1618,16 +1632,16 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "once_cell", "tiny-keccak", ] [[package]] name = "constant_time_eq" -version = "0.3.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" [[package]] name = "convert_case" @@ -1690,15 +1704,14 @@ checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" [[package]] name = "crc-fast" -version = "1.3.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bf62af4cc77d8fe1c22dde4e721d87f2f54056139d8c412e1366b740305f56f" +checksum = "2fd92aca2c6001b1bf5ba0ff84ee74ec8501b52bbef0cac80bf25a6c1d87a83d" dependencies = [ "crc", "digest", - "libc", - "rand 0.9.2", - "regex", + "rustversion", + "spin 0.10.0", ] [[package]] @@ -1774,28 +1787,18 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crossterm" -version = "0.27.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" dependencies = [ - "bitflags 2.9.4", + "bitflags", "crossterm_winapi", - "libc", + "document-features", "parking_lot", + "rustix 1.1.4", "winapi", ] -[[package]] -name = "crossterm" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" -dependencies = [ - "bitflags 2.9.4", - "parking_lot", - "rustix 0.38.44", -] - [[package]] name = "crossterm_winapi" version = "0.9.1" @@ -1835,9 +1838,9 @@ dependencies = [ [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", @@ -1845,21 +1848,21 @@ dependencies = [ [[package]] name = "csv" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" dependencies = [ "csv-core", "itoa", "ryu", - "serde", + "serde_core", ] [[package]] name = "csv-core" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" dependencies = [ "memchr", ] @@ -1892,12 +1895,12 @@ dependencies = [ [[package]] name = "darling" -version = "0.21.3" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" dependencies = [ - "darling_core 0.21.3", - "darling_macro 0.21.3", + "darling_core 0.23.0", + "darling_macro 0.23.0", ] [[package]] @@ -1911,21 +1914,20 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "darling_core" -version = "0.21.3" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" dependencies = [ - "fnv", "ident_case", "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -1936,18 +1938,18 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "darling_macro" -version = "0.21.3" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ - "darling_core 0.21.3", + "darling_core 0.23.0", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -1966,9 +1968,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d12ee9fdc6cdb5898c7691bb994f0ba606c4acc93a2258d78bb9f26ff8158bb3" +checksum = "43c18ba387f9c05ac1f3be32a73f8f3cc6c1cfc43e5d4b7a8e5b0d3a5eb48dc7" dependencies = [ "arrow", "arrow-schema", @@ -2015,9 +2017,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "462dc9ef45e5d688aeaae49a7e310587e81b6016b9d03bace5626ad0043e5a9e" +checksum = "3c75a4ce672b27fb8423810efb92a3600027717a1664d06a2c307eeeabcec694" dependencies = [ "arrow", "async-trait", @@ -2040,9 +2042,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b96dbf1d728fc321817b744eb5080cdd75312faa6980b338817f68f3caa4208" +checksum = "2c8b9a3795ffb46bf4957a34c67d89a67558b311ae455c8d4295ff2115eeea50" dependencies = [ "arrow", "async-trait", @@ -2063,9 +2065,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3237a6ff0d2149af4631290074289cae548c9863c885d821315d54c6673a074a" +checksum = "205dc1e20441973f470e6b7ef87626a3b9187970e5106058fef1b713047f770c" dependencies = [ "ahash", "arrow", @@ -2085,9 +2087,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70b5e34026af55a1bfccb1ef0a763cf1f64e77c696ffcf5a128a278c31236528" +checksum = "8cf5880c02ff6f5f11fb5bc19211789fb32fd3c53d79b7d6cb2b12e401312ba0" dependencies = [ "futures", "log", @@ -2096,9 +2098,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b2a6be734cc3785e18bbf2a7f2b22537f6b9fb960d79617775a51568c281842" +checksum = "bc614d6e709450e29b7b032a42c1bdb705f166a6b2edef7bed7c7897eb905499" dependencies = [ "arrow", "async-trait", @@ -2125,9 +2127,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1739b9b07c9236389e09c74f770e88aff7055250774e9def7d3f4f56b3dcc7be" +checksum = "6e497d5fc48dac7ce86f6b4fb09a3a494385774af301ff20ec91aebfae9b05b4" dependencies = [ "arrow", "arrow-ipc", @@ -2149,9 +2151,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61c73bc54b518bbba7c7650299d07d58730293cfba4356f6f428cc94c20b7600" +checksum = "0dfc250cad940d0327ca2e9109dc98830892d17a3d6b2ca11d68570e872cf379" dependencies = [ "arrow", "async-trait", @@ -2172,9 +2174,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37812c8494c698c4d889374ecfabbff780f1f26d9ec095dd1bddfc2a8ca12559" +checksum = "c91e9677ed62833b0e8129dec0d1a8f3c9bb7590bd6dd714a43e4c3b663e4aa0" dependencies = [ "arrow", "async-trait", @@ -2194,15 +2196,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c825f969126bc2ef6a6a02d94b3c07abff871acf4d6dd759ce1255edb7923ce" +checksum = "3e13e5fe3447baa0584b61ee8644086e007e1ef6e58f4be48bc8a72417854729" [[package]] name = "datafusion-execution" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa03ef05a2c2f90dd6c743e3e111078e322f4b395d20d4b4d431a245d79521ae" +checksum = "48a6cc03e34899a54546b229235f7b192634c8e832f78a267f0989b18216c56d" dependencies = [ "arrow", "async-trait", @@ -2221,9 +2223,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef33934c1f98ee695cc51192cc5f9ed3a8febee84fdbcd9131bf9d3a9a78276f" +checksum = "ee3315d87eca7a7df58e52a1fb43b4c4171b545fd30ffc3102945c162a9f6ddb" dependencies = [ "arrow", "async-trait", @@ -2243,9 +2245,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "000c98206e3dd47d2939a94b6c67af4bfa6732dd668ac4fafdbde408fd9134ea" +checksum = "98c6d83feae0753799f933a2c47dfd15980c6947960cb95ed60f5c1f885548b3" dependencies = [ "arrow", "datafusion-common", @@ -2256,9 +2258,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "379b01418ab95ca947014066248c22139fe9af9289354de10b445bd000d5d276" +checksum = "49b82962015cc3db4d7662459c9f7fcda0591b5edacb8af1cf3bc3031f274800" dependencies = [ "arrow", "arrow-buffer", @@ -2287,9 +2289,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd00d5454ba4c3f8ebbd04bd6a6a9dc7ced7c56d883f70f2076c188be8459e4c" +checksum = "4e42c227d9e55a6c8041785d4a8a117e4de531033d480aae10984247ac62e27e" dependencies = [ "ahash", "arrow", @@ -2308,9 +2310,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aec06b380729a87210a4e11f555ec2d729a328142253f8d557b87593622ecc9f" +checksum = "cead3cfed825b0b688700f4338d281cd7857e4907775a5b9554c083edd5f3f95" dependencies = [ "ahash", "arrow", @@ -2321,9 +2323,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "904f48d45e0f1eb7d0eb5c0f80f2b5c6046a85454364a6b16a2e0b46f62e7dff" +checksum = "62ea99612970aebab8cf864d02eb3d296bbab7f4881e1023d282b57fe431b201" dependencies = [ "arrow", "arrow-ord", @@ -2344,9 +2346,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9a0d20e2b887e11bee24f7734d780a2588b925796ac741c3118dd06d5aa77f0" +checksum = "d83dbf3ab8b9af6f209b068825a7adbd3b88bf276f2a1ec14ba09567b97f5674" dependencies = [ "arrow", "async-trait", @@ -2360,9 +2362,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3414b0a07e39b6979fe3a69c7aa79a9f1369f1d5c8e52146e66058be1b285ee" +checksum = "732edabe07496e2fc5a1e57a284d7a36edcea445a2821119770a0dea624b472c" dependencies = [ "arrow", "datafusion-common", @@ -2378,9 +2380,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bf2feae63cd4754e31add64ce75cae07d015bce4bb41cd09872f93add32523a" +checksum = "e0c6e30e09700799bd52adce8c377ab03dda96e73a623e4803a31ad94fe7ce14" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2388,20 +2390,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4fe888aeb6a095c4bcbe8ac1874c4b9a4c7ffa2ba849db7922683ba20875aaf" +checksum = "402f2a8ed70fb99a18f71580a1fe338604222a3d32ddeac6e72c5b34feea2d4d" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "datafusion-optimizer" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a6527c063ae305c11be397a86d8193936f4b84d137fe40bd706dfc178cf733c" +checksum = "99f32edb8ba12f08138f86c09b80fae3d4a320551262fa06b91d8a8cb3065a5b" dependencies = [ "arrow", "chrono", @@ -2418,9 +2420,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bb028323dd4efd049dd8a78d78fe81b2b969447b39c51424167f973ac5811d9" +checksum = "987c5e29e96186589301b42e25aa7d11bbe319a73eb02ef8d755edc55b5b89fc" dependencies = [ "ahash", "arrow", @@ -2441,9 +2443,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78fe0826aef7eab6b4b61533d811234a7a9e5e458331ebbf94152a51fc8ab433" +checksum = "1de89d0afa08b6686697bd8a6bac4ba2cd44c7003356e1bce6114d5a93f94b5c" dependencies = [ "arrow", "datafusion-common", @@ -2456,9 +2458,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfccd388620734c661bd8b7ca93c44cdd59fecc9b550eea416a78ffcbb29475f" +checksum = "602d1970c0fe87f1c3a36665d131fbfe1c4379d35f8fc5ec43a362229ad2954d" dependencies = [ "ahash", "arrow", @@ -2473,9 +2475,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bde5fa10e73259a03b705d5fddc136516814ab5f441b939525618a4070f5a059" +checksum = "b24d704b6385ebe27c756a12e5ba15684576d3b47aeca79cc9fb09480236dc32" dependencies = [ "arrow", "datafusion-common", @@ -2491,9 +2493,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e1098760fb29127c24cc9ade3277051dc73c9ed0ac0131bd7bcd742e0ad7470" +checksum = "c21d94141ea5043e98793f170798e9c1887095813b8291c5260599341e383a38" dependencies = [ "ahash", "arrow", @@ -2522,9 +2524,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64d0fef4201777b52951edec086c21a5b246f3c82621569ddb4a26f488bc38a9" +checksum = "1a68cce43d18c0dfac95cacd74e70565f7e2fb12b9ed41e2d312f0fa837626b1" dependencies = [ "arrow", "datafusion-common", @@ -2539,9 +2541,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f71f1e39e8f2acbf1c63b0e93756c2e970a64729dab70ac789587d6237c4fde0" +checksum = "6b4e1c40a0b1896aed4a4504145c2eb7fa9b9da13c2d04b40a4767a09f076199" dependencies = [ "async-trait", "datafusion-common", @@ -2553,9 +2555,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.1.0" +version = "52.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f44693cfcaeb7a9f12d71d1c576c3a6dc025a12cef209375fa2d16fb3b5670ee" +checksum = "2f1891e5b106d1d73c7fe403bd8a265d19c3977edc17f60808daf26c2fe65ffb" dependencies = [ "arrow", "bigdecimal", @@ -2611,25 +2613,14 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.4" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" dependencies = [ "powerfmt", "serde_core", ] -[[package]] -name = "derive_arbitrary" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.114", -] - [[package]] name = "derive_builder" version = "0.20.2" @@ -2648,7 +2639,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2658,7 +2649,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2702,7 +2693,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2714,6 +2705,15 @@ dependencies = [ "const-random", ] +[[package]] +name = "document-features" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" +dependencies = [ + "litrs", +] + [[package]] name = "downcast-rs" version = "2.0.2" @@ -2747,16 +2747,6 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" -[[package]] -name = "dyn-stack" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e53799688f5632f364f8fb387488dd05db9fe45db7011be066fc20e7027f8b" -dependencies = [ - "bytemuck", - "reborrow", -] - [[package]] name = "dyn-stack" version = "0.13.2" @@ -2769,9 +2759,9 @@ dependencies = [ [[package]] name = "dyn-stack-macros" -version = "0.1.0" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05dbec7076f432bb132db738df90d87a4f5789e99f59e7b1219a6b8ef61eaa68" +checksum = "e1d926b4d407d372f141f93bb444696142c29d32962ccbd3531117cf3aa0bfa9" [[package]] name = "ecdsa" @@ -2835,7 +2825,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2847,14 +2837,14 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "env_filter" -version = "0.1.3" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef" dependencies = [ "log", "regex", @@ -2862,9 +2852,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.8" +version = "0.11.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a" dependencies = [ "anstream", "anstyle", @@ -2944,9 +2934,9 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] name = "fancy-regex" -version = "0.13.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" +checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8" dependencies = [ "bit-set", "regex-automata", @@ -2989,9 +2979,9 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.3" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0399f9d26e5191ce32c498bebd31e7a3ceabc2745f0ac54af3f335126c3f24b3" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fixedbitset" @@ -3001,24 +2991,36 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "25.9.23" +version = "25.12.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" +checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" dependencies = [ - "bitflags 2.9.4", + "bitflags", "rustc_version", ] [[package]] name = "flate2" -version = "1.1.4" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", ] +[[package]] +name = "float8" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719a903cc23e4a89e87962c2a80fdb45cdaad0983a89bd150bb57b4c8571a7d5" +dependencies = [ + "half", + "num-traits", + "rand 0.9.2", + "rand_distr 0.5.1", +] + [[package]] name = "fnv" version = "1.0.7" @@ -3070,8 +3072,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2195cc7f87e84bd695586137de99605e7e9579b26ec5e01b82960ddb4d0922f2" dependencies = [ "arrow-array", "rand 0.9.2", @@ -3094,9 +3097,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -3109,9 +3112,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -3119,15 +3122,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -3136,32 +3139,32 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-timer" @@ -3171,9 +3174,9 @@ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -3183,259 +3186,141 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] [[package]] name = "gemm" -version = "0.17.1" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ab24cc62135b40090e31a76a9b2766a501979f3070fa27f689c27ec04377d32" +checksum = "aa0673db364b12263d103b68337a68fbecc541d6f6b61ba72fe438654709eacb" dependencies = [ - "dyn-stack 0.10.0", - "gemm-c32 0.17.1", - "gemm-c64 0.17.1", - "gemm-common 0.17.1", - "gemm-f16 0.17.1", - "gemm-f32 0.17.1", - "gemm-f64 0.17.1", + "dyn-stack", + "gemm-c32", + "gemm-c64", + "gemm-common", + "gemm-f16", + "gemm-f32", + "gemm-f64", "num-complex", "num-traits", "paste", - "raw-cpuid 10.7.0", - "seq-macro", -] - -[[package]] -name = "gemm" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab96b703d31950f1aeddded248bc95543c9efc7ac9c4a21fda8703a83ee35451" -dependencies = [ - "dyn-stack 0.13.2", - "gemm-c32 0.18.2", - "gemm-c64 0.18.2", - "gemm-common 0.18.2", - "gemm-f16 0.18.2", - "gemm-f32 0.18.2", - "gemm-f64 0.18.2", - "num-complex", - "num-traits", - "paste", - "raw-cpuid 11.6.0", + "raw-cpuid", "seq-macro", ] [[package]] name = "gemm-c32" -version = "0.17.1" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9c030d0b983d1e34a546b86e08f600c11696fde16199f971cd46c12e67512c0" +checksum = "086936dbdcb99e37aad81d320f98f670e53c1e55a98bee70573e83f95beb128c" dependencies = [ - "dyn-stack 0.10.0", - "gemm-common 0.17.1", + "dyn-stack", + "gemm-common", "num-complex", "num-traits", "paste", - "raw-cpuid 10.7.0", - "seq-macro", -] - -[[package]] -name = "gemm-c32" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6db9fd9f40421d00eea9dd0770045a5603b8d684654816637732463f4073847" -dependencies = [ - "dyn-stack 0.13.2", - "gemm-common 0.18.2", - "num-complex", - "num-traits", - "paste", - "raw-cpuid 11.6.0", + "raw-cpuid", "seq-macro", ] [[package]] name = "gemm-c64" -version = "0.17.1" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbb5f2e79fefb9693d18e1066a557b4546cd334b226beadc68b11a8f9431852a" +checksum = "20c8aeeeec425959bda4d9827664029ba1501a90a0d1e6228e48bef741db3a3f" dependencies = [ - "dyn-stack 0.10.0", - "gemm-common 0.17.1", + "dyn-stack", + "gemm-common", "num-complex", "num-traits", "paste", - "raw-cpuid 10.7.0", - "seq-macro", -] - -[[package]] -name = "gemm-c64" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfcad8a3d35a43758330b635d02edad980c1e143dc2f21e6fd25f9e4eada8edf" -dependencies = [ - "dyn-stack 0.13.2", - "gemm-common 0.18.2", - "num-complex", - "num-traits", - "paste", - "raw-cpuid 11.6.0", + "raw-cpuid", "seq-macro", ] [[package]] name = "gemm-common" -version = "0.17.1" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2e7ea062c987abcd8db95db917b4ffb4ecdfd0668471d8dc54734fdff2354e8" +checksum = "88027625910cc9b1085aaaa1c4bc46bb3a36aad323452b33c25b5e4e7c8e2a3e" dependencies = [ "bytemuck", - "dyn-stack 0.10.0", - "half", - "num-complex", - "num-traits", - "once_cell", - "paste", - "pulp 0.18.22", - "raw-cpuid 10.7.0", - "rayon", - "seq-macro", - "sysctl 0.5.5", -] - -[[package]] -name = "gemm-common" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a352d4a69cbe938b9e2a9cb7a3a63b7e72f9349174a2752a558a8a563510d0f3" -dependencies = [ - "bytemuck", - "dyn-stack 0.13.2", + "dyn-stack", "half", "libm", "num-complex", "num-traits", "once_cell", "paste", - "pulp 0.21.5", - "raw-cpuid 11.6.0", + "pulp", + "raw-cpuid", "rayon", "seq-macro", - "sysctl 0.6.0", + "sysctl", ] [[package]] name = "gemm-f16" -version = "0.17.1" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ca4c06b9b11952071d317604acb332e924e817bd891bec8dfb494168c7cedd4" +checksum = "e3df7a55202e6cd6739d82ae3399c8e0c7e1402859b30e4cb780e61525d9486e" dependencies = [ - "dyn-stack 0.10.0", - "gemm-common 0.17.1", - "gemm-f32 0.17.1", + "dyn-stack", + "gemm-common", + "gemm-f32", "half", "num-complex", "num-traits", "paste", - "raw-cpuid 10.7.0", - "rayon", - "seq-macro", -] - -[[package]] -name = "gemm-f16" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff95ae3259432f3c3410eaa919033cd03791d81cebd18018393dc147952e109" -dependencies = [ - "dyn-stack 0.13.2", - "gemm-common 0.18.2", - "gemm-f32 0.18.2", - "half", - "num-complex", - "num-traits", - "paste", - "raw-cpuid 11.6.0", + "raw-cpuid", "rayon", "seq-macro", ] [[package]] name = "gemm-f32" -version = "0.17.1" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9a69f51aaefbd9cf12d18faf273d3e982d9d711f60775645ed5c8047b4ae113" +checksum = "02e0b8c9da1fbec6e3e3ab2ce6bc259ef18eb5f6f0d3e4edf54b75f9fd41a81c" dependencies = [ - "dyn-stack 0.10.0", - "gemm-common 0.17.1", + "dyn-stack", + "gemm-common", "num-complex", "num-traits", "paste", - "raw-cpuid 10.7.0", - "seq-macro", -] - -[[package]] -name = "gemm-f32" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc8d3d4385393304f407392f754cd2dc4b315d05063f62cf09f47b58de276864" -dependencies = [ - "dyn-stack 0.13.2", - "gemm-common 0.18.2", - "num-complex", - "num-traits", - "paste", - "raw-cpuid 11.6.0", + "raw-cpuid", "seq-macro", ] [[package]] name = "gemm-f64" -version = "0.17.1" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa397a48544fadf0b81ec8741e5c0fba0043008113f71f2034def1935645d2b0" +checksum = "056131e8f2a521bfab322f804ccd652520c79700d81209e9d9275bbdecaadc6a" dependencies = [ - "dyn-stack 0.10.0", - "gemm-common 0.17.1", + "dyn-stack", + "gemm-common", "num-complex", "num-traits", "paste", - "raw-cpuid 10.7.0", - "seq-macro", -] - -[[package]] -name = "gemm-f64" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35b2a4f76ce4b8b16eadc11ccf2e083252d8237c1b589558a49b0183545015bd" -dependencies = [ - "dyn-stack 0.13.2", - "gemm-common 0.18.2", - "num-complex", - "num-traits", - "paste", - "raw-cpuid 11.6.0", + "raw-cpuid", "seq-macro", ] [[package]] name = "generator" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2" +checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9" dependencies = [ "cc", "cfg-if", "libc", "log", "rustversion", - "windows 0.61.3", + "windows-link", + "windows-result", ] [[package]] @@ -3450,40 +3335,40 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", - "wasi 0.14.7+wasi-0.2.4", + "r-efi 5.3.0", + "wasip2", "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ "cfg-if", "libc", - "r-efi", + "r-efi 6.0.0", "wasip2", "wasip3", ] @@ -3538,16 +3423,16 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" dependencies = [ "atomic-waker", "bytes", "fnv", "futures-core", "futures-sink", - "http 1.3.1", + "http 1.4.0", "indexmap 2.13.0", "slab", "tokio", @@ -3607,6 +3492,8 @@ dependencies = [ "allocator-api2", "equivalent", "foldhash 0.2.0", + "serde", + "serde_core", ] [[package]] @@ -3641,7 +3528,7 @@ checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97" dependencies = [ "dirs", "futures", - "http 1.3.1", + "http 1.4.0", "indicatif", "libc", "log", @@ -3650,7 +3537,7 @@ dependencies = [ "reqwest", "serde", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "ureq", "windows-sys 0.60.2", @@ -3667,11 +3554,11 @@ dependencies = [ [[package]] name = "home" -version = "0.5.11" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3693,12 +3580,11 @@ dependencies = [ [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -3720,7 +3606,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.3.1", + "http 1.4.0", ] [[package]] @@ -3731,7 +3617,7 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "pin-project-lite", ] @@ -3780,16 +3666,16 @@ dependencies = [ [[package]] name = "hyper" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" dependencies = [ "atomic-waker", "bytes", "futures-channel", "futures-core", - "h2 0.4.12", - "http 1.3.1", + "h2 0.4.13", + "http 1.4.0", "http-body 1.0.1", "httparse", "httpdate", @@ -3822,37 +3708,36 @@ version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http 1.3.1", - "hyper 1.7.0", + "http 1.4.0", + "hyper 1.8.1", "hyper-util", - "rustls 0.23.31", + "rustls 0.23.37", "rustls-native-certs", "rustls-pki-types", "tokio", "tokio-rustls 0.26.4", "tower-service", - "webpki-roots 1.0.3", + "webpki-roots 1.0.6", ] [[package]] name = "hyper-util" -version = "0.1.17" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64 0.22.1", "bytes", "futures-channel", - "futures-core", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", - "hyper 1.7.0", + "hyper 1.8.1", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.0", + "socket2 0.6.3", "system-configuration", "tokio", "tower-service", @@ -3871,9 +3756,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.64" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -3881,7 +3766,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.61.2", + "windows-core 0.62.2", ] [[package]] @@ -3895,22 +3780,22 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" dependencies = [ "displaydoc", "potential_utf", - "yoke 0.8.0", + "yoke", "zerofrom", "zerovec", ] [[package]] name = "icu_locale_core" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" dependencies = [ "displaydoc", "litemap", @@ -3921,11 +3806,10 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" dependencies = [ - "displaydoc", "icu_collections", "icu_normalizer_data", "icu_properties", @@ -3936,44 +3820,40 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" [[package]] name = "icu_properties" -version = "2.0.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" dependencies = [ - "displaydoc", "icu_collections", "icu_locale_core", "icu_properties_data", "icu_provider", - "potential_utf", "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "2.0.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" [[package]] name = "icu_provider" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" dependencies = [ "displaydoc", "icu_locale_core", - "stable_deref_trait", - "tinystr", "writeable", - "yoke 0.8.0", + "yoke", "zerofrom", "zerotrie", "zerovec", @@ -4050,9 +3930,12 @@ dependencies = [ [[package]] name = "indoc" -version = "2.0.6" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] [[package]] name = "inout" @@ -4075,15 +3958,15 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.8" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +checksum = "d8e7418f59cc01c88316161279a7f665217ae316b388e58a0d10e29f54f1e5eb" dependencies = [ "memchr", "serde", @@ -4091,9 +3974,9 @@ dependencies = [ [[package]] name = "is_terminal_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "itertools" @@ -4133,9 +4016,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "itoap" @@ -4145,35 +4028,35 @@ checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" [[package]] name = "jiff" -version = "0.2.15" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", "log", "portable-atomic", "portable-atomic-util", - "serde", - "windows-sys 0.59.0", + "serde_core", + "windows-sys 0.61.2", ] [[package]] name = "jiff-static" -version = "0.2.15" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "jiff-tzdb" -version = "0.1.4" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1283705eb0a21404d2bfd6eef2a7593d240bc42a0bdb39db0ad6fa2ec026524" +checksum = "c900ef84826f1338a557697dc8fc601df9ca9af4ac137c7fb61d4c6f2dfd3076" [[package]] name = "jiff-tzdb-platform" @@ -4190,15 +4073,15 @@ version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "libc", ] [[package]] name = "js-sys" -version = "0.3.81" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" dependencies = [ "once_cell", "wasm-bindgen", @@ -4206,9 +4089,9 @@ dependencies = [ [[package]] name = "jsonb" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a452366d21e8d3cbca680c41388e01d6a88739afef7877961946a6da409f9ccd" +checksum = "2a901f06163d352fbe41c3c2ff5e08b75330a003cc941e988fb501022f5421e6" dependencies = [ "byteorder", "ethnum", @@ -4241,8 +4124,9 @@ dependencies = [ [[package]] name = "lance" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efe6c3ddd79cdfd2b7e1c23cafae52806906bc40fbd97de9e8cf2f8c7a75fc04" dependencies = [ "arrow", "arrow-arith", @@ -4308,8 +4192,9 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d9f5d95bdda2a2b790f1fb8028b5b6dcf661abeb3133a8bca0f3d24b054af87" dependencies = [ "arrow-array", "arrow-buffer", @@ -4320,7 +4205,7 @@ dependencies = [ "arrow-select", "bytes", "futures", - "getrandom 0.2.16", + "getrandom 0.2.17", "half", "jsonb", "num-traits", @@ -4329,8 +4214,9 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f827d6ab9f8f337a9509d5ad66a12f3314db8713868260521c344ef6135eb4e4" dependencies = [ "arrayref", "paste", @@ -4339,8 +4225,9 @@ dependencies = [ [[package]] name = "lance-core" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f1e25df6a79bf72ee6bcde0851f19b1cd36c5848c1b7db83340882d3c9fdecb" dependencies = [ "arrow-array", "arrow-buffer", @@ -4377,8 +4264,9 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93146de8ae720cb90edef81c2f2d0a1b065fc2f23ecff2419546f389b0fa70a4" dependencies = [ "arrow", "arrow-array", @@ -4408,8 +4296,9 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccec8ce4d8e0a87a99c431dab2364398029f2ffb649c1a693c60c79e05ed30dd" dependencies = [ "arrow", "arrow-array", @@ -4427,8 +4316,9 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1aec0bbbac6bce829bc10f1ba066258126100596c375fb71908ecf11c2c2a5" dependencies = [ "arrow-arith", "arrow-array", @@ -4465,8 +4355,9 @@ dependencies = [ [[package]] name = "lance-file" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14a8c548804f5b17486dc2d3282356ed1957095a852780283bc401fdd69e9075" dependencies = [ "arrow-arith", "arrow-array", @@ -4498,8 +4389,9 @@ dependencies = [ [[package]] name = "lance-index" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2da212f0090ea59f79ac3686660f596520c167fe1cb5f408900cf71d215f0e03" dependencies = [ "arrow", "arrow-arith", @@ -4513,6 +4405,7 @@ dependencies = [ "bitpacking", "bitvec", "bytes", + "chrono", "crossbeam-queue", "datafusion", "datafusion-common", @@ -4562,8 +4455,9 @@ dependencies = [ [[package]] name = "lance-io" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d958eb4b56f03bbe0f5f85eb2b4e9657882812297b6f711f201ffc995f259f" dependencies = [ "arrow", "arrow-arith", @@ -4582,7 +4476,7 @@ dependencies = [ "chrono", "deepsize", "futures", - "http 1.3.1", + "http 1.4.0", "lance-arrow", "lance-core", "lance-namespace", @@ -4604,8 +4498,9 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0285b70da35def7ed95e150fae1d5308089554e1290470403ed3c50cb235bc5e" dependencies = [ "arrow-array", "arrow-buffer", @@ -4621,21 +4516,24 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f78e2a828b654e062a495462c6e3eb4fcf0e7e907d761b8f217fc09ccd3ceac" dependencies = [ "arrow", "async-trait", "bytes", "lance-core", "lance-namespace-reqwest-client", + "serde", "snafu 0.9.0", ] [[package]] name = "lance-namespace-impls" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2392314f3da38f00d166295e44244208a65ccfc256e274fa8631849fc3f4d94" dependencies = [ "arrow", "arrow-ipc", @@ -4666,9 +4564,9 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.5.2" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ad4c947349acd6e37e984eba0254588bd894e6128434338b9e6904e56fb4633" +checksum = "ee2e48de899e2931afb67fcddd0a08e439bf5d8b6ea2a2ed9cb8f4df669bd5cc" dependencies = [ "reqwest", "serde", @@ -4679,8 +4577,9 @@ dependencies = [ [[package]] name = "lance-table" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3df9c4adca3eb2074b3850432a9fb34248a3d90c3d6427d158b13ff9355664ee" dependencies = [ "arrow", "arrow-array", @@ -4719,8 +4618,9 @@ dependencies = [ [[package]] name = "lance-testing" -version = "3.0.0-rc.3" -source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ed7119bdd6983718387b4ac44af873a165262ca94f181b104cd6f97912eb3bf" dependencies = [ "arrow-array", "arrow-schema", @@ -4731,7 +4631,7 @@ dependencies = [ [[package]] name = "lancedb" -version = "0.27.0-beta.5" +version = "0.27.2" dependencies = [ "ahash", "anyhow", @@ -4768,7 +4668,7 @@ dependencies = [ "futures", "half", "hf-hub", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "lance", "lance-arrow", @@ -4813,9 +4713,10 @@ dependencies = [ [[package]] name = "lancedb-nodejs" -version = "0.27.0-beta.5" +version = "0.27.2" dependencies = [ "arrow-array", + "arrow-buffer", "arrow-ipc", "arrow-schema", "async-trait", @@ -4823,6 +4724,7 @@ dependencies = [ "aws-lc-sys", "env_logger", "futures", + "half", "lancedb", "log", "lzma-sys", @@ -4833,7 +4735,7 @@ dependencies = [ [[package]] name = "lancedb-python" -version = "0.30.0-beta.5" +version = "0.30.2" dependencies = [ "arrow", "async-trait", @@ -4845,6 +4747,7 @@ dependencies = [ "lance-namespace", "lance-namespace-impls", "lancedb", + "log", "pin-project", "pyo3", "pyo3-async-runtimes", @@ -4861,7 +4764,7 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" dependencies = [ - "spin", + "spin 0.9.8", ] [[package]] @@ -4935,19 +4838,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.180" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" - -[[package]] -name = "libloading" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" -dependencies = [ - "cfg-if", - "windows-link 0.2.1", -] +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libloading" @@ -4956,22 +4849,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" dependencies = [ "cfg-if", - "windows-link 0.2.1", + "windows-link", ] [[package]] name = "libm" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libredox" -version = "0.1.10" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" +checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08" dependencies = [ - "bitflags 2.9.4", "libc", ] @@ -4983,15 +4875,21 @@ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + +[[package]] +name = "litrs" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" [[package]] name = "lock_api" @@ -5004,9 +4902,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "loom" @@ -5057,15 +4955,15 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a" [[package]] name = "lz4_flex" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" +checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" dependencies = [ "twox-hash", ] @@ -5146,9 +5044,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "memmap2" @@ -5161,9 +5059,9 @@ dependencies = [ [[package]] name = "memmap2" -version = "0.9.8" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" dependencies = [ "libc", "stable_deref_trait", @@ -5212,13 +5110,13 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.4" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi 0.11.1+wasi-snapshot-preview1", - "windows-sys 0.59.0", + "wasi", + "windows-sys 0.61.2", ] [[package]] @@ -5229,9 +5127,9 @@ checksum = "dce6dd36094cac388f119d2e9dc82dc730ef91c32a6222170d630e5414b956e6" [[package]] name = "moka" -version = "0.12.11" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8261cd88c312e0004c1d51baad2980c66528dfdb2bee62003e643a4d8f86b077" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" dependencies = [ "async-lock", "crossbeam-channel", @@ -5242,7 +5140,6 @@ dependencies = [ "futures-util", "parking_lot", "portable-atomic", - "rustc_version", "smallvec", "tagptr", "uuid", @@ -5267,7 +5164,7 @@ checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -5310,7 +5207,7 @@ version = "3.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6944d0bf100571cd6e1a98a316cdca262deb6fccf8d93f5ae1502ca3fc88bd3" dependencies = [ - "bitflags 2.9.4", + "bitflags", "ctor", "futures", "napi-build", @@ -5337,7 +5234,7 @@ dependencies = [ "napi-derive-backend", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -5350,7 +5247,7 @@ dependencies = [ "proc-macro2", "quote", "semver", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -5359,7 +5256,7 @@ version = "3.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eb602b84d7c1edae45e50bbf1374696548f36ae179dfa667f577e384bb90c2b" dependencies = [ - "libloading 0.9.0", + "libloading", ] [[package]] @@ -5413,34 +5310,20 @@ dependencies = [ [[package]] name = "ntapi" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" dependencies = [ "winapi", ] [[package]] name = "nu-ansi-term" -version = "0.50.1" +version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.52.0", -] - -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", + "windows-sys 0.61.2", ] [[package]] @@ -5481,9 +5364,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" [[package]] name = "num-integer" @@ -5505,17 +5388,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -5536,28 +5408,6 @@ dependencies = [ "libc", ] -[[package]] -name = "num_enum" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a" -dependencies = [ - "num_enum_derive", - "rustversion", -] - -[[package]] -name = "num_enum_derive" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d" -dependencies = [ - "proc-macro-crate", - "proc-macro2", - "quote", - "syn 2.0.114", -] - [[package]] name = "number_prefix" version = "0.4.0" @@ -5565,10 +5415,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] -name = "object_store" -version = "0.12.4" +name = "object" +version = "0.37.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + +[[package]] +name = "object_store" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" dependencies = [ "async-trait", "base64 0.22.1", @@ -5576,16 +5435,16 @@ dependencies = [ "chrono", "form_urlencoded", "futures", - "http 1.3.1", + "http 1.4.0", "http-body-util", "httparse", "humantime", - "hyper 1.7.0", + "hyper 1.8.1", "itertools 0.14.0", "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.38.3", + "quick-xml 0.38.4", "rand 0.9.2", "reqwest", "ring", @@ -5593,7 +5452,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "url", @@ -5620,15 +5479,15 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "once_cell_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "oneshot" @@ -5642,7 +5501,7 @@ version = "6.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" dependencies = [ - "bitflags 2.9.4", + "bitflags", "libc", "once_cell", "onig_sys", @@ -5670,14 +5529,14 @@ dependencies = [ "bytes", "crc32c", "futures", - "getrandom 0.2.16", - "http 1.3.1", + "getrandom 0.2.17", + "http 1.4.0", "http-body 1.0.1", "jiff", "log", "md-5", "percent-encoding", - "quick-xml 0.38.3", + "quick-xml 0.38.4", "reqsign", "reqwest", "serde", @@ -5690,9 +5549,9 @@ dependencies = [ [[package]] name = "openssl-probe" -version = "0.1.6" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] name = "option-ext" @@ -5771,7 +5630,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -5819,12 +5678,12 @@ dependencies = [ [[package]] name = "pem" -version = "3.0.5" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38af38e8470ac9dee3ce1bae1af9c1671fffc44ddfd8bd1d0a3445bf349a8ef3" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" dependencies = [ "base64 0.22.1", - "serde", + "serde_core", ] [[package]] @@ -5918,29 +5777,29 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.10" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.10" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "pin-project-lite" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] name = "pin-utils" @@ -6017,7 +5876,7 @@ version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ea21b858b16b9c0e17a12db2800d11aa5b4bd182be6b3022eb537bbfc1f2db5" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "polars-arrow", "polars-core", "polars-error", @@ -6048,7 +5907,7 @@ dependencies = [ "ethnum", "fast-float", "foreign_vec", - "getrandom 0.2.16", + "getrandom 0.2.17", "hashbrown 0.14.5", "itoa", "itoap", @@ -6099,7 +5958,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "465f70d3e96b6d0b1a43c358ba451286b8c8bd56696feff020d65702aa33e35c" dependencies = [ "ahash", - "bitflags 2.9.4", + "bitflags", "bytemuck", "chrono", "chrono-tz 0.8.6", @@ -6173,7 +6032,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89b2632b1af668e2058d5f8f916d8fbde3cac63d03ae29a705f598e41dcfeb7f" dependencies = [ "ahash", - "bitflags 2.9.4", + "bitflags", "glob", "once_cell", "polars-arrow", @@ -6352,7 +6211,7 @@ dependencies = [ "num-traits", "once_cell", "polars-error", - "raw-cpuid 11.6.0", + "raw-cpuid", "rayon", "smartstring", "stacker", @@ -6362,24 +6221,24 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.11.1" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.4" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" dependencies = [ "portable-atomic", ] [[package]] name = "potential_utf" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" dependencies = [ "zerovec", ] @@ -6406,23 +6265,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "proc-macro-crate" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" dependencies = [ "toml_edit", ] [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] @@ -6452,7 +6311,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.114", + "syn 2.0.117", "tempfile", ] @@ -6466,7 +6325,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -6480,39 +6339,37 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.27" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e66fcd288453b748497d8fb18bccc83a16b0518e3906d4b8df0a8d42d93dbb1c" +checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8" dependencies = [ + "ar_archive_writer", "cc", ] [[package]] name = "pulp" -version = "0.18.22" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0a01a0dc67cf4558d279f0c25b0962bd08fc6dec0137699eae304103e882fe6" -dependencies = [ - "bytemuck", - "libm", - "num-complex", - "reborrow", -] - -[[package]] -name = "pulp" -version = "0.21.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907" +checksum = "2e205bb30d5b916c55e584c22201771bcf2bad9aabd5d4127f38387140c38632" dependencies = [ "bytemuck", "cfg-if", "libm", "num-complex", + "paste", + "pulp-wasm-simd-flag", + "raw-cpuid", "reborrow", "version_check", ] +[[package]] +name = "pulp-wasm-simd-flag" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40e24eee682d89fb193496edf918a7f407d30175b2e785fe057e4392dfd182e0" + [[package]] name = "pyo3" version = "0.26.0" @@ -6552,7 +6409,7 @@ checksum = "c29bc5c673e36a8102d0b9179149c1bb59990d8db4f3ae58bd7dceccab90b951" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -6583,7 +6440,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -6596,7 +6453,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -6611,9 +6468,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.38.3" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89" +checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" dependencies = [ "memchr", "serde", @@ -6631,9 +6488,9 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.31", - "socket2 0.6.0", - "thiserror 2.0.17", + "rustls 0.23.37", + "socket2 0.6.3", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -6641,20 +6498,20 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.13" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ "bytes", - "getrandom 0.3.3", + "getrandom 0.3.4", "lru-slab", "rand 0.9.2", "ring", "rustc-hash", - "rustls 0.23.31", + "rustls 0.23.37", "rustls-pki-types", "slab", - "thiserror 2.0.17", + "thiserror 2.0.18", "tinyvec", "tracing", "web-time", @@ -6669,16 +6526,16 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.0", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] [[package]] name = "quote" -version = "1.0.41" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -6689,6 +6546,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "radium" version = "0.7.0" @@ -6713,7 +6576,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -6733,7 +6596,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -6742,16 +6605,16 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", ] [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", ] [[package]] @@ -6780,7 +6643,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41" dependencies = [ - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -6812,18 +6675,9 @@ dependencies = [ [[package]] name = "rangemap" -version = "1.6.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93e7e49bb0bf967717f7bd674458b3d6b0c5f48ec7e3038166026a69fc22223" - -[[package]] -name = "raw-cpuid" -version = "10.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" -dependencies = [ - "bitflags 1.3.2", -] +checksum = "973443cf09a9c8656b574a866ab68dfa19f0867d0340648c7d2f6a71b8a8ea68" [[package]] name = "raw-cpuid" @@ -6831,7 +6685,7 @@ version = "11.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" dependencies = [ - "bitflags 2.9.4", + "bitflags", ] [[package]] @@ -6894,7 +6748,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -6903,7 +6757,7 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.9.4", + "bitflags", ] [[package]] @@ -6912,9 +6766,9 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "libredox", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -6934,14 +6788,14 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -6951,9 +6805,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -6962,15 +6816,15 @@ dependencies = [ [[package]] name = "regex-lite" -version = "0.1.7" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "943f41321c63ef1c92fd763bfe054d2668f7f225a5c29f0105903dc2fc04ba30" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" [[package]] name = "regex-syntax" -version = "0.8.6" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "relative-path" @@ -6989,11 +6843,11 @@ dependencies = [ "base64 0.22.1", "chrono", "form_urlencoded", - "getrandom 0.2.16", + "getrandom 0.2.17", "hex", "hmac", "home", - "http 1.3.1", + "http 1.4.0", "jsonwebtoken", "log", "once_cell", @@ -7012,21 +6866,20 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.24" +version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ - "async-compression", "base64 0.22.1", "bytes", "encoding_rs", "futures-core", "futures-util", - "h2 0.4.12", - "http 1.3.1", + "h2 0.4.13", + "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.7.0", + "hyper 1.8.1", "hyper-rustls 0.27.7", "hyper-util", "js-sys", @@ -7036,7 +6889,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.31", + "rustls 0.23.37", "rustls-native-certs", "rustls-pki-types", "serde", @@ -7047,14 +6900,14 @@ dependencies = [ "tokio-rustls 0.26.4", "tokio-util", "tower", - "tower-http 0.6.6", + "tower-http 0.6.8", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.3", + "webpki-roots 1.0.6", ] [[package]] @@ -7092,7 +6945,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted 0.9.0", "windows-sys 0.52.0", @@ -7155,7 +7008,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.114", + "syn 2.0.117", "unicode-ident", ] @@ -7200,7 +7053,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.9.4", + "bitflags", "errno", "libc", "linux-raw-sys 0.4.15", @@ -7209,14 +7062,14 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags 2.9.4", + "bitflags", "errno", "libc", - "linux-raw-sys 0.11.0", + "linux-raw-sys 0.12.1", "windows-sys 0.61.2", ] @@ -7234,25 +7087,25 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.31" +version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ "aws-lc-rs", "log", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.4", + "rustls-webpki 0.103.10", "subtle", "zeroize", ] [[package]] name = "rustls-native-certs" -version = "0.8.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" dependencies = [ "openssl-probe", "rustls-pki-types", @@ -7271,9 +7124,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.12.0" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ "web-time", "zeroize", @@ -7291,9 +7144,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.4" +version = "0.103.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" dependencies = [ "aws-lc-rs", "ring", @@ -7309,16 +7162,17 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.20" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "safetensors" -version = "0.4.5" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44560c11236a6130a46ce36c836a62936dc81ebf8c36a37947423571be0e55b6" +checksum = "675656c1eabb620b921efea4f9199f97fc86e36dd6ffd1fbbe48d0f59a4987f5" dependencies = [ + "hashbrown 0.16.1", "serde", "serde_json", ] @@ -7343,9 +7197,9 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.28" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" dependencies = [ "windows-sys 0.61.2", ] @@ -7364,9 +7218,9 @@ dependencies = [ [[package]] name = "schemars" -version = "1.0.4" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" dependencies = [ "dyn-clone", "ref-cast", @@ -7433,11 +7287,11 @@ dependencies = [ [[package]] name = "security-framework" -version = "3.5.1" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ - "bitflags 2.9.4", + "bitflags", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -7446,9 +7300,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.15.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" dependencies = [ "core-foundation-sys", "libc", @@ -7493,20 +7347,20 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "serde_json" -version = "1.0.145" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -7537,7 +7391,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7554,9 +7408,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.15.0" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6093cd8c01b25262b84927e0f7151692158fab02d961e04c979d3903eba7ecc5" +checksum = "dd5414fad8e6907dbdd5bc441a50ae8d6e26151a03b1de04d89a5576de61d01f" dependencies = [ "base64 0.22.1", "chrono", @@ -7564,7 +7418,7 @@ dependencies = [ "indexmap 1.9.3", "indexmap 2.13.0", "schemars 0.9.0", - "schemars 1.0.4", + "schemars 1.2.1", "serde_core", "serde_json", "serde_with_macros", @@ -7573,14 +7427,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.15.0" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7e6c180db0816026a61afa1cff5344fb7ebded7e4d3062772179f2501481c27" +checksum = "d3db8978e608f1fe7357e211969fd9abdcae80bac1ba7a3369bb7eb6b404eb65" dependencies = [ - "darling 0.21.3", + "darling 0.23.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7622,10 +7476,11 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.6" +version = "1.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" dependencies = [ + "errno", "libc", ] @@ -7651,9 +7506,9 @@ dependencies = [ [[package]] name = "simd-adler32" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] name = "simdutf8" @@ -7663,36 +7518,36 @@ checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "simple_asn1" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" +checksum = "0d585997b0ac10be3c5ee635f1bab02d512760d14b7c468801ac8a01d9ae5f1d" dependencies = [ "num-bigint", "num-traits", - "thiserror 2.0.17", + "thiserror 2.0.18", "time", ] [[package]] name = "siphasher" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" [[package]] name = "sketches-ddsketch" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" +checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b" dependencies = [ "serde", ] [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" @@ -7738,7 +7593,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7750,7 +7605,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7765,12 +7620,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.0" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -7790,6 +7645,12 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "spin" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" + [[package]] name = "spki" version = "0.6.0" @@ -7849,20 +7710,20 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "stable_deref_trait" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stacker" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013" dependencies = [ "cc", "cfg-if", @@ -7935,7 +7796,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7948,7 +7809,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7970,9 +7831,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.114" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -7996,21 +7857,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", -] - -[[package]] -name = "sysctl" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" -dependencies = [ - "bitflags 2.9.4", - "byteorder", - "enum-as-inner", - "libc", - "thiserror 1.0.69", - "walkdir", + "syn 2.0.117", ] [[package]] @@ -8019,7 +7866,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc" dependencies = [ - "bitflags 2.9.4", + "bitflags", "byteorder", "enum-as-inner", "libc", @@ -8038,16 +7885,16 @@ dependencies = [ "libc", "ntapi", "once_cell", - "windows 0.52.0", + "windows", ] [[package]] name = "system-configuration" -version = "0.6.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ - "bitflags 2.9.4", + "bitflags", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -8093,9 +7940,9 @@ dependencies = [ "levenshtein_automata", "log", "lru", - "lz4_flex 0.11.5", + "lz4_flex 0.11.6", "measure_time", - "memmap2 0.9.8", + "memmap2 0.9.10", "once_cell", "oneshot", "rayon", @@ -8114,7 +7961,7 @@ dependencies = [ "tantivy-stacker", "tantivy-tokenizer-api", "tempfile", - "thiserror 2.0.17", + "thiserror 2.0.18", "time", "uuid", "winapi", @@ -8228,28 +8075,28 @@ checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" [[package]] name = "target-lexicon" -version = "0.13.3" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" [[package]] name = "tempfile" -version = "3.23.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom 0.4.2", "once_cell", - "rustix 1.1.2", + "rustix 1.1.4", "windows-sys 0.61.2", ] [[package]] name = "test-log" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e33b98a582ea0be1168eba097538ee8dd4bbe0f2b01b22ac92ea30054e5be7b" +checksum = "37d53ac171c92a39e4769491c4b4dde7022c60042254b5fc044ae409d34a24d4" dependencies = [ "env_logger", "test-log-macros", @@ -8258,13 +8105,13 @@ dependencies = [ [[package]] name = "test-log-macros" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "451b374529930d7601b1eef8d32bc79ae870b6079b069401709c2a8bf9e75f36" +checksum = "be35209fd0781c5401458ab66e4f98accf63553e8fae7425503e92fdd319783b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -8278,11 +8125,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.17", + "thiserror-impl 2.0.18", ] [[package]] @@ -8293,18 +8140,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -8367,9 +8214,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" dependencies = [ "displaydoc", "zerovec", @@ -8377,9 +8224,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ "tinyvec_macros", ] @@ -8399,7 +8246,7 @@ dependencies = [ "aho-corasick", "derive_builder", "esaxx-rs", - "getrandom 0.2.16", + "getrandom 0.2.17", "indicatif", "itertools 0.12.1", "lazy_static", @@ -8424,9 +8271,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.48.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" dependencies = [ "bytes", "libc", @@ -8434,20 +8281,20 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.0", + "socket2 0.6.3", "tokio-macros", "windows-sys 0.61.2", ] [[package]] name = "tokio-macros" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -8466,15 +8313,15 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls 0.23.31", + "rustls 0.23.37", "tokio", ] [[package]] name = "tokio-stream" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" dependencies = [ "futures-core", "pin-project-lite", @@ -8483,9 +8330,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.16" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -8496,18 +8343,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.2" +version = "1.1.0+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f1085dec27c2b6632b04c80b3bb1b4300d6495d1e129693bdda7d91e72eec1" +checksum = "97251a7c317e03ad83774a8752a7e81fb6067740609f75ea2b585b569a59198f" dependencies = [ "serde_core", ] [[package]] name = "toml_edit" -version = "0.23.6" +version = "0.25.8+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3effe7c0e86fdff4f69cdd2ccc1b96f933e24811c5441d44904e8683e27184b" +checksum = "16bff38f1d86c47f9ff0647e6838d7bb362522bdf44006c7068c2b1e606f1f3c" dependencies = [ "indexmap 2.13.0", "toml_datetime", @@ -8517,18 +8364,18 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.3" +version = "1.1.0+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cf893c33be71572e0e9aa6dd15e6677937abd686b066eac3f8cd3531688a627" +checksum = "2334f11ee363607eb04df9b8fc8a13ca1715a72ba8662a26ac285c98aabb4011" dependencies = [ "winnow", ] [[package]] name = "tower" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", @@ -8546,9 +8393,9 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" dependencies = [ - "bitflags 2.9.4", + "bitflags", "bytes", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "pin-project-lite", @@ -8559,17 +8406,22 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ - "bitflags 2.9.4", + "async-compression", + "bitflags", "bytes", + "futures-core", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", + "http-body-util", "iri-string", "pin-project-lite", + "tokio", + "tokio-util", "tower", "tower-layer", "tower-service", @@ -8589,9 +8441,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "log", "pin-project-lite", @@ -8601,20 +8453,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", "valuable", @@ -8633,9 +8485,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.20" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" dependencies = [ "matchers", "nu-ansi-term", @@ -8664,44 +8516,29 @@ dependencies = [ "rand 0.9.2", ] +[[package]] +name = "typed-path" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e28f89b80c87b8fb0cf04ab448d5dd0dd0ade2f8891bae878de66a75a28600e" + [[package]] name = "typenum" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" -[[package]] -name = "ug" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90b70b37e9074642bc5f60bb23247fd072a84314ca9e71cdf8527593406a0dd3" -dependencies = [ - "gemm 0.18.2", - "half", - "libloading 0.8.9", - "memmap2 0.9.8", - "num", - "num-traits", - "num_cpus", - "rayon", - "safetensors", - "serde", - "thiserror 1.0.69", - "tracing", - "yoke 0.7.5", -] - [[package]] name = "unicase" -version = "2.8.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-ident" -version = "1.0.19" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-normalization-alignments" @@ -8723,9 +8560,9 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "da36089a805484bcccfffe0739803392c8298778a2d2f09febf76fac5ad9025b" [[package]] name = "unicode-width" @@ -8773,7 +8610,7 @@ dependencies = [ "flate2", "log", "once_cell", - "rustls 0.23.31", + "rustls 0.23.37", "rustls-pki-types", "serde", "serde_json", @@ -8784,9 +8621,9 @@ dependencies = [ [[package]] name = "url" -version = "2.5.7" +version = "2.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" dependencies = [ "form_urlencoded", "idna", @@ -8820,11 +8657,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.21.0" +version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" +checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" dependencies = [ - "getrandom 0.4.1", + "getrandom 0.4.2", "js-sys", "serde_core", "wasm-bindgen", @@ -8873,22 +8710,13 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.7+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" -dependencies = [ - "wasip2", -] - [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" dependencies = [ - "wit-bindgen 0.46.0", + "wit-bindgen", ] [[package]] @@ -8897,14 +8725,14 @@ version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ - "wit-bindgen 0.51.0", + "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.104" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" dependencies = [ "cfg-if", "once_cell", @@ -8913,27 +8741,14 @@ dependencies = [ "wasm-bindgen-shared", ] -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.104" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn 2.0.114", - "wasm-bindgen-shared", -] - [[package]] name = "wasm-bindgen-futures" -version = "0.4.54" +version = "0.4.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" dependencies = [ "cfg-if", + "futures-util", "js-sys", "once_cell", "wasm-bindgen", @@ -8942,9 +8757,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.104" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -8952,22 +8767,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.104" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" dependencies = [ + "bumpalo", "proc-macro2", "quote", - "syn 2.0.114", - "wasm-bindgen-backend", + "syn 2.0.117", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.104" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" dependencies = [ "unicode-ident", ] @@ -9013,7 +8828,7 @@ version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ - "bitflags 2.9.4", + "bitflags", "hashbrown 0.15.5", "indexmap 2.13.0", "semver", @@ -9021,9 +8836,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.81" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" dependencies = [ "js-sys", "wasm-bindgen", @@ -9045,14 +8860,14 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.3", + "webpki-roots 1.0.6", ] [[package]] name = "webpki-roots" -version = "1.0.3" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b130c0d2d49f8b6889abc456e795e82525204f27c42cf767cf0d7734e089b8" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" dependencies = [ "rustls-pki-types", ] @@ -9098,28 +8913,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows" -version = "0.61.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" -dependencies = [ - "windows-collections", - "windows-core 0.61.2", - "windows-future", - "windows-link 0.1.3", - "windows-numerics", -] - -[[package]] -name = "windows-collections" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" -dependencies = [ - "windows-core 0.61.2", -] - [[package]] name = "windows-core" version = "0.52.0" @@ -9131,28 +8924,17 @@ dependencies = [ [[package]] name = "windows-core" -version = "0.61.2" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", - "windows-link 0.1.3", + "windows-link", "windows-result", "windows-strings", ] -[[package]] -name = "windows-future" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" -dependencies = [ - "windows-core 0.61.2", - "windows-link 0.1.3", - "windows-threading", -] - [[package]] name = "windows-implement" version = "0.60.2" @@ -9161,7 +8943,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -9172,58 +8954,42 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] -[[package]] -name = "windows-link" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" - [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" -[[package]] -name = "windows-numerics" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" -dependencies = [ - "windows-core 0.61.2", - "windows-link 0.1.3", -] - [[package]] name = "windows-registry" -version = "0.5.3" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" dependencies = [ - "windows-link 0.1.3", + "windows-link", "windows-result", "windows-strings", ] [[package]] name = "windows-result" -version = "0.3.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] name = "windows-strings" -version = "0.4.2" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] @@ -9259,7 +9025,7 @@ version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -9284,7 +9050,7 @@ version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ - "windows-link 0.2.1", + "windows-link", "windows_aarch64_gnullvm 0.53.1", "windows_aarch64_msvc 0.53.1", "windows_i686_gnu 0.53.1", @@ -9295,15 +9061,6 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] -[[package]] -name = "windows-threading" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" -dependencies = [ - "windows-link 0.1.3", -] - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -9402,19 +9159,13 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.13" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +checksum = "a90e88e4667264a994d34e6d1ab2d26d398dcdca8b7f52bec8668957517fc7d8" dependencies = [ "memchr", ] -[[package]] -name = "wit-bindgen" -version = "0.46.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" - [[package]] name = "wit-bindgen" version = "0.51.0" @@ -9445,7 +9196,7 @@ dependencies = [ "heck 0.5.0", "indexmap 2.13.0", "prettyplease", - "syn 2.0.114", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -9461,7 +9212,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -9473,7 +9224,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", - "bitflags 2.9.4", + "bitflags", "indexmap 2.13.0", "log", "serde", @@ -9505,9 +9256,9 @@ dependencies = [ [[package]] name = "writeable" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" [[package]] name = "wyz" @@ -9532,70 +9283,45 @@ checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" [[package]] name = "yoke" -version = "0.7.5" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" dependencies = [ - "serde", "stable_deref_trait", - "yoke-derive 0.7.5", - "zerofrom", -] - -[[package]] -name = "yoke" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive 0.8.0", + "yoke-derive", "zerofrom", ] [[package]] name = "yoke-derive" -version = "0.7.5" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", - "synstructure", -] - -[[package]] -name = "yoke-derive" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.114", + "syn 2.0.117", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.27" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.27" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -9615,7 +9341,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "synstructure", ] @@ -9627,52 +9353,55 @@ checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" [[package]] name = "zerotrie" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" dependencies = [ "displaydoc", - "yoke 0.8.0", + "yoke", "zerofrom", ] [[package]] name = "zerovec" -version = "0.11.4" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" dependencies = [ - "yoke 0.8.0", + "yoke", "zerofrom", "zerovec-derive", ] [[package]] name = "zerovec-derive" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "zip" -version = "1.1.4" +version = "7.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cc23c04387f4da0374be4533ad1208cbb091d5c11d070dfef13676ad6497164" +checksum = "c42e33efc22a0650c311c2ef19115ce232583abbe80850bc8b66509ebef02de0" dependencies = [ - "arbitrary", "crc32fast", - "crossbeam-utils", - "displaydoc", "indexmap 2.13.0", - "num_enum", - "thiserror 1.0.69", + "memchr", + "typed-path", ] +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + [[package]] name = "zstd" version = "0.13.3" diff --git a/Cargo.toml b/Cargo.toml index ce0d03de1..be7726dfd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,20 +15,20 @@ categories = ["database-implementations"] rust-version = "1.91.0" [workspace.dependencies] -lance = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } -lance-core = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } -lance-datagen = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } -lance-file = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } -lance-io = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } -lance-index = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } -lance-linalg = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace-impls = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } -lance-table = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } -lance-testing = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } -lance-datafusion = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } -lance-encoding = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } -lance-arrow = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" } +lance = { version = "=4.0.0", default-features = false } +lance-core = { version = "=4.0.0" } +lance-datagen = { version = "=4.0.0" } +lance-file = { version = "=4.0.0" } +lance-io = { version = "=4.0.0", default-features = false } +lance-index = { version = "=4.0.0" } +lance-linalg = { version = "=4.0.0" } +lance-namespace = { version = "=4.0.0" } +lance-namespace-impls = { version = "=4.0.0", default-features = false } +lance-table = { version = "=4.0.0" } +lance-testing = { version = "=4.0.0" } +lance-datafusion = { version = "=4.0.0" } +lance-encoding = { version = "=4.0.0" } +lance-arrow = { version = "=4.0.0" } ahash = "0.8" # Note that this one does not include pyarrow arrow = { version = "57.2", optional = false } diff --git a/ci/check_lance_release.py b/ci/check_lance_release.py index d1629425e..e906dd489 100755 --- a/ci/check_lance_release.py +++ b/ci/check_lance_release.py @@ -3,6 +3,7 @@ from __future__ import annotations import argparse +import functools import json import os import re @@ -26,6 +27,7 @@ SEMVER_RE = re.compile( ) +@functools.total_ordering @dataclass(frozen=True) class SemVer: major: int @@ -156,7 +158,9 @@ def read_current_version(repo_root: Path) -> str: def determine_latest_tag(tags: Iterable[TagInfo]) -> TagInfo: - return max(tags, key=lambda tag: tag.semver) + # Stable releases (no prerelease) are always preferred over pre-releases. + # Within each group, standard semver ordering applies. + return max(tags, key=lambda tag: (not tag.semver.prerelease, tag.semver)) def write_outputs(args: argparse.Namespace, payload: dict) -> None: diff --git a/docs/requirements.txt b/docs/requirements.txt index 60d3b5e3e..e5f3867cb 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,8 +1,8 @@ -mkdocs==1.5.3 +mkdocs==1.6.1 mkdocs-jupyter==0.24.1 -mkdocs-material==9.5.3 +mkdocs-material==9.6.23 mkdocs-autorefs>=0.5,<=1.0 -mkdocstrings[python]==0.25.2 +mkdocstrings[python]>=0.24,<1.0 griffe>=0.40,<1.0 mkdocs-render-swagger-plugin>=0.1.0 pydantic>=2.0,<3.0 diff --git a/docs/src/java/java.md b/docs/src/java/java.md index 31b22cdcf..48a574187 100644 --- a/docs/src/java/java.md +++ b/docs/src/java/java.md @@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`: com.lancedb lancedb-core - 0.27.0-beta.5 + 0.27.2 ``` diff --git a/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md b/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md index 66d6ee162..574a0d71f 100644 --- a/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md +++ b/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md @@ -52,7 +52,7 @@ new EmbeddingFunction(): EmbeddingFunction ### computeQueryEmbeddings() ```ts -computeQueryEmbeddings(data): Promise +computeQueryEmbeddings(data): Promise ``` Compute the embeddings for a single query @@ -63,7 +63,7 @@ Compute the embeddings for a single query #### Returns -`Promise`<`number`[] \| `Float32Array` \| `Float64Array`> +`Promise`<`number`[] \| `Uint8Array` \| `Float32Array` \| `Float64Array`> *** diff --git a/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md b/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md index 8aee4f44c..444c4c3f0 100644 --- a/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md +++ b/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md @@ -37,7 +37,7 @@ new TextEmbeddingFunction(): TextEmbeddingFunction ### computeQueryEmbeddings() ```ts -computeQueryEmbeddings(data): Promise +computeQueryEmbeddings(data): Promise ``` Compute the embeddings for a single query @@ -48,7 +48,7 @@ Compute the embeddings for a single query #### Returns -`Promise`<`number`[] \| `Float32Array` \| `Float64Array`> +`Promise`<`number`[] \| `Uint8Array` \| `Float32Array` \| `Float64Array`> #### Overrides diff --git a/docs/src/js/type-aliases/IntoVector.md b/docs/src/js/type-aliases/IntoVector.md index 813d3cdc8..bfab8a0d3 100644 --- a/docs/src/js/type-aliases/IntoVector.md +++ b/docs/src/js/type-aliases/IntoVector.md @@ -7,5 +7,10 @@ # Type Alias: IntoVector ```ts -type IntoVector: Float32Array | Float64Array | number[] | Promise; +type IntoVector: + | Float32Array + | Float64Array + | Uint8Array + | number[] + | Promise; ``` diff --git a/docs/src/python/python.md b/docs/src/python/python.md index 103099d8c..5f4236c83 100644 --- a/docs/src/python/python.md +++ b/docs/src/python/python.md @@ -36,6 +36,20 @@ is also an [asynchronous API client](#connections-asynchronous). ::: lancedb.table.Tags +## Expressions + +Type-safe expression builder for filters and projections. Use these instead +of raw SQL strings with [where][lancedb.query.LanceQueryBuilder.where] and +[select][lancedb.query.LanceQueryBuilder.select]. + +::: lancedb.expr.Expr + +::: lancedb.expr.col + +::: lancedb.expr.lit + +::: lancedb.expr.func + ## Querying (Synchronous) ::: lancedb.query.Query diff --git a/java/lancedb-core/pom.xml b/java/lancedb-core/pom.xml index 3269256aa..d37e67c30 100644 --- a/java/lancedb-core/pom.xml +++ b/java/lancedb-core/pom.xml @@ -8,7 +8,7 @@ com.lancedb lancedb-parent - 0.27.0-beta.5 + 0.27.2-final.0 ../pom.xml diff --git a/java/pom.xml b/java/pom.xml index cf5ab4142..15d491979 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -6,7 +6,7 @@ com.lancedb lancedb-parent - 0.27.0-beta.5 + 0.27.2-final.0 pom ${project.artifactId} LanceDB Java SDK Parent POM @@ -28,7 +28,7 @@ UTF-8 15.0.0 - 3.1.0-beta.2 + 3.0.1 false 2.30.0 1.7 diff --git a/nodejs/Cargo.toml b/nodejs/Cargo.toml index 9e9ab4e7d..2909c0fc4 100644 --- a/nodejs/Cargo.toml +++ b/nodejs/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "lancedb-nodejs" edition.workspace = true -version = "0.27.0-beta.5" +version = "0.27.2" license.workspace = true description.workspace = true repository.workspace = true @@ -15,6 +15,8 @@ crate-type = ["cdylib"] async-trait.workspace = true arrow-ipc.workspace = true arrow-array.workspace = true +arrow-buffer = "57.2" +half.workspace = true arrow-schema.workspace = true env_logger.workspace = true futures.workspace = true diff --git a/nodejs/__test__/vector_types.test.ts b/nodejs/__test__/vector_types.test.ts new file mode 100644 index 000000000..4ac524cc6 --- /dev/null +++ b/nodejs/__test__/vector_types.test.ts @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The LanceDB Authors + +import * as tmp from "tmp"; + +import { type Table, connect } from "../lancedb"; +import { + Field, + FixedSizeList, + Float32, + Int64, + Schema, + makeArrowTable, +} from "../lancedb/arrow"; + +describe("Vector query with different typed arrays", () => { + let tmpDir: tmp.DirResult; + + afterEach(() => { + tmpDir?.removeCallback(); + }); + + async function createFloat32Table(): Promise { + tmpDir = tmp.dirSync({ unsafeCleanup: true }); + const db = await connect(tmpDir.name); + const schema = new Schema([ + new Field("id", new Int64(), true), + new Field( + "vec", + new FixedSizeList(2, new Field("item", new Float32())), + true, + ), + ]); + const data = makeArrowTable( + [ + { id: 1n, vec: [1.0, 0.0] }, + { id: 2n, vec: [0.0, 1.0] }, + { id: 3n, vec: [1.0, 1.0] }, + ], + { schema }, + ); + return db.createTable("test_f32", data); + } + + it("should search with Float32Array (baseline)", async () => { + const table = await createFloat32Table(); + const results = await table + .query() + .nearestTo(new Float32Array([1.0, 0.0])) + .limit(1) + .toArray(); + + expect(results.length).toBe(1); + expect(Number(results[0].id)).toBe(1); + }); + + it("should search with number[] (backward compat)", async () => { + const table = await createFloat32Table(); + const results = await table + .query() + .nearestTo([1.0, 0.0]) + .limit(1) + .toArray(); + + expect(results.length).toBe(1); + expect(Number(results[0].id)).toBe(1); + }); + + it("should search with Float64Array via raw path", async () => { + const table = await createFloat32Table(); + const results = await table + .query() + .nearestTo(new Float64Array([1.0, 0.0])) + .limit(1) + .toArray(); + + expect(results.length).toBe(1); + expect(Number(results[0].id)).toBe(1); + }); + + it("should add multiple query vectors with Float64Array", async () => { + const table = await createFloat32Table(); + const results = await table + .query() + .nearestTo(new Float64Array([1.0, 0.0])) + .addQueryVector(new Float64Array([0.0, 1.0])) + .limit(2) + .toArray(); + + expect(results.length).toBeGreaterThanOrEqual(2); + }); + + // Float16Array is only available in Node 22+; not in TypeScript's standard lib yet + const float16ArrayCtor = (globalThis as unknown as Record) + .Float16Array as (new (values: number[]) => unknown) | undefined; + const hasFloat16 = float16ArrayCtor !== undefined; + const f16it = hasFloat16 ? it : it.skip; + + f16it("should search with Float16Array via raw path", async () => { + const table = await createFloat32Table(); + const results = await table + .query() + .nearestTo(new float16ArrayCtor!([1.0, 0.0]) as Float32Array) + .limit(1) + .toArray(); + + expect(results.length).toBe(1); + expect(Number(results[0].id)).toBe(1); + }); +}); diff --git a/nodejs/lancedb/arrow.ts b/nodejs/lancedb/arrow.ts index 7fff42f47..84f5ddf7b 100644 --- a/nodejs/lancedb/arrow.ts +++ b/nodejs/lancedb/arrow.ts @@ -117,8 +117,9 @@ export type TableLike = export type IntoVector = | Float32Array | Float64Array + | Uint8Array | number[] - | Promise; + | Promise; export type MultiVector = IntoVector[]; @@ -126,14 +127,48 @@ export function isMultiVector(value: unknown): value is MultiVector { return Array.isArray(value) && isIntoVector(value[0]); } +// Float16Array is not in TypeScript's standard lib yet; access dynamically +type Float16ArrayCtor = new ( + ...args: unknown[] +) => { buffer: ArrayBuffer; byteOffset: number; byteLength: number }; +const float16ArrayCtor = (globalThis as unknown as Record) + .Float16Array as Float16ArrayCtor | undefined; + export function isIntoVector(value: unknown): value is IntoVector { return ( value instanceof Float32Array || value instanceof Float64Array || + value instanceof Uint8Array || + (float16ArrayCtor !== undefined && value instanceof float16ArrayCtor) || (Array.isArray(value) && !Array.isArray(value[0])) ); } +/** + * Extract the underlying byte buffer and data type from a typed array + * for passing to the Rust NAPI layer without precision loss. + */ +export function extractVectorBuffer( + vector: Float32Array | Float64Array | Uint8Array, +): { data: Uint8Array; dtype: string } | null { + if (float16ArrayCtor !== undefined && vector instanceof float16ArrayCtor) { + return { + data: new Uint8Array(vector.buffer, vector.byteOffset, vector.byteLength), + dtype: "float16", + }; + } + if (vector instanceof Float64Array) { + return { + data: new Uint8Array(vector.buffer, vector.byteOffset, vector.byteLength), + dtype: "float64", + }; + } + if (vector instanceof Uint8Array && !(vector instanceof Float32Array)) { + return { data: vector, dtype: "uint8" }; + } + return null; +} + export function isArrowTable(value: object): value is TableLike { if (value instanceof ArrowTable) return true; return "schema" in value && "batches" in value; diff --git a/nodejs/lancedb/query.ts b/nodejs/lancedb/query.ts index a10191ed2..c077234ec 100644 --- a/nodejs/lancedb/query.ts +++ b/nodejs/lancedb/query.ts @@ -5,6 +5,7 @@ import { Table as ArrowTable, type IntoVector, RecordBatch, + extractVectorBuffer, fromBufferToRecordBatch, fromRecordBatchToBuffer, tableFromIPC, @@ -661,10 +662,8 @@ export class VectorQuery extends StandardQueryBase { const res = (async () => { try { const v = await vector; - const arr = Float32Array.from(v); - // // biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping - const value: any = this.addQueryVector(arr); + const value: any = this.addQueryVector(v); const inner = value.inner as | NativeVectorQuery | Promise; @@ -676,7 +675,12 @@ export class VectorQuery extends StandardQueryBase { return new VectorQuery(res); } else { super.doCall((inner) => { - inner.addQueryVector(Float32Array.from(vector)); + const raw = Array.isArray(vector) ? null : extractVectorBuffer(vector); + if (raw) { + inner.addQueryVectorRaw(raw.data, raw.dtype); + } else { + inner.addQueryVector(Float32Array.from(vector as number[])); + } }); return this; } @@ -765,14 +769,23 @@ export class Query extends StandardQueryBase { * a default `limit` of 10 will be used. @see {@link Query#limit} */ nearestTo(vector: IntoVector): VectorQuery { + const callNearestTo = ( + inner: NativeQuery, + resolved: Float32Array | Float64Array | Uint8Array | number[], + ): NativeVectorQuery => { + const raw = Array.isArray(resolved) + ? null + : extractVectorBuffer(resolved); + if (raw) { + return inner.nearestToRaw(raw.data, raw.dtype); + } + return inner.nearestTo(Float32Array.from(resolved as number[])); + }; + if (this.inner instanceof Promise) { const nativeQuery = this.inner.then(async (inner) => { - if (vector instanceof Promise) { - const arr = await vector.then((v) => Float32Array.from(v)); - return inner.nearestTo(arr); - } else { - return inner.nearestTo(Float32Array.from(vector)); - } + const resolved = vector instanceof Promise ? await vector : vector; + return callNearestTo(inner, resolved); }); return new VectorQuery(nativeQuery); } @@ -780,10 +793,8 @@ export class Query extends StandardQueryBase { const res = (async () => { try { const v = await vector; - const arr = Float32Array.from(v); - // // biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping - const value: any = this.nearestTo(arr); + const value: any = this.nearestTo(v); const inner = value.inner as | NativeVectorQuery | Promise; @@ -794,7 +805,7 @@ export class Query extends StandardQueryBase { })(); return new VectorQuery(res); } else { - const vectorQuery = this.inner.nearestTo(Float32Array.from(vector)); + const vectorQuery = callNearestTo(this.inner, vector); return new VectorQuery(vectorQuery); } } diff --git a/nodejs/npm/darwin-arm64/package.json b/nodejs/npm/darwin-arm64/package.json index aebbf92bc..f75ddf8e9 100644 --- a/nodejs/npm/darwin-arm64/package.json +++ b/nodejs/npm/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-darwin-arm64", - "version": "0.27.0-beta.5", + "version": "0.27.2", "os": ["darwin"], "cpu": ["arm64"], "main": "lancedb.darwin-arm64.node", diff --git a/nodejs/npm/linux-arm64-gnu/package.json b/nodejs/npm/linux-arm64-gnu/package.json index ebdab4894..34bd217ef 100644 --- a/nodejs/npm/linux-arm64-gnu/package.json +++ b/nodejs/npm/linux-arm64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-arm64-gnu", - "version": "0.27.0-beta.5", + "version": "0.27.2", "os": ["linux"], "cpu": ["arm64"], "main": "lancedb.linux-arm64-gnu.node", diff --git a/nodejs/npm/linux-arm64-musl/package.json b/nodejs/npm/linux-arm64-musl/package.json index 92308e129..7ee00572d 100644 --- a/nodejs/npm/linux-arm64-musl/package.json +++ b/nodejs/npm/linux-arm64-musl/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-arm64-musl", - "version": "0.27.0-beta.5", + "version": "0.27.2", "os": ["linux"], "cpu": ["arm64"], "main": "lancedb.linux-arm64-musl.node", diff --git a/nodejs/npm/linux-x64-gnu/package.json b/nodejs/npm/linux-x64-gnu/package.json index 40bc70f5e..aa80ee8b1 100644 --- a/nodejs/npm/linux-x64-gnu/package.json +++ b/nodejs/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-x64-gnu", - "version": "0.27.0-beta.5", + "version": "0.27.2", "os": ["linux"], "cpu": ["x64"], "main": "lancedb.linux-x64-gnu.node", diff --git a/nodejs/npm/linux-x64-musl/package.json b/nodejs/npm/linux-x64-musl/package.json index 8f4d391cc..344e0cb5c 100644 --- a/nodejs/npm/linux-x64-musl/package.json +++ b/nodejs/npm/linux-x64-musl/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-x64-musl", - "version": "0.27.0-beta.5", + "version": "0.27.2", "os": ["linux"], "cpu": ["x64"], "main": "lancedb.linux-x64-musl.node", diff --git a/nodejs/npm/win32-arm64-msvc/package.json b/nodejs/npm/win32-arm64-msvc/package.json index 925dfc7be..e5c188d13 100644 --- a/nodejs/npm/win32-arm64-msvc/package.json +++ b/nodejs/npm/win32-arm64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-win32-arm64-msvc", - "version": "0.27.0-beta.5", + "version": "0.27.2", "os": [ "win32" ], diff --git a/nodejs/npm/win32-x64-msvc/package.json b/nodejs/npm/win32-x64-msvc/package.json index bef2f107f..381e98b7b 100644 --- a/nodejs/npm/win32-x64-msvc/package.json +++ b/nodejs/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-win32-x64-msvc", - "version": "0.27.0-beta.5", + "version": "0.27.2", "os": ["win32"], "cpu": ["x64"], "main": "lancedb.win32-x64-msvc.node", diff --git a/nodejs/package-lock.json b/nodejs/package-lock.json index 874093e14..81fd12717 100644 --- a/nodejs/package-lock.json +++ b/nodejs/package-lock.json @@ -1,12 +1,12 @@ { "name": "@lancedb/lancedb", - "version": "0.27.0-beta.5", + "version": "0.27.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@lancedb/lancedb", - "version": "0.27.0-beta.5", + "version": "0.27.2", "cpu": [ "x64", "arm64" diff --git a/nodejs/package.json b/nodejs/package.json index 07e143612..3551f0473 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -11,7 +11,7 @@ "ann" ], "private": false, - "version": "0.27.0-beta.5", + "version": "0.27.2", "main": "dist/index.js", "exports": { ".": "./dist/index.js", diff --git a/nodejs/src/query.rs b/nodejs/src/query.rs index 4ad42f32f..4516385d5 100644 --- a/nodejs/src/query.rs +++ b/nodejs/src/query.rs @@ -3,6 +3,12 @@ use std::sync::Arc; +use arrow_array::{ + Array, Float16Array as ArrowFloat16Array, Float32Array as ArrowFloat32Array, + Float64Array as ArrowFloat64Array, UInt8Array as ArrowUInt8Array, +}; +use arrow_buffer::ScalarBuffer; +use half::f16; use lancedb::index::scalar::{ BooleanQuery, BoostQuery, FtsQuery, FullTextSearchQuery, MatchQuery, MultiMatchQuery, Occur, Operator, PhraseQuery, @@ -24,6 +30,33 @@ use crate::rerankers::RerankHybridCallbackArgs; use crate::rerankers::Reranker; use crate::util::{parse_distance_type, schema_to_buffer}; +fn bytes_to_arrow_array(data: Uint8Array, dtype: String) -> napi::Result> { + let buf = arrow_buffer::Buffer::from(data.to_vec()); + let num_bytes = buf.len(); + match dtype.as_str() { + "float16" => { + let scalar_buf = ScalarBuffer::::new(buf, 0, num_bytes / 2); + Ok(Arc::new(ArrowFloat16Array::new(scalar_buf, None))) + } + "float32" => { + let scalar_buf = ScalarBuffer::::new(buf, 0, num_bytes / 4); + Ok(Arc::new(ArrowFloat32Array::new(scalar_buf, None))) + } + "float64" => { + let scalar_buf = ScalarBuffer::::new(buf, 0, num_bytes / 8); + Ok(Arc::new(ArrowFloat64Array::new(scalar_buf, None))) + } + "uint8" => { + let scalar_buf = ScalarBuffer::::new(buf, 0, num_bytes); + Ok(Arc::new(ArrowUInt8Array::new(scalar_buf, None))) + } + _ => Err(napi::Error::from_reason(format!( + "Unsupported vector dtype: {}. Expected one of: float16, float32, float64, uint8", + dtype + ))), + } +} + #[napi] pub struct Query { inner: LanceDbQuery, @@ -78,6 +111,13 @@ impl Query { Ok(VectorQuery { inner }) } + #[napi] + pub fn nearest_to_raw(&mut self, data: Uint8Array, dtype: String) -> Result { + let array = bytes_to_arrow_array(data, dtype)?; + let inner = self.inner.clone().nearest_to(array).default_error()?; + Ok(VectorQuery { inner }) + } + #[napi] pub fn fast_search(&mut self) { self.inner = self.inner.clone().fast_search(); @@ -163,6 +203,13 @@ impl VectorQuery { Ok(()) } + #[napi] + pub fn add_query_vector_raw(&mut self, data: Uint8Array, dtype: String) -> Result<()> { + let array = bytes_to_arrow_array(data, dtype)?; + self.inner = self.inner.clone().add_query_vector(array).default_error()?; + Ok(()) + } + #[napi] pub fn distance_type(&mut self, distance_type: String) -> napi::Result<()> { let distance_type = parse_distance_type(distance_type)?; diff --git a/python/.bumpversion.toml b/python/.bumpversion.toml index 6a3d3c6dc..27986b5c4 100644 --- a/python/.bumpversion.toml +++ b/python/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.30.0-beta.5" +current_version = "0.30.2" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/python/.gitignore b/python/.gitignore index cd6b09fa4..93fc0935f 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -1,3 +1,5 @@ # Test data created by some example tests data/ _lancedb.pyd +# macOS debug symbols bundle generated during build +*.dSYM/ diff --git a/python/Cargo.toml b/python/Cargo.toml index a0903700f..02968c6d4 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lancedb-python" -version = "0.30.0-beta.5" +version = "0.30.2" edition.workspace = true description = "Python bindings for LanceDB" license.workspace = true @@ -23,6 +23,7 @@ lance-namespace.workspace = true lance-namespace-impls.workspace = true lance-io.workspace = true env_logger.workspace = true +log.workspace = true pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] } pyo3-async-runtimes = { version = "0.26", features = [ "attributes", diff --git a/python/python/lancedb/__init__.py b/python/python/lancedb/__init__.py index 2fee6bb8b..c705abb19 100644 --- a/python/python/lancedb/__init__.py +++ b/python/python/lancedb/__init__.py @@ -18,6 +18,7 @@ from .db import AsyncConnection, DBConnection, LanceDBConnection from .io import StorageOptionsProvider from .remote import ClientConfig from .remote.db import RemoteDBConnection +from .expr import Expr, col, lit, func from .schema import vector from .table import AsyncTable, Table from ._lancedb import Session @@ -271,6 +272,10 @@ __all__ = [ "AsyncConnection", "AsyncLanceNamespaceDBConnection", "AsyncTable", + "col", + "Expr", + "func", + "lit", "URI", "sanitize_uri", "vector", diff --git a/python/python/lancedb/_lancedb.pyi b/python/python/lancedb/_lancedb.pyi index c5b35c945..47422e8c6 100644 --- a/python/python/lancedb/_lancedb.pyi +++ b/python/python/lancedb/_lancedb.pyi @@ -27,6 +27,32 @@ from .remote import ClientConfig IvfHnswPq: type[HnswPq] = HnswPq IvfHnswSq: type[HnswSq] = HnswSq +class PyExpr: + """A type-safe DataFusion expression node (Rust-side handle).""" + + def eq(self, other: "PyExpr") -> "PyExpr": ... + def ne(self, other: "PyExpr") -> "PyExpr": ... + def lt(self, other: "PyExpr") -> "PyExpr": ... + def lte(self, other: "PyExpr") -> "PyExpr": ... + def gt(self, other: "PyExpr") -> "PyExpr": ... + def gte(self, other: "PyExpr") -> "PyExpr": ... + def and_(self, other: "PyExpr") -> "PyExpr": ... + def or_(self, other: "PyExpr") -> "PyExpr": ... + def not_(self) -> "PyExpr": ... + def add(self, other: "PyExpr") -> "PyExpr": ... + def sub(self, other: "PyExpr") -> "PyExpr": ... + def mul(self, other: "PyExpr") -> "PyExpr": ... + def div(self, other: "PyExpr") -> "PyExpr": ... + def lower(self) -> "PyExpr": ... + def upper(self) -> "PyExpr": ... + def contains(self, substr: "PyExpr") -> "PyExpr": ... + def cast(self, data_type: pa.DataType) -> "PyExpr": ... + def to_sql(self) -> str: ... + +def expr_col(name: str) -> PyExpr: ... +def expr_lit(value: Union[bool, int, float, str]) -> PyExpr: ... +def expr_func(name: str, args: List[PyExpr]) -> PyExpr: ... + class Session: def __init__( self, @@ -135,7 +161,10 @@ class Table: def close(self) -> None: ... async def schema(self) -> pa.Schema: ... async def add( - self, data: pa.RecordBatchReader, mode: Literal["append", "overwrite"] + self, + data: pa.RecordBatchReader, + mode: Literal["append", "overwrite"], + progress: Optional[Any] = None, ) -> AddResult: ... async def update( self, updates: Dict[str, str], where: Optional[str] @@ -222,7 +251,9 @@ class RecordBatchStream: class Query: def where(self, filter: str): ... - def select(self, columns: Tuple[str, str]): ... + def where_expr(self, expr: PyExpr): ... + def select(self, columns: List[Tuple[str, str]]): ... + def select_expr(self, columns: List[Tuple[str, PyExpr]]): ... def select_columns(self, columns: List[str]): ... def limit(self, limit: int): ... def offset(self, offset: int): ... @@ -248,7 +279,9 @@ class TakeQuery: class FTSQuery: def where(self, filter: str): ... - def select(self, columns: List[str]): ... + def where_expr(self, expr: PyExpr): ... + def select(self, columns: List[Tuple[str, str]]): ... + def select_expr(self, columns: List[Tuple[str, PyExpr]]): ... def limit(self, limit: int): ... def offset(self, offset: int): ... def fast_search(self): ... @@ -267,7 +300,9 @@ class VectorQuery: async def output_schema(self) -> pa.Schema: ... async def execute(self) -> RecordBatchStream: ... def where(self, filter: str): ... - def select(self, columns: List[str]): ... + def where_expr(self, expr: PyExpr): ... + def select(self, columns: List[Tuple[str, str]]): ... + def select_expr(self, columns: List[Tuple[str, PyExpr]]): ... def select_with_projection(self, columns: Tuple[str, str]): ... def limit(self, limit: int): ... def offset(self, offset: int): ... @@ -284,7 +319,9 @@ class VectorQuery: class HybridQuery: def where(self, filter: str): ... - def select(self, columns: List[str]): ... + def where_expr(self, expr: PyExpr): ... + def select(self, columns: List[Tuple[str, str]]): ... + def select_expr(self, columns: List[Tuple[str, PyExpr]]): ... def limit(self, limit: int): ... def offset(self, offset: int): ... def fast_search(self): ... diff --git a/python/python/lancedb/embeddings/utils.py b/python/python/lancedb/embeddings/utils.py index 1fefc78bf..189bbe53c 100644 --- a/python/python/lancedb/embeddings/utils.py +++ b/python/python/lancedb/embeddings/utils.py @@ -10,6 +10,7 @@ import sys import threading import time import urllib.error +import urllib.request import weakref import logging from functools import wraps diff --git a/python/python/lancedb/expr.py b/python/python/lancedb/expr.py new file mode 100644 index 000000000..5a568d66a --- /dev/null +++ b/python/python/lancedb/expr.py @@ -0,0 +1,298 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright The LanceDB Authors + +"""Type-safe expression builder for filters and projections. + +Instead of writing raw SQL strings you can build expressions with Python +operators:: + + from lancedb.expr import col, lit + + # filter: age > 18 AND status = 'active' + filt = (col("age") > lit(18)) & (col("status") == lit("active")) + + # projection: compute a derived column + proj = {"score": col("raw_score") * lit(1.5)} + + table.search().where(filt).select(proj).to_list() +""" + +from __future__ import annotations + +from typing import Union + +import pyarrow as pa + +from lancedb._lancedb import PyExpr, expr_col, expr_lit, expr_func + +__all__ = ["Expr", "col", "lit", "func"] + +_STR_TO_PA_TYPE: dict = { + "bool": pa.bool_(), + "boolean": pa.bool_(), + "int8": pa.int8(), + "int16": pa.int16(), + "int32": pa.int32(), + "int64": pa.int64(), + "uint8": pa.uint8(), + "uint16": pa.uint16(), + "uint32": pa.uint32(), + "uint64": pa.uint64(), + "float16": pa.float16(), + "float32": pa.float32(), + "float": pa.float32(), + "float64": pa.float64(), + "double": pa.float64(), + "string": pa.string(), + "utf8": pa.string(), + "str": pa.string(), + "large_string": pa.large_utf8(), + "large_utf8": pa.large_utf8(), + "date32": pa.date32(), + "date": pa.date32(), + "date64": pa.date64(), +} + + +def _coerce(value: "ExprLike") -> "Expr": + """Return *value* as an :class:`Expr`, wrapping plain Python values via + :func:`lit` if needed.""" + if isinstance(value, Expr): + return value + return lit(value) + + +# Type alias used in annotations. +ExprLike = Union["Expr", bool, int, float, str] + + +class Expr: + """A type-safe expression node. + + Construct instances with :func:`col` and :func:`lit`, then combine them + using Python operators or the named methods below. + + Examples + -------- + >>> from lancedb.expr import col, lit + >>> filt = (col("age") > lit(18)) & (col("name").lower() == lit("alice")) + >>> proj = {"double": col("x") * lit(2)} + """ + + # Make Expr unhashable so that == returns an Expr rather than being used + # for dict keys / set membership. + __hash__ = None # type: ignore[assignment] + + def __init__(self, inner: PyExpr) -> None: + self._inner = inner + + # ── comparisons ────────────────────────────────────────────────────────── + + def __eq__(self, other: ExprLike) -> "Expr": # type: ignore[override] + """Equal to (``col("x") == 1``).""" + return Expr(self._inner.eq(_coerce(other)._inner)) + + def __ne__(self, other: ExprLike) -> "Expr": # type: ignore[override] + """Not equal to (``col("x") != 1``).""" + return Expr(self._inner.ne(_coerce(other)._inner)) + + def __lt__(self, other: ExprLike) -> "Expr": + """Less than (``col("x") < 1``).""" + return Expr(self._inner.lt(_coerce(other)._inner)) + + def __le__(self, other: ExprLike) -> "Expr": + """Less than or equal to (``col("x") <= 1``).""" + return Expr(self._inner.lte(_coerce(other)._inner)) + + def __gt__(self, other: ExprLike) -> "Expr": + """Greater than (``col("x") > 1``).""" + return Expr(self._inner.gt(_coerce(other)._inner)) + + def __ge__(self, other: ExprLike) -> "Expr": + """Greater than or equal to (``col("x") >= 1``).""" + return Expr(self._inner.gte(_coerce(other)._inner)) + + # ── logical ────────────────────────────────────────────────────────────── + + def __and__(self, other: "Expr") -> "Expr": + """Logical AND (``expr_a & expr_b``).""" + return Expr(self._inner.and_(_coerce(other)._inner)) + + def __or__(self, other: "Expr") -> "Expr": + """Logical OR (``expr_a | expr_b``).""" + return Expr(self._inner.or_(_coerce(other)._inner)) + + def __invert__(self) -> "Expr": + """Logical NOT (``~expr``).""" + return Expr(self._inner.not_()) + + # ── arithmetic ─────────────────────────────────────────────────────────── + + def __add__(self, other: ExprLike) -> "Expr": + """Add (``col("x") + 1``).""" + return Expr(self._inner.add(_coerce(other)._inner)) + + def __radd__(self, other: ExprLike) -> "Expr": + """Right-hand add (``1 + col("x")``).""" + return Expr(_coerce(other)._inner.add(self._inner)) + + def __sub__(self, other: ExprLike) -> "Expr": + """Subtract (``col("x") - 1``).""" + return Expr(self._inner.sub(_coerce(other)._inner)) + + def __rsub__(self, other: ExprLike) -> "Expr": + """Right-hand subtract (``1 - col("x")``).""" + return Expr(_coerce(other)._inner.sub(self._inner)) + + def __mul__(self, other: ExprLike) -> "Expr": + """Multiply (``col("x") * 2``).""" + return Expr(self._inner.mul(_coerce(other)._inner)) + + def __rmul__(self, other: ExprLike) -> "Expr": + """Right-hand multiply (``2 * col("x")``).""" + return Expr(_coerce(other)._inner.mul(self._inner)) + + def __truediv__(self, other: ExprLike) -> "Expr": + """Divide (``col("x") / 2``).""" + return Expr(self._inner.div(_coerce(other)._inner)) + + def __rtruediv__(self, other: ExprLike) -> "Expr": + """Right-hand divide (``1 / col("x")``).""" + return Expr(_coerce(other)._inner.div(self._inner)) + + # ── string methods ─────────────────────────────────────────────────────── + + def lower(self) -> "Expr": + """Convert string column values to lowercase.""" + return Expr(self._inner.lower()) + + def upper(self) -> "Expr": + """Convert string column values to uppercase.""" + return Expr(self._inner.upper()) + + def contains(self, substr: "ExprLike") -> "Expr": + """Return True where the string contains *substr*.""" + return Expr(self._inner.contains(_coerce(substr)._inner)) + + # ── type cast ──────────────────────────────────────────────────────────── + + def cast(self, data_type: Union[str, "pa.DataType"]) -> "Expr": + """Cast values to *data_type*. + + Parameters + ---------- + data_type: + A PyArrow ``DataType`` (e.g. ``pa.int32()``) or one of the type + name strings: ``"bool"``, ``"int8"``, ``"int16"``, ``"int32"``, + ``"int64"``, ``"uint8"``–``"uint64"``, ``"float32"``, + ``"float64"``, ``"string"``, ``"date32"``, ``"date64"``. + """ + if isinstance(data_type, str): + try: + data_type = _STR_TO_PA_TYPE[data_type] + except KeyError: + raise ValueError( + f"unsupported data type: '{data_type}'. Supported: " + f"{', '.join(_STR_TO_PA_TYPE)}" + ) + return Expr(self._inner.cast(data_type)) + + # ── named comparison helpers (alternative to operators) ────────────────── + + def eq(self, other: ExprLike) -> "Expr": + """Equal to.""" + return self.__eq__(other) + + def ne(self, other: ExprLike) -> "Expr": + """Not equal to.""" + return self.__ne__(other) + + def lt(self, other: ExprLike) -> "Expr": + """Less than.""" + return self.__lt__(other) + + def lte(self, other: ExprLike) -> "Expr": + """Less than or equal to.""" + return self.__le__(other) + + def gt(self, other: ExprLike) -> "Expr": + """Greater than.""" + return self.__gt__(other) + + def gte(self, other: ExprLike) -> "Expr": + """Greater than or equal to.""" + return self.__ge__(other) + + def and_(self, other: "Expr") -> "Expr": + """Logical AND.""" + return self.__and__(other) + + def or_(self, other: "Expr") -> "Expr": + """Logical OR.""" + return self.__or__(other) + + # ── utilities ──────────────────────────────────────────────────────────── + + def to_sql(self) -> str: + """Render the expression as a SQL string (useful for debugging).""" + return self._inner.to_sql() + + def __repr__(self) -> str: + return f"Expr({self._inner.to_sql()})" + + +# ── free functions ──────────────────────────────────────────────────────────── + + +def col(name: str) -> Expr: + """Reference a table column by name. + + Parameters + ---------- + name: + The column name. + + Examples + -------- + >>> from lancedb.expr import col, lit + >>> col("age") > lit(18) + Expr((age > 18)) + """ + return Expr(expr_col(name)) + + +def lit(value: Union[bool, int, float, str]) -> Expr: + """Create a literal (constant) value expression. + + Parameters + ---------- + value: + A Python ``bool``, ``int``, ``float``, or ``str``. + + Examples + -------- + >>> from lancedb.expr import col, lit + >>> col("price") * lit(1.1) + Expr((price * 1.1)) + """ + return Expr(expr_lit(value)) + + +def func(name: str, *args: ExprLike) -> Expr: + """Call an arbitrary SQL function by name. + + Parameters + ---------- + name: + The SQL function name (e.g. ``"lower"``, ``"upper"``). + *args: + The function arguments as :class:`Expr` or plain Python literals. + + Examples + -------- + >>> from lancedb.expr import col, func + >>> func("lower", col("name")) + Expr(lower(name)) + """ + inner_args = [_coerce(a)._inner for a in args] + return Expr(expr_func(name, inner_args)) diff --git a/python/python/lancedb/query.py b/python/python/lancedb/query.py index 8d256e9a4..b5298505c 100644 --- a/python/python/lancedb/query.py +++ b/python/python/lancedb/query.py @@ -38,6 +38,7 @@ from .rerankers.base import Reranker from .rerankers.rrf import RRFReranker from .rerankers.util import check_reranker_result from .util import flatten_columns +from .expr import Expr from lancedb._lancedb import fts_query_to_json from typing_extensions import Annotated @@ -70,7 +71,7 @@ def ensure_vector_query( ) -> Union[List[float], List[List[float]], pa.Array, List[pa.Array]]: if isinstance(val, list): if len(val) == 0: - return ValueError("Vector query must be a non-empty list") + raise ValueError("Vector query must be a non-empty list") sample = val[0] else: if isinstance(val, float): @@ -83,7 +84,7 @@ def ensure_vector_query( return val if isinstance(sample, list): if len(sample) == 0: - return ValueError("Vector query must be a non-empty list") + raise ValueError("Vector query must be a non-empty list") if isinstance(sample[0], float): # val is list of list of floats return val @@ -449,8 +450,8 @@ class Query(pydantic.BaseModel): ensure_vector_query, ] = None - # sql filter to refine the query with - filter: Optional[str] = None + # sql filter or type-safe Expr to refine the query with + filter: Optional[Union[str, Expr]] = None # if True then apply the filter after vector search postfilter: Optional[bool] = None @@ -464,8 +465,8 @@ class Query(pydantic.BaseModel): # distance type to use for vector search distance_type: Optional[str] = None - # which columns to return in the results - columns: Optional[Union[List[str], Dict[str, str]]] = None + # which columns to return in the results (dict values may be str or Expr) + columns: Optional[Union[List[str], Dict[str, Union[str, Expr]]]] = None # minimum number of IVF partitions to search # @@ -856,14 +857,15 @@ class LanceQueryBuilder(ABC): self._offset = offset return self - def select(self, columns: Union[list[str], dict[str, str]]) -> Self: + def select(self, columns: Union[list[str], dict[str, Union[str, Expr]]]) -> Self: """Set the columns to return. Parameters ---------- - columns: list of str, or dict of str to str default None + columns: list of str, or dict of str to str or Expr List of column names to be fetched. - Or a dictionary of column names to SQL expressions. + Or a dictionary of column names to SQL expressions or + :class:`~lancedb.expr.Expr` objects. All columns are fetched if None or unspecified. Returns @@ -877,15 +879,15 @@ class LanceQueryBuilder(ABC): raise ValueError("columns must be a list or a dictionary") return self - def where(self, where: str, prefilter: bool = True) -> Self: + def where(self, where: Union[str, Expr], prefilter: bool = True) -> Self: """Set the where clause. Parameters ---------- - where: str - The where clause which is a valid SQL where clause. See - `Lance filter pushdown `_ - for valid SQL expressions. + where: str or :class:`~lancedb.expr.Expr` + The filter condition. Can be a SQL string or a type-safe + :class:`~lancedb.expr.Expr` built with :func:`~lancedb.expr.col` + and :func:`~lancedb.expr.lit`. prefilter: bool, default True If True, apply the filter before vector search, otherwise the filter is applied on the result of vector search. @@ -1355,15 +1357,17 @@ class LanceVectorQueryBuilder(LanceQueryBuilder): return result_set - def where(self, where: str, prefilter: bool = None) -> LanceVectorQueryBuilder: + def where( + self, where: Union[str, Expr], prefilter: bool = None + ) -> LanceVectorQueryBuilder: """Set the where clause. Parameters ---------- - where: str - The where clause which is a valid SQL where clause. See - `Lance filter pushdown `_ - for valid SQL expressions. + where: str or :class:`~lancedb.expr.Expr` + The filter condition. Can be a SQL string or a type-safe + :class:`~lancedb.expr.Expr` built with :func:`~lancedb.expr.col` + and :func:`~lancedb.expr.lit`. prefilter: bool, default True If True, apply the filter before vector search, otherwise the filter is applied on the result of vector search. @@ -2205,8 +2209,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder): self._vector_query.select(self._columns) self._fts_query.select(self._columns) if self._where: - self._vector_query.where(self._where, self._postfilter) - self._fts_query.where(self._where, self._postfilter) + self._vector_query.where(self._where, not self._postfilter) + self._fts_query.where(self._where, not self._postfilter) if self._with_row_id: self._vector_query.with_row_id(True) self._fts_query.with_row_id(True) @@ -2286,10 +2290,20 @@ class AsyncQueryBase(object): """ if isinstance(columns, list) and all(isinstance(c, str) for c in columns): self._inner.select_columns(columns) - elif isinstance(columns, dict) and all( - isinstance(k, str) and isinstance(v, str) for k, v in columns.items() - ): - self._inner.select(list(columns.items())) + elif isinstance(columns, dict) and all(isinstance(k, str) for k in columns): + if any(isinstance(v, Expr) for v in columns.values()): + # At least one value is an Expr — use the type-safe path. + from .expr import _coerce + + pairs = [(k, _coerce(v)._inner) for k, v in columns.items()] + self._inner.select_expr(pairs) + elif all(isinstance(v, str) for v in columns.values()): + self._inner.select(list(columns.items())) + else: + raise TypeError( + "dict values must be str or Expr, got " + + str({k: type(v) for k, v in columns.items()}) + ) else: raise TypeError("columns must be a list of column names or a dict") return self @@ -2529,11 +2543,13 @@ class AsyncStandardQuery(AsyncQueryBase): """ super().__init__(inner) - def where(self, predicate: str) -> Self: + def where(self, predicate: Union[str, Expr]) -> Self: """ Only return rows matching the given predicate - The predicate should be supplied as an SQL query string. + The predicate can be a SQL string or a type-safe + :class:`~lancedb.expr.Expr` built with :func:`~lancedb.expr.col` + and :func:`~lancedb.expr.lit`. Examples -------- @@ -2545,7 +2561,10 @@ class AsyncStandardQuery(AsyncQueryBase): Filtering performance can often be improved by creating a scalar index on the filter column(s). """ - self._inner.where(predicate) + if isinstance(predicate, Expr): + self._inner.where_expr(predicate._inner) + else: + self._inner.where(predicate) return self def limit(self, limit: int) -> Self: diff --git a/python/python/lancedb/remote/db.py b/python/python/lancedb/remote/db.py index 41ec1ba3c..decea4778 100644 --- a/python/python/lancedb/remote/db.py +++ b/python/python/lancedb/remote/db.py @@ -568,4 +568,4 @@ class RemoteDBConnection(DBConnection): async def close(self): """Close the connection to the database.""" - self._client.close() + self._conn.close() diff --git a/python/python/lancedb/remote/table.py b/python/python/lancedb/remote/table.py index 4dd5b428f..905e1481a 100644 --- a/python/python/lancedb/remote/table.py +++ b/python/python/lancedb/remote/table.py @@ -4,7 +4,7 @@ from datetime import timedelta import logging from functools import cached_property -from typing import Dict, Iterable, List, Optional, Union, Literal +from typing import Any, Callable, Dict, Iterable, List, Optional, Union, Literal import warnings from lancedb._lancedb import ( @@ -35,6 +35,7 @@ import pyarrow as pa from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME from lancedb.merge import LanceMergeInsertBuilder from lancedb.embeddings import EmbeddingFunctionRegistry +from lancedb.table import _normalize_progress from ..query import LanceVectorQueryBuilder, LanceQueryBuilder, LanceTakeQueryBuilder from ..table import AsyncTable, IndexStatistics, Query, Table, Tags @@ -308,6 +309,7 @@ class RemoteTable(Table): mode: str = "append", on_bad_vectors: str = "error", fill_value: float = 0.0, + progress: Optional[Union[bool, Callable, Any]] = None, ) -> AddResult: """Add more data to the [Table](Table). It has the same API signature as the OSS version. @@ -330,17 +332,29 @@ class RemoteTable(Table): One of "error", "drop", "fill". fill_value: float, default 0. The value to use when filling vectors. Only used if on_bad_vectors="fill". + progress: bool, callable, or tqdm-like, optional + A callback or tqdm-compatible progress bar. See + :meth:`Table.add` for details. Returns ------- AddResult An object containing the new version number of the table after adding data. """ - return LOOP.run( - self._table.add( - data, mode=mode, on_bad_vectors=on_bad_vectors, fill_value=fill_value + progress, owns = _normalize_progress(progress) + try: + return LOOP.run( + self._table.add( + data, + mode=mode, + on_bad_vectors=on_bad_vectors, + fill_value=fill_value, + progress=progress, + ) ) - ) + finally: + if owns: + progress.close() def search( self, diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py index 0f0acaea0..c1754adfa 100644 --- a/python/python/lancedb/table.py +++ b/python/python/lancedb/table.py @@ -14,6 +14,7 @@ from functools import cached_property from typing import ( TYPE_CHECKING, Any, + Callable, Dict, Iterable, List, @@ -277,7 +278,7 @@ def _sanitize_data( if metadata: new_metadata = target_schema.metadata or {} - new_metadata = new_metadata.update(metadata) + new_metadata.update(metadata) target_schema = target_schema.with_metadata(new_metadata) _validate_schema(target_schema) @@ -556,6 +557,21 @@ def _table_uri(base: str, table_name: str) -> str: return join_uri(base, f"{table_name}.lance") +def _normalize_progress(progress): + """Normalize a ``progress`` parameter for :meth:`Table.add`. + + Returns ``(progress_obj, owns)`` where *owns* is True when we created a + tqdm bar that the caller must close. + """ + if progress is True: + from tqdm.auto import tqdm + + return tqdm(unit=" rows"), True + if progress is False or progress is None: + return None, False + return progress, False + + class Table(ABC): """ A Table is a collection of Records in a LanceDB Database. @@ -974,6 +990,7 @@ class Table(ABC): mode: AddMode = "append", on_bad_vectors: OnBadVectorsType = "error", fill_value: float = 0.0, + progress: Optional[Union[bool, Callable, Any]] = None, ) -> AddResult: """Add more data to the [Table](Table). @@ -995,6 +1012,29 @@ class Table(ABC): One of "error", "drop", "fill". fill_value: float, default 0. The value to use when filling vectors. Only used if on_bad_vectors="fill". + progress: bool, callable, or tqdm-like, optional + Progress reporting during the add operation. Can be: + + - ``True`` to automatically create and display a tqdm progress + bar (requires ``tqdm`` to be installed):: + + table.add(data, progress=True) + + - A **callable** that receives a dict with keys ``output_rows``, + ``output_bytes``, ``total_rows``, ``elapsed_seconds``, + ``active_tasks``, ``total_tasks``, and ``done``:: + + def on_progress(p): + print(f"{p['output_rows']}/{p['total_rows']} rows, " + f"{p['active_tasks']}/{p['total_tasks']} workers") + table.add(data, progress=on_progress) + + - A **tqdm-compatible** progress bar whose ``total`` and + ``update()`` will be called automatically. The postfix shows + write throughput (MB/s) and active worker count:: + + with tqdm() as pbar: + table.add(data, progress=pbar) Returns ------- @@ -2492,6 +2532,7 @@ class LanceTable(Table): mode: AddMode = "append", on_bad_vectors: OnBadVectorsType = "error", fill_value: float = 0.0, + progress: Optional[Union[bool, Callable, Any]] = None, ) -> AddResult: """Add data to the table. If vector columns are missing and the table @@ -2510,17 +2551,29 @@ class LanceTable(Table): One of "error", "drop", "fill", "null". fill_value: float, default 0. The value to use when filling vectors. Only used if on_bad_vectors="fill". + progress: bool, callable, or tqdm-like, optional + A callback or tqdm-compatible progress bar. See + :meth:`Table.add` for details. Returns ------- int The number of vectors in the table. """ - return LOOP.run( - self._table.add( - data, mode=mode, on_bad_vectors=on_bad_vectors, fill_value=fill_value + progress, owns = _normalize_progress(progress) + try: + return LOOP.run( + self._table.add( + data, + mode=mode, + on_bad_vectors=on_bad_vectors, + fill_value=fill_value, + progress=progress, + ) ) - ) + finally: + if owns: + progress.close() def merge( self, @@ -3769,6 +3822,7 @@ class AsyncTable: mode: Optional[Literal["append", "overwrite"]] = "append", on_bad_vectors: Optional[OnBadVectorsType] = None, fill_value: Optional[float] = None, + progress: Optional[Union[bool, Callable, Any]] = None, ) -> AddResult: """Add more data to the [Table](Table). @@ -3790,6 +3844,9 @@ class AsyncTable: One of "error", "drop", "fill", "null". fill_value: float, default 0. The value to use when filling vectors. Only used if on_bad_vectors="fill". + progress: callable or tqdm-like, optional + A callback or tqdm-compatible progress bar. See + :meth:`Table.add` for details. """ schema = await self.schema() @@ -3800,7 +3857,13 @@ class AsyncTable: # _santitize_data is an old code path, but we will use it until the # new code path is ready. - if on_bad_vectors != "error" or ( + if mode == "overwrite": + # For overwrite, apply the same preprocessing as create_table + # so vector columns are inferred as FixedSizeList. + data, _ = sanitize_create_table( + data, None, on_bad_vectors=on_bad_vectors, fill_value=fill_value + ) + elif on_bad_vectors != "error" or ( schema.metadata is not None and b"embedding_functions" in schema.metadata ): data = _sanitize_data( @@ -3813,8 +3876,9 @@ class AsyncTable: ) _register_optional_converters() data = to_scannable(data) + progress, owns = _normalize_progress(progress) try: - return await self._inner.add(data, mode or "append") + return await self._inner.add(data, mode or "append", progress=progress) except RuntimeError as e: if "Cast error" in str(e): raise ValueError(e) @@ -3822,6 +3886,9 @@ class AsyncTable: raise ValueError(e) else: raise + finally: + if owns: + progress.close() def merge_insert(self, on: Union[str, Iterable[str]]) -> LanceMergeInsertBuilder: """ @@ -4144,7 +4211,7 @@ class AsyncTable: async_query = async_query.offset(query.offset) if query.columns: async_query = async_query.select(query.columns) - if query.filter: + if query.filter is not None: async_query = async_query.where(query.filter) if query.fast_search: async_query = async_query.fast_search() @@ -4751,7 +4818,16 @@ class IndexStatistics: num_indexed_rows: int num_unindexed_rows: int index_type: Literal[ - "IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ", "FTS", "BTREE", "BITMAP", "LABEL_LIST" + "IVF_FLAT", + "IVF_SQ", + "IVF_PQ", + "IVF_RQ", + "IVF_HNSW_SQ", + "IVF_HNSW_PQ", + "FTS", + "BTREE", + "BITMAP", + "LABEL_LIST", ] distance_type: Optional[Literal["l2", "cosine", "dot"]] = None num_indices: Optional[int] = None diff --git a/python/python/tests/test_embeddings.py b/python/python/tests/test_embeddings.py index c78b822f1..43f7b44ac 100644 --- a/python/python/tests/test_embeddings.py +++ b/python/python/tests/test_embeddings.py @@ -546,3 +546,24 @@ def test_openai_no_retry_on_401(mock_sleep): assert mock_func.call_count == 1 # Verify that sleep was never called (no retries) assert mock_sleep.call_count == 0 + + +def test_url_retrieve_downloads_image(): + """ + Embedding functions like open-clip, siglip, and jinaai use url_retrieve() + to download images from HTTP URLs. For example, open_clip._to_pil() calls: + + PIL_Image.open(io.BytesIO(url_retrieve(image))) + + Verify that url_retrieve() can download an image and open it as PIL Image, + matching the real usage pattern in embedding functions. + """ + import io + + Image = pytest.importorskip("PIL.Image") + from lancedb.embeddings.utils import url_retrieve + + image_url = "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg" + image_bytes = url_retrieve(image_url) + img = Image.open(io.BytesIO(image_bytes)) + assert img.size[0] > 0 and img.size[1] > 0 diff --git a/python/python/tests/test_hybrid_query.py b/python/python/tests/test_hybrid_query.py index bb6f2befc..a9d89c0f0 100644 --- a/python/python/tests/test_hybrid_query.py +++ b/python/python/tests/test_hybrid_query.py @@ -177,6 +177,60 @@ async def test_analyze_plan(table: AsyncTable): assert "metrics=" in res +@pytest.fixture +def table_with_id(tmpdir_factory) -> Table: + tmp_path = str(tmpdir_factory.mktemp("data")) + db = lancedb.connect(tmp_path) + data = pa.table( + { + "id": pa.array([1, 2, 3, 4], type=pa.int64()), + "text": pa.array(["a", "b", "cat", "dog"]), + "vector": pa.array( + [[0.1, 0.1], [2, 2], [-0.1, -0.1], [0.5, -0.5]], + type=pa.list_(pa.float32(), list_size=2), + ), + } + ) + table = db.create_table("test_with_id", data) + table.create_fts_index("text", with_position=False, use_tantivy=False) + return table + + +def test_hybrid_prefilter_explain_plan(table_with_id: Table): + """ + Verify that the prefilter logic is not inverted in LanceHybridQueryBuilder. + """ + plan_prefilter = ( + table_with_id.search(query_type="hybrid") + .vector([0.0, 0.0]) + .text("dog") + .where("id = 1", prefilter=True) + .limit(2) + .explain_plan(verbose=True) + ) + + plan_postfilter = ( + table_with_id.search(query_type="hybrid") + .vector([0.0, 0.0]) + .text("dog") + .where("id = 1", prefilter=False) + .limit(2) + .explain_plan(verbose=True) + ) + + # prefilter=True: filter is pushed into the LanceRead scan. + # The FTS sub-plan exposes this as "full_filter=id = Int64(1)" inside LanceRead. + assert "full_filter=id = Int64(1)" in plan_prefilter, ( + f"Should push the filter into the scan.\nPlan:\n{plan_prefilter}" + ) + + # prefilter=False: filter is applied as a separate FilterExec after the search. + # The filter must NOT be embedded in the scan. + assert "full_filter=id = Int64(1)" not in plan_postfilter, ( + f"Should NOT push the filter into the scan.\nPlan:\n{plan_postfilter}" + ) + + def test_normalize_scores(): cases = [ (pa.array([0.1, 0.4]), pa.array([0.0, 1.0])), diff --git a/python/python/tests/test_index.py b/python/python/tests/test_index.py index b4097a8f0..8dfa55a77 100644 --- a/python/python/tests/test_index.py +++ b/python/python/tests/test_index.py @@ -3,6 +3,7 @@ from datetime import timedelta import random +from typing import get_args, get_type_hints import pyarrow as pa import pytest @@ -22,6 +23,7 @@ from lancedb.index import ( HnswSq, FTS, ) +from lancedb.table import IndexStatistics @pytest_asyncio.fixture @@ -283,3 +285,23 @@ async def test_create_index_with_binary_vectors(binary_table: AsyncTable): for v in range(256): res = await binary_table.query().nearest_to([v] * 128).to_arrow() assert res["id"][0].as_py() == v + + +def test_index_statistics_index_type_lists_all_supported_values(): + expected_index_types = { + "IVF_FLAT", + "IVF_SQ", + "IVF_PQ", + "IVF_RQ", + "IVF_HNSW_SQ", + "IVF_HNSW_PQ", + "FTS", + "BTREE", + "BITMAP", + "LABEL_LIST", + } + + assert ( + set(get_args(get_type_hints(IndexStatistics)["index_type"])) + == expected_index_types + ) diff --git a/python/python/tests/test_namespace.py b/python/python/tests/test_namespace.py index 70bc046ba..4bd0a5dd5 100644 --- a/python/python/tests/test_namespace.py +++ b/python/python/tests/test_namespace.py @@ -8,6 +8,7 @@ import shutil import pytest import pyarrow as pa import lancedb +from lance_namespace.errors import NamespaceNotEmptyError, TableNotFoundError class TestNamespaceConnection: @@ -130,7 +131,7 @@ class TestNamespaceConnection: assert len(list(db.table_names(namespace=["test_ns"]))) == 0 # Should not be able to open dropped table - with pytest.raises(RuntimeError): + with pytest.raises(TableNotFoundError): db.open_table("table1", namespace=["test_ns"]) def test_create_table_with_schema(self): @@ -340,7 +341,7 @@ class TestNamespaceConnection: db.create_table("test_table", schema=schema, namespace=["test_namespace"]) # Try to drop namespace with tables - should fail - with pytest.raises(RuntimeError, match="is not empty"): + with pytest.raises(NamespaceNotEmptyError): db.drop_namespace(["test_namespace"]) # Drop table first diff --git a/python/python/tests/test_query.py b/python/python/tests/test_query.py index 2105a4dfa..a7153b010 100644 --- a/python/python/tests/test_query.py +++ b/python/python/tests/test_query.py @@ -30,6 +30,7 @@ from lancedb.query import ( PhraseQuery, Query, FullTextSearchQuery, + ensure_vector_query, ) from lancedb.rerankers.cross_encoder import CrossEncoderReranker from lancedb.table import AsyncTable, LanceTable @@ -1501,6 +1502,18 @@ def test_search_empty_table(mem_db): assert results == [] +def test_ensure_vector_query_empty_list(): + """Regression: ensure_vector_query used to return instead of raise ValueError.""" + with pytest.raises(ValueError, match="non-empty"): + ensure_vector_query([]) + + +def test_ensure_vector_query_nested_empty_list(): + """Regression: ensure_vector_query used to return instead of raise ValueError.""" + with pytest.raises(ValueError, match="non-empty"): + ensure_vector_query([[]]) + + def test_fast_search(tmp_path): db = lancedb.connect(tmp_path) diff --git a/python/python/tests/test_remote_db.py b/python/python/tests/test_remote_db.py index 628a21e1b..0dd880cc0 100644 --- a/python/python/tests/test_remote_db.py +++ b/python/python/tests/test_remote_db.py @@ -1201,6 +1201,18 @@ async def test_header_provider_overrides_static_headers(): await db.table_names() +def test_close(): + """Test that close() works without AttributeError.""" + import asyncio + + def handler(req): + req.send_response(200) + req.end_headers() + + with mock_lancedb_connection(handler) as db: + asyncio.run(db.close()) + + @pytest.mark.parametrize("exception", [KeyboardInterrupt, SystemExit, GeneratorExit]) def test_background_loop_cancellation(exception): """Test that BackgroundEventLoop.run() cancels the future on interrupt.""" diff --git a/python/python/tests/test_table.py b/python/python/tests/test_table.py index eaa1eb7af..7303fd827 100644 --- a/python/python/tests/test_table.py +++ b/python/python/tests/test_table.py @@ -527,6 +527,132 @@ async def test_add_async(mem_db_async: AsyncConnection): assert await table.count_rows() == 3 +def test_add_overwrite_infers_vector_schema(mem_db: DBConnection): + """Overwrite should infer vector columns the same way create_table does. + + Regression test for https://github.com/lancedb/lancedb/issues/3183 + """ + table = mem_db.create_table( + "test_overwrite_vec", + data=[ + {"vector": [1.0, 2.0, 3.0, 4.0], "item": "foo"}, + {"vector": [5.0, 6.0, 7.0, 8.0], "item": "bar"}, + ], + ) + # create_table infers vector as fixed_size_list + original_type = table.schema.field("vector").type + assert pa.types.is_fixed_size_list(original_type) + + # overwrite with plain Python lists (PyArrow infers list) + table.add( + [ + {"vector": [10.0, 20.0, 30.0, 40.0], "item": "baz"}, + ], + mode="overwrite", + ) + # overwrite should infer vector column the same way as create_table + new_type = table.schema.field("vector").type + assert pa.types.is_fixed_size_list(new_type), ( + f"Expected fixed_size_list after overwrite, got {new_type}" + ) + + +def test_add_progress_callback(mem_db: DBConnection): + table = mem_db.create_table( + "test", + data=[{"id": 1}, {"id": 2}], + ) + + updates = [] + table.add([{"id": 3}, {"id": 4}], progress=lambda p: updates.append(dict(p))) + + assert len(table) == 4 + # The done callback always fires, so we should always get at least one. + assert len(updates) >= 1, "expected at least one progress callback" + for p in updates: + assert "output_rows" in p + assert "output_bytes" in p + assert "total_rows" in p + assert "elapsed_seconds" in p + assert "active_tasks" in p + assert "total_tasks" in p + assert "done" in p + # The last callback should have done=True. + assert updates[-1]["done"] is True + + +def test_add_progress_tqdm_like(mem_db: DBConnection): + """Test that a tqdm-like object gets total set and update() called.""" + + class FakeBar: + def __init__(self): + self.total = None + self.n = 0 + self.postfix = None + + def update(self, n): + self.n += n + + def set_postfix_str(self, s): + self.postfix = s + + def refresh(self): + pass + + table = mem_db.create_table( + "test", + data=[{"id": 1}, {"id": 2}], + ) + + bar = FakeBar() + table.add([{"id": 3}, {"id": 4}], progress=bar) + + assert len(table) == 4 + # Postfix should contain throughput and worker count + if bar.postfix is not None: + assert "MB/s" in bar.postfix + assert "workers" in bar.postfix + + +def test_add_progress_bool(mem_db: DBConnection): + """Test that progress=True creates and closes a tqdm bar automatically.""" + table = mem_db.create_table( + "test", + data=[{"id": 1}, {"id": 2}], + ) + + table.add([{"id": 3}, {"id": 4}], progress=True) + assert len(table) == 4 + + # progress=False should be the same as None + table.add([{"id": 5}], progress=False) + assert len(table) == 5 + + +@pytest.mark.asyncio +async def test_add_progress_callback_async(mem_db_async: AsyncConnection): + """Progress callbacks work through the async path too.""" + table = await mem_db_async.create_table("test", data=[{"id": 1}, {"id": 2}]) + + updates = [] + await table.add([{"id": 3}, {"id": 4}], progress=lambda p: updates.append(dict(p))) + + assert await table.count_rows() == 4 + assert len(updates) >= 1 + assert updates[-1]["done"] is True + + +def test_add_progress_callback_error(mem_db: DBConnection): + """A failing callback must not prevent the write from succeeding.""" + table = mem_db.create_table("test", data=[{"id": 1}, {"id": 2}]) + + def bad_callback(p): + raise RuntimeError("boom") + + table.add([{"id": 3}, {"id": 4}], progress=bad_callback) + assert len(table) == 4 + + def test_polars(mem_db: DBConnection): data = { "vector": [[3.1, 4.1], [5.9, 26.5]], @@ -2047,3 +2173,33 @@ def test_table_uri(tmp_path): db = lancedb.connect(tmp_path) table = db.create_table("my_table", data=[{"x": 0}]) assert table.uri == str(tmp_path / "my_table.lance") + + +def test_sanitize_data_metadata_not_stripped(): + """Regression test: dict.update() returns None, so assigning its result + would silently replace metadata with None, causing with_metadata(None) + to strip all schema metadata from the target schema.""" + from lancedb.table import _sanitize_data + + schema = pa.schema( + [pa.field("x", pa.int64())], + metadata={b"existing_key": b"existing_value"}, + ) + batch = pa.record_batch([pa.array([1, 2, 3])], schema=schema) + + # Use a different field type so the reader and target schemas differ, + # forcing _cast_to_target_schema to rebuild the schema with the + # target's metadata (instead of taking the fast-path). + target_schema = pa.schema( + [pa.field("x", pa.int32())], + metadata={b"existing_key": b"existing_value"}, + ) + + reader = pa.RecordBatchReader.from_batches(schema, [batch]) + metadata = {b"new_key": b"new_value"} + result = _sanitize_data(reader, target_schema=target_schema, metadata=metadata) + + result_schema = result.schema + assert result_schema.metadata is not None + assert result_schema.metadata[b"existing_key"] == b"existing_value" + assert result_schema.metadata[b"new_key"] == b"new_value" diff --git a/python/src/expr.rs b/python/src/expr.rs new file mode 100644 index 000000000..e12c7d0a8 --- /dev/null +++ b/python/src/expr.rs @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The LanceDB Authors + +//! PyO3 bindings for the LanceDB expression builder API. +//! +//! This module exposes [`PyExpr`] and helper free functions so Python can +//! build type-safe filter / projection expressions that map directly to +//! DataFusion [`Expr`] nodes, bypassing SQL string parsing. + +use arrow::{datatypes::DataType, pyarrow::PyArrowType}; +use lancedb::expr::{DfExpr, col as ldb_col, contains, expr_cast, lit as df_lit, lower, upper}; +use pyo3::{Bound, PyAny, PyResult, exceptions::PyValueError, prelude::*, pyfunction}; + +/// A type-safe DataFusion expression. +/// +/// Instances are constructed via the free functions [`expr_col`] and +/// [`expr_lit`] and combined with the methods on this struct. On the Python +/// side a thin wrapper class (`lancedb.expr.Expr`) delegates to these methods +/// and adds Python operator overloads. +#[pyclass(name = "PyExpr")] +#[derive(Clone)] +pub struct PyExpr(pub DfExpr); + +#[pymethods] +impl PyExpr { + // ── comparisons ────────────────────────────────────────────────────────── + + fn eq(&self, other: &Self) -> Self { + Self(self.0.clone().eq(other.0.clone())) + } + + fn ne(&self, other: &Self) -> Self { + Self(self.0.clone().not_eq(other.0.clone())) + } + + fn lt(&self, other: &Self) -> Self { + Self(self.0.clone().lt(other.0.clone())) + } + + fn lte(&self, other: &Self) -> Self { + Self(self.0.clone().lt_eq(other.0.clone())) + } + + fn gt(&self, other: &Self) -> Self { + Self(self.0.clone().gt(other.0.clone())) + } + + fn gte(&self, other: &Self) -> Self { + Self(self.0.clone().gt_eq(other.0.clone())) + } + + // ── logical ────────────────────────────────────────────────────────────── + + fn and_(&self, other: &Self) -> Self { + Self(self.0.clone().and(other.0.clone())) + } + + fn or_(&self, other: &Self) -> Self { + Self(self.0.clone().or(other.0.clone())) + } + + fn not_(&self) -> Self { + use std::ops::Not; + Self(self.0.clone().not()) + } + + // ── arithmetic ─────────────────────────────────────────────────────────── + + fn add(&self, other: &Self) -> Self { + use std::ops::Add; + Self(self.0.clone().add(other.0.clone())) + } + + fn sub(&self, other: &Self) -> Self { + use std::ops::Sub; + Self(self.0.clone().sub(other.0.clone())) + } + + fn mul(&self, other: &Self) -> Self { + use std::ops::Mul; + Self(self.0.clone().mul(other.0.clone())) + } + + fn div(&self, other: &Self) -> Self { + use std::ops::Div; + Self(self.0.clone().div(other.0.clone())) + } + + // ── string functions ───────────────────────────────────────────────────── + + /// Convert string column to lowercase. + fn lower(&self) -> Self { + Self(lower(self.0.clone())) + } + + /// Convert string column to uppercase. + fn upper(&self) -> Self { + Self(upper(self.0.clone())) + } + + /// Test whether the string contains `substr`. + fn contains(&self, substr: &Self) -> Self { + Self(contains(self.0.clone(), substr.0.clone())) + } + + // ── type cast ──────────────────────────────────────────────────────────── + + /// Cast the expression to `data_type`. + /// + /// `data_type` must be a PyArrow `DataType` (e.g. `pa.int32()`). + /// On the Python side, `lancedb.expr.Expr.cast` also accepts type name + /// strings via `pa.lib.ensure_type` before forwarding here. + fn cast(&self, data_type: PyArrowType) -> Self { + Self(expr_cast(self.0.clone(), data_type.0)) + } + + // ── utilities ──────────────────────────────────────────────────────────── + + /// Render the expression as a SQL string (useful for debugging). + fn to_sql(&self) -> PyResult { + lancedb::expr::expr_to_sql_string(&self.0).map_err(|e| PyValueError::new_err(e.to_string())) + } + + fn __repr__(&self) -> PyResult { + let sql = + lancedb::expr::expr_to_sql_string(&self.0).unwrap_or_else(|_| "".to_string()); + Ok(format!("PyExpr({})", sql)) + } +} + +// ── free functions ──────────────────────────────────────────────────────────── + +/// Create a column reference expression. +/// +/// The column name is preserved exactly as given (case-sensitive), so +/// `col("firstName")` correctly references a field named `firstName`. +#[pyfunction] +pub fn expr_col(name: &str) -> PyExpr { + PyExpr(ldb_col(name)) +} + +/// Create a literal value expression. +/// +/// Supported Python types: `bool`, `int`, `float`, `str`. +#[pyfunction] +pub fn expr_lit(value: Bound<'_, PyAny>) -> PyResult { + // bool must be checked before int because bool is a subclass of int in Python + if let Ok(b) = value.extract::() { + return Ok(PyExpr(df_lit(b))); + } + if let Ok(i) = value.extract::() { + return Ok(PyExpr(df_lit(i))); + } + if let Ok(f) = value.extract::() { + return Ok(PyExpr(df_lit(f))); + } + if let Ok(s) = value.extract::() { + return Ok(PyExpr(df_lit(s))); + } + Err(PyValueError::new_err(format!( + "unsupported literal type: {}. Supported: bool, int, float, str", + value.get_type().name()? + ))) +} + +/// Call an arbitrary registered SQL function by name. +/// +/// See `lancedb::expr::func` for the list of supported function names. +#[pyfunction] +pub fn expr_func(name: &str, args: Vec) -> PyResult { + let df_args: Vec = args.into_iter().map(|e| e.0).collect(); + lancedb::expr::func(name, df_args) + .map(PyExpr) + .map_err(|e| PyValueError::new_err(e.to_string())) +} diff --git a/python/src/lib.rs b/python/src/lib.rs index a6ef13f12..d773c06c6 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -4,6 +4,7 @@ use arrow::RecordBatchStream; use connection::{Connection, connect}; use env_logger::Env; +use expr::{PyExpr, expr_col, expr_func, expr_lit}; use index::IndexConfig; use permutation::{PyAsyncPermutationBuilder, PyPermutationReader}; use pyo3::{ @@ -21,6 +22,7 @@ use table::{ pub mod arrow; pub mod connection; pub mod error; +pub mod expr; pub mod header; pub mod index; pub mod namespace; @@ -55,10 +57,14 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_function(wrap_pyfunction!(connect, m)?)?; m.add_function(wrap_pyfunction!(permutation::async_permutation_builder, m)?)?; m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?; m.add_function(wrap_pyfunction!(query::fts_query_to_json, m)?)?; + m.add_function(wrap_pyfunction!(expr_col, m)?)?; + m.add_function(wrap_pyfunction!(expr_lit, m)?)?; + m.add_function(wrap_pyfunction!(expr_func, m)?)?; m.add("__version__", env!("CARGO_PKG_VERSION"))?; Ok(()) } diff --git a/python/src/query.rs b/python/src/query.rs index 6a22f53a9..98876739b 100644 --- a/python/src/query.rs +++ b/python/src/query.rs @@ -35,12 +35,10 @@ use pyo3::types::PyList; use pyo3::types::{PyDict, PyString}; use pyo3::{FromPyObject, exceptions::PyRuntimeError}; use pyo3::{PyErr, pyclass}; -use pyo3::{ - exceptions::{PyNotImplementedError, PyValueError}, - intern, -}; +use pyo3::{exceptions::PyValueError, intern}; use pyo3_async_runtimes::tokio::future_into_py; +use crate::expr::PyExpr; use crate::util::parse_distance_type; use crate::{arrow::RecordBatchStream, util::PyLanceDB}; use crate::{error::PythonErrorExt, index::class_name}; @@ -344,9 +342,13 @@ impl<'py> IntoPyObject<'py> for PyQueryFilter { fn into_pyobject(self, py: pyo3::Python<'py>) -> PyResult { match self.0 { - QueryFilter::Datafusion(_) => Err(PyNotImplementedError::new_err( - "Datafusion filter has no conversion to Python", - )), + QueryFilter::Datafusion(expr) => { + // Serialize the DataFusion expression to a SQL string so that + // callers (e.g. remote tables) see the same format as Sql. + let sql = lancedb::expr::expr_to_sql_string(&expr) + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + Ok(sql.into_pyobject(py)?.into_any()) + } QueryFilter::Sql(sql) => Ok(sql.into_pyobject(py)?.into_any()), QueryFilter::Substrait(substrait) => Ok(substrait.into_pyobject(py)?.into_any()), } @@ -370,10 +372,20 @@ impl Query { self.inner = self.inner.clone().only_if(predicate); } + pub fn where_expr(&mut self, expr: PyExpr) { + self.inner = self.inner.clone().only_if_expr(expr.0); + } + pub fn select(&mut self, columns: Vec<(String, String)>) { self.inner = self.inner.clone().select(Select::dynamic(&columns)); } + pub fn select_expr(&mut self, columns: Vec<(String, PyExpr)>) { + let pairs: Vec<(String, lancedb::expr::DfExpr)> = + columns.into_iter().map(|(name, e)| (name, e.0)).collect(); + self.inner = self.inner.clone().select(Select::Expr(pairs)); + } + pub fn select_columns(&mut self, columns: Vec) { self.inner = self.inner.clone().select(Select::columns(&columns)); } @@ -607,10 +619,20 @@ impl FTSQuery { self.inner = self.inner.clone().only_if(predicate); } + pub fn where_expr(&mut self, expr: PyExpr) { + self.inner = self.inner.clone().only_if_expr(expr.0); + } + pub fn select(&mut self, columns: Vec<(String, String)>) { self.inner = self.inner.clone().select(Select::dynamic(&columns)); } + pub fn select_expr(&mut self, columns: Vec<(String, PyExpr)>) { + let pairs: Vec<(String, lancedb::expr::DfExpr)> = + columns.into_iter().map(|(name, e)| (name, e.0)).collect(); + self.inner = self.inner.clone().select(Select::Expr(pairs)); + } + pub fn select_columns(&mut self, columns: Vec) { self.inner = self.inner.clone().select(Select::columns(&columns)); } @@ -725,6 +747,10 @@ impl VectorQuery { self.inner = self.inner.clone().only_if(predicate); } + pub fn where_expr(&mut self, expr: PyExpr) { + self.inner = self.inner.clone().only_if_expr(expr.0); + } + pub fn add_query_vector(&mut self, vector: Bound<'_, PyAny>) -> PyResult<()> { let data: ArrayData = ArrayData::from_pyarrow_bound(&vector)?; let array = make_array(data); @@ -736,6 +762,12 @@ impl VectorQuery { self.inner = self.inner.clone().select(Select::dynamic(&columns)); } + pub fn select_expr(&mut self, columns: Vec<(String, PyExpr)>) { + let pairs: Vec<(String, lancedb::expr::DfExpr)> = + columns.into_iter().map(|(name, e)| (name, e.0)).collect(); + self.inner = self.inner.clone().select(Select::Expr(pairs)); + } + pub fn select_columns(&mut self, columns: Vec) { self.inner = self.inner.clone().select(Select::columns(&columns)); } @@ -890,11 +922,21 @@ impl HybridQuery { self.inner_fts.r#where(predicate); } + pub fn where_expr(&mut self, expr: PyExpr) { + self.inner_vec.where_expr(expr.clone()); + self.inner_fts.where_expr(expr); + } + pub fn select(&mut self, columns: Vec<(String, String)>) { self.inner_vec.select(columns.clone()); self.inner_fts.select(columns); } + pub fn select_expr(&mut self, columns: Vec<(String, PyExpr)>) { + self.inner_vec.select_expr(columns.clone()); + self.inner_fts.select_expr(columns); + } + pub fn select_columns(&mut self, columns: Vec) { self.inner_vec.select_columns(columns.clone()); self.inner_fts.select_columns(columns); diff --git a/python/src/table.rs b/python/src/table.rs index 00015bba8..d44b6c1fd 100644 --- a/python/src/table.rs +++ b/python/src/table.rs @@ -19,7 +19,7 @@ use lancedb::table::{ Table as LanceDbTable, }; use pyo3::{ - Bound, FromPyObject, PyAny, PyRef, PyResult, Python, + Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python, exceptions::{PyKeyError, PyRuntimeError, PyValueError}, pyclass, pymethods, types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods}, @@ -299,10 +299,12 @@ impl Table { }) } + #[pyo3(signature = (data, mode, progress=None))] pub fn add<'a>( self_: PyRef<'a, Self>, data: PyScannable, mode: String, + progress: Option>, ) -> PyResult> { let mut op = self_.inner_ref()?.add(data); if mode == "append" { @@ -312,6 +314,81 @@ impl Table { } else { return Err(PyValueError::new_err(format!("Invalid mode: {}", mode))); } + if let Some(progress_obj) = progress { + let is_callable = Python::attach(|py| progress_obj.bind(py).is_callable()); + if is_callable { + // Callback: call with a dict of progress info. + op = op.progress(move |p| { + Python::attach(|py| { + let dict = PyDict::new(py); + if let Err(e) = dict + .set_item("output_rows", p.output_rows()) + .and_then(|_| dict.set_item("output_bytes", p.output_bytes())) + .and_then(|_| dict.set_item("total_rows", p.total_rows())) + .and_then(|_| { + dict.set_item("elapsed_seconds", p.elapsed().as_secs_f64()) + }) + .and_then(|_| dict.set_item("active_tasks", p.active_tasks())) + .and_then(|_| dict.set_item("total_tasks", p.total_tasks())) + .and_then(|_| dict.set_item("done", p.done())) + { + log::warn!("progress dict error: {e}"); + return; + } + if let Err(e) = progress_obj.call1(py, (dict,)) { + log::warn!("progress callback error: {e}"); + } + }); + }); + } else { + // tqdm-like: has update() method. + let mut last_rows: usize = 0; + let mut total_set = false; + op = op.progress(move |p| { + let current = p.output_rows(); + let prev = last_rows; + last_rows = current; + Python::attach(|py| { + if let Some(total) = p.total_rows() + && !total_set + { + if let Err(e) = progress_obj.setattr(py, "total", total) { + log::warn!("progress setattr error: {e}"); + } + total_set = true; + } + let delta = current.saturating_sub(prev); + if delta > 0 { + if let Err(e) = progress_obj.call_method1(py, "update", (delta,)) { + log::warn!("progress update error: {e}"); + } + // Show throughput and active workers in tqdm postfix. + let elapsed = p.elapsed().as_secs_f64(); + if elapsed > 0.0 { + let mb_per_sec = p.output_bytes() as f64 / elapsed / 1_000_000.0; + let postfix = format!( + "{:.1} MB/s | {}/{} workers", + mb_per_sec, + p.active_tasks(), + p.total_tasks() + ); + if let Err(e) = + progress_obj.call_method1(py, "set_postfix_str", (postfix,)) + { + log::warn!("progress set_postfix_str error: {e}"); + } + } + } + if p.done() { + // Force a final refresh so the bar shows completion. + if let Err(e) = progress_obj.call_method0(py, "refresh") { + log::warn!("progress refresh error: {e}"); + } + } + }); + }); + } + } future_into_py(self_.py(), async move { let result = op.execute().await.infer_error()?; diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py new file mode 100644 index 000000000..339aca323 --- /dev/null +++ b/python/tests/test_expr.py @@ -0,0 +1,387 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright The LanceDB Authors + +"""Tests for the type-safe expression builder API.""" + +import pytest +import pyarrow as pa +import lancedb +from lancedb.expr import Expr, col, lit, func + + +# ── unit tests for Expr construction ───────────────────────────────────────── + + +class TestExprConstruction: + def test_col_returns_expr(self): + e = col("age") + assert isinstance(e, Expr) + + def test_lit_int(self): + e = lit(42) + assert isinstance(e, Expr) + + def test_lit_float(self): + e = lit(3.14) + assert isinstance(e, Expr) + + def test_lit_str(self): + e = lit("hello") + assert isinstance(e, Expr) + + def test_lit_bool(self): + e = lit(True) + assert isinstance(e, Expr) + + def test_lit_unsupported_type_raises(self): + with pytest.raises(Exception): + lit([1, 2, 3]) + + def test_func(self): + e = func("lower", col("name")) + assert isinstance(e, Expr) + assert e.to_sql() == "lower(name)" + + def test_func_unknown_raises(self): + with pytest.raises(Exception): + func("not_a_real_function", col("x")) + + +class TestExprOperators: + def test_eq_operator(self): + e = col("x") == lit(1) + assert isinstance(e, Expr) + assert e.to_sql() == "(x = 1)" + + def test_ne_operator(self): + e = col("x") != lit(1) + assert isinstance(e, Expr) + assert e.to_sql() == "(x <> 1)" + + def test_lt_operator(self): + e = col("age") < lit(18) + assert isinstance(e, Expr) + assert e.to_sql() == "(age < 18)" + + def test_le_operator(self): + e = col("age") <= lit(18) + assert isinstance(e, Expr) + assert e.to_sql() == "(age <= 18)" + + def test_gt_operator(self): + e = col("age") > lit(18) + assert isinstance(e, Expr) + assert e.to_sql() == "(age > 18)" + + def test_ge_operator(self): + e = col("age") >= lit(18) + assert isinstance(e, Expr) + assert e.to_sql() == "(age >= 18)" + + def test_and_operator(self): + e = (col("age") > lit(18)) & (col("status") == lit("active")) + assert isinstance(e, Expr) + assert e.to_sql() == "((age > 18) AND (status = 'active'))" + + def test_or_operator(self): + e = (col("a") == lit(1)) | (col("b") == lit(2)) + assert isinstance(e, Expr) + assert e.to_sql() == "((a = 1) OR (b = 2))" + + def test_invert_operator(self): + e = ~(col("active") == lit(True)) + assert isinstance(e, Expr) + assert e.to_sql() == "NOT (active = true)" + + def test_add_operator(self): + e = col("x") + lit(1) + assert isinstance(e, Expr) + assert e.to_sql() == "(x + 1)" + + def test_sub_operator(self): + e = col("x") - lit(1) + assert isinstance(e, Expr) + assert e.to_sql() == "(x - 1)" + + def test_mul_operator(self): + e = col("price") * lit(1.1) + assert isinstance(e, Expr) + assert e.to_sql() == "(price * 1.1)" + + def test_div_operator(self): + e = col("total") / lit(2) + assert isinstance(e, Expr) + assert e.to_sql() == "(total / 2)" + + def test_radd(self): + e = lit(1) + col("x") + assert isinstance(e, Expr) + assert e.to_sql() == "(1 + x)" + + def test_rmul(self): + e = lit(2) * col("x") + assert isinstance(e, Expr) + assert e.to_sql() == "(2 * x)" + + def test_coerce_plain_int(self): + # Operators should auto-wrap plain Python values via lit() + e = col("age") > 18 + assert isinstance(e, Expr) + assert e.to_sql() == "(age > 18)" + + def test_coerce_plain_str(self): + e = col("name") == "alice" + assert isinstance(e, Expr) + assert e.to_sql() == "(name = 'alice')" + + +class TestExprStringMethods: + def test_lower(self): + e = col("name").lower() + assert isinstance(e, Expr) + assert e.to_sql() == "lower(name)" + + def test_upper(self): + e = col("name").upper() + assert isinstance(e, Expr) + assert e.to_sql() == "upper(name)" + + def test_contains(self): + e = col("text").contains(lit("hello")) + assert isinstance(e, Expr) + assert e.to_sql() == "contains(text, 'hello')" + + def test_contains_with_str_coerce(self): + e = col("text").contains("hello") + assert isinstance(e, Expr) + assert e.to_sql() == "contains(text, 'hello')" + + def test_chained_lower_eq(self): + e = col("name").lower() == lit("alice") + assert isinstance(e, Expr) + assert e.to_sql() == "(lower(name) = 'alice')" + + +class TestExprCast: + def test_cast_string(self): + e = col("id").cast("string") + assert isinstance(e, Expr) + assert e.to_sql() == "CAST(id AS VARCHAR)" + + def test_cast_int32(self): + e = col("score").cast("int32") + assert isinstance(e, Expr) + assert e.to_sql() == "CAST(score AS INTEGER)" + + def test_cast_float64(self): + e = col("val").cast("float64") + assert isinstance(e, Expr) + assert e.to_sql() == "CAST(val AS DOUBLE)" + + def test_cast_pyarrow_type(self): + e = col("score").cast(pa.int32()) + assert isinstance(e, Expr) + assert e.to_sql() == "CAST(score AS INTEGER)" + + def test_cast_pyarrow_float64(self): + e = col("val").cast(pa.float64()) + assert isinstance(e, Expr) + assert e.to_sql() == "CAST(val AS DOUBLE)" + + def test_cast_pyarrow_string(self): + e = col("id").cast(pa.string()) + assert isinstance(e, Expr) + assert e.to_sql() == "CAST(id AS VARCHAR)" + + def test_cast_pyarrow_and_string_equivalent(self): + # pa.int32() and "int32" should produce equivalent SQL + sql_str = col("x").cast("int32").to_sql() + sql_pa = col("x").cast(pa.int32()).to_sql() + assert sql_str == sql_pa + + +class TestExprNamedMethods: + def test_eq_method(self): + e = col("x").eq(lit(1)) + assert isinstance(e, Expr) + assert e.to_sql() == "(x = 1)" + + def test_gt_method(self): + e = col("x").gt(lit(0)) + assert isinstance(e, Expr) + assert e.to_sql() == "(x > 0)" + + def test_and_method(self): + e = col("x").gt(lit(0)).and_(col("y").lt(lit(10))) + assert isinstance(e, Expr) + assert e.to_sql() == "((x > 0) AND (y < 10))" + + def test_or_method(self): + e = col("x").eq(lit(1)).or_(col("x").eq(lit(2))) + assert isinstance(e, Expr) + assert e.to_sql() == "((x = 1) OR (x = 2))" + + +class TestExprRepr: + def test_repr(self): + e = col("age") > lit(18) + assert repr(e) == "Expr((age > 18))" + + def test_to_sql(self): + e = col("age") > 18 + assert e.to_sql() == "(age > 18)" + + def test_unhashable(self): + e = col("x") + with pytest.raises(TypeError): + {e: 1} + + +# ── integration tests: end-to-end query against a real table ───────────────── + + +@pytest.fixture +def simple_table(tmp_path): + db = lancedb.connect(str(tmp_path)) + data = pa.table( + { + "id": [1, 2, 3, 4, 5], + "name": ["Alice", "Bob", "Charlie", "alice", "BOB"], + "age": [25, 17, 30, 22, 15], + "score": [1.5, 2.0, 3.5, 4.0, 0.5], + } + ) + return db.create_table("test", data) + + +class TestExprFilter: + def test_simple_gt_filter(self, simple_table): + result = simple_table.search().where(col("age") > lit(20)).to_arrow() + assert result.num_rows == 3 # ages 25, 30, 22 + + def test_compound_and_filter(self, simple_table): + result = ( + simple_table.search() + .where((col("age") > lit(18)) & (col("score") > lit(2.0))) + .to_arrow() + ) + assert result.num_rows == 2 # (30, 3.5) and (22, 4.0) + + def test_string_equality_filter(self, simple_table): + result = simple_table.search().where(col("name") == lit("Bob")).to_arrow() + assert result.num_rows == 1 + + def test_or_filter(self, simple_table): + result = ( + simple_table.search() + .where((col("age") < lit(18)) | (col("age") > lit(28))) + .to_arrow() + ) + assert result.num_rows == 3 # ages 17, 30, 15 + + def test_coercion_no_lit(self, simple_table): + # Python values should be auto-coerced + result = simple_table.search().where(col("age") > 20).to_arrow() + assert result.num_rows == 3 + + def test_string_sql_still_works(self, simple_table): + # Backwards compatibility: plain strings still accepted + result = simple_table.search().where("age > 20").to_arrow() + assert result.num_rows == 3 + + +class TestExprProjection: + def test_select_with_expr(self, simple_table): + result = ( + simple_table.search() + .select({"double_score": col("score") * lit(2)}) + .to_arrow() + ) + assert "double_score" in result.schema.names + + def test_select_mixed_str_and_expr(self, simple_table): + result = ( + simple_table.search() + .select({"id": "id", "double_score": col("score") * lit(2)}) + .to_arrow() + ) + assert "id" in result.schema.names + assert "double_score" in result.schema.names + + def test_select_list_of_columns(self, simple_table): + # Plain list of str still works + result = simple_table.search().select(["id", "name"]).to_arrow() + assert result.schema.names == ["id", "name"] + + +# ── column name edge cases ──────────────────────────────────────────────────── + + +class TestColNaming: + """Unit tests verifying that col() preserves identifiers exactly. + + Identifiers that need quoting (camelCase, spaces, leading digits, unicode) + are wrapped in backticks to match the lance SQL parser's dialect. + """ + + def test_camel_case_preserved_in_sql(self): + # camelCase is quoted with backticks so the case round-trips correctly. + assert col("firstName").to_sql() == "`firstName`" + + def test_camel_case_in_expression(self): + assert (col("firstName") > lit(18)).to_sql() == "(`firstName` > 18)" + + def test_space_in_name_quoted(self): + assert col("first name").to_sql() == "`first name`" + + def test_space_in_expression(self): + assert (col("first name") == lit("A")).to_sql() == "(`first name` = 'A')" + + def test_leading_digit_quoted(self): + assert col("2fast").to_sql() == "`2fast`" + + def test_unicode_quoted(self): + assert col("名前").to_sql() == "`名前`" + + def test_snake_case_unquoted(self): + # Plain snake_case needs no quoting. + assert col("first_name").to_sql() == "first_name" + + +@pytest.fixture +def special_col_table(tmp_path): + db = lancedb.connect(str(tmp_path)) + data = pa.table( + { + "firstName": ["Alice", "Bob", "Charlie"], + "first name": ["A", "B", "C"], + "score": [10, 20, 30], + } + ) + return db.create_table("special", data) + + +class TestColNamingIntegration: + def test_camel_case_filter(self, special_col_table): + result = ( + special_col_table.search() + .where(col("firstName") == lit("Alice")) + .to_arrow() + ) + assert result.num_rows == 1 + assert result["firstName"][0].as_py() == "Alice" + + def test_space_in_col_filter(self, special_col_table): + result = ( + special_col_table.search().where(col("first name") == lit("B")).to_arrow() + ) + assert result.num_rows == 1 + + def test_camel_case_projection(self, special_col_table): + result = ( + special_col_table.search() + .select({"upper_name": col("firstName").upper()}) + .to_arrow() + ) + assert "upper_name" in result.schema.names + assert sorted(result["upper_name"].to_pylist()) == ["ALICE", "BOB", "CHARLIE"] diff --git a/rust/lancedb/Cargo.toml b/rust/lancedb/Cargo.toml index f8791c6ed..b2a93b226 100644 --- a/rust/lancedb/Cargo.toml +++ b/rust/lancedb/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lancedb" -version = "0.27.0-beta.5" +version = "0.27.2" edition.workspace = true description = "LanceDB: A serverless, low-latency vector database for AI applications" license.workspace = true diff --git a/rust/lancedb/src/connection.rs b/rust/lancedb/src/connection.rs index dc0d00919..df47972c6 100644 --- a/rust/lancedb/src/connection.rs +++ b/rust/lancedb/src/connection.rs @@ -599,11 +599,8 @@ pub struct ConnectBuilder { } #[cfg(feature = "remote")] -const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 3] = [ - ("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name"), - ("AZURE_CLIENT_ID", "azure_client_id"), - ("AZURE_TENANT_ID", "azure_tenant_id"), -]; +const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 1] = + [("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name")]; impl ConnectBuilder { /// Create a new [`ConnectOptions`] with the given database URI. diff --git a/rust/lancedb/src/dataloader/permutation/shuffle.rs b/rust/lancedb/src/dataloader/permutation/shuffle.rs index 05f98eb5a..7cd27e342 100644 --- a/rust/lancedb/src/dataloader/permutation/shuffle.rs +++ b/rust/lancedb/src/dataloader/permutation/shuffle.rs @@ -240,7 +240,7 @@ impl Shuffler { .await?; // Need to read the entire file in a single batch for in-memory shuffling let batch = reader.read_record_batch(0, reader.num_rows()).await?; - let mut rng = rng.lock().unwrap(); + let mut rng = rng.lock().unwrap_or_else(|e| e.into_inner()); Self::shuffle_batch(&batch, &mut rng, clump_size) } }) diff --git a/rust/lancedb/src/expr.rs b/rust/lancedb/src/expr.rs index e256724a0..4d700497f 100644 --- a/rust/lancedb/src/expr.rs +++ b/rust/lancedb/src/expr.rs @@ -27,7 +27,17 @@ use arrow_schema::DataType; use datafusion_expr::{Expr, ScalarUDF, expr_fn::cast}; use datafusion_functions::string::expr_fn as string_expr_fn; -pub use datafusion_expr::{col, lit}; +pub use datafusion_expr::lit; + +/// Create a column reference expression, preserving the name exactly as given. +/// +/// Unlike DataFusion's built-in [`col`][datafusion_expr::col], this function +/// does **not** normalise the identifier to lower-case, so +/// `col("firstName")` correctly references a field named `firstName`. +pub fn col(name: impl Into) -> DfExpr { + use datafusion_common::Column; + DfExpr::Column(Column::new_unqualified(name)) +} pub use datafusion_expr::Expr as DfExpr; diff --git a/rust/lancedb/src/expr/sql.rs b/rust/lancedb/src/expr/sql.rs index 21cd3ab41..f9cd81b50 100644 --- a/rust/lancedb/src/expr/sql.rs +++ b/rust/lancedb/src/expr/sql.rs @@ -2,11 +2,37 @@ // SPDX-FileCopyrightText: Copyright The LanceDB Authors use datafusion_expr::Expr; -use datafusion_sql::unparser; +use datafusion_sql::unparser::{self, dialect::Dialect}; + +/// Unparser dialect that matches the quoting style expected by the Lance SQL +/// parser. Lance uses backtick (`` ` ``) as the only delimited-identifier +/// quote character, so we must produce `` `firstName` `` rather than +/// `"firstName"` for identifiers that require quoting. +/// +/// We quote an identifier when it: +/// * is a SQL reserved word, OR +/// * contains characters outside `[a-zA-Z0-9_]`, OR +/// * starts with a digit, OR +/// * contains upper-case letters (unquoted identifiers are normalised to +/// lower-case by the SQL parser, which would break case-sensitive schemas). +struct LanceSqlDialect; + +impl Dialect for LanceSqlDialect { + fn identifier_quote_style(&self, identifier: &str) -> Option { + let needs_quote = identifier.chars().any(|c| c.is_ascii_uppercase()) + || !identifier + .chars() + .enumerate() + .all(|(i, c)| c == '_' || c.is_ascii_alphabetic() || (i > 0 && c.is_ascii_digit())); + if needs_quote { Some('`') } else { None } + } +} pub fn expr_to_sql_string(expr: &Expr) -> crate::Result { - let ast = unparser::expr_to_sql(expr).map_err(|e| crate::Error::InvalidInput { - message: format!("failed to serialize expression to SQL: {}", e), - })?; + let ast = unparser::Unparser::new(&LanceSqlDialect) + .expr_to_sql(expr) + .map_err(|e| crate::Error::InvalidInput { + message: format!("failed to serialize expression to SQL: {}", e), + })?; Ok(ast.to_string()) } diff --git a/rust/lancedb/src/io/object_store/io_tracking.rs b/rust/lancedb/src/io/object_store/io_tracking.rs index 882ef51fa..20f0a020a 100644 --- a/rust/lancedb/src/io/object_store/io_tracking.rs +++ b/rust/lancedb/src/io/object_store/io_tracking.rs @@ -66,13 +66,13 @@ impl IoTrackingStore { } fn record_read(&self, num_bytes: u64) { - let mut stats = self.stats.lock().unwrap(); + let mut stats = self.stats.lock().unwrap_or_else(|e| e.into_inner()); stats.read_iops += 1; stats.read_bytes += num_bytes; } fn record_write(&self, num_bytes: u64) { - let mut stats = self.stats.lock().unwrap(); + let mut stats = self.stats.lock().unwrap_or_else(|e| e.into_inner()); stats.write_iops += 1; stats.write_bytes += num_bytes; } @@ -229,10 +229,63 @@ impl MultipartUpload for IoTrackingMultipartUpload { fn put_part(&mut self, payload: PutPayload) -> UploadPart { { - let mut stats = self.stats.lock().unwrap(); + let mut stats = self.stats.lock().unwrap_or_else(|e| e.into_inner()); stats.write_iops += 1; stats.write_bytes += payload.content_length() as u64; } self.target.put_part(payload) } } + +#[cfg(test)] +mod tests { + use super::*; + + /// Helper: poison a Mutex by panicking while holding the lock. + fn poison_stats(stats: &Arc>) { + let stats_clone = stats.clone(); + let handle = std::thread::spawn(move || { + let _guard = stats_clone.lock().unwrap(); + panic!("intentional panic to poison stats mutex"); + }); + let _ = handle.join(); + assert!(stats.lock().is_err(), "mutex should be poisoned"); + } + + #[test] + fn test_record_read_recovers_from_poisoned_lock() { + let stats = Arc::new(Mutex::new(IoStats::default())); + let store = IoTrackingStore { + target: Arc::new(object_store::memory::InMemory::new()), + stats: stats.clone(), + }; + + poison_stats(&stats); + + // record_read should not panic + store.record_read(1024); + + // Verify the stats were updated despite poisoning + let s = stats.lock().unwrap_or_else(|e| e.into_inner()); + assert_eq!(s.read_iops, 1); + assert_eq!(s.read_bytes, 1024); + } + + #[test] + fn test_record_write_recovers_from_poisoned_lock() { + let stats = Arc::new(Mutex::new(IoStats::default())); + let store = IoTrackingStore { + target: Arc::new(object_store::memory::InMemory::new()), + stats: stats.clone(), + }; + + poison_stats(&stats); + + // record_write should not panic + store.record_write(2048); + + let s = stats.lock().unwrap_or_else(|e| e.into_inner()); + assert_eq!(s.write_iops, 1); + assert_eq!(s.write_bytes, 2048); + } +} diff --git a/rust/lancedb/src/query.rs b/rust/lancedb/src/query.rs index a1804a79c..8f60230b0 100644 --- a/rust/lancedb/src/query.rs +++ b/rust/lancedb/src/query.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use std::{future::Future, time::Duration}; use arrow::compute::concat_batches; -use arrow_array::{Array, Float16Array, Float32Array, Float64Array, make_array}; +use arrow_array::{Array, Float16Array, Float32Array, Float64Array, RecordBatch, make_array}; use arrow_schema::{DataType, SchemaRef}; use datafusion_expr::Expr; use datafusion_physical_plan::ExecutionPlan; @@ -17,15 +17,17 @@ use lance_datafusion::exec::execute_plan; use lance_index::scalar::FullTextSearchQuery; use lance_index::scalar::inverted::SCORE_COL; use lance_index::vector::DIST_COL; -use lance_io::stream::RecordBatchStreamAdapter; use crate::DistanceType; use crate::error::{Error, Result}; use crate::rerankers::rrf::RRFReranker; use crate::rerankers::{NormalizeMethod, Reranker, check_reranker_result}; use crate::table::BaseTable; -use crate::utils::TimeoutStream; -use crate::{arrow::SendableRecordBatchStream, table::AnyQuery}; +use crate::utils::{MaxBatchLengthStream, TimeoutStream}; +use crate::{ + arrow::{SendableRecordBatchStream, SimpleRecordBatchStream}, + table::AnyQuery, +}; mod hybrid; @@ -604,6 +606,14 @@ impl Default for QueryExecutionOptions { } } +impl QueryExecutionOptions { + fn without_output_batch_length_limit(&self) -> Self { + let mut options = self.clone(); + options.max_batch_length = 0; + options + } +} + /// A trait for a query object that can be executed to get results /// /// There are various kinds of queries but they all return results @@ -1180,6 +1190,8 @@ impl VectorQuery { &self, options: QueryExecutionOptions, ) -> Result { + let max_batch_length = options.max_batch_length as usize; + let internal_options = options.without_output_batch_length_limit(); // clone query and specify we want to include row IDs, which can be needed for reranking let mut fts_query = Query::new(self.parent.clone()); fts_query.request = self.request.base.clone(); @@ -1189,8 +1201,8 @@ impl VectorQuery { vector_query.request.base.full_text_search = None; let (fts_results, vec_results) = try_join!( - fts_query.execute_with_options(options.clone()), - vector_query.inner_execute_with_options(options) + fts_query.execute_with_options(internal_options.clone()), + vector_query.inner_execute_with_options(internal_options) )?; let (fts_results, vec_results) = try_join!( @@ -1245,9 +1257,7 @@ impl VectorQuery { results = results.drop_column(ROW_ID)?; } - Ok(SendableRecordBatchStream::from( - RecordBatchStreamAdapter::new(results.schema(), stream::iter([Ok(results)])), - )) + Ok(single_batch_stream(results, max_batch_length)) } async fn inner_execute_with_options( @@ -1256,6 +1266,7 @@ impl VectorQuery { ) -> Result { let plan = self.create_plan(options.clone()).await?; let inner = execute_plan(plan, Default::default())?; + let inner = MaxBatchLengthStream::new_boxed(inner, options.max_batch_length as usize); let inner = if let Some(timeout) = options.timeout { TimeoutStream::new_boxed(inner, timeout) } else { @@ -1265,6 +1276,25 @@ impl VectorQuery { } } +fn single_batch_stream(batch: RecordBatch, max_batch_length: usize) -> SendableRecordBatchStream { + let schema = batch.schema(); + if max_batch_length == 0 || batch.num_rows() <= max_batch_length { + return Box::pin(SimpleRecordBatchStream::new( + stream::iter([Ok(batch)]), + schema, + )); + } + + let mut batches = Vec::with_capacity(batch.num_rows().div_ceil(max_batch_length)); + let mut offset = 0; + while offset < batch.num_rows() { + let length = (batch.num_rows() - offset).min(max_batch_length); + batches.push(Ok(batch.slice(offset, length))); + offset += length; + } + Box::pin(SimpleRecordBatchStream::new(stream::iter(batches), schema)) +} + impl ExecutableQuery for VectorQuery { async fn create_plan(&self, options: QueryExecutionOptions) -> Result> { let query = AnyQuery::VectorQuery(self.request.clone()); @@ -1753,6 +1783,50 @@ mod tests { .unwrap() } + async fn make_large_vector_table(tmp_dir: &tempfile::TempDir, rows: usize) -> Table { + let dataset_path = tmp_dir.path().join("large_test.lance"); + let uri = dataset_path.to_str().unwrap(); + + let schema = Arc::new(ArrowSchema::new(vec![ + ArrowField::new("id", DataType::Utf8, false), + ArrowField::new( + "vector", + DataType::FixedSizeList( + Arc::new(ArrowField::new("item", DataType::Float32, true)), + 4, + ), + false, + ), + ])); + + let ids = StringArray::from_iter_values((0..rows).map(|i| format!("row-{i}"))); + let vectors = FixedSizeListArray::from_iter_primitive::( + (0..rows).map(|i| Some(vec![Some(i as f32), Some(1.0), Some(2.0), Some(3.0)])), + 4, + ); + let batch = + RecordBatch::try_new(schema.clone(), vec![Arc::new(ids), Arc::new(vectors)]).unwrap(); + + let conn = connect(uri).execute().await.unwrap(); + conn.create_table("my_table", vec![batch]) + .execute() + .await + .unwrap() + } + + async fn assert_stream_batches_at_most( + mut results: SendableRecordBatchStream, + max_batch_length: usize, + ) { + let mut saw_batch = false; + while let Some(batch) = results.next().await { + let batch = batch.unwrap(); + saw_batch = true; + assert!(batch.num_rows() <= max_batch_length); + } + assert!(saw_batch); + } + #[tokio::test] async fn test_execute_with_options() { let tmp_dir = tempdir().unwrap(); @@ -1772,6 +1846,83 @@ mod tests { } } + #[tokio::test] + async fn test_vector_query_execute_with_options_respects_max_batch_length() { + let tmp_dir = tempdir().unwrap(); + let table = make_large_vector_table(&tmp_dir, 10_000).await; + + let results = table + .query() + .nearest_to(vec![0.0, 1.0, 2.0, 3.0]) + .unwrap() + .limit(10_000) + .execute_with_options(QueryExecutionOptions { + max_batch_length: 100, + ..Default::default() + }) + .await + .unwrap(); + assert_stream_batches_at_most(results, 100).await; + } + + #[tokio::test] + async fn test_hybrid_query_execute_with_options_respects_max_batch_length() { + let tmp_dir = tempdir().unwrap(); + let dataset_path = tmp_dir.path(); + let conn = connect(dataset_path.to_str().unwrap()) + .execute() + .await + .unwrap(); + + let dims = 2; + let rows = 512; + let schema = Arc::new(ArrowSchema::new(vec![ + ArrowField::new("text", DataType::Utf8, false), + ArrowField::new( + "vector", + DataType::FixedSizeList( + Arc::new(ArrowField::new("item", DataType::Float32, true)), + dims, + ), + false, + ), + ])); + + let text = StringArray::from_iter_values((0..rows).map(|_| "match")); + let vectors = FixedSizeListArray::from_iter_primitive::( + (0..rows).map(|i| Some(vec![Some(i as f32), Some(0.0)])), + dims, + ); + let record_batch = + RecordBatch::try_new(schema.clone(), vec![Arc::new(text), Arc::new(vectors)]).unwrap(); + let table = conn + .create_table("my_table", record_batch) + .execute() + .await + .unwrap(); + + table + .create_index(&["text"], crate::index::Index::FTS(Default::default())) + .replace(true) + .execute() + .await + .unwrap(); + + let results = table + .query() + .full_text_search(FullTextSearchQuery::new("match".to_string())) + .limit(rows) + .nearest_to(&[0.0, 0.0]) + .unwrap() + .execute_with_options(QueryExecutionOptions { + max_batch_length: 100, + ..Default::default() + }) + .await + .unwrap(); + assert_stream_batches_at_most(results, 100).await; + } + #[tokio::test] async fn test_analyze_plan() { let tmp_dir = tempdir().unwrap(); diff --git a/rust/lancedb/src/remote/client.rs b/rust/lancedb/src/remote/client.rs index 11a73c72f..2b101f280 100644 --- a/rust/lancedb/src/remote/client.rs +++ b/rust/lancedb/src/remote/client.rs @@ -451,23 +451,13 @@ impl RestfulLanceDbClient { })?, ); } - // Map azure storage options to x-azure-* headers. - // The option key uses underscores (e.g. "azure_client_id") while the - // header uses hyphens (e.g. "x-azure-client-id"). - let azure_opts: [(&str, &str); 3] = [ - ("azure_storage_account_name", "x-azure-storage-account-name"), - ("azure_client_id", "x-azure-client-id"), - ("azure_tenant_id", "x-azure-tenant-id"), - ]; - for (opt_key, header_name) in azure_opts { - if let Some(v) = options.0.get(opt_key) { - headers.insert( - HeaderName::from_static(header_name), - HeaderValue::from_str(v).map_err(|_| Error::InvalidInput { - message: format!("non-ascii value '{}' for option '{}'", v, opt_key), - })?, - ); - } + if let Some(v) = options.0.get("azure_storage_account_name") { + headers.insert( + HeaderName::from_static("x-azure-storage-account-name"), + HeaderValue::from_str(v).map_err(|_| Error::InvalidInput { + message: format!("non-ascii storage account name '{}' provided", db_name), + })?, + ); } for (key, value) in &config.extra_headers { @@ -1101,34 +1091,4 @@ mod tests { _ => panic!("Expected Runtime error"), } } - - #[test] - fn test_default_headers_azure_opts() { - let mut opts = HashMap::new(); - opts.insert( - "azure_storage_account_name".to_string(), - "myaccount".to_string(), - ); - opts.insert("azure_client_id".to_string(), "my-client-id".to_string()); - opts.insert("azure_tenant_id".to_string(), "my-tenant-id".to_string()); - let remote_opts = RemoteOptions::new(opts); - - let headers = RestfulLanceDbClient::::default_headers( - "test-key", - "us-east-1", - "testdb", - false, - &remote_opts, - None, - &ClientConfig::default(), - ) - .unwrap(); - - assert_eq!( - headers.get("x-azure-storage-account-name").unwrap(), - "myaccount" - ); - assert_eq!(headers.get("x-azure-client-id").unwrap(), "my-client-id"); - assert_eq!(headers.get("x-azure-tenant-id").unwrap(), "my-tenant-id"); - } } diff --git a/rust/lancedb/src/remote/db.rs b/rust/lancedb/src/remote/db.rs index 7f88e56ab..84f556b4a 100644 --- a/rust/lancedb/src/remote/db.rs +++ b/rust/lancedb/src/remote/db.rs @@ -72,6 +72,10 @@ impl ServerVersion { pub fn support_structural_fts(&self) -> bool { self.0 >= semver::Version::new(0, 3, 0) } + + pub fn support_multipart_write(&self) -> bool { + self.0 >= semver::Version::new(0, 4, 0) + } } pub const OPT_REMOTE_PREFIX: &str = "remote_database_"; @@ -807,12 +811,7 @@ impl RemoteOptions { impl From for RemoteOptions { fn from(options: StorageOptions) -> Self { - let supported_opts = vec![ - "account_name", - "azure_storage_account_name", - "azure_client_id", - "azure_tenant_id", - ]; + let supported_opts = vec!["account_name", "azure_storage_account_name"]; let mut filtered = HashMap::new(); for opt in supported_opts { if let Some(v) = options.0.get(opt) { diff --git a/rust/lancedb/src/remote/table.rs b/rust/lancedb/src/remote/table.rs index 6d094d71f..315c71463 100644 --- a/rust/lancedb/src/remote/table.rs +++ b/rust/lancedb/src/remote/table.rs @@ -5,11 +5,13 @@ pub mod insert; use self::insert::RemoteInsertExec; use crate::expr::expr_to_sql_string; +use crate::table::write_progress::FinishOnDrop; use super::ARROW_STREAM_CONTENT_TYPE; use super::client::RequestResultExt; use super::client::{HttpSend, RestfulLanceDbClient, Sender}; use super::db::ServerVersion; +use crate::data::scannable::{PeekedScannable, Scannable, estimate_write_partitions}; use crate::index::Index; use crate::index::IndexStatistics; use crate::index::waiter::wait_for_index; @@ -23,7 +25,7 @@ use crate::table::MergeResult; use crate::table::Tags; use crate::table::UpdateResult; use crate::table::query::create_multi_vector_plan; -use crate::table::{AnyQuery, Filter, TableStatistics}; +use crate::table::{AnyQuery, Filter, PreprocessingOutput, TableStatistics}; use crate::utils::background_cache::BackgroundCache; use crate::utils::{supported_btree_data_type, supported_vector_data_type}; use crate::{DistanceType, Error}; @@ -43,7 +45,7 @@ use async_trait::async_trait; use datafusion_common::DataFusionError; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::{ExecutionPlan, RecordBatchStream, SendableRecordBatchStream}; -use futures::TryStreamExt; +use futures::{StreamExt, TryStreamExt}; use http::header::CONTENT_TYPE; use http::{HeaderName, StatusCode}; use lance::arrow::json::{JsonDataType, JsonSchema}; @@ -614,6 +616,66 @@ impl RemoteTable { Ok(bodies) } + async fn create_multipart_write(&self) -> Result { + let request = self.client.post(&format!( + "/v1/table/{}/multipart_write/create", + self.identifier + )); + let (request_id, response) = self.send(request, true).await?; + let response = self.check_table_response(&request_id, response).await?; + let body = response.text().await.err_to_http(request_id.clone())?; + let parsed: serde_json::Value = serde_json::from_str(&body).map_err(|e| Error::Http { + source: format!("Failed to parse multipart create response: {}", e).into(), + request_id, + status_code: None, + })?; + parsed["upload_id"] + .as_str() + .map(|s| s.to_string()) + .ok_or_else(|| Error::Http { + source: "Missing upload_id in multipart create response".into(), + request_id: String::new(), + status_code: None, + }) + } + + async fn complete_multipart_write(&self, upload_id: &str) -> Result { + let request = self + .client + .post(&format!( + "/v1/table/{}/multipart_write/complete", + self.identifier + )) + .query(&[("upload_id", upload_id)]); + let (request_id, response) = self.send(request, true).await?; + let response = self.check_table_response(&request_id, response).await?; + let body = response.text().await.err_to_http(request_id.clone())?; + let parsed: serde_json::Value = serde_json::from_str(&body).map_err(|e| Error::Http { + source: format!("Failed to parse multipart complete response: {}", e).into(), + request_id, + status_code: None, + })?; + let version = parsed["version"].as_u64().ok_or_else(|| Error::Http { + source: "Missing version in multipart complete response".into(), + request_id: String::new(), + status_code: None, + })?; + Ok(AddResult { version }) + } + + async fn abort_multipart_write(&self, upload_id: &str) -> Result<()> { + let request = self + .client + .post(&format!( + "/v1/table/{}/multipart_write/abort", + self.identifier + )) + .query(&[("upload_id", upload_id)]); + let (request_id, response) = self.send(request, true).await?; + self.check_table_response(&request_id, response).await?; + Ok(()) + } + async fn check_mutable(&self) -> Result<()> { let read_guard = self.version.read().await; match *read_guard { @@ -817,6 +879,19 @@ mod test_utils { } pub fn new_mock_with_config(name: String, handler: F, config: ClientConfig) -> Self + where + F: Fn(reqwest::Request) -> http::Response + Send + Sync + 'static, + T: Into, + { + Self::new_mock_with_version_and_config(name, handler, None, config) + } + + pub fn new_mock_with_version_and_config( + name: String, + handler: F, + version: Option, + config: ClientConfig, + ) -> Self where F: Fn(reqwest::Request) -> http::Response + Send + Sync + 'static, T: Into, @@ -827,7 +902,7 @@ mod test_utils { name: name.clone(), namespace: vec![], identifier: name, - server_version: ServerVersion::default(), + server_version: version.map(ServerVersion).unwrap_or_default(), version: RwLock::new(None), location: RwLock::new(None), schema_cache: BackgroundCache::new(SCHEMA_CACHE_TTL, SCHEMA_CACHE_REFRESH_WINDOW), @@ -836,6 +911,197 @@ mod test_utils { } } +impl RemoteTable { + fn is_retryable_write_error(&self, err: &Error) -> bool { + match err { + Error::Http { + source, + status_code, + .. + } => { + // Don't retry read errors (is_body/is_decode): the + // server may have committed the write already, and + // without an idempotency key we'd duplicate data. + source + .downcast_ref::() + .is_some_and(|e| e.is_connect()) + || status_code.is_some_and(|s| self.client.retry_config.statuses.contains(&s)) + } + // send_with_retry exhausted its internal retries on a retryable + // status. The outer loop can still retry the whole operation with + // a fresh session. + Error::Retry { status_code, .. } => { + status_code.is_some_and(|s| self.client.retry_config.statuses.contains(&s)) + } + _ => false, + } + } + + async fn add_single_partition(&self, output: PreprocessingOutput) -> Result { + use crate::remote::retry::RetryCounter; + + let _guard = output.tracker.as_ref().map(|t| t.track_task()); + + let mut insert: Arc = Arc::new(RemoteInsertExec::new( + self.name.clone(), + self.identifier.clone(), + self.client.clone(), + output.plan, + output.overwrite, + output.tracker.clone(), + )); + + let mut retry_counter = + RetryCounter::new(&self.client.retry_config, uuid::Uuid::new_v4().to_string()); + + loop { + let stream = execute_plan(insert.clone(), Default::default())?; + let result: Result> = stream.try_collect().await.map_err(Error::from); + + match result { + Ok(_) => { + let add_result = insert + .as_any() + .downcast_ref::>() + .and_then(|i| i.add_result()) + .unwrap_or(AddResult { version: 0 }); + + if output.overwrite { + self.invalidate_schema_cache(); + } + + return Ok(add_result); + } + Err(err) if output.rescannable && self.is_retryable_write_error(&err) => { + retry_counter.increment_from_error(err)?; + tokio::time::sleep(retry_counter.next_sleep_time()).await; + insert = insert.reset_state()?; + continue; + } + Err(err) => return Err(err), + } + } + } + + async fn add_multipart( + &self, + output: PreprocessingOutput, + num_partitions: usize, + ) -> Result { + use crate::remote::retry::RetryCounter; + + let mut retry_counter = + RetryCounter::new(&self.client.retry_config, uuid::Uuid::new_v4().to_string()); + + loop { + let upload_id = self.create_multipart_write().await?; + + let result = self + .execute_multipart_inserts(&upload_id, &output, num_partitions) + .await; + + match result { + Ok(()) => match self.complete_multipart_write(&upload_id).await { + Ok(result) => { + if output.overwrite { + self.invalidate_schema_cache(); + } + return Ok(result); + } + Err(e) => { + if let Err(abort_err) = self.abort_multipart_write(&upload_id).await { + log::warn!( + "Failed to abort multipart write {}: {}", + upload_id, + abort_err + ); + } + if output.rescannable && self.is_retryable_write_error(&e) { + retry_counter.increment_from_error(e)?; + tokio::time::sleep(retry_counter.next_sleep_time()).await; + continue; + } + return Err(e); + } + }, + Err(e) => { + if let Err(abort_err) = self.abort_multipart_write(&upload_id).await { + log::warn!( + "Failed to abort multipart write {}: {}", + upload_id, + abort_err + ); + } + if output.rescannable && self.is_retryable_write_error(&e) { + retry_counter.increment_from_error(e)?; + tokio::time::sleep(retry_counter.next_sleep_time()).await; + continue; + } + return Err(e); + } + } + } + } + + async fn execute_multipart_inserts( + &self, + upload_id: &str, + output: &PreprocessingOutput, + num_partitions: usize, + ) -> Result<()> { + debug_assert!( + output.rescannable, + "multipart inserts require rescannable input for retry support" + ); + + let plan = Arc::new( + datafusion_physical_plan::repartition::RepartitionExec::try_new( + output.plan.clone(), + datafusion_physical_plan::Partitioning::RoundRobinBatch(num_partitions), + )?, + ) as Arc; + + let insert = Arc::new(RemoteInsertExec::new_multipart( + self.name.clone(), + self.identifier.clone(), + self.client.clone(), + plan, + output.overwrite, + upload_id.to_string(), + output.tracker.clone(), + )); + + let task_ctx = Arc::new(datafusion_execution::TaskContext::default()); + let tracker = output.tracker.clone(); + let mut join_set = tokio::task::JoinSet::new(); + for partition in 0..num_partitions { + let exec = insert.clone(); + let ctx = task_ctx.clone(); + let tracker = tracker.clone(); + join_set.spawn(async move { + let _guard = tracker.as_ref().map(|t| t.track_task()); + let mut stream = exec + .execute(partition, ctx) + .map_err(|e| -> Error { e.into() })?; + while let Some(batch) = stream.next().await { + batch.map_err(|e| -> Error { e.into() })?; + } + Ok::<_, Error>(()) + }); + } + + // JoinSet aborts all remaining tasks when dropped, so if we return + // early on error the orphaned tasks are automatically cancelled. + while let Some(result) = join_set.join_next().await { + result.map_err(|e| Error::Runtime { + message: format!("Insert task panicked: {}", e), + })??; + } + + Ok(()) + } +} + #[async_trait] impl BaseTable for RemoteTable { fn as_any(&self) -> &dyn std::any::Any { @@ -986,74 +1252,49 @@ impl BaseTable for RemoteTable { status_code: None, }) } - async fn add(&self, add: AddDataBuilder) -> Result { - use crate::remote::retry::RetryCounter; - + async fn add(&self, mut add: AddDataBuilder) -> Result { self.check_mutable().await?; let table_schema = self.schema().await?; let table_def = TableDefinition::try_from_rich_schema(table_schema.clone())?; + + let num_partitions = if let Some(parallelism) = add.write_parallelism { + if parallelism > 1 && self.server_version.support_multipart_write() { + parallelism + } else { + 1 + } + } else if self.server_version.support_multipart_write() { + // Peek at the first batch to estimate write partitions, same as NativeTable. + let mut peeked = PeekedScannable::new(add.data); + let n = if let Some(first_batch) = peeked.peek().await { + let max_partitions = lance_core::utils::tokio::get_num_compute_intensive_cpus(); + estimate_write_partitions( + first_batch.get_array_memory_size(), + first_batch.num_rows(), + peeked.num_rows(), + max_partitions, + ) + } else { + 1 + }; + add.data = Box::new(peeked); + n + } else { + 1 + }; + let output = add.into_plan(&table_schema, &table_def)?; - let mut insert: Arc = Arc::new(RemoteInsertExec::new( - self.name.clone(), - self.identifier.clone(), - self.client.clone(), - output.plan, - output.overwrite, - )); + if let Some(ref t) = output.tracker { + t.set_total_tasks(num_partitions); + } + let _finish = FinishOnDrop(output.tracker.clone()); - let mut retry_counter = - RetryCounter::new(&self.client.retry_config, uuid::Uuid::new_v4().to_string()); - - loop { - let stream = execute_plan(insert.clone(), Default::default())?; - let result: Result> = stream.try_collect().await.map_err(Error::from); - - match result { - Ok(_) => { - let add_result = insert - .as_any() - .downcast_ref::>() - .and_then(|i| i.add_result()) - .unwrap_or(AddResult { version: 0 }); - - if output.overwrite { - self.invalidate_schema_cache(); - } - - return Ok(add_result); - } - Err(err) if output.rescannable => { - let retryable = match &err { - Error::Http { - source, - status_code, - .. - } => { - // Don't retry read errors (is_body/is_decode): the - // server may have committed the write already, and - // without an idempotency key we'd duplicate data. - source - .downcast_ref::() - .is_some_and(|e| e.is_connect()) - || status_code - .is_some_and(|s| self.client.retry_config.statuses.contains(&s)) - } - _ => false, - }; - - if retryable { - retry_counter.increment_from_error(err)?; - tokio::time::sleep(retry_counter.next_sleep_time()).await; - insert = insert.reset_state()?; - continue; - } - - return Err(err); - } - Err(err) => return Err(err), - } + if num_partitions > 1 { + self.add_multipart(output, num_partitions).await + } else { + self.add_single_partition(output).await } } @@ -1756,6 +1997,7 @@ impl BaseTable for RemoteTable { self.client.clone(), input, overwrite, + None, ))) } } @@ -1815,6 +2057,7 @@ mod tests { use super::*; + use crate::remote::client::{ClientConfig, RetryConfig}; use crate::table::AddDataMode; use arrow::{array::AsArray, compute::concat_batches, datatypes::Int32Type}; @@ -4872,4 +5115,587 @@ mod tests { assert_eq!(data.len(), 1); assert_eq!(data[0].as_ref().unwrap(), &expected_data); } + + fn schema_json() -> &'static str { + r#"{"fields": [{"name": "id", "type": {"type": "int32"}, "nullable": true}]}"# + } + + fn simple_describe_response() -> http::Response { + http::Response::builder() + .status(200) + .body(format!(r#"{{"version": 1, "schema": {}}}"#, schema_json())) + .unwrap() + } + + #[tokio::test] + async fn test_multipart_write_happy_path() { + use std::sync::Mutex; + + let create_count = Arc::new(AtomicUsize::new(0)); + let insert_count = Arc::new(AtomicUsize::new(0)); + let complete_count = Arc::new(AtomicUsize::new(0)); + let abort_count = Arc::new(AtomicUsize::new(0)); + let upload_ids = Arc::new(Mutex::new(Vec::::new())); + + let create_count_c = create_count.clone(); + let insert_count_c = insert_count.clone(); + let complete_count_c = complete_count.clone(); + let abort_count_c = abort_count.clone(); + let upload_ids_c = upload_ids.clone(); + + let table = Table::new_with_handler_version( + "my_table", + semver::Version::new(0, 4, 0), + move |request| { + let path = request.url().path(); + let query = request.url().query().unwrap_or(""); + + if path == "/v1/table/my_table/describe/" { + return simple_describe_response(); + } + + if path == "/v1/table/my_table/multipart_write/create" { + create_count_c.fetch_add(1, Ordering::SeqCst); + return http::Response::builder() + .status(200) + .body(r#"{"upload_id": "test-upload-123"}"#.to_string()) + .unwrap(); + } + + if path == "/v1/table/my_table/insert/" { + insert_count_c.fetch_add(1, Ordering::SeqCst); + let uid = url::form_urlencoded::parse(query.as_bytes()) + .find(|(k, _)| k == "upload_id") + .map(|(_, v)| v.to_string()); + upload_ids_c + .lock() + .unwrap() + .push(uid.expect("missing upload_id on insert")); + return http::Response::builder() + .status(200) + .body(r#"{"version": 1}"#.to_string()) + .unwrap(); + } + + if path == "/v1/table/my_table/multipart_write/complete" { + complete_count_c.fetch_add(1, Ordering::SeqCst); + let uid = url::form_urlencoded::parse(query.as_bytes()) + .find(|(k, _)| k == "upload_id") + .map(|(_, v)| v.to_string()); + upload_ids_c + .lock() + .unwrap() + .push(uid.expect("missing upload_id on complete")); + return http::Response::builder() + .status(200) + .body(r#"{"version": 5}"#.to_string()) + .unwrap(); + } + + if path == "/v1/table/my_table/multipart_write/abort" { + abort_count_c.fetch_add(1, Ordering::SeqCst); + return http::Response::builder() + .status(200) + .body(String::new()) + .unwrap(); + } + + panic!("Unexpected request path: {}", path); + }, + ); + + let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap(); + let result = table + .add(vec![batch]) + .write_parallelism(2) + .execute() + .await + .unwrap(); + + assert_eq!(result.version, 5); + assert_eq!(create_count.load(Ordering::SeqCst), 1); + assert!( + insert_count.load(Ordering::SeqCst) > 1, + "Expected multiple insert calls, got {}", + insert_count.load(Ordering::SeqCst) + ); + assert_eq!(complete_count.load(Ordering::SeqCst), 1); + assert_eq!(abort_count.load(Ordering::SeqCst), 0); + + let ids = upload_ids.lock().unwrap(); + assert!( + ids.iter().all(|id| id == "test-upload-123"), + "All requests should use the same upload_id, got: {:?}", + *ids + ); + } + + #[tokio::test] + async fn test_multipart_write_progress() { + let callback_count = Arc::new(AtomicUsize::new(0)); + let max_active = Arc::new(AtomicUsize::new(0)); + let last_total_tasks = Arc::new(AtomicUsize::new(0)); + let seen_done = Arc::new(std::sync::Mutex::new(false)); + + let cb_count = callback_count.clone(); + let cb_active = max_active.clone(); + let cb_total = last_total_tasks.clone(); + let cb_done = seen_done.clone(); + + let table = Table::new_with_handler_version( + "my_table", + semver::Version::new(0, 4, 0), + move |request| { + let path = request.url().path(); + + if path == "/v1/table/my_table/describe/" { + return simple_describe_response(); + } + if path == "/v1/table/my_table/multipart_write/create" { + return http::Response::builder() + .status(200) + .body(r#"{"upload_id": "prog-upload"}"#.to_string()) + .unwrap(); + } + if path == "/v1/table/my_table/insert/" { + return http::Response::builder() + .status(200) + .body(r#"{"version": 1}"#.to_string()) + .unwrap(); + } + if path == "/v1/table/my_table/multipart_write/complete" { + return http::Response::builder() + .status(200) + .body(r#"{"version": 3}"#.to_string()) + .unwrap(); + } + panic!("Unexpected request path: {}", path); + }, + ); + + let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap(); + table + .add(vec![batch]) + .write_parallelism(2) + .progress(move |p| { + cb_count.fetch_add(1, Ordering::SeqCst); + cb_active.fetch_max(p.active_tasks(), Ordering::SeqCst); + cb_total.store(p.total_tasks(), Ordering::SeqCst); + if p.done() { + *cb_done.lock().unwrap() = true; + } + }) + .execute() + .await + .unwrap(); + + assert!( + callback_count.load(Ordering::SeqCst) >= 1, + "expected at least one progress callback" + ); + assert!(*seen_done.lock().unwrap(), "must see done=true"); + assert_eq!(last_total_tasks.load(Ordering::SeqCst), 2); + assert!( + max_active.load(Ordering::SeqCst) >= 1, + "expected at least one active task" + ); + } + + #[tokio::test] + async fn test_multipart_write_fallback_old_server() { + let insert_count = Arc::new(AtomicUsize::new(0)); + let create_count = Arc::new(AtomicUsize::new(0)); + + let insert_count_c = insert_count.clone(); + let create_count_c = create_count.clone(); + + // Server version 0.3.0 does not support multipart writes + let table = Table::new_with_handler_version( + "my_table", + semver::Version::new(0, 3, 0), + move |request| { + let path = request.url().path(); + + if path == "/v1/table/my_table/describe/" { + return simple_describe_response(); + } + + if path.contains("multipart_write") { + create_count_c.fetch_add(1, Ordering::SeqCst); + panic!("Should not call multipart write endpoints on old server"); + } + + if path == "/v1/table/my_table/insert/" { + let query = request.url().query().unwrap_or(""); + assert!( + !query.contains("upload_id"), + "Should not have upload_id for old server" + ); + insert_count_c.fetch_add(1, Ordering::SeqCst); + return http::Response::builder() + .status(200) + .body(r#"{"version": 2}"#.to_string()) + .unwrap(); + } + + panic!("Unexpected request path: {}", path); + }, + ); + + let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap(); + let result = table + .add(vec![batch]) + .write_parallelism(2) + .execute() + .await + .unwrap(); + + assert_eq!(result.version, 2); + assert_eq!(create_count.load(Ordering::SeqCst), 0); + assert_eq!(insert_count.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn test_multipart_write_small_data_single_partition() { + let insert_count = Arc::new(AtomicUsize::new(0)); + let create_count = Arc::new(AtomicUsize::new(0)); + + let insert_count_c = insert_count.clone(); + let create_count_c = create_count.clone(); + + let table = Table::new_with_handler_version( + "my_table", + semver::Version::new(0, 4, 0), + move |request| { + let path = request.url().path(); + + if path == "/v1/table/my_table/describe/" { + return simple_describe_response(); + } + + if path.contains("multipart_write") { + create_count_c.fetch_add(1, Ordering::SeqCst); + panic!("Should not call multipart write endpoints for small data"); + } + + if path == "/v1/table/my_table/insert/" { + let query = request.url().query().unwrap_or(""); + assert!( + !query.contains("upload_id"), + "Should not have upload_id for small data" + ); + insert_count_c.fetch_add(1, Ordering::SeqCst); + return http::Response::builder() + .status(200) + .body(r#"{"version": 2}"#.to_string()) + .unwrap(); + } + + panic!("Unexpected request path: {}", path); + }, + ); + + // Small data: only 3 rows + let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap(); + let result = table.add(vec![batch]).execute().await.unwrap(); + + assert_eq!(result.version, 2); + assert_eq!(create_count.load(Ordering::SeqCst), 0); + assert_eq!(insert_count.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn test_multipart_write_abort_on_insert_failure() { + let create_count = Arc::new(AtomicUsize::new(0)); + let insert_count = Arc::new(AtomicUsize::new(0)); + let complete_count = Arc::new(AtomicUsize::new(0)); + let abort_count = Arc::new(AtomicUsize::new(0)); + + let create_count_c = create_count.clone(); + let insert_count_c = insert_count.clone(); + let complete_count_c = complete_count.clone(); + let abort_count_c = abort_count.clone(); + + let table = Table::new_with_handler_version( + "my_table", + semver::Version::new(0, 4, 0), + move |request| { + let path = request.url().path(); + + if path == "/v1/table/my_table/describe/" { + return simple_describe_response(); + } + + if path == "/v1/table/my_table/multipart_write/create" { + create_count_c.fetch_add(1, Ordering::SeqCst); + return http::Response::builder() + .status(200) + .body(r#"{"upload_id": "test-upload-456"}"#.to_string()) + .unwrap(); + } + + if path == "/v1/table/my_table/insert/" { + let count = insert_count_c.fetch_add(1, Ordering::SeqCst); + // Fail on the first insert with non-retryable status + if count == 0 { + return http::Response::builder() + .status(400) + .body("Bad Request".to_string()) + .unwrap(); + } + return http::Response::builder() + .status(200) + .body(r#"{"version": 1}"#.to_string()) + .unwrap(); + } + + if path == "/v1/table/my_table/multipart_write/complete" { + complete_count_c.fetch_add(1, Ordering::SeqCst); + return http::Response::builder() + .status(200) + .body(r#"{"version": 5}"#.to_string()) + .unwrap(); + } + + if path == "/v1/table/my_table/multipart_write/abort" { + abort_count_c.fetch_add(1, Ordering::SeqCst); + return http::Response::builder() + .status(200) + .body(String::new()) + .unwrap(); + } + + panic!("Unexpected request path: {}", path); + }, + ); + + let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap(); + let result = table.add(vec![batch]).write_parallelism(2).execute().await; + + assert!(result.is_err()); + assert_eq!(create_count.load(Ordering::SeqCst), 1); + assert_eq!(complete_count.load(Ordering::SeqCst), 0); + assert_eq!(abort_count.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn test_multipart_write_abort_on_complete_failure() { + let abort_count = Arc::new(AtomicUsize::new(0)); + let abort_count_c = abort_count.clone(); + + let table = Table::new_with_handler_version( + "my_table", + semver::Version::new(0, 4, 0), + move |request| { + let path = request.url().path(); + + if path == "/v1/table/my_table/describe/" { + return simple_describe_response(); + } + + if path == "/v1/table/my_table/multipart_write/create" { + return http::Response::builder() + .status(200) + .body(r#"{"upload_id": "test-upload-789"}"#.to_string()) + .unwrap(); + } + + if path == "/v1/table/my_table/insert/" { + return http::Response::builder() + .status(200) + .body(r#"{"version": 1}"#.to_string()) + .unwrap(); + } + + if path == "/v1/table/my_table/multipart_write/complete" { + return http::Response::builder() + .status(400) + .body("Bad Request".to_string()) + .unwrap(); + } + + if path == "/v1/table/my_table/multipart_write/abort" { + abort_count_c.fetch_add(1, Ordering::SeqCst); + return http::Response::builder() + .status(200) + .body(String::new()) + .unwrap(); + } + + panic!("Unexpected request path: {}", path); + }, + ); + + let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap(); + let result = table.add(vec![batch]).write_parallelism(2).execute().await; + + assert!(result.is_err()); + assert_eq!(abort_count.load(Ordering::SeqCst), 1); + } + + fn retry_config_no_backoff() -> ClientConfig { + ClientConfig { + retry_config: RetryConfig { + retries: Some(3), + connect_retries: Some(3), + read_retries: Some(3), + backoff_factor: Some(0.0), + backoff_jitter: Some(0.0), + statuses: Some(vec![502, 503]), + }, + ..Default::default() + } + } + + #[tokio::test] + async fn test_multipart_write_retry_on_partition_failure() { + // All inserts for the first upload session return 503 (retryable). + // After exhausting internal retries, the outer loop retries with a + // new session and succeeds. + let create_count = Arc::new(AtomicUsize::new(0)); + let complete_count = Arc::new(AtomicUsize::new(0)); + let abort_count = Arc::new(AtomicUsize::new(0)); + + let create_count_c = create_count.clone(); + let complete_count_c = complete_count.clone(); + let abort_count_c = abort_count.clone(); + + let table = Table::new_with_handler_version_and_config( + "my_table", + semver::Version::new(0, 4, 0), + move |request| { + let path = request.url().path(); + let query = request.url().query().unwrap_or(""); + + if path == "/v1/table/my_table/describe/" { + return simple_describe_response(); + } + + if path == "/v1/table/my_table/multipart_write/create" { + let n = create_count_c.fetch_add(1, Ordering::SeqCst); + let body = format!(r#"{{"upload_id": "upload-{}"}}"#, n + 1); + return http::Response::builder().status(200).body(body).unwrap(); + } + + if path == "/v1/table/my_table/insert/" { + // Fail all inserts for the first session + if query.contains("upload_id=upload-1") { + return http::Response::builder() + .status(503) + .body("Service Unavailable".to_string()) + .unwrap(); + } + return http::Response::builder() + .status(200) + .body(r#"{"version": 1}"#.to_string()) + .unwrap(); + } + + if path == "/v1/table/my_table/multipart_write/complete" { + complete_count_c.fetch_add(1, Ordering::SeqCst); + return http::Response::builder() + .status(200) + .body(r#"{"version": 7}"#.to_string()) + .unwrap(); + } + + if path == "/v1/table/my_table/multipart_write/abort" { + abort_count_c.fetch_add(1, Ordering::SeqCst); + return http::Response::builder() + .status(200) + .body(String::new()) + .unwrap(); + } + + panic!("Unexpected request path: {}", path); + }, + retry_config_no_backoff(), + ); + + let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap(); + let result = table + .add(vec![batch]) + .write_parallelism(2) + .execute() + .await + .unwrap(); + + assert_eq!(result.version, 7); + assert_eq!(create_count.load(Ordering::SeqCst), 2); + assert_eq!(abort_count.load(Ordering::SeqCst), 1); + assert_eq!(complete_count.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn test_multipart_write_retry_on_complete_failure() { + // Complete returns 503 for the first session, succeeds for the second. + let create_count = Arc::new(AtomicUsize::new(0)); + let abort_count = Arc::new(AtomicUsize::new(0)); + + let create_count_c = create_count.clone(); + let abort_count_c = abort_count.clone(); + + let table = Table::new_with_handler_version_and_config( + "my_table", + semver::Version::new(0, 4, 0), + move |request| { + let path = request.url().path(); + let query = request.url().query().unwrap_or(""); + + if path == "/v1/table/my_table/describe/" { + return simple_describe_response(); + } + + if path == "/v1/table/my_table/multipart_write/create" { + let n = create_count_c.fetch_add(1, Ordering::SeqCst); + let body = format!(r#"{{"upload_id": "upload-{}"}}"#, n + 1); + return http::Response::builder().status(200).body(body).unwrap(); + } + + if path == "/v1/table/my_table/insert/" { + return http::Response::builder() + .status(200) + .body(r#"{"version": 1}"#.to_string()) + .unwrap(); + } + + if path == "/v1/table/my_table/multipart_write/complete" { + // Fail complete for first session + if query.contains("upload_id=upload-1") { + return http::Response::builder() + .status(503) + .body("Service Unavailable".to_string()) + .unwrap(); + } + return http::Response::builder() + .status(200) + .body(r#"{"version": 9}"#.to_string()) + .unwrap(); + } + + if path == "/v1/table/my_table/multipart_write/abort" { + abort_count_c.fetch_add(1, Ordering::SeqCst); + return http::Response::builder() + .status(200) + .body(String::new()) + .unwrap(); + } + + panic!("Unexpected request path: {}", path); + }, + retry_config_no_backoff(), + ); + + let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap(); + let result = table + .add(vec![batch]) + .write_parallelism(2) + .execute() + .await + .unwrap(); + + assert_eq!(result.version, 9); + assert_eq!(create_count.load(Ordering::SeqCst), 2); + assert_eq!(abort_count.load(Ordering::SeqCst), 1); + } } diff --git a/rust/lancedb/src/remote/table/insert.rs b/rust/lancedb/src/remote/table/insert.rs index c8637281e..8aec28609 100644 --- a/rust/lancedb/src/remote/table/insert.rs +++ b/rust/lancedb/src/remote/table/insert.rs @@ -11,10 +11,14 @@ use arrow_ipc::CompressionType; use datafusion_common::{DataFusionError, Result as DataFusionResult}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_physical_expr::EquivalenceProperties; +use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; -use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties}; +use datafusion_physical_plan::{ + DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties, +}; use futures::StreamExt; use http::header::CONTENT_TYPE; +use lance::io::exec::utils::InstrumentedRecordBatchStreamAdapter; use crate::Error; use crate::remote::ARROW_STREAM_CONTENT_TYPE; @@ -22,13 +26,16 @@ use crate::remote::client::{HttpSend, RestfulLanceDbClient, Sender}; use crate::remote::table::RemoteTable; use crate::table::AddResult; use crate::table::datafusion::insert::COUNT_SCHEMA; +use crate::table::write_progress::WriteProgressTracker; /// ExecutionPlan for inserting data into a remote LanceDB table. /// -/// This plan: -/// 1. Requires single partition (no parallel remote inserts yet) -/// 2. Streams data as Arrow IPC to `/v1/table/{id}/insert/` endpoint -/// 3. Stores AddResult for retrieval after execution +/// Streams data as Arrow IPC to `/v1/table/{id}/insert/` endpoint. +/// +/// When `upload_id` is set, inserts are staged as part of a multipart write +/// session and the plan supports multiple partitions for parallel uploads. +/// Without `upload_id`, the plan requires a single partition and commits +/// immediately. #[derive(Debug)] pub struct RemoteInsertExec { table_name: String, @@ -38,21 +45,69 @@ pub struct RemoteInsertExec { overwrite: bool, properties: PlanProperties, add_result: Arc>>, + metrics: ExecutionPlanMetricsSet, + upload_id: Option, + tracker: Option>, } impl RemoteInsertExec { - /// Create a new RemoteInsertExec. + /// Create a new single-partition RemoteInsertExec. pub fn new( table_name: String, identifier: String, client: RestfulLanceDbClient, input: Arc, overwrite: bool, + tracker: Option>, ) -> Self { + Self::new_inner( + table_name, identifier, client, input, overwrite, None, tracker, + ) + } + + /// Create a multi-partition RemoteInsertExec for use with multipart writes. + /// + /// Each partition's insert is staged under the given `upload_id` without + /// committing. The caller is responsible for calling the complete (or abort) + /// endpoint after all partitions finish. + pub fn new_multipart( + table_name: String, + identifier: String, + client: RestfulLanceDbClient, + input: Arc, + overwrite: bool, + upload_id: String, + tracker: Option>, + ) -> Self { + Self::new_inner( + table_name, + identifier, + client, + input, + overwrite, + Some(upload_id), + tracker, + ) + } + + fn new_inner( + table_name: String, + identifier: String, + client: RestfulLanceDbClient, + input: Arc, + overwrite: bool, + upload_id: Option, + tracker: Option>, + ) -> Self { + let num_partitions = if upload_id.is_some() { + input.output_partitioning().partition_count() + } else { + 1 + }; let schema = COUNT_SCHEMA.clone(); let properties = PlanProperties::new( EquivalenceProperties::new(schema), - datafusion_physical_plan::Partitioning::UnknownPartitioning(1), + datafusion_physical_plan::Partitioning::UnknownPartitioning(num_partitions), datafusion_physical_plan::execution_plan::EmissionType::Final, datafusion_physical_plan::execution_plan::Boundedness::Bounded, ); @@ -65,6 +120,9 @@ impl RemoteInsertExec { overwrite, properties, add_result: Arc::new(Mutex::new(None)), + metrics: ExecutionPlanMetricsSet::new(), + upload_id, + tracker, } } @@ -72,7 +130,10 @@ impl RemoteInsertExec { // TODO: this will be used when we wire this up to Table::add(). #[allow(dead_code)] pub fn add_result(&self) -> Option { - self.add_result.lock().unwrap().clone() + self.add_result + .lock() + .unwrap_or_else(|e| e.into_inner()) + .clone() } /// Stream the input into an HTTP body as an Arrow IPC stream, capturing any @@ -83,6 +144,7 @@ impl RemoteInsertExec { fn stream_as_http_body( data: SendableRecordBatchStream, error_tx: tokio::sync::oneshot::Sender, + tracker: Option>, ) -> DataFusionResult { let options = arrow_ipc::writer::IpcWriteOptions::default() .try_with_compression(Some(CompressionType::LZ4_FRAME))?; @@ -94,37 +156,46 @@ impl RemoteInsertExec { let stream = futures::stream::try_unfold( (data, writer, Some(error_tx), false), - move |(mut data, mut writer, error_tx, finished)| async move { - if finished { - return Ok(None); - } - match data.next().await { - Some(Ok(batch)) => { - writer - .write(&batch) - .map_err(|e| std::io::Error::other(e.to_string()))?; - let buffer = std::mem::take(writer.get_mut()); - Ok(Some((buffer, (data, writer, error_tx, false)))) + move |(mut data, mut writer, error_tx, finished)| { + let tracker = tracker.clone(); + async move { + if finished { + return Ok(None); } - Some(Err(e)) => { - // Send the original error through the channel before - // returning a generic error to reqwest. - if let Some(tx) = error_tx { - let _ = tx.send(e); + match data.next().await { + Some(Ok(batch)) => { + writer + .write(&batch) + .map_err(|e| std::io::Error::other(e.to_string()))?; + let buffer = std::mem::take(writer.get_mut()); + if let Some(ref t) = tracker { + t.record_bytes(buffer.len()); + } + Ok(Some((buffer, (data, writer, error_tx, false)))) } - Err(std::io::Error::other( - "input stream error (see error channel)", - )) - } - None => { - writer - .finish() - .map_err(|e| std::io::Error::other(e.to_string()))?; - let buffer = std::mem::take(writer.get_mut()); - if buffer.is_empty() { - Ok(None) - } else { - Ok(Some((buffer, (data, writer, None, true)))) + Some(Err(e)) => { + // Send the original error through the channel before + // returning a generic error to reqwest. + if let Some(tx) = error_tx { + let _ = tx.send(e); + } + Err(std::io::Error::other( + "input stream error (see error channel)", + )) + } + None => { + writer + .finish() + .map_err(|e| std::io::Error::other(e.to_string()))?; + let buffer = std::mem::take(writer.get_mut()); + if buffer.is_empty() { + Ok(None) + } else { + if let Some(ref t) = tracker { + t.record_bytes(buffer.len()); + } + Ok(Some((buffer, (data, writer, None, true)))) + } } } } @@ -174,8 +245,11 @@ impl ExecutionPlan for RemoteInsertExec { } fn required_input_distribution(&self) -> Vec { - // Until we have a separate commit endpoint, we need to do all inserts in a single partition - vec![datafusion_physical_plan::Distribution::SinglePartition] + if self.upload_id.is_some() { + vec![datafusion_physical_plan::Distribution::UnspecifiedDistribution] + } else { + vec![datafusion_physical_plan::Distribution::SinglePartition] + } } fn benefits_from_input_partitioning(&self) -> Vec { @@ -191,12 +265,14 @@ impl ExecutionPlan for RemoteInsertExec { "RemoteInsertExec requires exactly one child".to_string(), )); } - Ok(Arc::new(Self::new( + Ok(Arc::new(Self::new_inner( self.table_name.clone(), self.identifier.clone(), self.client.clone(), children[0].clone(), self.overwrite, + self.upload_id.clone(), + self.tracker.clone(), ))) } @@ -205,18 +281,29 @@ impl ExecutionPlan for RemoteInsertExec { partition: usize, context: Arc, ) -> DataFusionResult { - if partition != 0 { + if self.upload_id.is_none() && partition != 0 { return Err(DataFusionError::Internal( - "RemoteInsertExec only supports single partition execution".to_string(), + "RemoteInsertExec only supports single partition execution without upload_id" + .to_string(), )); } - let input_stream = self.input.execute(0, context)?; + let input_stream = self.input.execute(partition, context)?; + let input_schema = input_stream.schema(); + let input_stream: SendableRecordBatchStream = + Box::pin(InstrumentedRecordBatchStreamAdapter::new( + input_schema, + input_stream, + partition, + &self.metrics, + )); let client = self.client.clone(); let identifier = self.identifier.clone(); let overwrite = self.overwrite; let add_result = self.add_result.clone(); let table_name = self.table_name.clone(); + let upload_id = self.upload_id.clone(); + let tracker = self.tracker.clone(); let stream = futures::stream::once(async move { let mut request = client @@ -226,9 +313,12 @@ impl ExecutionPlan for RemoteInsertExec { if overwrite { request = request.query(&[("mode", "overwrite")]); } + if let Some(ref uid) = upload_id { + request = request.query(&[("upload_id", uid.as_str())]); + } let (error_tx, mut error_rx) = tokio::sync::oneshot::channel(); - let body = Self::stream_as_http_body(input_stream, error_tx)?; + let body = Self::stream_as_http_body(input_stream, error_tx, tracker)?; let request = request.body(body); let result: DataFusionResult<(String, _)> = async { @@ -262,32 +352,43 @@ impl ExecutionPlan for RemoteInsertExec { let (request_id, response) = result?; - let body_text = response.text().await.map_err(|e| { - DataFusionError::External(Box::new(Error::Http { - source: Box::new(e), - request_id: request_id.clone(), - status_code: None, - })) - })?; - - let parsed_result = if body_text.trim().is_empty() { - // Backward compatible with old servers - AddResult { version: 0 } - } else { - serde_json::from_str(&body_text).map_err(|e| { + // For multipart writes, the staging response is not the final + // version. Only parse AddResult for non-multipart inserts. + if upload_id.is_none() { + let body_text = response.text().await.map_err(|e| { DataFusionError::External(Box::new(Error::Http { - source: format!("Failed to parse add response: {}", e).into(), + source: Box::new(e), request_id: request_id.clone(), status_code: None, })) - })? - }; + })?; + + let parsed_result = if body_text.trim().is_empty() { + // Backward compatible with old servers + AddResult { version: 0 } + } else { + serde_json::from_str(&body_text).map_err(|e| { + DataFusionError::External(Box::new(Error::Http { + source: format!("Failed to parse add response: {}", e).into(), + request_id: request_id.clone(), + status_code: None, + })) + })? + }; - { let mut res_lock = add_result.lock().map_err(|_| { DataFusionError::Execution("Failed to acquire lock for add_result".to_string()) })?; *res_lock = Some(parsed_result); + } else { + // We don't use the body in this case, but we should still consume it. + let _ = response.bytes().await.map_err(|e| { + DataFusionError::External(Box::new(Error::Http { + source: Box::new(e), + request_id: request_id.clone(), + status_code: None, + })) + })?; } // Return a single batch with count 0 (actual count is tracked in add_result) @@ -301,6 +402,10 @@ impl ExecutionPlan for RemoteInsertExec { stream, ))) } + + fn metrics(&self) -> Option { + Some(self.metrics.clone_inner()) + } } #[cfg(test)] diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index db0636a1c..7eac7463a 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -74,7 +74,10 @@ pub mod optimize; pub mod query; pub mod schema_evolution; pub mod update; +pub mod write_progress; use crate::index::waiter::wait_for_index; +#[cfg(feature = "remote")] +pub(crate) use add_data::PreprocessingOutput; pub use add_data::{AddDataBuilder, AddDataMode, AddResult, NaNVectorBehavior}; pub use chrono::Duration; pub use delete::DeleteResult; @@ -440,6 +443,34 @@ mod test_utils { embedding_registry: Arc::new(MemoryRegistry::new()), } } + + pub fn new_with_handler_version_and_config( + name: impl Into, + version: semver::Version, + handler: impl Fn(reqwest::Request) -> http::Response + Clone + Send + Sync + 'static, + config: crate::remote::ClientConfig, + ) -> Self + where + T: Into, + { + let inner = Arc::new( + crate::remote::table::RemoteTable::new_mock_with_version_and_config( + name.into(), + handler.clone(), + Some(version), + config.clone(), + ), + ); + let database = Arc::new(crate::remote::db::RemoteDatabase::new_mock_with_config( + handler, config, + )); + Self { + inner, + database: Some(database), + // Registry is unused. + embedding_registry: Arc::new(MemoryRegistry::new()), + } + } } } @@ -2198,21 +2229,26 @@ impl BaseTable for NativeTable { let table_schema = Schema::from(&ds.schema().clone()); - // Peek at the first batch to estimate a good partition count for - // write parallelism. - let mut peeked = PeekedScannable::new(add.data); - let num_partitions = if let Some(first_batch) = peeked.peek().await { - let max_partitions = lance_core::utils::tokio::get_num_compute_intensive_cpus(); - estimate_write_partitions( - first_batch.get_array_memory_size(), - first_batch.num_rows(), - peeked.num_rows(), - max_partitions, - ) + let num_partitions = if let Some(parallelism) = add.write_parallelism { + parallelism } else { - 1 + // Peek at the first batch to estimate a good partition count for + // write parallelism. + let mut peeked = PeekedScannable::new(add.data); + let n = if let Some(first_batch) = peeked.peek().await { + let max_partitions = lance_core::utils::tokio::get_num_compute_intensive_cpus(); + estimate_write_partitions( + first_batch.get_array_memory_size(), + first_batch.num_rows(), + peeked.num_rows(), + max_partitions, + ) + } else { + 1 + }; + add.data = Box::new(peeked); + n }; - add.data = Box::new(peeked); let output = add.into_plan(&table_schema, &table_def)?; @@ -2241,13 +2277,21 @@ impl BaseTable for NativeTable { let insert_exec = Arc::new(InsertExec::new(ds_wrapper.clone(), ds, plan, lance_params)); + let tracker_for_tasks = output.tracker.clone(); + if let Some(ref t) = tracker_for_tasks { + t.set_total_tasks(num_partitions); + } + let _finish = write_progress::FinishOnDrop(output.tracker); + // Execute all partitions in parallel. let task_ctx = Arc::new(TaskContext::default()); let handles = FuturesUnordered::new(); for partition in 0..num_partitions { let exec = insert_exec.clone(); let ctx = task_ctx.clone(); + let tracker = tracker_for_tasks.clone(); handles.push(tokio::spawn(async move { + let _guard = tracker.as_ref().map(|t| t.track_task()); let mut stream = exec .execute(partition, ctx) .map_err(|e| -> Error { e.into() })?; diff --git a/rust/lancedb/src/table/add_data.rs b/rust/lancedb/src/table/add_data.rs index 5921c54ea..1c4b4bdf3 100644 --- a/rust/lancedb/src/table/add_data.rs +++ b/rust/lancedb/src/table/add_data.rs @@ -13,6 +13,9 @@ use crate::embeddings::EmbeddingRegistry; use crate::table::datafusion::cast::cast_to_table_schema; use crate::table::datafusion::reject_nan::reject_nan_vectors; use crate::table::datafusion::scannable_exec::ScannableExec; +use crate::table::write_progress::ProgressCallback; +use crate::table::write_progress::WriteProgress; +use crate::table::write_progress::WriteProgressTracker; use crate::{Error, Result}; use super::{BaseTable, TableDefinition, WriteOptions}; @@ -52,6 +55,8 @@ pub struct AddDataBuilder { pub(crate) write_options: WriteOptions, pub(crate) on_nan_vectors: NaNVectorBehavior, pub(crate) embedding_registry: Option>, + pub(crate) progress_callback: Option, + pub(crate) write_parallelism: Option, } impl std::fmt::Debug for AddDataBuilder { @@ -77,6 +82,8 @@ impl AddDataBuilder { write_options: WriteOptions::default(), on_nan_vectors: NaNVectorBehavior::default(), embedding_registry, + progress_callback: None, + write_parallelism: None, } } @@ -101,7 +108,43 @@ impl AddDataBuilder { self } + /// Set a callback to receive progress updates during the add operation. + /// + /// The callback is invoked once per batch written, and once more with + /// [`WriteProgress::done`] set to `true` when the write completes. + /// + /// ``` + /// # use lancedb::Table; + /// # async fn example(table: &Table) -> Result<(), Box> { + /// let batch = arrow_array::record_batch!(("id", Int32, [1, 2, 3])).unwrap(); + /// table.add(batch) + /// .progress(|p| println!("{}/{:?} rows", p.output_rows(), p.total_rows())) + /// .execute() + /// .await?; + /// # Ok(()) + /// # } + /// ``` + pub fn progress(mut self, callback: impl FnMut(&WriteProgress) + Send + 'static) -> Self { + self.progress_callback = Some(Arc::new(std::sync::Mutex::new(callback))); + self + } + + /// Set the number of parallel write streams. + /// + /// By default, the number of streams is estimated from the data size. + /// Setting this to `1` disables parallel writes. + pub fn write_parallelism(mut self, parallelism: usize) -> Self { + self.write_parallelism = Some(parallelism); + self + } + pub async fn execute(self) -> Result { + if self.write_parallelism.map(|p| p == 0).unwrap_or(false) { + return Err(Error::InvalidInput { + message: "write_parallelism must be greater than 0".to_string(), + }); + } + self.parent.clone().add(self).await } @@ -130,8 +173,11 @@ impl AddDataBuilder { scannable_with_embeddings(self.data, table_def, self.embedding_registry.as_ref())?; let rescannable = self.data.rescannable(); + let tracker = self + .progress_callback + .map(|cb| Arc::new(WriteProgressTracker::new(cb, self.data.num_rows()))); let plan: Arc = - Arc::new(ScannableExec::new(self.data)); + Arc::new(ScannableExec::new(self.data, tracker.clone())); // Skip casting when overwriting — the input schema replaces the table schema. let plan = if overwrite { plan @@ -149,6 +195,7 @@ impl AddDataBuilder { rescannable, write_options: self.write_options, mode: self.mode, + tracker, }) } } @@ -161,6 +208,7 @@ pub struct PreprocessingOutput { pub rescannable: bool, pub write_options: WriteOptions, pub mode: AddDataMode, + pub tracker: Option>, } /// Check that the input schema is valid for insert. diff --git a/rust/lancedb/src/table/datafusion/insert.rs b/rust/lancedb/src/table/datafusion/insert.rs index 4c3d66195..51be4abb8 100644 --- a/rust/lancedb/src/table/datafusion/insert.rs +++ b/rust/lancedb/src/table/datafusion/insert.rs @@ -12,13 +12,16 @@ use datafusion_common::{DataFusionError, Result as DataFusionResult}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_physical_expr::{EquivalenceProperties, Partitioning}; use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; +use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet}; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties, }; +use futures::TryStreamExt; use lance::Dataset; use lance::dataset::transaction::{Operation, Transaction}; use lance::dataset::{CommitBuilder, InsertBuilder, WriteParams}; +use lance::io::exec::utils::InstrumentedRecordBatchStreamAdapter; use lance_table::format::Fragment; use crate::table::dataset::DatasetConsistencyWrapper; @@ -80,6 +83,7 @@ pub struct InsertExec { write_params: WriteParams, properties: PlanProperties, partial_transactions: Arc>>, + metrics: ExecutionPlanMetricsSet, } impl InsertExec { @@ -105,6 +109,7 @@ impl InsertExec { write_params, properties, partial_transactions: Arc::new(Mutex::new(Vec::with_capacity(num_partitions))), + metrics: ExecutionPlanMetricsSet::new(), } } } @@ -176,6 +181,19 @@ impl ExecutionPlan for InsertExec { let total_partitions = self.input.output_partitioning().partition_count(); let ds_wrapper = self.ds_wrapper.clone(); + let output_bytes = MetricBuilder::new(&self.metrics).output_bytes(partition); + let input_schema = input_stream.schema(); + let input_stream: SendableRecordBatchStream = + Box::pin(InstrumentedRecordBatchStreamAdapter::new( + input_schema, + input_stream.map_ok(move |batch| { + output_bytes.add(batch.get_array_memory_size()); + batch + }), + partition, + &self.metrics, + )); + let stream = futures::stream::once(async move { let transaction = InsertBuilder::new(dataset.clone()) .with_params(&write_params) @@ -186,7 +204,9 @@ impl ExecutionPlan for InsertExec { let to_commit = { // Don't hold the lock over an await point. - let mut txns = partial_transactions.lock().unwrap(); + let mut txns = partial_transactions + .lock() + .unwrap_or_else(|e| e.into_inner()); txns.push(transaction); if txns.len() == total_partitions { Some(std::mem::take(&mut *txns)) @@ -215,6 +235,10 @@ impl ExecutionPlan for InsertExec { stream, ))) } + + fn metrics(&self) -> Option { + Some(self.metrics.clone_inner()) + } } #[cfg(test)] diff --git a/rust/lancedb/src/table/datafusion/scannable_exec.rs b/rust/lancedb/src/table/datafusion/scannable_exec.rs index eb128ac18..a55b6e13f 100644 --- a/rust/lancedb/src/table/datafusion/scannable_exec.rs +++ b/rust/lancedb/src/table/datafusion/scannable_exec.rs @@ -7,17 +7,21 @@ use std::sync::{Arc, Mutex}; use datafusion_common::{DataFusionError, Result as DFResult, Statistics, stats::Precision}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_physical_expr::{EquivalenceProperties, Partitioning}; +use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, execution_plan::EmissionType, }; +use futures::TryStreamExt; +use crate::table::write_progress::WriteProgressTracker; use crate::{arrow::SendableRecordBatchStreamExt, data::scannable::Scannable}; -pub struct ScannableExec { - // We don't require Scannable to by Sync, so we wrap it in a Mutex to allow safe concurrent access. +pub(crate) struct ScannableExec { + // We don't require Scannable to be Sync, so we wrap it in a Mutex to allow safe concurrent access. source: Mutex>, num_rows: Option, properties: PlanProperties, + tracker: Option>, } impl std::fmt::Debug for ScannableExec { @@ -30,7 +34,7 @@ impl std::fmt::Debug for ScannableExec { } impl ScannableExec { - pub fn new(source: Box) -> Self { + pub fn new(source: Box, tracker: Option>) -> Self { let schema = source.schema(); let eq_properties = EquivalenceProperties::new(schema); let properties = PlanProperties::new( @@ -46,6 +50,7 @@ impl ScannableExec { source, num_rows, properties, + tracker, } } } @@ -102,7 +107,18 @@ impl ExecutionPlan for ScannableExec { Err(poison) => poison.into_inner().scan_as_stream(), }; - Ok(stream.into_df_stream()) + let tracker = self.tracker.clone(); + let stream = stream.into_df_stream().map_ok(move |batch| { + if let Some(ref t) = tracker { + t.record_batch(batch.num_rows(), batch.get_array_memory_size()); + } + batch + }); + + Ok(Box::pin(RecordBatchStreamAdapter::new( + self.schema(), + stream, + ))) } fn partition_statistics(&self, _partition: Option) -> DFResult { diff --git a/rust/lancedb/src/table/dataset.rs b/rust/lancedb/src/table/dataset.rs index 89fcf55dd..54c4ba691 100644 --- a/rust/lancedb/src/table/dataset.rs +++ b/rust/lancedb/src/table/dataset.rs @@ -82,7 +82,7 @@ impl DatasetConsistencyWrapper { /// pinned dataset regardless of consistency mode. pub async fn get(&self) -> Result> { { - let state = self.state.lock().unwrap(); + let state = self.state.lock()?; if state.pinned_version.is_some() { return Ok(state.dataset.clone()); } @@ -101,7 +101,7 @@ impl DatasetConsistencyWrapper { } ConsistencyMode::Strong => refresh_latest(self.state.clone()).await, ConsistencyMode::Lazy => { - let state = self.state.lock().unwrap(); + let state = self.state.lock()?; Ok(state.dataset.clone()) } } @@ -116,7 +116,7 @@ impl DatasetConsistencyWrapper { /// concurrent [`as_time_travel`](Self::as_time_travel) call), the update /// is silently ignored — the write already committed to storage. pub fn update(&self, dataset: Dataset) { - let mut state = self.state.lock().unwrap(); + let mut state = self.state.lock().unwrap_or_else(|e| e.into_inner()); if state.pinned_version.is_some() { // A concurrent as_time_travel() beat us here. The write succeeded // in storage, but since we're now pinned we don't advance the @@ -139,7 +139,7 @@ impl DatasetConsistencyWrapper { /// Check that the dataset is in a mutable mode (Latest). pub fn ensure_mutable(&self) -> Result<()> { - let state = self.state.lock().unwrap(); + let state = self.state.lock()?; if state.pinned_version.is_some() { Err(crate::Error::InvalidInput { message: "table cannot be modified when a specific version is checked out" @@ -152,13 +152,16 @@ impl DatasetConsistencyWrapper { /// Returns the version, if in time travel mode, or None otherwise. pub fn time_travel_version(&self) -> Option { - self.state.lock().unwrap().pinned_version + self.state + .lock() + .unwrap_or_else(|e| e.into_inner()) + .pinned_version } /// Convert into a wrapper in latest version mode. pub async fn as_latest(&self) -> Result<()> { let dataset = { - let state = self.state.lock().unwrap(); + let state = self.state.lock()?; if state.pinned_version.is_none() { return Ok(()); } @@ -168,7 +171,7 @@ impl DatasetConsistencyWrapper { let latest_version = dataset.latest_version_id().await?; let new_dataset = dataset.checkout_version(latest_version).await?; - let mut state = self.state.lock().unwrap(); + let mut state = self.state.lock()?; if state.pinned_version.is_some() { state.dataset = Arc::new(new_dataset); state.pinned_version = None; @@ -184,7 +187,7 @@ impl DatasetConsistencyWrapper { let target_ref = target_version.into(); let (should_checkout, dataset) = { - let state = self.state.lock().unwrap(); + let state = self.state.lock()?; let should = match state.pinned_version { None => true, Some(version) => match &target_ref { @@ -204,7 +207,7 @@ impl DatasetConsistencyWrapper { let new_dataset = dataset.checkout_version(target_ref).await?; let version_value = new_dataset.version().version; - let mut state = self.state.lock().unwrap(); + let mut state = self.state.lock()?; state.dataset = Arc::new(new_dataset); state.pinned_version = Some(version_value); Ok(()) @@ -212,7 +215,7 @@ impl DatasetConsistencyWrapper { pub async fn reload(&self) -> Result<()> { let (dataset, pinned_version) = { - let state = self.state.lock().unwrap(); + let state = self.state.lock()?; (state.dataset.clone(), state.pinned_version) }; @@ -230,7 +233,7 @@ impl DatasetConsistencyWrapper { let new_dataset = dataset.checkout_version(version).await?; - let mut state = self.state.lock().unwrap(); + let mut state = self.state.lock()?; if state.pinned_version == Some(version) { state.dataset = Arc::new(new_dataset); } @@ -242,14 +245,14 @@ impl DatasetConsistencyWrapper { } async fn refresh_latest(state: Arc>) -> Result> { - let dataset = { state.lock().unwrap().dataset.clone() }; + let dataset = { state.lock()?.dataset.clone() }; let mut ds = (*dataset).clone(); ds.checkout_latest().await?; let new_arc = Arc::new(ds); { - let mut state = state.lock().unwrap(); + let mut state = state.lock()?; if state.pinned_version.is_none() && new_arc.manifest().version >= state.dataset.manifest().version { @@ -612,4 +615,108 @@ mod tests { let s = io_stats.incremental_stats(); assert_eq!(s.read_iops, 0, "step 5, elapsed={:?}", start.elapsed()); } + + /// Helper: poison the mutex inside a DatasetConsistencyWrapper. + fn poison_state(wrapper: &DatasetConsistencyWrapper) { + let state = wrapper.state.clone(); + let handle = std::thread::spawn(move || { + let _guard = state.lock().unwrap(); + panic!("intentional panic to poison mutex"); + }); + let _ = handle.join(); // join collects the panic + assert!(wrapper.state.lock().is_err(), "mutex should be poisoned"); + } + + #[tokio::test] + async fn test_get_returns_error_on_poisoned_lock() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let ds = create_test_dataset(uri).await; + + let wrapper = DatasetConsistencyWrapper::new_latest(ds, None); + poison_state(&wrapper); + + // get() should return Err, not panic + let result = wrapper.get().await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_ensure_mutable_returns_error_on_poisoned_lock() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let ds = create_test_dataset(uri).await; + + let wrapper = DatasetConsistencyWrapper::new_latest(ds, None); + poison_state(&wrapper); + + let result = wrapper.ensure_mutable(); + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_update_recovers_from_poisoned_lock() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let ds = create_test_dataset(uri).await; + let ds_v2 = append_to_dataset(uri).await; + + let wrapper = DatasetConsistencyWrapper::new_latest(ds, None); + poison_state(&wrapper); + + // update() returns (), should not panic + wrapper.update(ds_v2); + } + + #[tokio::test] + async fn test_time_travel_version_recovers_from_poisoned_lock() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let ds = create_test_dataset(uri).await; + + let wrapper = DatasetConsistencyWrapper::new_latest(ds, None); + poison_state(&wrapper); + + // Should not panic, returns whatever was in the mutex + let _version = wrapper.time_travel_version(); + } + + #[tokio::test] + async fn test_as_latest_returns_error_on_poisoned_lock() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let ds = create_test_dataset(uri).await; + + let wrapper = DatasetConsistencyWrapper::new_latest(ds, None); + poison_state(&wrapper); + + let result = wrapper.as_latest().await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_as_time_travel_returns_error_on_poisoned_lock() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let ds = create_test_dataset(uri).await; + + let wrapper = DatasetConsistencyWrapper::new_latest(ds, None); + poison_state(&wrapper); + + let result = wrapper.as_time_travel(1u64).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_reload_returns_error_on_poisoned_lock() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let ds = create_test_dataset(uri).await; + + let wrapper = DatasetConsistencyWrapper::new_latest(ds, None); + poison_state(&wrapper); + + let result = wrapper.reload().await; + assert!(result.is_err()); + } } diff --git a/rust/lancedb/src/table/query.rs b/rust/lancedb/src/table/query.rs index abce6d325..6cbcf4e19 100644 --- a/rust/lancedb/src/table/query.rs +++ b/rust/lancedb/src/table/query.rs @@ -9,7 +9,7 @@ use crate::expr::expr_to_sql_string; use crate::query::{ DEFAULT_TOP_K, QueryExecutionOptions, QueryFilter, QueryRequest, Select, VectorQueryRequest, }; -use crate::utils::{TimeoutStream, default_vector_column}; +use crate::utils::{MaxBatchLengthStream, TimeoutStream, default_vector_column}; use arrow::array::{AsArray, FixedSizeListBuilder, Float32Builder}; use arrow::datatypes::{Float32Type, UInt8Type}; use arrow_array::Array; @@ -66,6 +66,7 @@ async fn execute_generic_query( ) -> Result { let plan = create_plan(table, query, options.clone()).await?; let inner = execute_plan(plan, Default::default())?; + let inner = MaxBatchLengthStream::new_boxed(inner, options.max_batch_length as usize); let inner = if let Some(timeout) = options.timeout { TimeoutStream::new_boxed(inner, timeout) } else { @@ -200,7 +201,9 @@ pub async fn create_plan( scanner.with_row_id(); } - scanner.batch_size(options.max_batch_length as usize); + if options.max_batch_length > 0 { + scanner.batch_size(options.max_batch_length as usize); + } if query.base.fast_search { scanner.fast_search(); diff --git a/rust/lancedb/src/table/write_progress.rs b/rust/lancedb/src/table/write_progress.rs new file mode 100644 index 000000000..7a5c30008 --- /dev/null +++ b/rust/lancedb/src/table/write_progress.rs @@ -0,0 +1,431 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The LanceDB Authors + +//! Progress monitoring for write operations. +//! +//! You can add a callback to process progress in [`crate::table::AddDataBuilder::progress`]. +//! [`WriteProgress`] is the struct passed to the callback. + +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; + +/// Progress snapshot for a write operation. +#[derive(Debug, Clone)] +pub struct WriteProgress { + // These are private and only accessible via getters, to make it easy to add + // new fields without breaking existing callbacks. + elapsed: Duration, + output_rows: usize, + output_bytes: usize, + total_rows: Option, + active_tasks: usize, + total_tasks: usize, + done: bool, +} + +impl WriteProgress { + /// Wall-clock time since monitoring started. + pub fn elapsed(&self) -> Duration { + self.elapsed + } + + /// Number of rows written so far. + pub fn output_rows(&self) -> usize { + self.output_rows + } + + /// Number of bytes written so far. + pub fn output_bytes(&self) -> usize { + self.output_bytes + } + + /// Total rows expected. + /// + /// Populated when the input source reports a row count (e.g. a + /// [`arrow_array::RecordBatch`]). Always `Some` when [`WriteProgress::done`] + /// is `true` — falling back to the actual number of rows written. + pub fn total_rows(&self) -> Option { + self.total_rows + } + + /// Number of parallel write tasks currently in flight. + pub fn active_tasks(&self) -> usize { + self.active_tasks + } + + /// Total number of parallel write tasks (i.e. the write parallelism). + pub fn total_tasks(&self) -> usize { + self.total_tasks + } + + /// Whether the write operation has completed. + /// + /// The final callback always has `done = true`. Callers can use this to + /// finalize progress bars or perform cleanup. + pub fn done(&self) -> bool { + self.done + } +} + +/// Callback type for progress updates. +/// +/// Callbacks are serialized by the tracker and are never invoked reentrantly, +/// so `FnMut` is safe to use here. +pub type ProgressCallback = Arc>; + +/// Tracks progress of a write operation and invokes a [`ProgressCallback`]. +/// +/// Call [`WriteProgressTracker::record_batch`] for each batch written. +/// Call [`WriteProgressTracker::finish`] once after all data is written. +/// +/// The callback is never invoked reentrantly: all state updates and callback +/// invocations are serialized behind a single lock. +impl std::fmt::Debug for WriteProgressTracker { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("WriteProgressTracker") + .field("total_rows", &self.total_rows) + .finish() + } +} + +pub(crate) struct WriteProgressTracker { + rows_and_bytes: std::sync::Mutex<(usize, usize)>, + /// Wire bytes tracked separately by the insert layer. When set (> 0), + /// this takes precedence over the in-memory bytes from `rows_and_bytes`. + wire_bytes: AtomicUsize, + active_tasks: Arc, + total_tasks: AtomicUsize, + start: Instant, + /// Known total rows from the input source, if available. + total_rows: Option, + callback: ProgressCallback, +} + +impl WriteProgressTracker { + pub fn new(callback: ProgressCallback, total_rows: Option) -> Self { + Self { + rows_and_bytes: std::sync::Mutex::new((0, 0)), + wire_bytes: AtomicUsize::new(0), + active_tasks: Arc::new(AtomicUsize::new(0)), + total_tasks: AtomicUsize::new(1), + start: Instant::now(), + total_rows, + callback, + } + } + + /// Set the total number of parallel write tasks (the write parallelism). + pub fn set_total_tasks(&self, n: usize) { + self.total_tasks.store(n, Ordering::Relaxed); + } + + /// Increment the active task count. Returns a guard that decrements on drop. + pub fn track_task(&self) -> ActiveTaskGuard { + self.active_tasks.fetch_add(1, Ordering::Relaxed); + ActiveTaskGuard(self.active_tasks.clone()) + } + + /// Record a batch of rows passing through the scan node. + pub fn record_batch(&self, rows: usize, bytes: usize) { + // Lock order: callback first, then rows_and_bytes. This is the only + // order used anywhere, so deadlocks cannot occur. + let mut cb = self.callback.lock().unwrap_or_else(|e| e.into_inner()); + let mut guard = self + .rows_and_bytes + .lock() + .unwrap_or_else(|e| e.into_inner()); + guard.0 += rows; + guard.1 += bytes; + let progress = self.snapshot(guard.0, guard.1, false); + drop(guard); + cb(&progress); + } + + /// Record wire bytes from the insert layer (e.g. IPC-encoded bytes for + /// remote writes). When wire bytes are recorded, they take precedence over + /// the in-memory Arrow bytes tracked by [`record_batch`]. + pub fn record_bytes(&self, bytes: usize) { + self.wire_bytes.fetch_add(bytes, Ordering::Relaxed); + } + + /// Emit the final progress callback indicating the write is complete. + /// + /// `total_rows` is always `Some` on the final callback: it uses the known + /// total if available, or falls back to the number of rows actually written. + pub fn finish(&self) { + let mut cb = self.callback.lock().unwrap_or_else(|e| e.into_inner()); + let guard = self + .rows_and_bytes + .lock() + .unwrap_or_else(|e| e.into_inner()); + let mut snap = self.snapshot(guard.0, guard.1, true); + snap.total_rows = Some(self.total_rows.unwrap_or(guard.0)); + drop(guard); + cb(&snap); + } + + fn snapshot(&self, rows: usize, in_memory_bytes: usize, done: bool) -> WriteProgress { + let wire = self.wire_bytes.load(Ordering::Relaxed); + // Prefer wire bytes (actual I/O size) when the insert layer is + // tracking them; fall back to in-memory Arrow size otherwise. + // TODO: for local writes, track actual bytes written by Lance + // instead of using in-memory Arrow size as a proxy. + let output_bytes = if wire > 0 { wire } else { in_memory_bytes }; + WriteProgress { + elapsed: self.start.elapsed(), + output_rows: rows, + output_bytes, + total_rows: self.total_rows, + active_tasks: self.active_tasks.load(Ordering::Relaxed), + total_tasks: self.total_tasks.load(Ordering::Relaxed), + done, + } + } +} + +/// RAII guard that decrements the active task count when dropped. +pub(crate) struct ActiveTaskGuard(Arc); + +impl Drop for ActiveTaskGuard { + fn drop(&mut self) { + self.0.fetch_sub(1, Ordering::Relaxed); + } +} + +/// RAII guard that calls [`WriteProgressTracker::finish`] on drop. +/// +/// This ensures the final `done=true` callback fires even if the write +/// errors or the future is cancelled. +pub(crate) struct FinishOnDrop(pub Option>); + +impl Drop for FinishOnDrop { + fn drop(&mut self) { + if let Some(t) = self.0.take() { + t.finish(); + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; + + use arrow_array::record_batch; + + use crate::connect; + + #[tokio::test] + async fn test_progress_monitor_fires_callback() { + let db = connect("memory://").execute().await.unwrap(); + + let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap(); + let table = db + .create_table("progress_test", batch) + .execute() + .await + .unwrap(); + + let callback_count = Arc::new(AtomicUsize::new(0)); + let last_rows = Arc::new(AtomicUsize::new(0)); + let max_active = Arc::new(AtomicUsize::new(0)); + let last_total_tasks = Arc::new(AtomicUsize::new(0)); + let cb_count = callback_count.clone(); + let cb_rows = last_rows.clone(); + let cb_active = max_active.clone(); + let cb_total_tasks = last_total_tasks.clone(); + + let new_data = record_batch!(("id", Int32, [4, 5, 6])).unwrap(); + table + .add(new_data) + .progress(move |p| { + cb_count.fetch_add(1, Ordering::SeqCst); + cb_rows.store(p.output_rows(), Ordering::SeqCst); + cb_active.fetch_max(p.active_tasks(), Ordering::SeqCst); + cb_total_tasks.store(p.total_tasks(), Ordering::SeqCst); + }) + .execute() + .await + .unwrap(); + + assert_eq!(table.count_rows(None).await.unwrap(), 6); + assert!(callback_count.load(Ordering::SeqCst) >= 1); + // Progress tracks the newly inserted rows, not the total table size. + assert_eq!(last_rows.load(Ordering::SeqCst), 3); + // At least one callback should have seen an active task. + assert!(max_active.load(Ordering::SeqCst) >= 1); + // total_tasks should reflect the write parallelism. + assert!(last_total_tasks.load(Ordering::SeqCst) >= 1); + } + + #[tokio::test] + async fn test_progress_done_fires_at_end() { + let db = connect("memory://").execute().await.unwrap(); + let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap(); + let table = db + .create_table("progress_done", batch) + .execute() + .await + .unwrap(); + + let seen_done = Arc::new(std::sync::Mutex::new(Vec::::new())); + let seen = seen_done.clone(); + + let new_data = record_batch!(("id", Int32, [4, 5, 6])).unwrap(); + table + .add(new_data) + .progress(move |p| { + seen.lock().unwrap().push(p.done()); + }) + .execute() + .await + .unwrap(); + + let done_flags = seen_done.lock().unwrap(); + assert!(!done_flags.is_empty(), "at least one callback must fire"); + // Only the last callback should have done=true. + let last = *done_flags.last().unwrap(); + assert!(last, "last callback must have done=true"); + // All earlier callbacks should have done=false. + for &d in done_flags.iter().rev().skip(1) { + assert!(!d, "non-final callbacks must have done=false"); + } + } + + #[tokio::test] + async fn test_progress_total_rows_known() { + let db = connect("memory://").execute().await.unwrap(); + + let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap(); + let table = db + .create_table("total_known", batch) + .execute() + .await + .unwrap(); + + let seen_total = Arc::new(std::sync::Mutex::new(Vec::new())); + let seen = seen_total.clone(); + + // RecordBatch implements Scannable with num_rows() -> Some(3) + let new_data = record_batch!(("id", Int32, [4, 5, 6])).unwrap(); + table + .add(new_data) + .progress(move |p| { + seen.lock().unwrap().push(p.total_rows()); + }) + .execute() + .await + .unwrap(); + + let totals = seen_total.lock().unwrap(); + // All callbacks (including done) should have total_rows = Some(3) + assert!( + totals.contains(&Some(3)), + "expected total_rows=Some(3) in at least one callback, got: {:?}", + *totals + ); + } + + #[tokio::test] + async fn test_progress_total_rows_unknown() { + use arrow_array::RecordBatchIterator; + + let db = connect("memory://").execute().await.unwrap(); + + let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap(); + let table = db + .create_table("total_unknown", batch) + .execute() + .await + .unwrap(); + + let seen_total = Arc::new(std::sync::Mutex::new(Vec::new())); + let seen = seen_total.clone(); + + // RecordBatchReader does not provide num_rows, so total_rows should be + // None in intermediate callbacks but always Some on the done callback. + let schema = arrow_schema::Schema::new(vec![arrow_schema::Field::new( + "id", + arrow_schema::DataType::Int32, + false, + )]); + let new_data: Box = + Box::new(RecordBatchIterator::new( + vec![Ok(record_batch!(("id", Int32, [4, 5, 6])).unwrap())], + Arc::new(schema), + )); + table + .add(new_data) + .progress(move |p| { + seen.lock().unwrap().push((p.total_rows(), p.done())); + }) + .execute() + .await + .unwrap(); + + let entries = seen_total.lock().unwrap(); + assert!(!entries.is_empty(), "at least one callback must fire"); + for (total, done) in entries.iter() { + if *done { + assert!( + total.is_some(), + "done callback must have total_rows set, got: {:?}", + total + ); + } else { + assert_eq!( + *total, None, + "intermediate callback must have total_rows=None, got: {:?}", + total + ); + } + } + } + + #[test] + fn test_record_batch_recovers_from_poisoned_callback_lock() { + use super::{ProgressCallback, WriteProgressTracker}; + use std::sync::Mutex; + + let callback: ProgressCallback = Arc::new(Mutex::new(|_: &super::WriteProgress| {})); + + // Poison the callback mutex + let cb_clone = callback.clone(); + let handle = std::thread::spawn(move || { + let _guard = cb_clone.lock().unwrap(); + panic!("intentional panic to poison callback mutex"); + }); + let _ = handle.join(); + assert!( + callback.lock().is_err(), + "callback mutex should be poisoned" + ); + + let tracker = WriteProgressTracker::new(callback, Some(100)); + + // record_batch should not panic + tracker.record_batch(10, 1024); + } + + #[test] + fn test_finish_recovers_from_poisoned_callback_lock() { + use super::{ProgressCallback, WriteProgressTracker}; + use std::sync::Mutex; + + let callback: ProgressCallback = Arc::new(Mutex::new(|_: &super::WriteProgress| {})); + + // Poison the callback mutex + let cb_clone = callback.clone(); + let handle = std::thread::spawn(move || { + let _guard = cb_clone.lock().unwrap(); + panic!("intentional panic to poison callback mutex"); + }); + let _ = handle.join(); + + let tracker = WriteProgressTracker::new(callback, Some(100)); + + // finish should not panic + tracker.finish(); + } +} diff --git a/rust/lancedb/src/utils/background_cache.rs b/rust/lancedb/src/utils/background_cache.rs index 211630556..851f495f4 100644 --- a/rust/lancedb/src/utils/background_cache.rs +++ b/rust/lancedb/src/utils/background_cache.rs @@ -122,7 +122,7 @@ where /// This is a cheap synchronous check useful as a fast path before /// constructing a fetch closure for [`get()`](Self::get). pub fn try_get(&self) -> Option { - let cache = self.inner.lock().unwrap(); + let cache = self.inner.lock().unwrap_or_else(|e| e.into_inner()); cache.state.fresh_value(self.ttl, self.refresh_window) } @@ -138,7 +138,7 @@ where { // Fast path: check if cache is fresh { - let cache = self.inner.lock().unwrap(); + let cache = self.inner.lock().unwrap_or_else(|e| e.into_inner()); if let Some(value) = cache.state.fresh_value(self.ttl, self.refresh_window) { return Ok(value); } @@ -147,7 +147,7 @@ where // Slow path let mut fetch = Some(fetch); let action = { - let mut cache = self.inner.lock().unwrap(); + let mut cache = self.inner.lock().unwrap_or_else(|e| e.into_inner()); self.determine_action(&mut cache, &mut fetch) }; @@ -161,7 +161,7 @@ where /// /// This avoids a blocking fetch on the first [`get()`](Self::get) call. pub fn seed(&self, value: V) { - let mut cache = self.inner.lock().unwrap(); + let mut cache = self.inner.lock().unwrap_or_else(|e| e.into_inner()); cache.state = State::Current(value, clock::now()); } @@ -170,7 +170,7 @@ where /// Any in-flight background fetch from before this call will not update the /// cache (the generation counter prevents stale writes). pub fn invalidate(&self) { - let mut cache = self.inner.lock().unwrap(); + let mut cache = self.inner.lock().unwrap_or_else(|e| e.into_inner()); cache.state = State::Empty; cache.generation += 1; } @@ -267,7 +267,7 @@ where let fut_for_spawn = shared.clone(); tokio::spawn(async move { let result = fut_for_spawn.await; - let mut cache = inner.lock().unwrap(); + let mut cache = inner.lock().unwrap_or_else(|e| e.into_inner()); // Only update if no invalidation has happened since we started if cache.generation != generation { return; @@ -590,4 +590,67 @@ mod tests { let v = cache.get(ok_fetcher(count.clone(), "fresh")).await.unwrap(); assert_eq!(v, "fresh"); } + + /// Helper: poison the inner mutex of a BackgroundCache. + fn poison_cache(cache: &BackgroundCache) { + let inner = cache.inner.clone(); + let handle = std::thread::spawn(move || { + let _guard = inner.lock().unwrap(); + panic!("intentional panic to poison mutex"); + }); + let _ = handle.join(); + assert!(cache.inner.lock().is_err(), "mutex should be poisoned"); + } + + #[tokio::test] + async fn test_try_get_recovers_from_poisoned_lock() { + let cache = new_cache(); + let count = Arc::new(AtomicUsize::new(0)); + + // Seed a value first + cache.get(ok_fetcher(count.clone(), "hello")).await.unwrap(); + cache.get(ok_fetcher(count.clone(), "hello")).await.unwrap(); // peek + + poison_cache(&cache); + + // try_get() should not panic — it recovers via unwrap_or_else + let result = cache.try_get(); + // The value may or may not be fresh depending on timing, but it must not panic + let _ = result; + } + + #[tokio::test] + async fn test_get_recovers_from_poisoned_lock() { + let cache = new_cache(); + let count = Arc::new(AtomicUsize::new(0)); + + poison_cache(&cache); + + // get() should not panic — it recovers and can still fetch + let result = cache.get(ok_fetcher(count.clone(), "recovered")).await; + assert!(result.is_ok()); + assert_eq!(result.unwrap(), "recovered"); + } + + #[tokio::test] + async fn test_seed_recovers_from_poisoned_lock() { + let cache = new_cache(); + poison_cache(&cache); + + // seed() should not panic + cache.seed("seeded".to_string()); + } + + #[tokio::test] + async fn test_invalidate_recovers_from_poisoned_lock() { + let cache = new_cache(); + let count = Arc::new(AtomicUsize::new(0)); + + cache.get(ok_fetcher(count.clone(), "hello")).await.unwrap(); + + poison_cache(&cache); + + // invalidate() should not panic + cache.invalidate(); + } } diff --git a/rust/lancedb/src/utils/mod.rs b/rust/lancedb/src/utils/mod.rs index ffed533f6..0af8623b4 100644 --- a/rust/lancedb/src/utils/mod.rs +++ b/rust/lancedb/src/utils/mod.rs @@ -335,6 +335,85 @@ impl Stream for TimeoutStream { } } +/// A `Stream` wrapper that slices oversized batches to enforce a maximum batch length. +pub struct MaxBatchLengthStream { + inner: SendableRecordBatchStream, + max_batch_length: Option, + buffered_batch: Option, + buffered_offset: usize, +} + +impl MaxBatchLengthStream { + pub fn new(inner: SendableRecordBatchStream, max_batch_length: usize) -> Self { + Self { + inner, + max_batch_length: (max_batch_length > 0).then_some(max_batch_length), + buffered_batch: None, + buffered_offset: 0, + } + } + + pub fn new_boxed( + inner: SendableRecordBatchStream, + max_batch_length: usize, + ) -> SendableRecordBatchStream { + if max_batch_length == 0 { + inner + } else { + Box::pin(Self::new(inner, max_batch_length)) + } + } +} + +impl RecordBatchStream for MaxBatchLengthStream { + fn schema(&self) -> SchemaRef { + self.inner.schema() + } +} + +impl Stream for MaxBatchLengthStream { + type Item = DataFusionResult; + + fn poll_next( + mut self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + loop { + let Some(max_batch_length) = self.max_batch_length else { + return Pin::new(&mut self.inner).poll_next(cx); + }; + + if let Some(batch) = self.buffered_batch.clone() { + if self.buffered_offset < batch.num_rows() { + let remaining = batch.num_rows() - self.buffered_offset; + let length = remaining.min(max_batch_length); + let sliced = batch.slice(self.buffered_offset, length); + self.buffered_offset += length; + if self.buffered_offset >= batch.num_rows() { + self.buffered_batch = None; + self.buffered_offset = 0; + } + return std::task::Poll::Ready(Some(Ok(sliced))); + } + + self.buffered_batch = None; + self.buffered_offset = 0; + } + + match Pin::new(&mut self.inner).poll_next(cx) { + std::task::Poll::Ready(Some(Ok(batch))) => { + if batch.num_rows() <= max_batch_length { + return std::task::Poll::Ready(Some(Ok(batch))); + } + self.buffered_batch = Some(batch); + self.buffered_offset = 0; + } + other => return other, + } + } + } +} + #[cfg(test)] mod tests { use arrow_array::Int32Array; @@ -470,7 +549,7 @@ mod tests { assert_eq!(string_to_datatype(string), Some(expected)); } - fn sample_batch() -> RecordBatch { + fn sample_batch(num_rows: i32) -> RecordBatch { let schema = Arc::new(Schema::new(vec![Field::new( "col1", DataType::Int32, @@ -478,14 +557,14 @@ mod tests { )])); RecordBatch::try_new( schema.clone(), - vec![Arc::new(Int32Array::from(vec![1, 2, 3]))], + vec![Arc::new(Int32Array::from_iter_values(0..num_rows))], ) .unwrap() } #[tokio::test] async fn test_timeout_stream() { - let batch = sample_batch(); + let batch = sample_batch(3); let schema = batch.schema(); let mock_stream = stream::iter(vec![Ok(batch.clone()), Ok(batch.clone())]); @@ -515,7 +594,7 @@ mod tests { #[tokio::test] async fn test_timeout_stream_zero_duration() { - let batch = sample_batch(); + let batch = sample_batch(3); let schema = batch.schema(); let mock_stream = stream::iter(vec![Ok(batch.clone()), Ok(batch.clone())]); @@ -534,7 +613,7 @@ mod tests { #[tokio::test] async fn test_timeout_stream_completes_normally() { - let batch = sample_batch(); + let batch = sample_batch(3); let schema = batch.schema(); let mock_stream = stream::iter(vec![Ok(batch.clone()), Ok(batch.clone())]); @@ -552,4 +631,35 @@ mod tests { // Stream should be empty now assert!(timeout_stream.next().await.is_none()); } + + async fn collect_batch_sizes( + stream: SendableRecordBatchStream, + max_batch_length: usize, + ) -> Vec { + let mut sliced_stream = MaxBatchLengthStream::new(stream, max_batch_length); + sliced_stream + .by_ref() + .map(|batch| batch.unwrap().num_rows()) + .collect::>() + .await + } + + #[tokio::test] + async fn test_max_batch_length_stream_behaviors() { + let schema = sample_batch(7).schema(); + let mock_stream = stream::iter(vec![Ok(sample_batch(2)), Ok(sample_batch(7))]); + + let sendable_stream: SendableRecordBatchStream = + Box::pin(RecordBatchStreamAdapter::new(schema.clone(), mock_stream)); + assert_eq!( + collect_batch_sizes(sendable_stream, 3).await, + vec![2, 3, 3, 1] + ); + + let sendable_stream: SendableRecordBatchStream = Box::pin(RecordBatchStreamAdapter::new( + schema, + stream::iter(vec![Ok(sample_batch(2)), Ok(sample_batch(7))]), + )); + assert_eq!(collect_batch_sizes(sendable_stream, 0).await, vec![2, 7]); + } }