Compare commits

..

1 Commits

Author SHA1 Message Date
lancedb automation
d1d7151824 chore: update lance dependency to v1.0.3-rc.1 2026-01-21 08:51:12 +00:00
45 changed files with 537 additions and 588 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.24.0"
current_version = "0.23.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -75,13 +75,6 @@ jobs:
VERSION="${VERSION#v}"
BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
# Use "chore" for beta/rc versions, "feat" for stable releases
if [[ "${VERSION}" == *beta* ]] || [[ "${VERSION}" == *rc* ]]; then
COMMIT_TYPE="chore"
else
COMMIT_TYPE="feat"
fi
cat <<EOF >/tmp/codex-prompt.txt
You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.
@@ -91,10 +84,10 @@ jobs:
3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
6. Stage all relevant files with "git add -A". Commit using the message "${COMMIT_TYPE}: update lance dependency to v${VERSION}".
6. Stage all relevant files with "git add -A". Commit using the message "chore: update lance dependency to v${VERSION}".
7. Push the branch to origin. If the branch already exists, force-push your changes.
8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
9. Create a pull request targeting "main" with title "${COMMIT_TYPE}: update lance dependency to v${VERSION}". First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'EOF' > /tmp/pr-body.md). The body should summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). Then run "gh pr create --body-file /tmp/pr-body.md".
9. Create a pull request targeting "main" with title "chore: update lance dependency to v${VERSION}". In the body, summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}).
10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
Constraints:

View File

@@ -48,8 +48,6 @@ jobs:
run: cargo fmt --all -- --check
- name: Run clippy
run: cargo clippy --profile ci --workspace --tests --all-features -- -D warnings
- name: Run clippy (without remote feature)
run: cargo clippy --profile ci --workspace --tests -- -D warnings
build-no-lock:
runs-on: ubuntu-24.04
@@ -183,7 +181,7 @@ jobs:
runs-on: ubuntu-24.04
strategy:
matrix:
msrv: ["1.88.0"] # This should match up with rust-version in Cargo.toml
msrv: ["1.78.0"] # This should match up with rust-version in Cargo.toml
env:
# Need up-to-date compilers for kernels
CC: clang-18
@@ -214,6 +212,4 @@ jobs:
cargo update -p aws-sdk-sts --precise 1.51.0
cargo update -p home --precise 0.5.9
- name: cargo +${{ matrix.msrv }} check
env:
RUSTUP_TOOLCHAIN: ${{ matrix.msrv }}
run: cargo check --profile ci --workspace --tests --benches --all-features

259
Cargo.lock generated
View File

@@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
version = 3
[[package]]
name = "adler2"
@@ -320,7 +320,7 @@ dependencies = [
"arrow-schema",
"chrono",
"half",
"indexmap 2.11.4",
"indexmap 2.12.0",
"lexical-core",
"memchr",
"num",
@@ -491,7 +491,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -502,7 +502,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -1193,7 +1193,7 @@ dependencies = [
"regex",
"rustc-hash 1.1.0",
"shlex",
"syn 2.0.106",
"syn 2.0.114",
"which",
]
@@ -1307,7 +1307,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -1375,7 +1375,7 @@ checksum = "3fa76293b4f7bb636ab88fd78228235b5248b4d05cc589aed610f954af5d7c7a"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -1913,7 +1913,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501"
dependencies = [
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -1947,7 +1947,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -1961,7 +1961,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -1972,7 +1972,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [
"darling_core 0.20.11",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -1983,7 +1983,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
dependencies = [
"darling_core 0.21.3",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -2002,9 +2002,9 @@ dependencies = [
[[package]]
name = "datafusion"
version = "50.2.0"
version = "50.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc6759cf9ef57c5c469e4027ac4b4cfa746e06a0f5472c2b922b6a403c2a64c4"
checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a"
dependencies = [
"arrow",
"arrow-ipc",
@@ -2117,7 +2117,7 @@ dependencies = [
"chrono",
"half",
"hashbrown 0.14.5",
"indexmap 2.11.4",
"indexmap 2.12.0",
"libc",
"log",
"object_store",
@@ -2301,7 +2301,7 @@ dependencies = [
"datafusion-functions-aggregate-common",
"datafusion-functions-window-common",
"datafusion-physical-expr-common",
"indexmap 2.11.4",
"indexmap 2.12.0",
"paste",
"recursive",
"serde_json",
@@ -2316,7 +2316,7 @@ checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6"
dependencies = [
"arrow",
"datafusion-common",
"indexmap 2.11.4",
"indexmap 2.12.0",
"itertools 0.14.0",
"paste",
]
@@ -2458,7 +2458,7 @@ checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730"
dependencies = [
"datafusion-expr",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -2473,7 +2473,7 @@ dependencies = [
"datafusion-expr",
"datafusion-expr-common",
"datafusion-physical-expr",
"indexmap 2.11.4",
"indexmap 2.12.0",
"itertools 0.14.0",
"log",
"recursive",
@@ -2496,7 +2496,7 @@ dependencies = [
"datafusion-physical-expr-common",
"half",
"hashbrown 0.14.5",
"indexmap 2.11.4",
"indexmap 2.12.0",
"itertools 0.14.0",
"log",
"parking_lot",
@@ -2576,7 +2576,7 @@ dependencies = [
"futures",
"half",
"hashbrown 0.14.5",
"indexmap 2.11.4",
"indexmap 2.12.0",
"itertools 0.14.0",
"log",
"parking_lot",
@@ -2636,7 +2636,7 @@ dependencies = [
"bigdecimal",
"datafusion-common",
"datafusion-expr",
"indexmap 2.11.4",
"indexmap 2.12.0",
"log",
"recursive",
"regex",
@@ -2702,7 +2702,7 @@ checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -2723,7 +2723,7 @@ dependencies = [
"darling 0.20.11",
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -2733,7 +2733,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
dependencies = [
"derive_builder_core",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -2777,7 +2777,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -2905,7 +2905,7 @@ dependencies = [
"heck 0.5.0",
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -2917,7 +2917,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -3141,8 +3141,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -3219,7 +3219,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -3709,7 +3709,7 @@ dependencies = [
"futures-sink",
"futures-util",
"http 0.2.12",
"indexmap 2.11.4",
"indexmap 2.12.0",
"slab",
"tokio",
"tokio-util",
@@ -3728,7 +3728,7 @@ dependencies = [
"futures-core",
"futures-sink",
"http 1.3.1",
"indexmap 2.11.4",
"indexmap 2.12.0",
"slab",
"tokio",
"tokio-util",
@@ -3737,9 +3737,9 @@ dependencies = [
[[package]]
name = "half"
version = "2.6.0"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
dependencies = [
"bytemuck",
"cfg-if",
@@ -3747,6 +3747,7 @@ dependencies = [
"num-traits",
"rand 0.9.2",
"rand_distr 0.5.1",
"zerocopy",
]
[[package]]
@@ -4256,9 +4257,9 @@ dependencies = [
[[package]]
name = "indexmap"
version = "2.11.4"
version = "2.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5"
checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f"
dependencies = [
"equivalent",
"hashbrown 0.16.0",
@@ -4403,7 +4404,7 @@ checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -4478,8 +4479,8 @@ dependencies = [
[[package]]
name = "lance"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow",
"arrow-arith",
@@ -4544,8 +4545,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4563,8 +4564,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrayref",
"paste",
@@ -4573,8 +4574,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4610,8 +4611,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow",
"arrow-array",
@@ -4641,8 +4642,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow",
"arrow-array",
@@ -4659,8 +4660,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4697,8 +4698,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4730,8 +4731,8 @@ dependencies = [
[[package]]
name = "lance-geo"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"datafusion",
"geo-types",
@@ -4742,8 +4743,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow",
"arrow-arith",
@@ -4804,8 +4805,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow",
"arrow-arith",
@@ -4845,8 +4846,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4862,8 +4863,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow",
"async-trait",
@@ -4875,8 +4876,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4919,8 +4920,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow",
"arrow-array",
@@ -4959,8 +4960,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
version = "1.0.3-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3-rc.1#695a77bbe5abe1695d98d4d397acc23c456c4217"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4971,7 +4972,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.24.0"
version = "0.23.1"
dependencies = [
"ahash",
"anyhow",
@@ -5050,7 +5051,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.24.0"
version = "0.23.1"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -5070,7 +5071,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.27.0"
version = "0.26.1"
dependencies = [
"arrow",
"async-trait",
@@ -5174,9 +5175,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7"
[[package]]
name = "libc"
version = "0.2.176"
version = "0.2.180"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174"
checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
[[package]]
name = "libloading"
@@ -5206,9 +5207,9 @@ dependencies = [
[[package]]
name = "libz-rs-sys"
version = "0.5.2"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "840db8cf39d9ec4dd794376f38acc40d0fc65eec2a8f484f7fd375b84602becd"
checksum = "c10501e7805cee23da17c7790e59df2870c0d4043ec6d03f67d31e2b53e77415"
dependencies = [
"zlib-rs",
]
@@ -5499,7 +5500,7 @@ checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -5567,7 +5568,7 @@ dependencies = [
"napi-derive-backend",
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -5582,7 +5583,7 @@ dependencies = [
"quote",
"regex",
"semver",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -5782,7 +5783,7 @@ dependencies = [
"proc-macro-crate",
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -6128,7 +6129,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
dependencies = [
"fixedbitset",
"indexmap 2.11.4",
"indexmap 2.12.0",
]
[[package]]
@@ -6139,7 +6140,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455"
dependencies = [
"fixedbitset",
"hashbrown 0.15.5",
"indexmap 2.11.4",
"indexmap 2.12.0",
"serde",
]
@@ -6216,7 +6217,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -6389,7 +6390,7 @@ dependencies = [
"comfy-table",
"either",
"hashbrown 0.14.5",
"indexmap 2.11.4",
"indexmap 2.12.0",
"num-traits",
"once_cell",
"polars-arrow",
@@ -6487,7 +6488,7 @@ dependencies = [
"either",
"hashbrown 0.14.5",
"hex",
"indexmap 2.11.4",
"indexmap 2.12.0",
"memchr",
"num-traits",
"polars-arrow",
@@ -6631,7 +6632,7 @@ dependencies = [
"ahash",
"bytemuck",
"hashbrown 0.14.5",
"indexmap 2.11.4",
"indexmap 2.12.0",
"num-traits",
"once_cell",
"polars-error",
@@ -6689,7 +6690,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
dependencies = [
"proc-macro2",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -6736,7 +6737,7 @@ dependencies = [
"prost",
"prost-types",
"regex",
"syn 2.0.106",
"syn 2.0.114",
"tempfile",
]
@@ -6750,7 +6751,7 @@ dependencies = [
"itertools 0.14.0",
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -6836,7 +6837,7 @@ checksum = "ca31e43a0f205f2960208938135e37e579e61e10b36b4e7f49b0e8f60fab5b83"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -6868,7 +6869,7 @@ dependencies = [
"proc-macro2",
"pyo3-macros-backend",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -6881,7 +6882,7 @@ dependencies = [
"proc-macro2",
"pyo3-build-config",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -7179,7 +7180,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
dependencies = [
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -7219,14 +7220,14 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
name = "regex"
version = "1.11.3"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
@@ -7236,9 +7237,9 @@ dependencies = [
[[package]]
name = "regex-automata"
version = "0.4.11"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
@@ -7457,7 +7458,7 @@ dependencies = [
"regex",
"relative-path",
"rustc_version",
"syn 2.0.106",
"syn 2.0.114",
"unicode-ident",
]
@@ -7835,7 +7836,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -7879,7 +7880,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -7904,7 +7905,7 @@ dependencies = [
"chrono",
"hex",
"indexmap 1.9.3",
"indexmap 2.11.4",
"indexmap 2.12.0",
"schemars 0.9.0",
"schemars 1.0.4",
"serde_core",
@@ -7922,7 +7923,7 @@ dependencies = [
"darling 0.21.3",
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -8080,7 +8081,7 @@ dependencies = [
"heck 0.5.0",
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -8198,7 +8199,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -8284,7 +8285,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -8297,7 +8298,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -8319,9 +8320,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.106"
version = "2.0.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
dependencies = [
"proc-macro2",
"quote",
@@ -8345,7 +8346,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -8613,7 +8614,7 @@ checksum = "451b374529930d7601b1eef8d32bc79ae870b6079b069401709c2a8bf9e75f36"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -8642,7 +8643,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -8653,7 +8654,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -8807,7 +8808,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -8869,7 +8870,7 @@ version = "0.23.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3effe7c0e86fdff4f69cdd2ccc1b96f933e24811c5441d44904e8683e27184b"
dependencies = [
"indexmap 2.11.4",
"indexmap 2.12.0",
"toml_datetime",
"toml_parser",
"winnow",
@@ -8967,7 +8968,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -9268,7 +9269,7 @@ dependencies = [
"log",
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
"wasm-bindgen-shared",
]
@@ -9303,7 +9304,7 @@ checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@@ -9484,7 +9485,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -9495,7 +9496,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -9831,7 +9832,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
"synstructure",
]
@@ -9843,7 +9844,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
"synstructure",
]
@@ -9864,7 +9865,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -9884,7 +9885,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
"synstructure",
]
@@ -9924,7 +9925,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"syn 2.0.114",
]
[[package]]
@@ -9937,16 +9938,16 @@ dependencies = [
"crc32fast",
"crossbeam-utils",
"displaydoc",
"indexmap 2.11.4",
"indexmap 2.12.0",
"num_enum",
"thiserror 1.0.69",
]
[[package]]
name = "zlib-rs"
version = "0.5.2"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f06ae92f42f5e5c42443fd094f245eb656abf56dd7cce9b8b263236565e00f2"
checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3"
[[package]]
name = "zstd"

View File

@@ -12,42 +12,42 @@ repository = "https://github.com/lancedb/lancedb"
description = "Serverless, low-latency vector database for AI applications"
keywords = ["lancedb", "lance", "database", "vector", "search"]
categories = ["database-implementations"]
rust-version = "1.88.0"
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=1.0.3-rc.1", default-features = false, "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.0.3-rc.1", default-features = false, "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.0.3-rc.1", default-features = false, "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "56.2", optional = false }
arrow-array = "56.2"
arrow-data = "56.2"
arrow-ipc = "56.2"
arrow-ord = "56.2"
arrow-schema = "56.2"
arrow-select = "56.2"
arrow-cast = "56.2"
arrow = { version = "56.1", optional = false }
arrow-array = "56.1"
arrow-data = "56.1"
arrow-ipc = "56.1"
arrow-ord = "56.1"
arrow-schema = "56.1"
arrow-select = "56.1"
arrow-cast = "56.1"
async-trait = "0"
datafusion = { version = "50.1", default-features = false }
datafusion-catalog = "50.1"
datafusion-common = { version = "50.1", default-features = false }
datafusion-execution = "50.1"
datafusion-expr = "50.1"
datafusion-physical-plan = "50.1"
datafusion = { version = "50.0.0", default-features = false }
datafusion-catalog = "50.0.0"
datafusion-common = { version = "50.0.0", default-features = false }
datafusion-execution = "50.0.0"
datafusion-expr = "50.0.0"
datafusion-physical-plan = "50.0.0"
env_logger = "0.11"
half = { "version" = "2.6.0", default-features = false, features = [
half = { "version" = "2.7.1", default-features = false, features = [
"num-traits",
] }
futures = "0"
@@ -59,7 +59,7 @@ rand = "0.9"
snafu = "0.8"
url = "2"
num-traits = "0.2"
regex = "1.10"
regex = "1.12"
lazy_static = "1"
semver = "1.0.25"
chrono = "0.4"

View File

@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-core</artifactId>
<version>0.24.0</version>
<version>0.23.1</version>
</dependency>
```

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.24.0-final.0</version>
<version>0.23.1-final.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.24.0-final.0</version>
<version>0.23.1-final.0</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.24.0"
version = "0.23.1"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.24.0",
"version": "0.23.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.24.0",
"version": "0.23.1",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.24.0",
"version": "0.23.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.24.0",
"version": "0.23.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.24.0",
"version": "0.23.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.24.0",
"version": "0.23.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.24.0",
"version": "0.23.1",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.24.0",
"version": "0.23.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.24.0",
"version": "0.23.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.24.0",
"version": "0.23.1",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.24.0",
"version": "0.23.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.27.1"
current_version = "0.26.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,28 +1,28 @@
[package]
name = "lancedb-python"
version = "0.27.1"
version = "0.26.1"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true
repository.workspace = true
keywords.workspace = true
categories.workspace = true
rust-version = "1.88.0"
rust-version = "1.75.0"
[lib]
name = "_lancedb"
crate-type = ["cdylib"]
[dependencies]
arrow = { version = "56.2", features = ["pyarrow"] }
arrow = { version = "56.1", features = ["pyarrow"] }
async-trait = "0.1"
lancedb = { path = "../rust/lancedb", default-features = false }
lance-core.workspace = true
lance-namespace.workspace = true
lance-io.workspace = true
env_logger.workspace = true
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
pyo3-async-runtimes = { version = "0.25", features = [
pyo3 = { version = "0.25.1", features = ["extension-module", "abi3-py39"] }
pyo3-async-runtimes = { version = "0.25.0", features = [
"attributes",
"tokio-runtime",
] }
@@ -32,7 +32,7 @@ snafu.workspace = true
tokio = { version = "1.40", features = ["sync"] }
[build-dependencies]
pyo3-build-config = { version = "0.25", features = [
pyo3-build-config = { version = "0.25.0", features = [
"extension-module",
"abi3-py39",
] }

View File

@@ -961,22 +961,27 @@ class LanceQueryBuilder(ABC):
>>> query = [100, 100]
>>> plan = table.search(query).analyze_plan()
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
AnalyzeExec verbose=true, metrics=[], cumulative_cpu=...
TracedExec, metrics=[], cumulative_cpu=...
ProjectionExec: expr=[...], metrics=[...], cumulative_cpu=...
GlobalLimitExec: skip=0, fetch=10, metrics=[...], cumulative_cpu=...
FilterExec: _distance@2 IS NOT NULL,
metrics=[output_rows=..., elapsed_compute=...], cumulative_cpu=...
SortExec: TopK(fetch=10), expr=[...],
AnalyzeExec verbose=true, elapsed=..., metrics=...
TracedExec, elapsed=..., metrics=...
ProjectionExec: elapsed=..., expr=[...],
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
GlobalLimitExec: elapsed=..., skip=0, fetch=10,
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
FilterExec: elapsed=..., _distance@2 IS NOT NULL, metrics=[...]
SortExec: elapsed=..., TopK(fetch=10), expr=[...],
preserve_partitioning=[...],
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...],
cumulative_cpu=...
KNNVectorDistance: metric=l2,
metrics=[output_rows=..., elapsed_compute=..., output_batches=...],
cumulative_cpu=...
LanceRead: uri=..., projection=[vector], ...
metrics=[output_rows=..., elapsed_compute=...,
bytes_read=..., iops=..., requests=...], cumulative_cpu=...
metrics=[output_rows=..., elapsed_compute=...,
output_bytes=..., row_replacements=...]
KNNVectorDistance: elapsed=..., metric=l2,
metrics=[output_rows=..., elapsed_compute=...,
output_bytes=..., output_batches=...]
LanceRead: elapsed=..., uri=..., projection=[vector],
num_fragments=..., range_before=None, range_after=None,
row_id=true, row_addr=false,
full_filter=--, refine_filter=--,
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...,
fragments_scanned=..., ranges_scanned=1, rows_scanned=1,
bytes_read=..., iops=..., requests=..., task_wait_time=...]
Returns
-------

View File

@@ -2,27 +2,12 @@
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
from datetime import timedelta
from lancedb.db import AsyncConnection, DBConnection
import lancedb
import pytest
import pytest_asyncio
def pandas_string_type():
"""Return the PyArrow string type that pandas uses for string columns.
pandas 3.0+ uses large_string for string columns, pandas 2.x uses string.
"""
import pandas as pd
import pyarrow as pa
version = tuple(int(x) for x in pd.__version__.split(".")[:2])
if version >= (3, 0):
return pa.large_utf8()
return pa.utf8()
# Use an in-memory database for most tests.
@pytest.fixture
def mem_db() -> DBConnection:

View File

@@ -268,8 +268,6 @@ async def test_create_table_from_iterator_async(mem_db_async: lancedb.AsyncConne
def test_create_exist_ok(tmp_db: lancedb.DBConnection):
from conftest import pandas_string_type
data = pd.DataFrame(
{
"vector": [[3.1, 4.1], [5.9, 26.5]],
@@ -288,11 +286,10 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
assert tbl.schema == tbl2.schema
assert len(tbl) == len(tbl2)
# pandas 3.0+ uses large_string, pandas 2.x uses string
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pandas_string_type()),
pa.field("item", pa.utf8()),
pa.field("price", pa.float64()),
]
)
@@ -302,7 +299,7 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
bad_schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pandas_string_type()),
pa.field("item", pa.utf8()),
pa.field("price", pa.float64()),
pa.field("extra", pa.float32()),
]
@@ -368,8 +365,6 @@ async def test_create_mode_async(tmp_db_async: lancedb.AsyncConnection):
@pytest.mark.asyncio
async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
from conftest import pandas_string_type
data = pd.DataFrame(
{
"vector": [[3.1, 4.1], [5.9, 26.5]],
@@ -387,11 +382,10 @@ async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
assert tbl.name == tbl2.name
assert await tbl.schema() == await tbl2.schema()
# pandas 3.0+ uses large_string, pandas 2.x uses string
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pandas_string_type()),
pa.field("item", pa.utf8()),
pa.field("price", pa.float64()),
]
)
@@ -601,8 +595,6 @@ def test_open_table_sync(tmp_db: lancedb.DBConnection):
@pytest.mark.asyncio
async def test_open_table(tmp_path):
from conftest import pandas_string_type
db = await lancedb.connect_async(tmp_path)
data = pd.DataFrame(
{
@@ -622,11 +614,10 @@ async def test_open_table(tmp_path):
)
is not None
)
# pandas 3.0+ uses large_string, pandas 2.x uses string
assert await tbl.schema() == pa.schema(
{
"vector": pa.list_(pa.float32(), list_size=2),
"item": pandas_string_type(),
"item": pa.utf8(),
"price": pa.float64(),
}
)

View File

@@ -26,8 +26,6 @@ import pytest
from lance_namespace import (
CreateEmptyTableRequest,
CreateEmptyTableResponse,
DeclareTableRequest,
DeclareTableResponse,
DescribeTableRequest,
DescribeTableResponse,
LanceNamespace,
@@ -162,19 +160,6 @@ class TrackingNamespace(LanceNamespace):
return modified
def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
"""Track declare_table calls and inject rotating credentials."""
with self.lock:
self.create_call_count += 1
count = self.create_call_count
response = self.inner.declare_table(request)
response.storage_options = self._modify_storage_options(
response.storage_options, count
)
return response
def create_empty_table(
self, request: CreateEmptyTableRequest
) -> CreateEmptyTableResponse:

View File

@@ -528,19 +528,12 @@ def test_sanitize_data(
else:
expected_schema = schema
else:
from conftest import pandas_string_type
# polars uses large_string, pandas 3.0+ uses large_string, others use string
if isinstance(data, pl.DataFrame):
text_type = pa.large_utf8()
elif isinstance(data, pd.DataFrame):
text_type = pandas_string_type()
else:
text_type = pa.string()
expected_schema = pa.schema(
{
"id": pa.int64(),
"text": text_type,
"text": pa.large_utf8()
if isinstance(data, pl.DataFrame)
else pa.string(),
"vector": pa.list_(pa.float32(), 10),
}
)

View File

@@ -10,8 +10,7 @@ use arrow::{
use futures::stream::StreamExt;
use lancedb::arrow::SendableRecordBatchStream;
use pyo3::{
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult,
Python,
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -36,7 +35,7 @@ impl RecordBatchStream {
#[pymethods]
impl RecordBatchStream {
#[getter]
pub fn schema(&self, py: Python) -> PyResult<PyObject> {
pub fn schema(&self, py: Python) -> PyResult<Py<PyAny>> {
(*self.schema).clone().into_pyarrow(py)
}
@@ -53,7 +52,11 @@ impl RecordBatchStream {
.next()
.await
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
Python::with_gil(|py| inner_next.infer_error()?.to_pyarrow(py))
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
inner_next.infer_error()?.to_pyarrow(py)
})?;
Ok(py_obj)
})
}
}

View File

@@ -12,7 +12,7 @@ use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyclass, pyfunction, pymethods,
types::{PyDict, PyDictMethods},
Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -114,7 +114,7 @@ impl Connection {
data: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
storage_options_provider: Option<Py<PyAny>>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -152,7 +152,7 @@ impl Connection {
schema: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
storage_options_provider: Option<Py<PyAny>>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -187,7 +187,7 @@ impl Connection {
name: String,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
storage_options_provider: Option<Py<PyAny>>,
index_cache_size: Option<u32>,
location: Option<String>,
) -> PyResult<Bound<'_, PyAny>> {
@@ -307,6 +307,7 @@ impl Connection {
..Default::default()
};
let response = inner.list_namespaces(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("namespaces", response.namespaces)?;
@@ -327,8 +328,7 @@ impl Connection {
let py = self_.py();
future_into_py(py, async move {
use lance_namespace::models::CreateNamespaceRequest;
// Mode is now a string field
let mode_str = mode.and_then(|m| match m.to_lowercase().as_str() {
let mode_enum = mode.and_then(|m| match m.to_lowercase().as_str() {
"create" => Some("Create".to_string()),
"exist_ok" => Some("ExistOk".to_string()),
"overwrite" => Some("Overwrite".to_string()),
@@ -340,11 +340,12 @@ impl Connection {
} else {
Some(namespace)
},
mode: mode_str,
mode: mode_enum,
properties,
..Default::default()
};
let response = inner.create_namespace(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
@@ -364,13 +365,12 @@ impl Connection {
let py = self_.py();
future_into_py(py, async move {
use lance_namespace::models::DropNamespaceRequest;
// Mode and Behavior are now string fields
let mode_str = mode.and_then(|m| match m.to_uppercase().as_str() {
let mode_enum = mode.and_then(|m| match m.to_uppercase().as_str() {
"SKIP" => Some("Skip".to_string()),
"FAIL" => Some("Fail".to_string()),
_ => None,
});
let behavior_str = behavior.and_then(|b| match b.to_uppercase().as_str() {
let behavior_enum = behavior.and_then(|b| match b.to_uppercase().as_str() {
"RESTRICT" => Some("Restrict".to_string()),
"CASCADE" => Some("Cascade".to_string()),
_ => None,
@@ -381,11 +381,12 @@ impl Connection {
} else {
Some(namespace)
},
mode: mode_str,
behavior: behavior_str,
mode: mode_enum,
behavior: behavior_enum,
..Default::default()
};
let response = inner.drop_namespace(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
@@ -413,6 +414,7 @@ impl Connection {
..Default::default()
};
let response = inner.describe_namespace(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
@@ -443,6 +445,7 @@ impl Connection {
..Default::default()
};
let response = inner.list_tables(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("tables", response.tables)?;

View File

@@ -40,31 +40,34 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
request_id,
source,
status_code,
} => Python::with_gil(|py| {
let message = err.to_string();
let http_err_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
.getattr(intern!(py, "HttpError"))?;
let err = http_err_cls.call1((
message,
request_id,
status_code.map(|s| s.as_u16()),
))?;
if let Some(cause) = source.source() {
// The HTTP error already includes the first cause. But
// we can add the rest of the chain if there is any more.
let cause_err = http_from_rust_error(
py,
cause,
} => {
#[allow(deprecated)]
Python::with_gil(|py| {
let message = err.to_string();
let http_err_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
.getattr(intern!(py, "HttpError"))?;
let err = http_err_cls.call1((
message,
request_id,
status_code.map(|s| s.as_u16()),
)?;
err.setattr(intern!(py, "__cause__"), cause_err)?;
}
))?;
Err(PyErr::from_value(err))
}),
if let Some(cause) = source.source() {
// The HTTP error already includes the first cause. But
// we can add the rest of the chain if there is any more.
let cause_err = http_from_rust_error(
py,
cause,
request_id,
status_code.map(|s| s.as_u16()),
)?;
err.setattr(intern!(py, "__cause__"), cause_err)?;
}
Err(PyErr::from_value(err))
})
}
LanceError::Retry {
request_id,
request_failures,
@@ -75,33 +78,37 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
max_read_failures,
source,
status_code,
} => Python::with_gil(|py| {
let cause_err = http_from_rust_error(
py,
source.as_ref(),
request_id,
status_code.map(|s| s.as_u16()),
)?;
} =>
{
#[allow(deprecated)]
Python::with_gil(|py| {
let cause_err = http_from_rust_error(
py,
source.as_ref(),
request_id,
status_code.map(|s| s.as_u16()),
)?;
let message = err.to_string();
let retry_error_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
.getattr("RetryError")?;
let err = retry_error_cls.call1((
message,
request_id,
*request_failures,
*connect_failures,
*read_failures,
*max_request_failures,
*max_connect_failures,
*max_read_failures,
status_code.map(|s| s.as_u16()),
))?;
let message = err.to_string();
let retry_error_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
.getattr("RetryError")?;
let err = retry_error_cls.call1((
message,
request_id,
*request_failures,
*connect_failures,
*read_failures,
*max_request_failures,
*max_connect_failures,
*max_read_failures,
status_code.map(|s| s.as_u16()),
))?;
err.setattr(intern!(py, "__cause__"), cause_err)?;
Err(PyErr::from_value(err))
}),
err.setattr(intern!(py, "__cause__"), cause_err)?;
Err(PyErr::from_value(err))
})
}
_ => self.runtime_error(),
},
}

View File

@@ -12,6 +12,7 @@ pub struct PyHeaderProvider {
impl Clone for PyHeaderProvider {
fn clone(&self) -> Self {
#[allow(deprecated)]
Python::with_gil(|py| Self {
provider: self.provider.clone_ref(py),
})
@@ -25,6 +26,7 @@ impl PyHeaderProvider {
/// Get headers from the Python provider (internal implementation)
fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
#[allow(deprecated)]
Python::with_gil(|py| {
// Call the get_headers method
let result = self.provider.call_method0(py, "get_headers");

View File

@@ -19,7 +19,7 @@ use pyo3::{
exceptions::PyRuntimeError,
pyclass, pymethods,
types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
Bound, PyAny, PyRef, PyRefMut, PyResult, Python,
Bound, Py, PyAny, PyRef, PyRefMut, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -281,7 +281,10 @@ impl PyPermutationReader {
let reader = slf.reader.clone();
future_into_py(slf.py(), async move {
let schema = reader.output_schema(selection).await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py))
#[allow(deprecated)]
let py_obj: Py<PyAny> =
Python::with_gil(|py| -> PyResult<Py<PyAny>> { schema.to_pyarrow(py) })?;
Ok(py_obj)
})
}

View File

@@ -29,6 +29,7 @@ use pyo3::types::PyList;
use pyo3::types::{PyDict, PyString};
use pyo3::Bound;
use pyo3::IntoPyObject;
use pyo3::Py;
use pyo3::PyAny;
use pyo3::PyRef;
use pyo3::PyResult;
@@ -453,7 +454,10 @@ impl Query {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py))
#[allow(deprecated)]
let py_obj: Py<PyAny> =
Python::with_gil(|py| -> PyResult<Py<PyAny>> { schema.to_pyarrow(py) })?;
Ok(py_obj)
})
}
@@ -532,7 +536,10 @@ impl TakeQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py))
#[allow(deprecated)]
let py_obj: Py<PyAny> =
Python::with_gil(|py| -> PyResult<Py<PyAny>> { schema.to_pyarrow(py) })?;
Ok(py_obj)
})
}
@@ -627,7 +634,10 @@ impl FTSQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py))
#[allow(deprecated)]
let py_obj: Py<PyAny> =
Python::with_gil(|py| -> PyResult<Py<PyAny>> { schema.to_pyarrow(py) })?;
Ok(py_obj)
})
}
@@ -806,7 +816,10 @@ impl VectorQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py))
#[allow(deprecated)]
let py_obj: Py<PyAny> =
Python::with_gil(|py| -> PyResult<Py<PyAny>> { schema.to_pyarrow(py) })?;
Ok(py_obj)
})
}

View File

@@ -17,11 +17,12 @@ use pyo3::types::PyDict;
/// Internal wrapper around a Python object implementing StorageOptionsProvider
pub struct PyStorageOptionsProvider {
/// The Python object implementing fetch_storage_options()
inner: PyObject,
inner: Py<PyAny>,
}
impl Clone for PyStorageOptionsProvider {
fn clone(&self) -> Self {
#[allow(deprecated)]
Python::with_gil(|py| Self {
inner: self.inner.clone_ref(py),
})
@@ -29,7 +30,8 @@ impl Clone for PyStorageOptionsProvider {
}
impl PyStorageOptionsProvider {
pub fn new(obj: PyObject) -> PyResult<Self> {
pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
#[allow(deprecated)]
Python::with_gil(|py| {
// Verify the object has a fetch_storage_options method
if !obj.bind(py).hasattr("fetch_storage_options")? {
@@ -37,7 +39,9 @@ impl PyStorageOptionsProvider {
"StorageOptionsProvider must implement fetch_storage_options() method",
));
}
Ok(Self { inner: obj })
Ok(Self {
inner: obj.clone_ref(py),
})
})
}
}
@@ -60,6 +64,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
let py_provider = self.py_provider.clone();
tokio::task::spawn_blocking(move || {
#[allow(deprecated)]
Python::with_gil(|py| {
// Call the Python fetch_storage_options method
let result = py_provider
@@ -119,6 +124,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
}
fn provider_id(&self) -> String {
#[allow(deprecated)]
Python::with_gil(|py| {
// Call provider_id() method on the Python object
let obj = self.py_provider.inner.bind(py);
@@ -143,7 +149,7 @@ impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
/// This is the main entry point for converting Python StorageOptionsProvider objects
/// to Rust trait objects that can be used by the Lance ecosystem.
pub fn py_object_to_storage_options_provider(
py_obj: PyObject,
py_obj: Py<PyAny>,
) -> PyResult<Arc<dyn StorageOptionsProvider>> {
let py_provider = PyStorageOptionsProvider::new(py_obj)?;
Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))

View File

@@ -21,7 +21,7 @@ use pyo3::{
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
pyclass, pymethods,
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -287,7 +287,10 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let schema = inner.schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py))
#[allow(deprecated)]
let py_obj: Py<PyAny> =
Python::with_gil(|py| -> PyResult<Py<PyAny>> { schema.to_pyarrow(py) })?;
Ok(py_obj)
})
}
@@ -437,6 +440,7 @@ impl Table {
future_into_py(self_.py(), async move {
let stats = inner.index_stats(&index_name).await.infer_error()?;
if let Some(stats) = stats {
#[allow(deprecated)]
Python::with_gil(|py| {
let dict = PyDict::new(py);
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
@@ -467,6 +471,7 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let stats = inner.stats().await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| {
let dict = PyDict::new(py);
dict.set_item("total_bytes", stats.total_bytes)?;
@@ -521,6 +526,7 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let versions = inner.list_versions().await.infer_error()?;
#[allow(deprecated)]
let versions_as_dict = Python::with_gil(|py| {
versions
.iter()
@@ -872,6 +878,7 @@ impl Tags {
let tags = inner.tags().await.infer_error()?;
let res = tags.list().await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| {
let py_dict = PyDict::new(py);
for (key, contents) in res {

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.24.0"
version = "0.23.1"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -9,6 +9,7 @@ use std::sync::Arc;
use arrow_array::RecordBatchReader;
use arrow_schema::{Field, SchemaRef};
use lance::dataset::ReadParams;
use lance::io::ObjectStoreParams;
use lance_namespace::models::{
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
@@ -36,41 +37,20 @@ use crate::remote::{
};
use crate::table::{TableDefinition, WriteOptions};
use crate::Table;
use lance::io::ObjectStoreParams;
pub use lance_encoding::version::LanceFileVersion;
#[cfg(feature = "remote")]
use lance_io::object_store::StorageOptions;
use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
fn merge_storage_options(
store_params: &mut ObjectStoreParams,
pairs: impl IntoIterator<Item = (String, String)>,
) {
fn update_storage_options<F>(store_params: &mut ObjectStoreParams, update: F)
where
F: FnOnce(&mut HashMap<String, String>),
{
let mut options = store_params.storage_options().cloned().unwrap_or_default();
for (key, value) in pairs {
options.insert(key, value);
}
let provider = store_params
.storage_options_accessor
.as_ref()
.and_then(|accessor| accessor.provider().cloned());
let accessor = if let Some(provider) = provider {
StorageOptionsAccessor::with_initial_and_provider(options, provider)
} else {
StorageOptionsAccessor::with_static_options(options)
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
}
fn set_storage_options_provider(
store_params: &mut ObjectStoreParams,
provider: Arc<dyn StorageOptionsProvider>,
) {
let accessor = match store_params.storage_options().cloned() {
Some(options) => StorageOptionsAccessor::with_initial_and_provider(options, provider),
None => StorageOptionsAccessor::with_provider(provider),
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
update(&mut options);
store_params.storage_options_accessor = Some(Arc::new(
StorageOptionsAccessor::with_static_options(options),
));
}
/// A builder for configuring a [`Connection::table_names`] operation
@@ -285,7 +265,9 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
.get_or_insert(Default::default())
.store_params
.get_or_insert(Default::default());
merge_storage_options(store_params, [(key.into(), value.into())]);
update_storage_options(store_params, |options| {
options.insert(key.into(), value.into());
});
self
}
@@ -306,10 +288,11 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
.get_or_insert(Default::default())
.store_params
.get_or_insert(Default::default());
let updates = pairs
.into_iter()
.map(|(key, value)| (key.into(), value.into()));
merge_storage_options(store_params, updates);
update_storage_options(store_params, |options| {
for (key, value) in pairs {
options.insert(key.into(), value.into());
}
});
self
}
@@ -353,15 +336,16 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
.get_or_insert_with(Default::default)
.store_params
.get_or_insert_with(Default::default);
let value = if use_v2_manifest_paths {
"true".to_string()
} else {
"false".to_string()
};
merge_storage_options(
store_params,
[(OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(), value)],
);
update_storage_options(store_params, |options| {
options.insert(
OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(),
if use_v2_manifest_paths {
"true".to_string()
} else {
"false".to_string()
},
);
});
self
}
@@ -377,13 +361,12 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
.get_or_insert_with(Default::default)
.store_params
.get_or_insert_with(Default::default);
merge_storage_options(
store_params,
[(
update_storage_options(store_params, |options| {
options.insert(
OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
data_storage_version.to_string(),
)],
);
);
});
self
}
@@ -414,7 +397,10 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
.get_or_insert(Default::default())
.store_params
.get_or_insert(Default::default());
set_storage_options_provider(store_params, provider);
let initial = store_params.storage_options().cloned().unwrap_or_default();
store_params.storage_options_accessor = Some(Arc::new(
StorageOptionsAccessor::with_initial_and_provider(initial, provider),
));
self
}
}
@@ -483,7 +469,9 @@ impl OpenTableBuilder {
.get_or_insert(Default::default())
.store_options
.get_or_insert(Default::default());
merge_storage_options(store_params, [(key.into(), value.into())]);
update_storage_options(store_params, |options| {
options.insert(key.into(), value.into());
});
self
}
@@ -503,10 +491,11 @@ impl OpenTableBuilder {
.get_or_insert(Default::default())
.store_options
.get_or_insert(Default::default());
let updates = pairs
.into_iter()
.map(|(key, value)| (key.into(), value.into()));
merge_storage_options(store_params, updates);
update_storage_options(store_params, |options| {
for (key, value) in pairs {
options.insert(key.into(), value.into());
}
});
self
}
@@ -536,7 +525,10 @@ impl OpenTableBuilder {
.get_or_insert(Default::default())
.store_options
.get_or_insert(Default::default());
set_storage_options_provider(store_params, provider);
let initial = store_params.storage_options().cloned().unwrap_or_default();
store_params.storage_options_accessor = Some(Arc::new(
StorageOptionsAccessor::with_initial_and_provider(initial, provider),
));
self
}
@@ -892,10 +884,6 @@ pub struct ConnectBuilder {
embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
}
#[cfg(feature = "remote")]
const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 1] =
[("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name")];
impl ConnectBuilder {
/// Create a new [`ConnectOptions`] with the given database URI.
pub fn new(uri: &str) -> Self {
@@ -1079,27 +1067,11 @@ impl ConnectBuilder {
self
}
#[cfg(feature = "remote")]
fn apply_env_defaults(
env_var_to_remote_storage_option: &[(&str, &str)],
options: &mut HashMap<String, String>,
) {
for (env_key, opt_key) in env_var_to_remote_storage_option {
if let Ok(env_value) = std::env::var(env_key) {
if !options.contains_key(*opt_key) {
options.insert((*opt_key).to_string(), env_value);
}
}
}
}
#[cfg(feature = "remote")]
fn execute_remote(self) -> Result<Connection> {
use crate::remote::db::RemoteDatabaseOptions;
let mut merged_options = self.request.options.clone();
Self::apply_env_defaults(&ENV_VARS_TO_STORAGE_OPTS, &mut merged_options);
let options = RemoteDatabaseOptions::parse_from_map(&merged_options)?;
let options = RemoteDatabaseOptions::parse_from_map(&self.request.options)?;
let region = options.region.ok_or_else(|| Error::InvalidInput {
message: "A region is required when connecting to LanceDb Cloud".to_string(),
@@ -1321,6 +1293,8 @@ mod test_utils {
#[cfg(test)]
mod tests {
use std::fs::create_dir_all;
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
use crate::query::QueryBase;
use crate::query::{ExecutableQuery, QueryExecutionOptions};
@@ -1344,23 +1318,6 @@ mod tests {
assert_eq!(tc.connection.uri(), tc.uri);
}
#[cfg(feature = "remote")]
#[test]
fn test_apply_env_defaults() {
let env_key = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_KEY";
let env_val = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_VAL";
let opts_key = "test_apply_env_defaults_environment_variable_opts_key";
std::env::set_var(env_key, env_val);
let mut options = HashMap::new();
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
assert_eq!(Some(&env_val.to_string()), options.get(opts_key));
options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string());
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
assert_eq!(Some(&"EXPLICIT-VALUE".to_string()), options.get(opts_key));
}
#[cfg(not(windows))]
#[tokio::test]
async fn test_connect_relative() {
@@ -1585,27 +1542,18 @@ mod tests {
#[tokio::test]
async fn drop_table() {
let tc = new_test_connection().await.unwrap();
let db = tc.connection;
let tmp_dir = tempdir().unwrap();
if tc.is_remote {
// All the typical endpoints such as s3:///, file-object-store:///, etc. treat drop_table
// as idempotent.
assert!(db.drop_table("invalid_table", &[]).await.is_ok());
} else {
// The behavior of drop_table when using a file:/// endpoint differs from all other
// object providers, in that it returns an error when deleting a non-existent table.
assert!(matches!(
db.drop_table("invalid_table", &[]).await,
Err(crate::Error::TableNotFound { .. }),
));
}
let uri = tmp_dir.path().to_str().unwrap();
let db = connect(uri).execute().await.unwrap();
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
db.create_empty_table("table1", schema.clone())
.execute()
.await
.unwrap();
// drop non-exist table
assert!(matches!(
db.drop_table("invalid_table", &[]).await,
Err(crate::Error::TableNotFound { .. }),
));
create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
db.drop_table("table1", &[]).await.unwrap();
let tables = db.table_names().execute().await.unwrap();

View File

@@ -356,13 +356,11 @@ impl ListingDatabase {
.clone()
.unwrap_or_else(|| Arc::new(lance::session::Session::default()));
let os_params = ObjectStoreParams {
storage_options_accessor: if options.storage_options.is_empty() {
None
} else {
Some(Arc::new(StorageOptionsAccessor::with_static_options(
storage_options_accessor: Some(Arc::new(
StorageOptionsAccessor::with_static_options(
options.storage_options.clone(),
)))
},
),
)),
..Default::default()
};
let (object_store, base_path) = ObjectStore::from_uri_and_params(
@@ -498,13 +496,9 @@ impl ListingDatabase {
async fn drop_tables(&self, names: Vec<String>) -> Result<()> {
let object_store_params = ObjectStoreParams {
storage_options_accessor: if self.storage_options.is_empty() {
None
} else {
Some(Arc::new(StorageOptionsAccessor::with_static_options(
self.storage_options.clone(),
)))
},
storage_options_accessor: Some(Arc::new(StorageOptionsAccessor::with_static_options(
self.storage_options.clone(),
))),
..Default::default()
};
let mut uri = self.uri.clone();
@@ -604,20 +598,26 @@ impl ListingDatabase {
// will cause a new connection to be created, and that connection will
// be dropped from the cache when python GCs the table object, which
// confounds reuse across tables.
if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
if !self.storage_options.is_empty() {
let store_params = write_params
.store_params
.get_or_insert_with(Default::default);
let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
if !self.storage_options.is_empty() {
self.inherit_storage_options(&mut storage_options);
}
let accessor = if let Some(ref provider) = self.storage_options_provider {
StorageOptionsAccessor::with_initial_and_provider(storage_options, provider.clone())
} else {
StorageOptionsAccessor::with_static_options(storage_options)
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
self.inherit_storage_options(&mut storage_options);
store_params.storage_options_accessor = Some(Arc::new(
StorageOptionsAccessor::with_static_options(storage_options),
));
}
// Set storage options provider if available
if let Some(provider) = self.storage_options_provider.clone() {
let store_params = write_params
.store_params
.get_or_insert_with(Default::default);
let initial = store_params.storage_options().cloned().unwrap_or_default();
store_params.storage_options_accessor = Some(Arc::new(
StorageOptionsAccessor::with_initial_and_provider(initial, provider),
));
}
write_params.data_storage_version = self
@@ -903,13 +903,9 @@ impl Database for ListingDatabase {
validate_table_name(&request.target_table_name)?;
let storage_params = ObjectStoreParams {
storage_options_accessor: if self.storage_options.is_empty() {
None
} else {
Some(Arc::new(StorageOptionsAccessor::with_static_options(
self.storage_options.clone(),
)))
},
storage_options_accessor: Some(Arc::new(StorageOptionsAccessor::with_static_options(
self.storage_options.clone(),
))),
..Default::default()
};
let read_params = ReadParams {
@@ -973,28 +969,30 @@ impl Database for ListingDatabase {
// will cause a new connection to be created, and that connection will
// be dropped from the cache when python GCs the table object, which
// confounds reuse across tables.
if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
if !self.storage_options.is_empty() {
let store_params = request
.lance_read_params
.get_or_insert_with(Default::default)
.store_options
.get_or_insert_with(Default::default);
let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
if !self.storage_options.is_empty() {
self.inherit_storage_options(&mut storage_options);
}
// Preserve request-level provider if no connection-level provider exists
let request_provider = store_params
.storage_options_accessor
.as_ref()
.and_then(|a| a.provider().cloned());
let provider = self.storage_options_provider.clone().or(request_provider);
let accessor = if let Some(provider) = provider {
StorageOptionsAccessor::with_initial_and_provider(storage_options, provider)
} else {
StorageOptionsAccessor::with_static_options(storage_options)
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
self.inherit_storage_options(&mut storage_options);
store_params.storage_options_accessor = Some(Arc::new(
StorageOptionsAccessor::with_static_options(storage_options),
));
}
// Set storage options provider if available
if let Some(provider) = self.storage_options_provider.clone() {
let store_params = request
.lance_read_params
.get_or_insert_with(Default::default)
.store_options
.get_or_insert_with(Default::default);
let initial = store_params.storage_options().cloned().unwrap_or_default();
store_params.storage_options_accessor = Some(Arc::new(
StorageOptionsAccessor::with_initial_and_provider(initial, provider),
));
}
// Some ReadParams are exposed in the OpenTableBuilder, but we also

View File

@@ -9,15 +9,14 @@ use std::sync::Arc;
use async_trait::async_trait;
use lance_namespace::{
models::{
CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
DeclareTableRequest, DescribeNamespaceRequest, DescribeNamespaceResponse,
DescribeTableRequest, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest,
ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
CreateNamespaceRequest, CreateNamespaceResponse, DeclareTableRequest,
DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest,
DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest,
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
},
LanceNamespace,
};
use lance_namespace_impls::ConnectBuilder;
use log::warn;
use crate::database::ReadConsistency;
use crate::error::{Error, Result};
@@ -155,6 +154,7 @@ impl Database for LanceNamespaceDatabase {
table_id.push(request.name.clone());
let describe_request = DescribeTableRequest {
id: Some(table_id.clone()),
version: None,
..Default::default()
};
@@ -205,53 +205,26 @@ impl Database for LanceNamespaceDatabase {
let mut table_id = request.namespace.clone();
table_id.push(request.name.clone());
// Try declare_table first, falling back to create_empty_table for backwards
// compatibility with older namespace clients that don't support declare_table
let declare_request = DeclareTableRequest {
let create_empty_request = DeclareTableRequest {
id: Some(table_id.clone()),
location: None,
vend_credentials: None,
..Default::default()
};
let location = match self.namespace.declare_table(declare_request).await {
Ok(response) => response.location.ok_or_else(|| Error::Runtime {
message: "Table location is missing from declare_table response".to_string(),
})?,
Err(e) => {
// Check if the error is "not supported" and try create_empty_table as fallback
let err_str = e.to_string().to_lowercase();
if err_str.contains("not supported") || err_str.contains("not implemented") {
warn!(
"declare_table is not supported by the namespace client, \
falling back to deprecated create_empty_table. \
create_empty_table is deprecated and will be removed in Lance 3.0.0. \
Please upgrade your namespace client to support declare_table."
);
#[allow(deprecated)]
let create_empty_request = CreateEmptyTableRequest {
id: Some(table_id.clone()),
..Default::default()
};
let create_empty_response = self
.namespace
.declare_table(create_empty_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to declare table: {}", e),
})?;
#[allow(deprecated)]
let create_response = self
.namespace
.create_empty_table(create_empty_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to create empty table: {}", e),
})?;
create_response.location.ok_or_else(|| Error::Runtime {
message: "Table location is missing from create_empty_table response"
.to_string(),
})?
} else {
return Err(Error::Runtime {
message: format!("Failed to declare table: {}", e),
});
}
}
};
let location = create_empty_response
.location
.ok_or_else(|| Error::Runtime {
message: "Table location is missing from create_empty_table response".to_string(),
})?;
let native_table = NativeTable::create_from_namespace(
self.namespace.clone(),
@@ -466,6 +439,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -526,6 +501,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -589,6 +566,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -672,6 +651,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -727,6 +708,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -807,6 +790,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -840,6 +825,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await

View File

@@ -171,7 +171,7 @@ impl Shuffler {
// This is kind of an annoying limitation but if we allow runt clumps from batches then
// clumps will get unaligned and we will mess up the clumps when we do the in-memory
// shuffle step. If this is a problem we can probably figure out a better way to do this.
if !is_last && !(batch.num_rows() as u64).is_multiple_of(clump_size) {
if !is_last && batch.num_rows() as u64 % clump_size != 0 {
return Err(Error::Runtime {
message: format!(
"Expected batch size ({}) to be divisible by clump size ({})",

View File

@@ -1,9 +1,12 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::sync::{
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
Arc,
use std::{
iter,
sync::{
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
Arc,
},
};
use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array};
@@ -155,7 +158,7 @@ impl Splitter {
remaining_in_split
};
split_ids.extend(std::iter::repeat_n(split_id as u64, rows_to_add as usize));
split_ids.extend(iter::repeat(split_id as u64).take(rows_to_add as usize));
if done {
// Quit early if we've run out of splits
break;
@@ -659,7 +662,7 @@ mod tests {
assert_eq!(split_batch.num_rows(), total_split_sizes as usize);
let mut expected = Vec::with_capacity(total_split_sizes as usize);
for (i, size) in expected_split_sizes.iter().enumerate() {
expected.extend(std::iter::repeat_n(i as u64, *size as usize));
expected.extend(iter::repeat(i as u64).take(*size as usize));
}
let expected = Arc::new(UInt64Array::from(expected)) as Arc<dyn Array>;

View File

@@ -297,10 +297,10 @@ impl IvfPqIndexBuilder {
}
pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
if dim.is_multiple_of(16) {
if dim % 16 == 0 {
// Should be more aggressive than this default.
dim / 16
} else if dim.is_multiple_of(8) {
} else if dim % 8 == 0 {
dim / 8
} else {
log::warn!(

View File

@@ -51,19 +51,24 @@
//! - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud object store
//! - `db://dbname` - Lance Cloud
//!
//! You can also use [`ConnectBuilder`] to configure the connection to the database.
//! You can also use [`ConnectOptions`] to configure the connection to the database.
//!
//! ```rust
//! # #[cfg(feature = "aws")]
//! # {
//! use object_store::aws::AwsCredential;
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
//! let db = lancedb::connect("data/sample-lancedb")
//! .storage_options([
//! ("aws_access_key_id", "some_key"),
//! ("aws_secret_access_key", "some_secret"),
//! ])
//! .aws_creds(AwsCredential {
//! key_id: "some_key".to_string(),
//! secret_key: "some_secret".to_string(),
//! token: None,
//! })
//! .execute()
//! .await
//! .unwrap();
//! # });
//! # }
//! ```
//!
//! LanceDB uses [arrow-rs](https://github.com/apache/arrow-rs) to define schema, data types and array itself.

View File

@@ -1718,6 +1718,8 @@ mod tests {
let namespace = vec!["test_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest {
id: Some(namespace.clone()),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -1743,6 +1745,8 @@ mod tests {
let list_response = conn
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default()
})
.await
@@ -1754,6 +1758,8 @@ mod tests {
let list_response = namespace_client
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default()
})
.await
@@ -1794,6 +1800,8 @@ mod tests {
let namespace = vec!["multi_table_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest {
id: Some(namespace.clone()),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -1819,6 +1827,8 @@ mod tests {
let list_response = conn
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default()
})
.await

View File

@@ -1877,13 +1877,10 @@ impl NativeTable {
let store_params = params
.store_params
.get_or_insert_with(ObjectStoreParams::default);
let accessor = match store_params.storage_options().cloned() {
Some(options) => {
StorageOptionsAccessor::with_initial_and_provider(options, storage_options_provider)
}
None => StorageOptionsAccessor::with_provider(storage_options_provider),
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
let initial = store_params.storage_options().cloned().unwrap_or_default();
store_params.storage_options_accessor = Some(Arc::new(
StorageOptionsAccessor::with_initial_and_provider(initial, storage_options_provider),
));
// Patch the params if we have a write store wrapper
let params = match write_store_wrapper.clone() {
@@ -2059,7 +2056,7 @@ impl NativeTable {
return provided;
}
let suggested = suggested_num_sub_vectors(dim);
if num_bits.is_some_and(|num_bits| num_bits == 4) && !suggested.is_multiple_of(2) {
if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 {
// num_sub_vectors must be even when 4 bits are used
suggested + 1
} else {
@@ -2349,7 +2346,7 @@ impl NativeTable {
};
// Convert select to columns list
let columns = match &vq.base.select {
let columns: Option<Box<QueryTableRequestColumns>> = match &vq.base.select {
Select::All => None,
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
column_names: Some(cols.clone()),
@@ -2407,6 +2404,7 @@ impl NativeTable {
with_row_id: Some(vq.base.with_row_id),
bypass_vector_index: Some(!vq.use_index),
full_text_query,
version: None,
..Default::default()
})
}
@@ -2425,7 +2423,7 @@ impl NativeTable {
.map(|f| self.filter_to_sql(f))
.transpose()?;
let columns = match &q.select {
let columns: Option<Box<QueryTableRequestColumns>> = match &q.select {
Select::All => None,
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
column_names: Some(cols.clone()),
@@ -2469,10 +2467,18 @@ impl NativeTable {
columns,
prefilter: Some(q.prefilter),
offset: q.offset.map(|o| o as i32),
ef: None,
refine_factor: None,
distance_type: None,
nprobes: None,
vector_column: None, // No vector column for plain queries
with_row_id: Some(q.with_row_id),
bypass_vector_index: Some(true), // No vector index for plain queries
full_text_query,
version: None,
fast_search: None,
lower_bound: None,
upper_bound: None,
..Default::default()
})
}
@@ -3400,6 +3406,7 @@ pub struct FragmentSummaryStats {
#[cfg(test)]
#[allow(deprecated)]
mod tests {
use std::iter;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
@@ -4016,7 +4023,7 @@ mod tests {
schema.clone(),
vec![
Arc::new(Int32Array::from_iter_values(offset..(offset + 10))),
Arc::new(Int32Array::from_iter_values(std::iter::repeat_n(age, 10))),
Arc::new(Int32Array::from_iter_values(iter::repeat(age).take(10))),
],
)],
schema,
@@ -5144,16 +5151,15 @@ mod tests {
let any_query = AnyQuery::VectorQuery(vq);
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
let column_names = ns_request
.columns
.as_ref()
.and_then(|cols| cols.column_names.clone());
assert_eq!(ns_request.k, 10);
assert_eq!(ns_request.offset, Some(5));
assert_eq!(ns_request.filter, Some("id > 0".to_string()));
assert_eq!(
ns_request
.columns
.as_ref()
.and_then(|c| c.column_names.as_ref()),
Some(&vec!["id".to_string()])
);
assert_eq!(column_names, Some(vec!["id".to_string()]));
assert_eq!(ns_request.vector_column, Some("vector".to_string()));
assert_eq!(ns_request.distance_type, Some("l2".to_string()));
assert!(ns_request.vector.single_vector.is_some());
@@ -5190,17 +5196,16 @@ mod tests {
let any_query = AnyQuery::Query(q);
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
let column_names = ns_request
.columns
.as_ref()
.and_then(|cols| cols.column_names.clone());
// Plain queries should pass an empty vector
assert_eq!(ns_request.k, 20);
assert_eq!(ns_request.offset, Some(5));
assert_eq!(ns_request.filter, Some("id > 5".to_string()));
assert_eq!(
ns_request
.columns
.as_ref()
.and_then(|c| c.column_names.as_ref()),
Some(&vec!["id".to_string()])
);
assert_eq!(column_names, Some(vec!["id".to_string()]));
assert_eq!(ns_request.with_row_id, Some(true));
assert_eq!(ns_request.bypass_vector_index, Some(true));
assert!(ns_request.vector_column.is_none()); // No vector column for plain queries

View File

@@ -4,6 +4,7 @@
use std::{
borrow::Cow,
collections::{HashMap, HashSet},
iter::repeat,
sync::Arc,
};
@@ -267,10 +268,9 @@ fn create_some_records() -> Result<impl IntoArrow> {
schema.clone(),
vec![
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
Arc::new(StringArray::from_iter(std::iter::repeat_n(
Some("hello world".to_string()),
TOTAL,
))),
Arc::new(StringArray::from_iter(
repeat(Some("hello world".to_string())).take(TOTAL),
)),
],
)
.unwrap()]