mirror of
https://github.com/lancedb/lancedb.git
synced 2026-06-04 12:50:40 +00:00
Compare commits
11 Commits
feat/table
...
codex/upda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d1d77a8d96 | ||
|
|
aafcca2aab | ||
|
|
415d199c15 | ||
|
|
a16676e05f | ||
|
|
4e44262499 | ||
|
|
632375faf1 | ||
|
|
9969191d0d | ||
|
|
1e7326cd8c | ||
|
|
9483b534af | ||
|
|
ac3411e81e | ||
|
|
6f18eb4cce |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.30.1-beta.0"
|
||||
current_version = "0.30.1-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
11
.github/dependabot.yml
vendored
11
.github/dependabot.yml
vendored
@@ -21,3 +21,14 @@ updates:
|
||||
update-types:
|
||||
- minor
|
||||
- patch
|
||||
|
||||
- package-ecosystem: pip
|
||||
directory: /python
|
||||
schedule:
|
||||
interval: weekly
|
||||
# Only update uv.lock, never widen version requirements in pyproject.toml.
|
||||
versioning-strategy: lockfile-only
|
||||
groups:
|
||||
python-deps:
|
||||
patterns:
|
||||
- "*"
|
||||
|
||||
447
Cargo.lock
generated
447
Cargo.lock
generated
@@ -2,6 +2,15 @@
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.25.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b"
|
||||
dependencies = [
|
||||
"gimli",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "adler2"
|
||||
version = "2.0.1"
|
||||
@@ -42,6 +51,15 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aligned-vec"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b"
|
||||
dependencies = [
|
||||
"equator",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "alloc-no-stdlib"
|
||||
version = "2.0.4"
|
||||
@@ -57,6 +75,15 @@ dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "alloca"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.21"
|
||||
@@ -72,6 +99,12 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anes"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "1.0.0"
|
||||
@@ -1169,6 +1202,21 @@ dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.76"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
|
||||
dependencies = [
|
||||
"addr2line",
|
||||
"cfg-if 1.0.4",
|
||||
"libc",
|
||||
"miniz_oxide",
|
||||
"object",
|
||||
"rustc-demangle",
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "base16ct"
|
||||
version = "0.1.1"
|
||||
@@ -1517,6 +1565,12 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cast"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "cbc"
|
||||
version = "0.1.2"
|
||||
@@ -1622,6 +1676,33 @@ dependencies = [
|
||||
"phf_codegen 0.11.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ciborium"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
|
||||
dependencies = [
|
||||
"ciborium-io",
|
||||
"ciborium-ll",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ciborium-io"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
|
||||
|
||||
[[package]]
|
||||
name = "ciborium-ll"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
|
||||
dependencies = [
|
||||
"ciborium-io",
|
||||
"half",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cipher"
|
||||
version = "0.4.4"
|
||||
@@ -1859,6 +1940,15 @@ dependencies = [
|
||||
"futures-io",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpp_demangle"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2bb79cb74d735044c972aae58ed0aaa9a837e85b01106a54c39e42e97f62253"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.17"
|
||||
@@ -1922,6 +2012,42 @@ dependencies = [
|
||||
"cfg-if 1.0.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "criterion"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3"
|
||||
dependencies = [
|
||||
"alloca",
|
||||
"anes",
|
||||
"cast",
|
||||
"ciborium",
|
||||
"clap",
|
||||
"criterion-plot",
|
||||
"itertools 0.13.0",
|
||||
"num-traits",
|
||||
"oorandom",
|
||||
"page_size",
|
||||
"plotters",
|
||||
"rayon",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tinytemplate",
|
||||
"tokio",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "criterion-plot"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea"
|
||||
dependencies = [
|
||||
"cast",
|
||||
"itertools 0.13.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.15"
|
||||
@@ -2805,6 +2931,15 @@ dependencies = [
|
||||
"sqlparser 0.61.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "debugid"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d"
|
||||
dependencies = [
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deepsize"
|
||||
version = "0.2.0"
|
||||
@@ -3113,6 +3248,26 @@ dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equator"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc"
|
||||
dependencies = [
|
||||
"equator-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equator-macro"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.2"
|
||||
@@ -3227,6 +3382,18 @@ version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
|
||||
|
||||
[[package]]
|
||||
name = "findshlibs"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fixedbitset"
|
||||
version = "0.5.7"
|
||||
@@ -3306,8 +3473,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "fsst"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"rand 0.9.4",
|
||||
@@ -3618,6 +3785,12 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
|
||||
|
||||
[[package]]
|
||||
name = "git-version"
|
||||
version = "0.3.9"
|
||||
@@ -4281,6 +4454,24 @@ dependencies = [
|
||||
"web-time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inferno"
|
||||
version = "0.11.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"indexmap 2.14.0",
|
||||
"is-terminal",
|
||||
"itoa",
|
||||
"log",
|
||||
"num-format",
|
||||
"once_cell",
|
||||
"quick-xml 0.26.0",
|
||||
"rgb",
|
||||
"str_stack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inout"
|
||||
version = "0.1.4"
|
||||
@@ -4317,6 +4508,17 @@ version = "2.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
|
||||
|
||||
[[package]]
|
||||
name = "is-terminal"
|
||||
version = "0.4.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.2"
|
||||
@@ -4574,8 +4776,8 @@ checksum = "e037a2e1d8d5fdbd49b16a4ea09d5d6401c1f29eca5ff29d03d3824dba16256a"
|
||||
|
||||
[[package]]
|
||||
name = "lance"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"arrow",
|
||||
@@ -4607,6 +4809,7 @@ dependencies = [
|
||||
"datafusion-physical-plan",
|
||||
"deepsize",
|
||||
"either",
|
||||
"fst",
|
||||
"futures",
|
||||
"half",
|
||||
"humantime",
|
||||
@@ -4634,6 +4837,7 @@ dependencies = [
|
||||
"rand 0.9.4",
|
||||
"rayon",
|
||||
"roaring",
|
||||
"rustc-hash",
|
||||
"semver",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -4648,8 +4852,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-arrow"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4670,7 +4874,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "lance-arrow-scalar"
|
||||
version = "58.0.0"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4684,7 +4888,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "lance-arrow-stats"
|
||||
version = "58.0.0"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-schema",
|
||||
@@ -4693,8 +4897,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-bitpacking"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"paste",
|
||||
@@ -4703,8 +4907,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-core"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4739,8 +4943,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datafusion"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4770,8 +4974,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datagen"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4789,8 +4993,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-encoding"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4825,8 +5029,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-file"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4857,8 +5061,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-index"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"arrow",
|
||||
@@ -4924,8 +5128,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-io"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4967,8 +5171,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-linalg"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4984,8 +5188,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -4997,8 +5201,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace-impls"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-ipc",
|
||||
@@ -5033,9 +5237,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace-reqwest-client"
|
||||
version = "0.7.7"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6369eee4682fb11edf538388b43c61ce288b8302fe89bb40944d7daa7faaae99"
|
||||
checksum = "3eefb02ded2c3d4b6b60669bb74822d9fa628e144fc748c79ee31f13f566e87b"
|
||||
dependencies = [
|
||||
"reqwest 0.12.28",
|
||||
"serde",
|
||||
@@ -5047,23 +5251,25 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-select"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-schema",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"deepsize",
|
||||
"itertools 0.13.0",
|
||||
"lance-core",
|
||||
"roaring",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lance-table"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -5103,20 +5309,22 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-testing"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-schema",
|
||||
"criterion",
|
||||
"lance-arrow",
|
||||
"num-traits",
|
||||
"pprof",
|
||||
"rand 0.9.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lance-tokenizer"
|
||||
version = "7.2.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v7.2.0-beta.3#7c070f760fa8e24c8015cb2afbd22c5e6b7898e8"
|
||||
version = "8.0.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v8.0.0-beta.2#f154b4e84942e5b8028f7f4a7a77b1d963107202"
|
||||
dependencies = [
|
||||
"icu_segmenter",
|
||||
"jieba-rs",
|
||||
@@ -5128,7 +5336,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb"
|
||||
version = "0.30.1-beta.0"
|
||||
version = "0.30.1-beta.1"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
@@ -5211,7 +5419,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-nodejs"
|
||||
version = "0.30.1-beta.0"
|
||||
version = "0.30.1-beta.1"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -5234,7 +5442,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-python"
|
||||
version = "0.33.1-beta.0"
|
||||
version = "0.33.1-beta.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -5838,6 +6046,17 @@ dependencies = [
|
||||
"rawpointer",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.26.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"cfg-if 1.0.4",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nohash-hasher"
|
||||
version = "0.2.0"
|
||||
@@ -5932,6 +6151,16 @@ version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967"
|
||||
|
||||
[[package]]
|
||||
name = "num-format"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3"
|
||||
dependencies = [
|
||||
"arrayvec",
|
||||
"itoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.46"
|
||||
@@ -6112,6 +6341,12 @@ dependencies = [
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "oorandom"
|
||||
version = "11.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
|
||||
|
||||
[[package]]
|
||||
name = "opendal"
|
||||
version = "0.57.0"
|
||||
@@ -6387,6 +6622,16 @@ dependencies = [
|
||||
"sha2 0.10.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "page_size"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking"
|
||||
version = "2.2.1"
|
||||
@@ -6692,6 +6937,34 @@ dependencies = [
|
||||
"array-init-cursor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "plotters"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"plotters-backend",
|
||||
"plotters-svg",
|
||||
"wasm-bindgen",
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "plotters-backend"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
|
||||
|
||||
[[package]]
|
||||
name = "plotters-svg"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
|
||||
dependencies = [
|
||||
"plotters-backend",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars"
|
||||
version = "0.39.2"
|
||||
@@ -7073,6 +7346,28 @@ version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
|
||||
|
||||
[[package]]
|
||||
name = "pprof"
|
||||
version = "0.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38a01da47675efa7673b032bf8efd8214f1917d89685e07e395ab125ea42b187"
|
||||
dependencies = [
|
||||
"aligned-vec",
|
||||
"backtrace",
|
||||
"cfg-if 1.0.4",
|
||||
"findshlibs",
|
||||
"inferno",
|
||||
"libc",
|
||||
"log",
|
||||
"nix",
|
||||
"once_cell",
|
||||
"smallvec",
|
||||
"spin 0.10.0",
|
||||
"symbolic-demangle",
|
||||
"tempfile",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.21"
|
||||
@@ -7298,6 +7593,15 @@ dependencies = [
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.38.4"
|
||||
@@ -7958,6 +8262,15 @@ dependencies = [
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rgb"
|
||||
version = "0.8.53"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "47b34b781b31e5d73e9fbc8689c70551fd1ade9a19e3e28cfec8580a79290cc4"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.14"
|
||||
@@ -8083,6 +8396,12 @@ dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "2.1.2"
|
||||
@@ -8817,6 +9136,9 @@ name = "spin"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spki"
|
||||
@@ -8927,6 +9249,12 @@ version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e51f1e89f093f99e7432c491c382b88a6860a5adbe6bf02574bf0a08efff1978"
|
||||
|
||||
[[package]]
|
||||
name = "str_stack"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f446288b699d66d0fd2e30d1cfe7869194312524b3b9252594868ed26ef056a"
|
||||
|
||||
[[package]]
|
||||
name = "streaming-decompression"
|
||||
version = "0.1.2"
|
||||
@@ -9016,6 +9344,29 @@ version = "2.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
||||
|
||||
[[package]]
|
||||
name = "symbolic-common"
|
||||
version = "12.18.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "332615d90111d8eeaf86a84dc9bbe9f65d0d8c5cf11b4caccedc37754eb0dcfd"
|
||||
dependencies = [
|
||||
"debugid",
|
||||
"memmap2 0.9.10",
|
||||
"stable_deref_trait",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "symbolic-demangle"
|
||||
version = "12.18.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "912017718eb4d21930546245af9a3475c9dccf15675a5c215664e76621afc471"
|
||||
dependencies = [
|
||||
"cpp_demangle",
|
||||
"rustc-demangle",
|
||||
"symbolic-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "symlink"
|
||||
version = "0.1.0"
|
||||
@@ -9305,6 +9656,16 @@ dependencies = [
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinytemplate"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinyvec"
|
||||
version = "1.11.0"
|
||||
|
||||
28
Cargo.toml
28
Cargo.toml
@@ -13,20 +13,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=7.2.0-beta.3", default-features = false, "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=7.2.0-beta.3", default-features = false, "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=7.2.0-beta.3", default-features = false, "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=8.0.0-beta.2", default-features = false, "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=8.0.0-beta.2", "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=8.0.0-beta.2", "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=8.0.0-beta.2", "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=8.0.0-beta.2", default-features = false, "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=8.0.0-beta.2", "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=8.0.0-beta.2", "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=8.0.0-beta.2", "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=8.0.0-beta.2", default-features = false, "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=8.0.0-beta.2", "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=8.0.0-beta.2", "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=8.0.0-beta.2", "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=8.0.0-beta.2", "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=8.0.0-beta.2", "tag" = "v8.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "58.0.0", optional = false }
|
||||
|
||||
26
REVIEW.md
Normal file
26
REVIEW.md
Normal file
@@ -0,0 +1,26 @@
|
||||
# Code review guidelines
|
||||
|
||||
Repo-specific guidance for automated PR reviews.
|
||||
|
||||
## Cross-SDK parity
|
||||
|
||||
LanceDB exposes the same core (`rust/lancedb`) through Python, TypeScript (`nodejs`),
|
||||
and Java bindings. Behavioral drift between SDKs is a recurring problem, so watch for
|
||||
parity gaps when reviewing — but only flag real ones:
|
||||
|
||||
* If the change adds or modifies user-facing API or behavior in the shared core
|
||||
(`rust/lancedb`), check whether each binding that should expose it (`python`,
|
||||
`nodejs`) does. A core change with no corresponding binding update is worth a note.
|
||||
* If the change adds or modifies a public API in one SDK but not the other, open the
|
||||
sibling SDK's corresponding module and state whether an equivalent exists. If not,
|
||||
note it as a possible parity gap and suggest a follow-up issue.
|
||||
* For bug fixes, first read the sibling SDK's analogous code path to check whether the
|
||||
same bug exists there. Only raise parity if it actually does. Do not ask to "port" a
|
||||
fix for a bug that only ever existed in one binding.
|
||||
* Stay silent on internal-only refactors, tests, docs, and changes with no cross-SDK
|
||||
surface.
|
||||
* Parity expectations apply to the Python and TypeScript (`nodejs`) SDKs. Java currently
|
||||
implements only the remote table, not the local/embedded backend, so it is expected to
|
||||
be partial — do not flag Java for missing local-only functionality.
|
||||
* Keep parity feedback to a short, clearly-labeled note (e.g. "Possible SDK parity
|
||||
gap: …"). It is advisory, not a merge blocker.
|
||||
@@ -145,6 +145,10 @@ allow = [
|
||||
# CDLA-Permissive-2.0 is a permissive data license used by `webpki-roots`
|
||||
# for the Mozilla CA root bundle. Data-only, distribution-compatible.
|
||||
"CDLA-Permissive-2.0",
|
||||
# CDDL-1.0 (copyleft) is pulled in only as a dev/profiling dependency via
|
||||
# `inferno` -> `pprof` -> `lance-testing`; it is not distributed.
|
||||
# PENDING LEGAL REVIEW before merge.
|
||||
"CDDL-1.0",
|
||||
]
|
||||
confidence-threshold = 0.8
|
||||
# Crates whose license cannot be determined from Cargo metadata but whose
|
||||
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.30.1-beta.0</version>
|
||||
<version>0.30.1-beta.1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.30.1-beta.0</version>
|
||||
<version>0.30.1-beta.1</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.30.1-beta.0</version>
|
||||
<version>0.30.1-beta.1</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>7.2.0-beta.1</lance-core.version>
|
||||
<lance-core.version>8.0.0-beta.2</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.30.1-beta.0"
|
||||
version = "0.30.1-beta.1"
|
||||
publish = false
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.30.1-beta.0",
|
||||
"version": "0.30.1-beta.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.30.1-beta.0",
|
||||
"version": "0.30.1-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.30.1-beta.0",
|
||||
"version": "0.30.1-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.30.1-beta.0",
|
||||
"version": "0.30.1-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.30.1-beta.0",
|
||||
"version": "0.30.1-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.30.1-beta.0",
|
||||
"version": "0.30.1-beta.1",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.30.1-beta.0",
|
||||
"version": "0.30.1-beta.1",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.30.1-beta.0",
|
||||
"version": "0.30.1-beta.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.30.1-beta.0",
|
||||
"version": "0.30.1-beta.1",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.30.1-beta.0",
|
||||
"version": "0.30.1-beta.1",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.33.1-beta.0"
|
||||
current_version = "0.33.1-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.33.1-beta.0"
|
||||
version = "0.33.1-beta.1"
|
||||
publish = false
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
|
||||
@@ -41,6 +41,14 @@ from .rerankers.rrf import RRFReranker
|
||||
from .rerankers.util import check_reranker_result
|
||||
from .util import flatten_columns
|
||||
|
||||
BlobMode = Literal["lazy", "bytes", "descriptions"]
|
||||
|
||||
_BLOB_MODE_TO_HANDLING = {
|
||||
"lazy": "blobs_descriptions",
|
||||
"bytes": "all_binary",
|
||||
"descriptions": "blobs_descriptions",
|
||||
}
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import sys
|
||||
|
||||
@@ -55,7 +63,7 @@ if TYPE_CHECKING:
|
||||
from ._lancedb import VectorQuery as LanceVectorQuery
|
||||
from .common import VEC
|
||||
from .pydantic import LanceModel
|
||||
from .table import Table
|
||||
from .table import AsyncTable, Table
|
||||
|
||||
if sys.version_info >= (3, 11):
|
||||
from typing import Self
|
||||
@@ -65,6 +73,147 @@ if TYPE_CHECKING:
|
||||
T = TypeVar("T", bound="LanceModel")
|
||||
|
||||
|
||||
def _validate_blob_mode(blob_mode: BlobMode) -> None:
|
||||
if blob_mode not in _BLOB_MODE_TO_HANDLING:
|
||||
modes = ", ".join(repr(mode) for mode in _BLOB_MODE_TO_HANDLING)
|
||||
raise ValueError(f"blob_mode must be one of {modes}, got {blob_mode!r}")
|
||||
|
||||
|
||||
def _field_is_blob(field: pa.Field) -> bool:
|
||||
metadata = field.metadata or {}
|
||||
return metadata.get(b"lance-encoding:blob") == b"true" or (
|
||||
metadata.get("lance-encoding:blob") == "true"
|
||||
)
|
||||
|
||||
|
||||
def _schema_has_blob_field(schema: pa.Schema) -> bool:
|
||||
return any(_field_is_blob(field) for field in schema)
|
||||
|
||||
|
||||
def _blob_mode_requires_native_pandas(blob_mode: BlobMode, schema: pa.Schema) -> bool:
|
||||
return blob_mode in ("lazy", "bytes") and _schema_has_blob_field(schema)
|
||||
|
||||
|
||||
def _unsupported_blob_pandas_error(reason: str) -> RuntimeError:
|
||||
return RuntimeError(
|
||||
"blob_mode='lazy' and blob_mode='bytes' require Lance native pandas "
|
||||
f"conversion for queries that return blob columns, but {reason}. "
|
||||
"Use blob_mode='descriptions' or remove blob columns from the projection."
|
||||
)
|
||||
|
||||
|
||||
def _query_is_plain_scan(query: Query) -> bool:
|
||||
return (
|
||||
query.vector is None
|
||||
and query.full_text_query is None
|
||||
and not query.postfilter
|
||||
and not query.order_by
|
||||
)
|
||||
|
||||
|
||||
def _filter_to_sql(filter: Optional[Union[str, Expr]]) -> Optional[str]:
|
||||
if filter is None:
|
||||
return None
|
||||
if isinstance(filter, Expr):
|
||||
return filter.to_sql()
|
||||
return filter
|
||||
|
||||
|
||||
def _projection_to_scanner_kwargs(
|
||||
columns: Optional[
|
||||
Union[
|
||||
List[str], List[Tuple[str, Union[str, Expr]]], Dict[str, Union[str, Expr]]
|
||||
]
|
||||
],
|
||||
) -> Dict[str, Any]:
|
||||
if columns is None:
|
||||
return {}
|
||||
if isinstance(columns, list):
|
||||
if all(isinstance(column, str) for column in columns):
|
||||
return {"columns": columns}
|
||||
if all(isinstance(column, tuple) and len(column) == 2 for column in columns):
|
||||
return {
|
||||
"columns": {
|
||||
name: expr.to_sql() if isinstance(expr, Expr) else expr
|
||||
for name, expr in columns
|
||||
}
|
||||
}
|
||||
# Let Lance raise the detailed projection validation error.
|
||||
return {"columns": columns}
|
||||
|
||||
projection = {}
|
||||
for name, expr in columns.items():
|
||||
if isinstance(expr, Expr):
|
||||
expr = expr.to_sql()
|
||||
projection[name] = expr
|
||||
return {"columns": projection}
|
||||
|
||||
|
||||
def _scanner_kwargs_for_query(query: Query, blob_mode: BlobMode) -> Dict[str, Any]:
|
||||
kwargs = {
|
||||
**_projection_to_scanner_kwargs(query.columns),
|
||||
"filter": _filter_to_sql(query.filter),
|
||||
"limit": query.limit,
|
||||
"offset": query.offset,
|
||||
"with_row_id": query.with_row_id,
|
||||
"fast_search": query.fast_search,
|
||||
"blob_handling": _BLOB_MODE_TO_HANDLING[blob_mode],
|
||||
}
|
||||
return {key: value for key, value in kwargs.items() if value is not None}
|
||||
|
||||
|
||||
def _ensure_lazy_blob_frame(
|
||||
df: "pd.DataFrame", schema: pa.Schema, blob_mode: BlobMode
|
||||
) -> "pd.DataFrame":
|
||||
if blob_mode != "lazy" or not _schema_has_blob_field(schema) or len(df) == 0:
|
||||
return df
|
||||
|
||||
for field in schema:
|
||||
if not _field_is_blob(field) or field.name not in df.columns:
|
||||
continue
|
||||
value = df[field.name].iloc[0]
|
||||
if value is not None and not hasattr(value, "readall"):
|
||||
raise _unsupported_blob_pandas_error(
|
||||
"the Lance scanner did not return lazy blob files"
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
def _scanner_to_pandas(scanner: Any, blob_mode: BlobMode, **kwargs) -> "pd.DataFrame":
|
||||
schema = getattr(scanner, "projected_schema", None)
|
||||
if schema is None:
|
||||
schema = getattr(scanner, "schema", None)
|
||||
if schema is None:
|
||||
schema = getattr(scanner, "dataset_schema", None)
|
||||
if callable(schema):
|
||||
schema = schema()
|
||||
if hasattr(scanner, "to_pandas"):
|
||||
try:
|
||||
df = scanner.to_pandas(blob_mode=blob_mode, **kwargs)
|
||||
except TypeError as err:
|
||||
message = str(err)
|
||||
if "blob_mode" not in message and "unexpected keyword" not in message:
|
||||
raise
|
||||
df = scanner.to_pandas(**kwargs)
|
||||
if schema is not None:
|
||||
return _ensure_lazy_blob_frame(df, schema, blob_mode)
|
||||
return df
|
||||
|
||||
if hasattr(scanner, "to_pyarrow"):
|
||||
reader = scanner.to_pyarrow()
|
||||
tbl = reader.read_all()
|
||||
elif hasattr(scanner, "to_table"):
|
||||
tbl = scanner.to_table()
|
||||
else:
|
||||
reader = scanner.to_reader()
|
||||
tbl = reader.read_all()
|
||||
if blob_mode == "lazy" and _schema_has_blob_field(tbl.schema):
|
||||
raise _unsupported_blob_pandas_error(
|
||||
"the Lance scanner does not expose to_pandas"
|
||||
)
|
||||
return tbl.to_pandas(**kwargs)
|
||||
|
||||
|
||||
# Pydantic validation function for vector queries
|
||||
def ensure_vector_query(
|
||||
val: Any,
|
||||
@@ -718,6 +867,7 @@ class LanceQueryBuilder(ABC):
|
||||
self,
|
||||
flatten: Optional[Union[int, bool]] = None,
|
||||
*,
|
||||
blob_mode: BlobMode = "lazy",
|
||||
timeout: Optional[timedelta] = None,
|
||||
**kwargs,
|
||||
) -> "pd.DataFrame":
|
||||
@@ -737,11 +887,39 @@ class LanceQueryBuilder(ABC):
|
||||
timeout: Optional[timedelta]
|
||||
The maximum time to wait for the query to complete.
|
||||
If None, wait indefinitely.
|
||||
blob_mode: str, default "lazy"
|
||||
Controls how blob columns are returned for plain scan queries.
|
||||
Vector, FTS, hybrid, and other non-native query shapes keep the
|
||||
existing Arrow conversion path and only support blob descriptions.
|
||||
**kwargs
|
||||
Forwarded to pyarrow.Table.to_pandas after query execution and
|
||||
optional flattening.
|
||||
"""
|
||||
_validate_blob_mode(blob_mode)
|
||||
output_schema = getattr(self, "output_schema", None)
|
||||
if output_schema is not None:
|
||||
schema = output_schema()
|
||||
if _blob_mode_requires_native_pandas(blob_mode, schema):
|
||||
native_error = None
|
||||
if flatten is None and timeout is None:
|
||||
try:
|
||||
df = self._plain_scan_to_pandas(blob_mode, **kwargs)
|
||||
if df is not None:
|
||||
return df
|
||||
except Exception as err:
|
||||
native_error = err
|
||||
reason = (
|
||||
"this query shape cannot use Lance native pandas conversion"
|
||||
if native_error is None
|
||||
else str(native_error)
|
||||
)
|
||||
raise _unsupported_blob_pandas_error(reason) from native_error
|
||||
|
||||
tbl = flatten_columns(self.to_arrow(timeout=timeout), flatten)
|
||||
if _blob_mode_requires_native_pandas(blob_mode, tbl.schema):
|
||||
raise _unsupported_blob_pandas_error(
|
||||
"this query shape cannot use Lance native pandas conversion"
|
||||
)
|
||||
return tbl.to_pandas(**kwargs)
|
||||
|
||||
@abstractmethod
|
||||
@@ -1086,6 +1264,19 @@ class LanceQueryBuilder(ABC):
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _plain_scan_to_pandas(
|
||||
self,
|
||||
blob_mode: BlobMode,
|
||||
**kwargs,
|
||||
) -> Optional["pd.DataFrame"]:
|
||||
query = self.to_query_object()
|
||||
if not _query_is_plain_scan(query):
|
||||
return None
|
||||
|
||||
dataset = self._table.to_lance()
|
||||
scanner = dataset.scanner(**_scanner_kwargs_for_query(query, blob_mode))
|
||||
return _scanner_to_pandas(scanner, blob_mode, **kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def to_query_object(self) -> Query:
|
||||
"""Return a serializable representation of the query
|
||||
@@ -2207,7 +2398,11 @@ class AsyncQueryBase(object):
|
||||
Base class for all async queries (take, scan, vector, fts, hybrid)
|
||||
"""
|
||||
|
||||
def __init__(self, inner: Union[LanceQuery, LanceVectorQuery, LanceTakeQuery]):
|
||||
def __init__(
|
||||
self,
|
||||
inner: Union[LanceQuery, LanceVectorQuery, LanceTakeQuery],
|
||||
table: Optional["AsyncTable"] = None,
|
||||
):
|
||||
"""
|
||||
Construct an AsyncQueryBase
|
||||
|
||||
@@ -2215,6 +2410,7 @@ class AsyncQueryBase(object):
|
||||
[AsyncTable.query][lancedb.table.AsyncTable.query] method to create a query.
|
||||
"""
|
||||
self._inner = inner
|
||||
self._table = table
|
||||
|
||||
def to_query_object(self) -> Query:
|
||||
"""
|
||||
@@ -2357,6 +2553,8 @@ class AsyncQueryBase(object):
|
||||
self,
|
||||
flatten: Optional[Union[int, bool]] = None,
|
||||
timeout: Optional[timedelta] = None,
|
||||
*,
|
||||
blob_mode: BlobMode = "lazy",
|
||||
**kwargs,
|
||||
) -> "pd.DataFrame":
|
||||
"""
|
||||
@@ -2390,13 +2588,55 @@ class AsyncQueryBase(object):
|
||||
The maximum time to wait for the query to complete.
|
||||
If not specified, no timeout is applied. If the query does not
|
||||
complete within the specified time, an error will be raised.
|
||||
blob_mode: str, default "lazy"
|
||||
Controls how blob columns are returned for plain scan queries.
|
||||
Vector, FTS, hybrid, and other non-native query shapes keep the
|
||||
existing Arrow conversion path and only support blob descriptions.
|
||||
**kwargs
|
||||
Forwarded to pyarrow.Table.to_pandas after query execution and
|
||||
optional flattening.
|
||||
"""
|
||||
return (
|
||||
flatten_columns(await self.to_arrow(timeout=timeout), flatten)
|
||||
).to_pandas(**kwargs)
|
||||
_validate_blob_mode(blob_mode)
|
||||
if hasattr(self._inner, "output_schema"):
|
||||
schema = await self.output_schema()
|
||||
if _blob_mode_requires_native_pandas(blob_mode, schema):
|
||||
native_error = None
|
||||
if flatten is None and timeout is None:
|
||||
try:
|
||||
df = await self._plain_scan_to_pandas(blob_mode, **kwargs)
|
||||
if df is not None:
|
||||
return df
|
||||
except Exception as err:
|
||||
native_error = err
|
||||
reason = (
|
||||
"this query shape cannot use Lance native pandas conversion"
|
||||
if native_error is None
|
||||
else str(native_error)
|
||||
)
|
||||
raise _unsupported_blob_pandas_error(reason) from native_error
|
||||
|
||||
tbl = flatten_columns(await self.to_arrow(timeout=timeout), flatten)
|
||||
if _blob_mode_requires_native_pandas(blob_mode, tbl.schema):
|
||||
raise _unsupported_blob_pandas_error(
|
||||
"this query shape cannot use Lance native pandas conversion"
|
||||
)
|
||||
return tbl.to_pandas(**kwargs)
|
||||
|
||||
async def _plain_scan_to_pandas(
|
||||
self,
|
||||
blob_mode: BlobMode,
|
||||
**kwargs,
|
||||
) -> Optional["pd.DataFrame"]:
|
||||
if self._table is None:
|
||||
return None
|
||||
|
||||
query = self.to_query_object()
|
||||
if not _query_is_plain_scan(query):
|
||||
return None
|
||||
|
||||
dataset = await self._table._to_lance()
|
||||
scanner = dataset.scanner(**_scanner_kwargs_for_query(query, blob_mode))
|
||||
return _scanner_to_pandas(scanner, blob_mode, **kwargs)
|
||||
|
||||
async def to_polars(
|
||||
self,
|
||||
@@ -2503,14 +2743,18 @@ class AsyncStandardQuery(AsyncQueryBase):
|
||||
Base class for "standard" async queries (all but take currently)
|
||||
"""
|
||||
|
||||
def __init__(self, inner: Union[LanceQuery, LanceVectorQuery]):
|
||||
def __init__(
|
||||
self,
|
||||
inner: Union[LanceQuery, LanceVectorQuery],
|
||||
table: Optional["AsyncTable"] = None,
|
||||
):
|
||||
"""
|
||||
Construct an AsyncStandardQuery
|
||||
|
||||
This method is not intended to be called directly. Instead, use the
|
||||
[AsyncTable.query][lancedb.table.AsyncTable.query] method to create a query.
|
||||
"""
|
||||
super().__init__(inner)
|
||||
super().__init__(inner, table)
|
||||
|
||||
def where(self, predicate: Union[str, Expr]) -> Self:
|
||||
"""
|
||||
@@ -2616,14 +2860,14 @@ class AsyncStandardQuery(AsyncQueryBase):
|
||||
|
||||
|
||||
class AsyncQuery(AsyncStandardQuery):
|
||||
def __init__(self, inner: LanceQuery):
|
||||
def __init__(self, inner: LanceQuery, table: Optional["AsyncTable"] = None):
|
||||
"""
|
||||
Construct an AsyncQuery
|
||||
|
||||
This method is not intended to be called directly. Instead, use the
|
||||
[AsyncTable.query][lancedb.table.AsyncTable.query] method to create a query.
|
||||
"""
|
||||
super().__init__(inner)
|
||||
super().__init__(inner, table)
|
||||
self._inner = inner
|
||||
|
||||
@classmethod
|
||||
@@ -2707,10 +2951,11 @@ class AsyncQuery(AsyncStandardQuery):
|
||||
new_self = self._inner.nearest_to(query_vectors[0])
|
||||
for v in query_vectors[1:]:
|
||||
new_self.add_query_vector(v)
|
||||
return AsyncVectorQuery(new_self)
|
||||
return AsyncVectorQuery(new_self, self._table)
|
||||
else:
|
||||
return AsyncVectorQuery(
|
||||
self._inner.nearest_to(AsyncQuery._query_vec_to_array(query_vector))
|
||||
self._inner.nearest_to(AsyncQuery._query_vec_to_array(query_vector)),
|
||||
self._table,
|
||||
)
|
||||
|
||||
def nearest_to_text(
|
||||
@@ -2743,17 +2988,18 @@ class AsyncQuery(AsyncStandardQuery):
|
||||
|
||||
if isinstance(query, str):
|
||||
return AsyncFTSQuery(
|
||||
self._inner.nearest_to_text({"query": query, "columns": columns})
|
||||
self._inner.nearest_to_text({"query": query, "columns": columns}),
|
||||
self._table,
|
||||
)
|
||||
# FullTextQuery object
|
||||
return AsyncFTSQuery(self._inner.nearest_to_text({"query": query}))
|
||||
return AsyncFTSQuery(self._inner.nearest_to_text({"query": query}), self._table)
|
||||
|
||||
|
||||
class AsyncFTSQuery(AsyncStandardQuery):
|
||||
"""A query for full text search for LanceDB."""
|
||||
|
||||
def __init__(self, inner: LanceFTSQuery):
|
||||
super().__init__(inner)
|
||||
def __init__(self, inner: LanceFTSQuery, table: Optional["AsyncTable"] = None):
|
||||
super().__init__(inner, table)
|
||||
self._inner = inner
|
||||
self._reranker = None
|
||||
|
||||
@@ -2835,10 +3081,11 @@ class AsyncFTSQuery(AsyncStandardQuery):
|
||||
new_self = self._inner.nearest_to(query_vectors[0])
|
||||
for v in query_vectors[1:]:
|
||||
new_self.add_query_vector(v)
|
||||
return AsyncHybridQuery(new_self)
|
||||
return AsyncHybridQuery(new_self, self._table)
|
||||
else:
|
||||
return AsyncHybridQuery(
|
||||
self._inner.nearest_to(AsyncQuery._query_vec_to_array(query_vector))
|
||||
self._inner.nearest_to(AsyncQuery._query_vec_to_array(query_vector)),
|
||||
self._table,
|
||||
)
|
||||
|
||||
async def to_batches(
|
||||
@@ -3029,7 +3276,7 @@ class AsyncVectorQueryBase:
|
||||
|
||||
|
||||
class AsyncVectorQuery(AsyncStandardQuery, AsyncVectorQueryBase):
|
||||
def __init__(self, inner: LanceVectorQuery):
|
||||
def __init__(self, inner: LanceVectorQuery, table: Optional["AsyncTable"] = None):
|
||||
"""
|
||||
Construct an AsyncVectorQuery
|
||||
|
||||
@@ -3039,7 +3286,7 @@ class AsyncVectorQuery(AsyncStandardQuery, AsyncVectorQueryBase):
|
||||
a vector query. Or you can use
|
||||
[AsyncTable.vector_search][lancedb.table.AsyncTable.vector_search]
|
||||
"""
|
||||
super().__init__(inner)
|
||||
super().__init__(inner, table)
|
||||
self._inner = inner
|
||||
self._reranker = None
|
||||
self._query_string = None
|
||||
@@ -3093,10 +3340,13 @@ class AsyncVectorQuery(AsyncStandardQuery, AsyncVectorQueryBase):
|
||||
|
||||
if isinstance(query, str):
|
||||
return AsyncHybridQuery(
|
||||
self._inner.nearest_to_text({"query": query, "columns": columns})
|
||||
self._inner.nearest_to_text({"query": query, "columns": columns}),
|
||||
self._table,
|
||||
)
|
||||
# FullTextQuery object
|
||||
return AsyncHybridQuery(self._inner.nearest_to_text({"query": query}))
|
||||
return AsyncHybridQuery(
|
||||
self._inner.nearest_to_text({"query": query}), self._table
|
||||
)
|
||||
|
||||
async def to_batches(
|
||||
self,
|
||||
@@ -3123,8 +3373,8 @@ class AsyncHybridQuery(AsyncStandardQuery, AsyncVectorQueryBase):
|
||||
in the `rerank` method to convert the scores to ranks and then normalize them.
|
||||
"""
|
||||
|
||||
def __init__(self, inner: LanceHybridQuery):
|
||||
super().__init__(inner)
|
||||
def __init__(self, inner: LanceHybridQuery, table: Optional["AsyncTable"] = None):
|
||||
super().__init__(inner, table)
|
||||
self._inner = inner
|
||||
self._norm = "score"
|
||||
self._reranker = RRFReranker()
|
||||
@@ -3165,8 +3415,8 @@ class AsyncHybridQuery(AsyncStandardQuery, AsyncVectorQueryBase):
|
||||
max_batch_length: Optional[int] = None,
|
||||
timeout: Optional[timedelta] = None,
|
||||
) -> AsyncRecordBatchReader:
|
||||
fts_query = AsyncFTSQuery(self._inner.to_fts_query())
|
||||
vec_query = AsyncVectorQuery(self._inner.to_vector_query())
|
||||
fts_query = AsyncFTSQuery(self._inner.to_fts_query(), self._table)
|
||||
vec_query = AsyncVectorQuery(self._inner.to_vector_query(), self._table)
|
||||
|
||||
# save the row ID choice that was made on the query builder and force it
|
||||
# to actually fetch the row ids because we need this for reranking
|
||||
@@ -3266,8 +3516,15 @@ class AsyncTakeQuery(AsyncQueryBase):
|
||||
Builder for parameterizing and executing take queries.
|
||||
"""
|
||||
|
||||
def __init__(self, inner: LanceTakeQuery):
|
||||
super().__init__(inner)
|
||||
def __init__(self, inner: LanceTakeQuery, table: Optional["AsyncTable"] = None):
|
||||
super().__init__(inner, table)
|
||||
|
||||
async def _plain_scan_to_pandas(
|
||||
self,
|
||||
blob_mode: BlobMode,
|
||||
**kwargs,
|
||||
) -> Optional["pd.DataFrame"]:
|
||||
return None
|
||||
|
||||
|
||||
class BaseQueryBuilder(object):
|
||||
@@ -3400,6 +3657,8 @@ class BaseQueryBuilder(object):
|
||||
self,
|
||||
flatten: Optional[Union[int, bool]] = None,
|
||||
timeout: Optional[timedelta] = None,
|
||||
*,
|
||||
blob_mode: BlobMode = "lazy",
|
||||
**kwargs,
|
||||
) -> "pd.DataFrame":
|
||||
"""
|
||||
@@ -3433,11 +3692,15 @@ class BaseQueryBuilder(object):
|
||||
The maximum time to wait for the query to complete.
|
||||
If not specified, no timeout is applied. If the query does not
|
||||
complete within the specified time, an error will be raised.
|
||||
blob_mode: str, default "lazy"
|
||||
Controls how blob columns are returned for plain scan queries.
|
||||
**kwargs
|
||||
Forwarded to pyarrow.Table.to_pandas after query execution and
|
||||
optional flattening.
|
||||
"""
|
||||
return LOOP.run(self._inner.to_pandas(flatten, timeout, **kwargs))
|
||||
return LOOP.run(
|
||||
self._inner.to_pandas(flatten, timeout, blob_mode=blob_mode, **kwargs)
|
||||
)
|
||||
|
||||
def to_polars(
|
||||
self,
|
||||
|
||||
@@ -125,6 +125,9 @@ class MRRReranker(Reranker):
|
||||
This cannot reuse rerank_hybrid because MRR semantics require treating
|
||||
each vector result as a separate ranking system.
|
||||
"""
|
||||
if not vector_results:
|
||||
raise ValueError("vector_results must not be empty")
|
||||
|
||||
if not all(isinstance(v, type(vector_results[0])) for v in vector_results):
|
||||
raise ValueError(
|
||||
"All elements in vector_results should be of the same type"
|
||||
|
||||
@@ -82,6 +82,9 @@ class RRFReranker(Reranker):
|
||||
results from multiple vector searches as it doesn't support reranking
|
||||
vector results individually.
|
||||
"""
|
||||
if not vector_results:
|
||||
raise ValueError("vector_results must not be empty")
|
||||
|
||||
# Make sure all elements are of the same type
|
||||
if not all(isinstance(v, type(vector_results[0])) for v in vector_results):
|
||||
raise ValueError(
|
||||
|
||||
@@ -89,6 +89,26 @@ from .index import lang_mapping
|
||||
|
||||
BlobMode = Literal["lazy", "bytes", "descriptions"]
|
||||
|
||||
_VALID_BLOB_MODES = ("lazy", "bytes", "descriptions")
|
||||
|
||||
|
||||
def _validate_blob_mode(blob_mode: BlobMode) -> None:
|
||||
if blob_mode not in _VALID_BLOB_MODES:
|
||||
modes = ", ".join(repr(mode) for mode in _VALID_BLOB_MODES)
|
||||
raise ValueError(f"blob_mode must be one of {modes}, got {blob_mode!r}")
|
||||
|
||||
|
||||
def _field_is_blob(field: pa.Field) -> bool:
|
||||
metadata = field.metadata or {}
|
||||
return metadata.get(b"lance-encoding:blob") == b"true" or (
|
||||
metadata.get("lance-encoding:blob") == "true"
|
||||
)
|
||||
|
||||
|
||||
def _schema_has_blob_field(schema: pa.Schema) -> bool:
|
||||
return any(_field_is_blob(field) for field in schema)
|
||||
|
||||
|
||||
_MODEL_BACKED_TOKENIZER_PREFIXES = ("jieba", "lindera")
|
||||
_MODEL_BACKED_TOKENIZER_ERRORS = (
|
||||
"unknown base tokenizer",
|
||||
@@ -2294,9 +2314,14 @@ class LanceTable(Table):
|
||||
-------
|
||||
pd.DataFrame
|
||||
"""
|
||||
if blob_mode == "lazy" and (
|
||||
self._namespace_client is not None
|
||||
or get_uri_scheme(self._dataset_path) == "memory"
|
||||
_validate_blob_mode(blob_mode)
|
||||
if blob_mode == "descriptions" or not _schema_has_blob_field(self.schema):
|
||||
return self.to_arrow().to_pandas(**kwargs)
|
||||
|
||||
if (
|
||||
blob_mode == "lazy"
|
||||
and self._namespace_client is None
|
||||
and get_uri_scheme(self._dataset_path) == "memory"
|
||||
):
|
||||
return self.to_arrow().to_pandas(**kwargs)
|
||||
|
||||
@@ -4317,7 +4342,7 @@ class AsyncTable:
|
||||
can be executed with methods like [to_arrow][lancedb.query.AsyncQuery.to_arrow],
|
||||
[to_pandas][lancedb.query.AsyncQuery.to_pandas] and more.
|
||||
"""
|
||||
return AsyncQuery(self._inner.query())
|
||||
return AsyncQuery(self._inner.query(), self)
|
||||
|
||||
async def _to_lance(self, **kwargs) -> lance.LanceDataset:
|
||||
try:
|
||||
@@ -4349,7 +4374,13 @@ class AsyncTable:
|
||||
-------
|
||||
pd.DataFrame
|
||||
"""
|
||||
if blob_mode == "lazy":
|
||||
_validate_blob_mode(blob_mode)
|
||||
if blob_mode == "descriptions" or not _schema_has_blob_field(
|
||||
await self.schema()
|
||||
):
|
||||
return (await self.to_arrow()).to_pandas(**kwargs)
|
||||
|
||||
if blob_mode == "lazy" and get_uri_scheme(await self.uri()) == "memory":
|
||||
return (await self.to_arrow()).to_pandas(**kwargs)
|
||||
return (await self._to_lance()).to_pandas(blob_mode=blob_mode, **kwargs)
|
||||
|
||||
@@ -5393,7 +5424,7 @@ class AsyncTable:
|
||||
pa.RecordBatch
|
||||
A record batch containing the rows at the given offsets.
|
||||
"""
|
||||
return AsyncTakeQuery(self._inner.take_offsets(offsets))
|
||||
return AsyncTakeQuery(self._inner.take_offsets(offsets), self)
|
||||
|
||||
def take_row_ids(self, row_ids: list[int]) -> AsyncTakeQuery:
|
||||
"""
|
||||
@@ -5422,7 +5453,7 @@ class AsyncTable:
|
||||
AsyncTakeQuery
|
||||
A query object that can be executed to get the rows.
|
||||
"""
|
||||
return AsyncTakeQuery(self._inner.take_row_ids(row_ids))
|
||||
return AsyncTakeQuery(self._inner.take_row_ids(row_ids), self)
|
||||
|
||||
@property
|
||||
def tags(self) -> AsyncTags:
|
||||
|
||||
@@ -76,6 +76,35 @@ class TestNamespaceConnection:
|
||||
assert len(result) == 0
|
||||
assert list(result.columns) == ["id", "vector", "text"]
|
||||
|
||||
def test_table_to_pandas_blob_lazy_through_namespace(self):
|
||||
"""Namespace-backed tables should use Lance blob-aware pandas conversion."""
|
||||
pytest.importorskip("lance")
|
||||
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||
db.create_namespace(["test_ns"])
|
||||
data = pa.table(
|
||||
{
|
||||
"id": pa.array([1, 2], pa.int64()),
|
||||
"blob": pa.array([b"hello", b"world"], pa.large_binary()),
|
||||
},
|
||||
schema=pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field(
|
||||
"blob",
|
||||
pa.large_binary(),
|
||||
metadata={"lance-encoding:blob": "true"},
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
table = db.create_table("blob_table", data, namespace_path=["test_ns"])
|
||||
df = table.to_pandas(blob_mode="lazy").sort_values("id")
|
||||
|
||||
blob = df["blob"].iloc[0]
|
||||
assert hasattr(blob, "readall")
|
||||
assert blob.readall() == b"hello"
|
||||
|
||||
def test_open_table_through_namespace(self):
|
||||
"""Test opening an existing table through namespace."""
|
||||
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||
|
||||
@@ -39,6 +39,35 @@ from utils import exception_output
|
||||
from importlib.util import find_spec
|
||||
|
||||
|
||||
def _blob_query_data():
|
||||
return pa.table(
|
||||
{
|
||||
"id": pa.array([1, 2, 3, 4], pa.int64()),
|
||||
"tag": pa.array(["drop", "keep", "keep", "keep"], pa.utf8()),
|
||||
"vector": pa.array(
|
||||
[[1.0, 0.0], [2.0, 0.0], [3.0, 0.0], [4.0, 0.0]],
|
||||
type=pa.list_(pa.float32(), list_size=2),
|
||||
),
|
||||
"blob": pa.array([b"one", b"two", b"three", b"four"], pa.large_binary()),
|
||||
},
|
||||
schema=pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field("tag", pa.utf8()),
|
||||
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
|
||||
pa.field(
|
||||
"blob", pa.large_binary(), metadata={"lance-encoding:blob": "true"}
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _assert_lazy_blob(value, expected: bytes):
|
||||
assert hasattr(value, "readall")
|
||||
assert value.readall() == expected
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def table(tmpdir_factory) -> lancedb.table.Table:
|
||||
tmp_path = str(tmpdir_factory.mktemp("data"))
|
||||
@@ -181,6 +210,138 @@ async def test_query_to_pandas_kwargs(table, table_async):
|
||||
assert async_df["id"].tolist() == [1, 2]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("blob_mode", ["lazy", "bytes", "descriptions"])
|
||||
def test_plain_scan_query_to_pandas_blob_modes(tmp_db, blob_mode):
|
||||
pytest.importorskip("lance")
|
||||
table = tmp_db.create_table(
|
||||
f"test_query_to_pandas_blob_{blob_mode}", _blob_query_data()
|
||||
)
|
||||
|
||||
df = (
|
||||
table.search()
|
||||
.select(["id", "blob"])
|
||||
.where("id = 1")
|
||||
.to_pandas(blob_mode=blob_mode)
|
||||
)
|
||||
|
||||
assert df["id"].tolist() == [1]
|
||||
if blob_mode == "lazy":
|
||||
_assert_lazy_blob(df["blob"].iloc[0], b"one")
|
||||
elif blob_mode == "bytes":
|
||||
assert df["blob"].tolist() == [b"one"]
|
||||
else:
|
||||
first = df["blob"].iloc[0]
|
||||
assert first != b"one"
|
||||
assert not hasattr(first, "readall")
|
||||
|
||||
|
||||
def test_plain_scan_query_to_pandas_blob_projection(tmp_db):
|
||||
pytest.importorskip("lance")
|
||||
table = tmp_db.create_table(
|
||||
"test_query_to_pandas_blob_projection", _blob_query_data()
|
||||
)
|
||||
|
||||
df = (
|
||||
table.search()
|
||||
.where("id >= 2")
|
||||
.select({"id_alias": "id", "payload": "blob", "double_id": "id * 2"})
|
||||
.limit(2)
|
||||
.offset(1)
|
||||
.to_pandas(blob_mode="bytes")
|
||||
)
|
||||
|
||||
assert df["id_alias"].tolist() == [3, 4]
|
||||
assert df["payload"].tolist() == [b"three", b"four"]
|
||||
assert df["double_id"].tolist() == [6, 8]
|
||||
|
||||
|
||||
def test_plain_scan_query_to_pandas_blob_mode_does_not_collect_arrow(
|
||||
tmp_db, monkeypatch
|
||||
):
|
||||
pytest.importorskip("lance")
|
||||
table = tmp_db.create_table(
|
||||
"test_query_to_pandas_blob_no_arrow_collect", _blob_query_data()
|
||||
)
|
||||
query = table.search().where("id = 1").select(["id", "blob"])
|
||||
|
||||
def fail_to_arrow(*args, **kwargs):
|
||||
raise AssertionError("to_arrow should not be called before native pandas")
|
||||
|
||||
monkeypatch.setattr(query, "to_arrow", fail_to_arrow)
|
||||
|
||||
df = query.to_pandas(blob_mode="bytes")
|
||||
|
||||
assert df["id"].tolist() == [1]
|
||||
assert df["blob"].tolist() == [b"one"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_plain_scan_query_to_pandas_blob_projection(tmp_db_async):
|
||||
pytest.importorskip("lance")
|
||||
table = await tmp_db_async.create_table(
|
||||
"test_async_query_to_pandas_blob_projection", _blob_query_data()
|
||||
)
|
||||
|
||||
lazy_df = await (
|
||||
table.query().where("id = 1").select(["id", "blob"]).to_pandas(blob_mode="lazy")
|
||||
)
|
||||
assert lazy_df["id"].tolist() == [1]
|
||||
_assert_lazy_blob(lazy_df["blob"].iloc[0], b"one")
|
||||
|
||||
bytes_df = await (
|
||||
table.query()
|
||||
.where("id >= 2")
|
||||
.select({"id_alias": "id", "payload": "blob", "double_id": "id * 2"})
|
||||
.limit(2)
|
||||
.offset(1)
|
||||
.to_pandas(blob_mode="bytes")
|
||||
)
|
||||
assert bytes_df["id_alias"].tolist() == [3, 4]
|
||||
assert bytes_df["payload"].tolist() == [b"three", b"four"]
|
||||
assert bytes_df["double_id"].tolist() == [6, 8]
|
||||
|
||||
desc_df = await (
|
||||
table.query()
|
||||
.where("id = 1")
|
||||
.select(["blob"])
|
||||
.to_pandas(blob_mode="descriptions")
|
||||
)
|
||||
first = desc_df["blob"].iloc[0]
|
||||
assert first != b"one"
|
||||
assert not hasattr(first, "readall")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_plain_scan_query_to_pandas_blob_mode_does_not_collect_arrow(
|
||||
tmp_db_async, monkeypatch
|
||||
):
|
||||
pytest.importorskip("lance")
|
||||
table = await tmp_db_async.create_table(
|
||||
"test_async_query_to_pandas_blob_no_arrow_collect", _blob_query_data()
|
||||
)
|
||||
query = table.query().where("id = 1").select(["id", "blob"])
|
||||
|
||||
async def fail_to_arrow(*args, **kwargs):
|
||||
raise AssertionError("to_arrow should not be called before native pandas")
|
||||
|
||||
monkeypatch.setattr(query, "to_arrow", fail_to_arrow)
|
||||
|
||||
df = await query.to_pandas(blob_mode="bytes")
|
||||
|
||||
assert df["id"].tolist() == [1]
|
||||
assert df["blob"].tolist() == [b"one"]
|
||||
|
||||
|
||||
def test_vector_query_to_pandas_blob_mode_requires_native_path(tmp_db):
|
||||
pytest.importorskip("lance")
|
||||
table = tmp_db.create_table("test_vector_query_blob_mode", _blob_query_data())
|
||||
|
||||
with pytest.raises(RuntimeError, match="Lance native pandas conversion"):
|
||||
table.search([1.0, 0.0]).select(["blob", "vector"]).limit(1).to_pandas(
|
||||
blob_mode="lazy"
|
||||
)
|
||||
|
||||
|
||||
def test_order_by_plain_query(mem_db):
|
||||
table = mem_db.create_table(
|
||||
"test_order_by",
|
||||
|
||||
@@ -344,6 +344,12 @@ def test_mrr_reranker(tmp_path):
|
||||
assert len(result_deduped) == len(result)
|
||||
|
||||
|
||||
def test_mrr_reranker_empty_input():
|
||||
reranker = MRRReranker()
|
||||
with pytest.raises(ValueError, match="must not be empty"):
|
||||
reranker.rerank_multivector([])
|
||||
|
||||
|
||||
def test_rrf_reranker_distance():
|
||||
data = pa.table(
|
||||
{
|
||||
|
||||
@@ -26,6 +26,28 @@ from lancedb.table import LanceTable
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
def _blob_test_data():
|
||||
return pa.table(
|
||||
{
|
||||
"id": pa.array([1, 2], pa.int64()),
|
||||
"blob": pa.array([b"hello", b"world"], pa.large_binary()),
|
||||
},
|
||||
schema=pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field(
|
||||
"blob", pa.large_binary(), metadata={"lance-encoding:blob": "true"}
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _assert_lazy_blob(value, expected: bytes):
|
||||
assert hasattr(value, "readall")
|
||||
assert value.readall() == expected
|
||||
|
||||
|
||||
def test_basic(mem_db: DBConnection):
|
||||
data = [
|
||||
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
@@ -57,27 +79,30 @@ def test_table_to_pandas_default_matches_arrow(tmp_db: DBConnection):
|
||||
pd.testing.assert_frame_equal(table.to_pandas(), expected)
|
||||
|
||||
|
||||
def test_table_to_pandas_blob_bytes(tmp_db: DBConnection):
|
||||
def test_table_to_pandas_invalid_blob_mode_non_blob_table(tmp_db: DBConnection):
|
||||
data = pa.table({"id": [1, 2], "text": ["one", "two"]})
|
||||
table = tmp_db.create_table("test_to_pandas_invalid_blob_mode", data=data)
|
||||
|
||||
with pytest.raises(ValueError, match="blob_mode must be one of"):
|
||||
table.to_pandas(blob_mode="invalid")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("blob_mode", ["lazy", "bytes", "descriptions"])
|
||||
def test_table_to_pandas_blob_modes(tmp_db: DBConnection, blob_mode):
|
||||
pytest.importorskip("lance")
|
||||
data = pa.table(
|
||||
{
|
||||
"id": pa.array([1, 2], pa.int64()),
|
||||
"blob": pa.array([b"hello", b"world"], pa.large_binary()),
|
||||
},
|
||||
schema=pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field(
|
||||
"blob", pa.large_binary(), metadata={"lance-encoding:blob": "true"}
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
table = tmp_db.create_table("test_to_pandas_blob_bytes", data=data)
|
||||
table = tmp_db.create_table(f"test_to_pandas_blob_{blob_mode}", _blob_test_data())
|
||||
|
||||
df = table.to_pandas(blob_mode="bytes")
|
||||
df = table.to_pandas(blob_mode=blob_mode)
|
||||
|
||||
assert df["blob"].tolist() == [b"hello", b"world"]
|
||||
if blob_mode == "lazy":
|
||||
_assert_lazy_blob(df["blob"].iloc[0], b"hello")
|
||||
_assert_lazy_blob(df["blob"].iloc[1], b"world")
|
||||
elif blob_mode == "bytes":
|
||||
assert df["blob"].tolist() == [b"hello", b"world"]
|
||||
else:
|
||||
first = df["blob"].iloc[0]
|
||||
assert first != b"hello"
|
||||
assert not hasattr(first, "readall")
|
||||
|
||||
|
||||
def test_table_to_pandas_kwargs(tmp_db: DBConnection):
|
||||
@@ -93,22 +118,8 @@ def test_table_to_pandas_kwargs(tmp_db: DBConnection):
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_table_to_pandas_blob_bytes(tmp_db_async: AsyncConnection):
|
||||
pytest.importorskip("lance")
|
||||
data = pa.table(
|
||||
{
|
||||
"id": pa.array([1, 2], pa.int64()),
|
||||
"blob": pa.array([b"hello", b"world"], pa.large_binary()),
|
||||
},
|
||||
schema=pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field(
|
||||
"blob", pa.large_binary(), metadata={"lance-encoding:blob": "true"}
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
table = await tmp_db_async.create_table(
|
||||
"test_async_to_pandas_blob_bytes", data=data
|
||||
"test_async_to_pandas_blob_bytes", data=_blob_test_data()
|
||||
)
|
||||
|
||||
df = await table.to_pandas(blob_mode="bytes")
|
||||
@@ -116,6 +127,19 @@ async def test_async_table_to_pandas_blob_bytes(tmp_db_async: AsyncConnection):
|
||||
assert df["blob"].tolist() == [b"hello", b"world"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_table_to_pandas_invalid_blob_mode_non_blob_table(
|
||||
tmp_db_async: AsyncConnection,
|
||||
):
|
||||
table = await tmp_db_async.create_table(
|
||||
"test_async_to_pandas_invalid_blob_mode",
|
||||
data=pa.table({"id": [1, 2], "text": ["one", "two"]}),
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="blob_mode must be one of"):
|
||||
await table.to_pandas(blob_mode="invalid")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_table_to_pandas_kwargs(tmp_db_async: AsyncConnection):
|
||||
pd = pytest.importorskip("pandas")
|
||||
@@ -1264,6 +1288,45 @@ def test_add_with_empty_fixed_size_list_drops_bad_rows(mem_db: DBConnection):
|
||||
assert np.allclose(data["embedding"].to_pylist()[0], np.array([0.1] * 16))
|
||||
|
||||
|
||||
def test_add_nullable_struct_with_none(mem_db: DBConnection):
|
||||
"""Regression test for issue #2654: a nullable struct column whose
|
||||
first batch contains only None values must not crash in
|
||||
_align_field_types with AttributeError: 'pyarrow.lib.DataType'
|
||||
object has no attribute 'fields'.
|
||||
|
||||
PyArrow infers an all-None struct column as `null` (not `struct`),
|
||||
so the type-alignment path needs to handle the case where the
|
||||
source field type is null and use the target type directly.
|
||||
"""
|
||||
# Use the v2.1 file format so that nullable structs are supported.
|
||||
table = mem_db.create_table(
|
||||
"test_nullable_struct",
|
||||
schema=pa.schema(
|
||||
[
|
||||
pa.field("id", pa.string()),
|
||||
pa.field(
|
||||
"data",
|
||||
pa.struct([pa.field("x", pa.float32())]),
|
||||
nullable=True,
|
||||
),
|
||||
]
|
||||
),
|
||||
storage_options=dict(new_table_data_storage_version="2.1"),
|
||||
)
|
||||
|
||||
# Adding a row with a non-null struct should work.
|
||||
table.add([{"id": "1", "data": {"x": 1.0}}])
|
||||
|
||||
# Adding a row with None for the nullable struct field should also
|
||||
# work — this is what used to crash.
|
||||
table.add([{"id": "2", "data": None}])
|
||||
|
||||
result = table.to_arrow()
|
||||
assert result.num_rows == 2
|
||||
assert result.column("id").to_pylist() == ["1", "2"]
|
||||
assert result.column("data").to_pylist() == [{"x": 1.0}, None]
|
||||
|
||||
|
||||
def test_add_with_integer_embeddings_preserves_casting(mem_db: DBConnection):
|
||||
class Schema(LanceModel):
|
||||
text: str
|
||||
|
||||
4226
python/uv.lock
generated
4226
python/uv.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.30.1-beta.0"
|
||||
version = "0.30.1-beta.1"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
@@ -23,6 +23,7 @@ use crate::table::DropColumnsResult;
|
||||
use crate::table::MergeResult;
|
||||
use crate::table::Tags;
|
||||
use crate::table::UpdateResult;
|
||||
use crate::table::merge::MergeFilter;
|
||||
use crate::table::query::create_multi_vector_plan;
|
||||
use crate::table::{AlterColumnsResult, FieldMetadataUpdate, UpdateFieldMetadataResult};
|
||||
use crate::table::{AnyQuery, Filter, Predicate, PreprocessingOutput, TableStatistics};
|
||||
@@ -2266,13 +2267,34 @@ impl TryFrom<MergeInsertBuilder> for MergeInsertRequest {
|
||||
}
|
||||
let on = value.on[0].clone();
|
||||
|
||||
let when_matched_update_all_filt = match value.when_matched_update_all_filt {
|
||||
Some(MergeFilter::Sql(sql)) => Some(sql),
|
||||
Some(MergeFilter::Expr(_)) => {
|
||||
return Err(Error::NotSupported {
|
||||
message: "DataFusion expressions are not supported on remote tables".into(),
|
||||
});
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let when_not_matched_by_source_delete_filt =
|
||||
match value.when_not_matched_by_source_delete_filt {
|
||||
Some(MergeFilter::Sql(sql)) => Some(sql),
|
||||
Some(MergeFilter::Expr(_)) => {
|
||||
return Err(Error::NotSupported {
|
||||
message: "DataFusion expressions are not supported on remote tables".into(),
|
||||
});
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
on,
|
||||
when_matched_update_all: value.when_matched_update_all,
|
||||
when_matched_update_all_filt: value.when_matched_update_all_filt,
|
||||
when_matched_update_all_filt,
|
||||
when_not_matched_insert_all: value.when_not_matched_insert_all,
|
||||
when_not_matched_by_source_delete: value.when_not_matched_by_source_delete,
|
||||
when_not_matched_by_source_delete_filt: value.when_not_matched_by_source_delete_filt,
|
||||
when_not_matched_by_source_delete_filt,
|
||||
// Only serialize use_index when it's false for backwards compatibility
|
||||
use_index: value.use_index,
|
||||
})
|
||||
|
||||
@@ -53,6 +53,12 @@ pub struct MergeResult {
|
||||
pub num_rows: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum MergeFilter {
|
||||
Sql(String),
|
||||
Expr(datafusion_expr::Expr),
|
||||
}
|
||||
|
||||
/// A builder used to create and run a merge insert operation
|
||||
///
|
||||
/// See [`super::Table::merge_insert`] for more context
|
||||
@@ -61,10 +67,10 @@ pub struct MergeInsertBuilder {
|
||||
table: Arc<dyn BaseTable>,
|
||||
pub(crate) on: Vec<String>,
|
||||
pub(crate) when_matched_update_all: bool,
|
||||
pub(crate) when_matched_update_all_filt: Option<String>,
|
||||
pub(crate) when_matched_update_all_filt: Option<MergeFilter>,
|
||||
pub(crate) when_not_matched_insert_all: bool,
|
||||
pub(crate) when_not_matched_by_source_delete: bool,
|
||||
pub(crate) when_not_matched_by_source_delete_filt: Option<String>,
|
||||
pub(crate) when_not_matched_by_source_delete_filt: Option<MergeFilter>,
|
||||
pub(crate) timeout: Option<Duration>,
|
||||
pub(crate) use_index: bool,
|
||||
pub(crate) use_lsm_write: Option<bool>,
|
||||
@@ -110,7 +116,14 @@ impl MergeInsertBuilder {
|
||||
/// For example, "target.last_update < source.last_update"
|
||||
pub fn when_matched_update_all(&mut self, condition: Option<String>) -> &mut Self {
|
||||
self.when_matched_update_all = true;
|
||||
self.when_matched_update_all_filt = condition;
|
||||
self.when_matched_update_all_filt = condition.map(MergeFilter::Sql);
|
||||
self
|
||||
}
|
||||
|
||||
/// Similar to [`Self::when_matched_update_all`] but accepts a DataFusion logical expression directly.
|
||||
pub fn when_matched_update_all_expr(&mut self, condition: datafusion_expr::Expr) -> &mut Self {
|
||||
self.when_matched_update_all = true;
|
||||
self.when_matched_update_all_filt = Some(MergeFilter::Expr(condition));
|
||||
self
|
||||
}
|
||||
|
||||
@@ -132,7 +145,17 @@ impl MergeInsertBuilder {
|
||||
/// limit what rows are deleted.
|
||||
pub fn when_not_matched_by_source_delete(&mut self, filter: Option<String>) -> &mut Self {
|
||||
self.when_not_matched_by_source_delete = true;
|
||||
self.when_not_matched_by_source_delete_filt = filter;
|
||||
self.when_not_matched_by_source_delete_filt = filter.map(MergeFilter::Sql);
|
||||
self
|
||||
}
|
||||
|
||||
/// Similar to [`Self::when_not_matched_by_source_delete`] but accepts a DataFusion logical expression directly.
|
||||
pub fn when_not_matched_by_source_delete_expr(
|
||||
&mut self,
|
||||
filter: datafusion_expr::Expr,
|
||||
) -> &mut Self {
|
||||
self.when_not_matched_by_source_delete = true;
|
||||
self.when_not_matched_by_source_delete_filt = Some(MergeFilter::Expr(filter));
|
||||
self
|
||||
}
|
||||
|
||||
@@ -234,7 +257,12 @@ pub(crate) async fn execute_merge_insert(
|
||||
) {
|
||||
(false, _) => builder.when_matched(WhenMatched::DoNothing),
|
||||
(true, None) => builder.when_matched(WhenMatched::UpdateAll),
|
||||
(true, Some(filt)) => builder.when_matched(WhenMatched::update_if(&dataset, &filt)?),
|
||||
(true, Some(MergeFilter::Sql(filt))) => {
|
||||
builder.when_matched(WhenMatched::update_if(&dataset, &filt)?)
|
||||
}
|
||||
(true, Some(MergeFilter::Expr(expr))) => {
|
||||
builder.when_matched(WhenMatched::update_if_expr(expr))
|
||||
}
|
||||
};
|
||||
if params.when_not_matched_insert_all {
|
||||
builder.when_not_matched(lance::dataset::WhenNotMatched::InsertAll);
|
||||
@@ -242,10 +270,12 @@ pub(crate) async fn execute_merge_insert(
|
||||
builder.when_not_matched(lance::dataset::WhenNotMatched::DoNothing);
|
||||
}
|
||||
if params.when_not_matched_by_source_delete {
|
||||
let behavior = if let Some(filter) = params.when_not_matched_by_source_delete_filt {
|
||||
WhenNotMatchedBySource::delete_if(dataset.as_ref(), &filter)?
|
||||
} else {
|
||||
WhenNotMatchedBySource::Delete
|
||||
let behavior = match params.when_not_matched_by_source_delete_filt {
|
||||
Some(MergeFilter::Sql(filter)) => {
|
||||
WhenNotMatchedBySource::delete_if(dataset.as_ref(), &filter)?
|
||||
}
|
||||
Some(MergeFilter::Expr(expr)) => WhenNotMatchedBySource::DeleteIf(expr),
|
||||
None => WhenNotMatchedBySource::Delete,
|
||||
};
|
||||
builder.when_not_matched_by_source(behavior);
|
||||
} else {
|
||||
@@ -386,6 +416,45 @@ mod tests {
|
||||
merge_insert_builder.execute(new_batches).await.unwrap();
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 25);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_merge_insert_expr() {
|
||||
use datafusion_expr::{col, lit};
|
||||
|
||||
let conn = connect("memory://").execute().await.unwrap();
|
||||
|
||||
// Create a dataset with i=0..10
|
||||
let batches = merge_insert_test_batches(0, 0);
|
||||
let table = conn
|
||||
.create_table("my_table_expr", batches)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 10);
|
||||
|
||||
// Conditional update that only replaces the age=0 data
|
||||
let new_batches = merge_insert_test_batches(5, 3);
|
||||
let mut merge_insert_builder = table.merge_insert(&["i"]);
|
||||
// use expression: target.age = 0
|
||||
let expr = col("target.age").eq(lit(0));
|
||||
merge_insert_builder.when_matched_update_all_expr(expr);
|
||||
merge_insert_builder.execute(new_batches).await.unwrap();
|
||||
assert_eq!(
|
||||
table.count_rows(Some("age = 3".to_string())).await.unwrap(),
|
||||
5
|
||||
);
|
||||
|
||||
// Delete with expression
|
||||
// Create new batches with i=10..20 (so target rows i=0..9 are not matched by source)
|
||||
let new_batches = merge_insert_test_batches(10, 0); // won't insert or update since we don't enable matched/unmatched actions
|
||||
let mut merge_insert_builder = table.merge_insert(&["i"]);
|
||||
// delete if target.age = 3
|
||||
let delete_expr = col("target.age").eq(lit(3));
|
||||
merge_insert_builder.when_not_matched_by_source_delete_expr(delete_expr);
|
||||
let result = merge_insert_builder.execute(new_batches).await.unwrap();
|
||||
assert_eq!(result.num_deleted_rows, 5);
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 5);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
Reference in New Issue
Block a user