Compare commits

..

7 Commits

Author SHA1 Message Date
Ryan Green
1a747260f6 add missing field 2026-06-24 16:20:37 -02:30
Ryan Green
a0e2eb0c03 wip 2026-06-24 15:29:22 -02:30
Ryan Green
e88524ca0e cleanup 2026-06-24 14:02:07 -02:30
Ryan Green
f50e2f22f5 fix: ensure read freshness provider is built into namespace client 2026-06-24 13:15:23 -02:30
LanceDB Robot
ebf8d55ede chore: update lance dependency to v9.0.0-beta.4 (#3570)
Bumps the Lance dependencies to v9.0.0-beta.4 and refreshes the
generated lockfile metadata. No compatibility fixes were required beyond
the dependency updates. Triggered by
https://github.com/lance-format/lance/releases/tag/v9.0.0-beta.4
2026-06-24 10:16:29 -05:00
Raphael Malikian
0ba70d96c3 fix: add missing stacklevel=2 to warnings.warn() and fix broken message concatenation (Fixes #3563) (#3564)
Fixes #3563

## Summary

- Add `stacklevel=2` to 10 `warnings.warn()` calls across 4 files
- Fix broken message concatenation in `table.py` where the second string
was incorrectly passed as the `category` parameter

## Problem

Multiple `warnings.warn()` calls in the `python/lancedb/` codebase were
missing the `stacklevel` parameter. Without `stacklevel=2`, warnings
point to library internals instead of the caller's code, making it
impossible for users to identify which of their function calls triggered
the warning.

Additionally, two calls in `table.py` (lines 3411 and 3420) had a more
serious bug: the deprecation message was split across two separate
string arguments, causing the second string to be passed as the
`category` parameter instead of being concatenated with the first
string. This would cause `TypeError` when the warning was triggered.

## Changes

| File | Fixes | Description |
|------|-------|-------------|
| `embeddings/colpali.py` | 1 | Add `stacklevel=2` to
`use_token_pooling` deprecation warning |
| `remote/db.py` | 3 | Add `stacklevel=2` to `request_thread_pool`,
`connection_timeout`, `read_timeout` deprecation warnings |
| `remote/table.py` | 3 | Add `stacklevel=2` to `cleanup_old_versions`,
`compact_files`, `optimize` no-op warnings |
| `table.py` | 3 | Fix broken message concatenation for
`data_storage_version` and `enable_v2_manifest_paths` deprecation
warnings + add `stacklevel=2` to `retrain` deprecation warning |

## Verification

```python
# All warnings.warn() calls now have stacklevel
python3 -c "import ast, os; ..."
# Result: All warnings.warn() calls now have stacklevel!
```

## Changelog

| Date | Change | Author |
|------|--------|--------|
| 2026-06-20 | Fix missing stacklevel=2 in 10 warnings.warn() calls +
fix broken message concatenation | rtmalikian |

### Files Changed
- `python/python/lancedb/embeddings/colpali.py` — Add stacklevel=2
- `python/python/lancedb/remote/db.py` — Add stacklevel=2 to 3
deprecation warnings
- `python/python/lancedb/remote/table.py` — Add stacklevel=2 to 3 no-op
warnings
- `python/python/lancedb/table.py` — Fix broken message concatenation +
add stacklevel=2

### Verification
- AST-based audit confirms all `warnings.warn()` calls now include
`stacklevel=2`
- Syntax check passes for all 4 modified files

---

**About the Author:** Raphael Malikian — Clinical AI Solutions
Architect. I specialise in building and fixing AI/ML systems for
healthcare, including vector databases, RAG pipelines, and clinical NLP.
If you need help with your project or think I can add value to your
organisation, feel free to reach out — I'd love to connect.

📧 rtmalikian@gmail.com
🔗 GitHub: https://github.com/rtmalikian
🔗 LinkedIn:
http://www.linkedin.com/in/raphael-t-malikian-mbbs-bsc-hons-71075436a

---

**Disclosure:** This code was developed with assistance from **Hermes
Agent** (Nous Research). All changes were reviewed, tested against the
actual codebase, and verified for correctness.

Signed-off-by: rtmalikian <rtmalikian@gmail.com>
2026-06-23 13:42:59 -07:00
Lance Release
0749532c3c Bump version: 0.31.0-beta.1 → 0.31.0-beta.2 2026-06-23 16:23:08 +00:00
26 changed files with 317 additions and 114 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.31.0-beta.1"
current_version = "0.31.0-beta.2"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

96
Cargo.lock generated
View File

@@ -3432,8 +3432,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"rand 0.9.4",
@@ -4735,8 +4735,8 @@ checksum = "e037a2e1d8d5fdbd49b16a4ea09d5d6401c1f29eca5ff29d03d3824dba16256a"
[[package]]
name = "lance"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arc-swap",
"arrow",
@@ -4771,7 +4771,7 @@ dependencies = [
"futures",
"half",
"humantime",
"itertools 0.13.0",
"itertools 0.14.0",
"lance-arrow",
"lance-core",
"lance-datafusion",
@@ -4810,8 +4810,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4832,7 +4832,7 @@ dependencies = [
[[package]]
name = "lance-arrow-scalar"
version = "58.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4846,7 +4846,7 @@ dependencies = [
[[package]]
name = "lance-arrow-stats"
version = "58.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4855,8 +4855,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrayref",
"paste",
@@ -4865,8 +4865,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4878,7 +4878,7 @@ dependencies = [
"datafusion-common",
"datafusion-sql",
"futures",
"itertools 0.13.0",
"itertools 0.14.0",
"lance-arrow",
"lance-derive",
"libc",
@@ -4904,8 +4904,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow",
"arrow-array",
@@ -4935,8 +4935,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow",
"arrow-array",
@@ -4953,8 +4953,8 @@ dependencies = [
[[package]]
name = "lance-derive"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"proc-macro2",
"quote",
@@ -4963,8 +4963,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4980,7 +4980,7 @@ dependencies = [
"futures",
"hex",
"hyperloglogplus",
"itertools 0.13.0",
"itertools 0.14.0",
"lance-arrow",
"lance-bitpacking",
"lance-core",
@@ -4999,8 +4999,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -5030,8 +5030,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arc-swap",
"arrow",
@@ -5056,7 +5056,7 @@ dependencies = [
"fst",
"futures",
"half",
"itertools 0.13.0",
"itertools 0.14.0",
"jieba-rs",
"jsonb",
"lance-arrow",
@@ -5096,8 +5096,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow",
"arrow-arith",
@@ -5138,8 +5138,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -5155,8 +5155,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow",
"async-trait",
@@ -5168,8 +5168,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow",
"arrow-ipc",
@@ -5223,15 +5223,15 @@ dependencies = [
[[package]]
name = "lance-select"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-schema",
"byteorder",
"bytes",
"itertools 0.13.0",
"itertools 0.14.0",
"lance-core",
"roaring",
"tracing",
@@ -5239,8 +5239,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow",
"arrow-array",
@@ -5279,8 +5279,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -5293,8 +5293,8 @@ dependencies = [
[[package]]
name = "lance-tokenizer"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"icu_segmenter",
"jieba-rs",
@@ -5307,7 +5307,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.31.0-beta.1"
version = "0.31.0-beta.2"
dependencies = [
"ahash",
"anyhow",
@@ -5390,7 +5390,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.31.0-beta.1"
version = "0.31.0-beta.2"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -5415,7 +5415,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.34.0-beta.1"
version = "0.34.0-beta.2"
dependencies = [
"arrow",
"async-trait",

View File

@@ -13,20 +13,20 @@ categories = ["database-implementations"]
rust-version = "1.91.0"
[workspace.dependencies]
lance = { "version" = "=9.0.0-beta.2", default-features = false, "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=9.0.0-beta.2", default-features = false, "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=9.0.0-beta.2", default-features = false, "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=9.0.0-beta.4", default-features = false, "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=9.0.0-beta.4", default-features = false, "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=9.0.0-beta.4", default-features = false, "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "58.0.0", optional = false }

View File

@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-core</artifactId>
<version>0.31.0-beta.1</version>
<version>0.31.0-beta.2</version>
</dependency>
```

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.31.0-beta.1</version>
<version>0.31.0-beta.2</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.31.0-beta.1</version>
<version>0.31.0-beta.2</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>
@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-core.version>9.0.0-beta.2</lance-core.version>
<lance-core.version>9.0.0-beta.4</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.31.0-beta.1"
version = "0.31.0-beta.2"
publish = false
license.workspace = true
description.workspace = true

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.31.0-beta.1",
"version": "0.31.0-beta.2",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.31.0-beta.1",
"version": "0.31.0-beta.2",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.31.0-beta.1",
"version": "0.31.0-beta.2",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.31.0-beta.1",
"version": "0.31.0-beta.2",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.31.0-beta.1",
"version": "0.31.0-beta.2",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.31.0-beta.1",
"version": "0.31.0-beta.2",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.31.0-beta.1",
"version": "0.31.0-beta.2",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.31.0-beta.1",
"version": "0.31.0-beta.2",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.31.0-beta.1",
"version": "0.31.0-beta.2",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.31.0-beta.1",
"version": "0.31.0-beta.2",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -81,6 +81,7 @@ class ColPaliEmbeddings(EmbeddingFunction):
warnings.warn(
"use_token_pooling is deprecated, use pooling_strategy=None instead",
DeprecationWarning,
stacklevel=2,
)
self.pooling_strategy = None

View File

@@ -373,6 +373,19 @@ def _convert_pyarrow_schema_to_json(schema: pa.Schema) -> JsonArrowSchema:
return JsonArrowSchema(fields=fields, metadata=meta)
def _builds_namespace_natively(
namespace_client_impl: Optional[str],
namespace_client_properties: Optional[Dict[str, str]],
) -> bool:
"""Whether ``connect_namespace_client`` builds the namespace client natively
in Rust (installing the read-freshness context provider) rather than wrapping
the pre-built Python client.
Must mirror Rust ``build_namespace_natively`` in ``python/src/connection.rs``.
"""
return namespace_client_impl == "rest" and bool(namespace_client_properties)
class LanceNamespaceDBConnection(DBConnection):
"""
A LanceDB connection that uses a namespace for table management.
@@ -432,6 +445,13 @@ class LanceNamespaceDBConnection(DBConnection):
)
self._namespace_client_impl = namespace_client_impl
self._namespace_client_properties = namespace_client_properties
# When the namespace client is built natively (see Rust
# ``build_namespace_natively``), the underlying Rust table performs
# QueryTable pushdown through the read-freshness context provider, which
# the pure-Python ``query_table`` path bypasses.
self._route_pushdown_to_rust = _builds_namespace_natively(
namespace_client_impl, namespace_client_properties
)
self._inner = AsyncConnection(
_connect_namespace_client(
namespace_client,
@@ -543,6 +563,7 @@ class LanceNamespaceDBConnection(DBConnection):
namespace_path=namespace_path,
namespace_client=self._namespace_client,
pushdown_operations=self._namespace_client_pushdown_operations,
route_pushdown_to_rust=self._route_pushdown_to_rust,
_async=async_table,
)
@@ -580,6 +601,7 @@ class LanceNamespaceDBConnection(DBConnection):
namespace_path=namespace_path,
namespace_client=self._namespace_client,
pushdown_operations=self._namespace_client_pushdown_operations,
route_pushdown_to_rust=self._route_pushdown_to_rust,
_async=async_table,
)
if branch is not None:

View File

@@ -124,6 +124,7 @@ class RemoteDBConnection(DBConnection):
"request_thread_pool is no longer used and will be removed in "
"a future release.",
DeprecationWarning,
stacklevel=2,
)
if connection_timeout is not None:
@@ -132,6 +133,7 @@ class RemoteDBConnection(DBConnection):
"release. Please use client_config.timeout_config.connect_timeout "
"instead.",
DeprecationWarning,
stacklevel=2,
)
client_config.timeout_config.connect_timeout = timedelta(
seconds=connection_timeout
@@ -142,6 +144,7 @@ class RemoteDBConnection(DBConnection):
"read_timeout is deprecated and will be removed in a future release. "
"Please use client_config.timeout_config.read_timeout instead.",
DeprecationWarning,
stacklevel=2,
)
client_config.timeout_config.read_timeout = timedelta(seconds=read_timeout)

View File

@@ -845,7 +845,8 @@ class RemoteTable(Table):
"""
warnings.warn(
"cleanup_old_versions() is a no-op on LanceDB Cloud. "
"Tables are automatically cleaned up and optimized."
"Tables are automatically cleaned up and optimized.",
stacklevel=2,
)
pass
@@ -857,7 +858,8 @@ class RemoteTable(Table):
"""
warnings.warn(
"compact_files() is a no-op on LanceDB Cloud. "
"Tables are automatically compacted and optimized."
"Tables are automatically compacted and optimized.",
stacklevel=2,
)
pass
@@ -874,7 +876,8 @@ class RemoteTable(Table):
"""
warnings.warn(
"optimize() is a no-op on LanceDB Cloud. "
"Indices are optimized automatically."
"Indices are optimized automatically.",
stacklevel=2,
)
pass

View File

@@ -2022,6 +2022,7 @@ class LanceTable(Table):
namespace_client: Optional[Any] = None,
managed_versioning: Optional[bool] = None,
pushdown_operations: Optional[set] = None,
route_pushdown_to_rust: bool = False,
_async: AsyncTable = None,
):
if namespace_path is None:
@@ -2031,6 +2032,14 @@ class LanceTable(Table):
self._location = location # Store location for use in _dataset_path
self._namespace_client = namespace_client
self._pushdown_operations = pushdown_operations or set()
# When the connection built the namespace client natively (e.g. an
# enterprise "rest" connection), the underlying Rust table already
# executes QueryTable pushdown itself -- and, unlike this Python urllib3
# path, it routes through the read-freshness context provider that emits
# the ``x-lancedb-min-timestamp`` header. So we must defer pushdown to
# Rust instead of calling the Python ``namespace_client.query_table``
# directly, or reads silently bypass read-freshness (stale results).
self._route_pushdown_to_rust = route_pushdown_to_rust
if _async is not None:
self._table = _async
else:
@@ -2241,6 +2250,7 @@ class LanceTable(Table):
namespace_path=self._namespace_path,
namespace_client=self._namespace_client,
pushdown_operations=self._pushdown_operations,
route_pushdown_to_rust=self._route_pushdown_to_rust,
location=self._location,
_async=async_table,
)
@@ -2391,8 +2401,11 @@ class LanceTable(Table):
Returns
-------
pa.Table"""
if _should_push_down_query_table(
self._namespace_client, self._pushdown_operations
if (
_should_push_down_query_table(
self._namespace_client, self._pushdown_operations
)
and not self._route_pushdown_to_rust
):
return self._execute_query(Query()).read_all()
@@ -3344,6 +3357,7 @@ class LanceTable(Table):
location: Optional[str] = None,
namespace_client: Optional[Any] = None,
pushdown_operations: Optional[set] = None,
route_pushdown_to_rust: bool = False,
):
"""
Create a new table.
@@ -3406,21 +3420,24 @@ class LanceTable(Table):
self._location = location
self._namespace_client = namespace_client
self._pushdown_operations = pushdown_operations or set()
self._route_pushdown_to_rust = route_pushdown_to_rust
if data_storage_version is not None:
warnings.warn(
"setting data_storage_version directly on create_table is deprecated. ",
"setting data_storage_version directly on create_table is deprecated. "
"Use database_options instead.",
DeprecationWarning,
stacklevel=2,
)
if storage_options is None:
storage_options = {}
storage_options["new_table_data_storage_version"] = data_storage_version
if enable_v2_manifest_paths is not None:
warnings.warn(
"setting enable_v2_manifest_paths directly on create_table is ",
"setting enable_v2_manifest_paths directly on create_table is "
"deprecated. Use database_options instead.",
DeprecationWarning,
stacklevel=2,
)
if storage_options is None:
storage_options = {}
@@ -3517,6 +3534,7 @@ class LanceTable(Table):
_should_push_down_query_table(
self._namespace_client, self._pushdown_operations
)
and not self._route_pushdown_to_rust
and self.current_branch() is None
):
from lancedb.namespace import _execute_server_side_query
@@ -4258,6 +4276,7 @@ class AsyncTable:
namespace_path: Optional[List[str]] = None,
namespace_client: Optional[Any] = None,
pushdown_operations: Optional[set] = None,
route_pushdown_to_rust: bool = False,
):
"""Create a new AsyncTable object.
@@ -4270,6 +4289,9 @@ class AsyncTable:
self._namespace_path = namespace_path or []
self._namespace_client = namespace_client
self._pushdown_operations = pushdown_operations or set()
# See LanceTable.__init__: defer QueryTable pushdown to Rust (which emits
# the read-freshness header) for natively-built namespace clients.
self._route_pushdown_to_rust = route_pushdown_to_rust
def _set_namespace_context(
self,
@@ -4277,10 +4299,12 @@ class AsyncTable:
namespace_path: Optional[List[str]] = None,
namespace_client: Optional[Any] = None,
pushdown_operations: Optional[set] = None,
route_pushdown_to_rust: bool = False,
) -> "AsyncTable":
self._namespace_path = namespace_path or []
self._namespace_client = namespace_client
self._pushdown_operations = pushdown_operations or set()
self._route_pushdown_to_rust = route_pushdown_to_rust
return self
def __repr__(self):
@@ -4490,8 +4514,11 @@ class AsyncTable:
-------
pa.Table
"""
if _should_push_down_query_table(
self._namespace_client, self._pushdown_operations
if (
_should_push_down_query_table(
self._namespace_client, self._pushdown_operations
)
and not self._route_pushdown_to_rust
):
return (await self._execute_query(Query())).read_all()
@@ -5175,8 +5202,11 @@ class AsyncTable:
batch_size: Optional[int] = None,
timeout: Optional[timedelta] = None,
) -> pa.RecordBatchReader:
if _should_push_down_query_table(
self._namespace_client, self._pushdown_operations
if (
_should_push_down_query_table(
self._namespace_client, self._pushdown_operations
)
and not self._route_pushdown_to_rust
):
from lancedb.namespace import _execute_server_side_query
@@ -5662,6 +5692,7 @@ class AsyncTable:
"The 'retrain' parameter is deprecated and will be removed in a "
"future version.",
DeprecationWarning,
stacklevel=2,
)
return await self._inner.optimize(

View File

@@ -65,6 +65,9 @@ def _namespace_lance_table(namespace_client: _NamespaceClient) -> LanceTable:
table._namespace_path = ["geneva"]
table._namespace_client = namespace_client
table._pushdown_operations = {"QueryTable"}
# This test exercises the Python-side pushdown path (non-native client), so
# pushdown is not routed to Rust.
table._route_pushdown_to_rust = False
return table

View File

@@ -610,24 +610,38 @@ pub fn connect_namespace_client(
namespace_client_impl: Option<String>,
namespace_client_properties: Option<HashMap<String, String>>,
) -> PyResult<Connection> {
let namespace_client = extract_namespace_arc(py, namespace_client)?;
let read_consistency_interval = read_consistency_interval.map(Duration::from_secs_f64);
let namespace_client_pushdown_operations =
parse_namespace_client_pushdown_operations(namespace_client_pushdown_operations)?;
let ns_impl = namespace_client_impl.unwrap_or_else(|| "python".to_string());
let ns_properties = namespace_client_properties.unwrap_or_default();
let storage_options = storage_options.unwrap_or_default();
let session = session.map(|s| s.inner.clone());
let database = LanceNamespaceDatabase::from_namespace_client(
namespace_client,
ns_impl,
ns_properties,
storage_options,
read_consistency_interval,
session,
namespace_client_pushdown_operations,
);
// Prefer building the namespace natively from (impl, properties) so the
// read-freshness provider installed
let database = if build_namespace_natively(namespace_client_impl.as_deref(), &ns_properties) {
let ns_impl = namespace_client_impl.expect("impl present per build_namespace_natively");
crate::runtime::block_on(LanceNamespaceDatabase::connect(
&ns_impl,
ns_properties,
storage_options,
read_consistency_interval,
session,
namespace_client_pushdown_operations,
))
.infer_error()?
} else {
let namespace_client = extract_namespace_arc(py, namespace_client)?;
LanceNamespaceDatabase::from_namespace_client(
namespace_client,
namespace_client_impl.unwrap_or_else(|| "python".to_string()),
ns_properties,
storage_options,
read_consistency_interval,
session,
namespace_client_pushdown_operations,
)
};
Ok(Connection::new(LanceConnection::new(
Arc::new(database),
@@ -635,6 +649,16 @@ pub fn connect_namespace_client(
)))
}
/// Whether to build the namespace natively (from impl + properties) instead of
/// wrapping a pre-built client. Native construction is required for the
/// read-freshness provider to be installed
fn build_namespace_natively(
namespace_client_impl: Option<&str>,
namespace_client_properties: &HashMap<String, String>,
) -> bool {
matches!(namespace_client_impl, Some("rest")) && !namespace_client_properties.is_empty()
}
#[derive(FromPyObject)]
pub struct PyClientConfig {
user_agent: String,
@@ -733,3 +757,36 @@ impl From<PyClientConfig> for lancedb::remote::ClientConfig {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn props(pairs: &[(&str, &str)]) -> HashMap<String, String> {
pairs
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect()
}
#[test]
fn native_build_only_for_rest_with_properties() {
let rest = props(&[("uri", "http://localhost:10024")]);
// rest + non-empty properties -> build natively (installs the
// read-freshness provider so checkout_latest() busts the server cache).
assert!(build_namespace_natively(Some("rest"), &rest));
// dir is local (no server cache) -> wrap the pre-built client unchanged.
assert!(!build_namespace_natively(
Some("dir"),
&props(&[("root", "/tmp")])
));
// No impl: only a pre-built client was handed in -> wrap it as-is.
assert!(!build_namespace_natively(None, &rest));
// rest but no properties: nothing to build a connection from -> wrap.
assert!(!build_namespace_natively(Some("rest"), &HashMap::new()));
}
}

View File

@@ -56,6 +56,15 @@ fn get_runtime() -> &'static runtime::Runtime {
unsafe { &*new_ptr }
}
/// Block the current thread on a future using the shared runtime.
///
/// For sync `#[pyfunction]`s that need to drive an async operation (e.g.
/// building a namespace client). Must not be called from within the runtime's
/// own worker threads.
pub fn block_on<F: std::future::Future>(fut: F) -> F::Output {
get_runtime().block_on(fut)
}
/// Runs in async-signal context after `fork()` in the child. We can only
/// touch atomics here; we deliberately leak the previous runtime because
/// dropping a tokio `Runtime` would try to join its (now-dead) worker

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.31.0-beta.1"
version = "0.31.0-beta.2"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -579,24 +579,45 @@ fn array_to_f32_vec(arr: &Arc<dyn arrow_array::Array>) -> Result<Vec<f32>> {
})
}
/// Magic bytes that prefix (and suffix) the Arrow IPC *file* format.
const ARROW_IPC_FILE_MAGIC: &[u8] = b"ARROW1";
/// Parse Arrow IPC response from the namespace server.
///
/// The server may return either the Arrow IPC *file* format or the *stream*
/// format. REST/phalanx returns the file format (it begins with the `ARROW1`
/// magic); reading that with a `StreamReader` fails with "failed to fill whole
/// buffer". Detect the magic and pick the matching reader so both are handled.
async fn parse_arrow_ipc_response(bytes: bytes::Bytes) -> Result<DatasetRecordBatchStream> {
use arrow_ipc::reader::StreamReader;
use arrow_ipc::reader::{FileReader, StreamReader};
use std::io::Cursor;
let cursor = Cursor::new(bytes);
let reader = StreamReader::try_new(cursor, None).map_err(|e| Error::Runtime {
message: format!("Failed to parse Arrow IPC response: {}", e),
})?;
// Collect all record batches
let schema = reader.schema();
let batches: Vec<_> = reader
.into_iter()
.collect::<std::result::Result<Vec<_>, _>>()
.map_err(|e| Error::Runtime {
message: format!("Failed to read Arrow IPC batches: {}", e),
let (schema, batches) = if bytes.starts_with(ARROW_IPC_FILE_MAGIC) {
let reader = FileReader::try_new(Cursor::new(bytes), None).map_err(|e| Error::Runtime {
message: format!("Failed to parse Arrow IPC file response: {}", e),
})?;
let schema = reader.schema();
let batches = reader
.into_iter()
.collect::<std::result::Result<Vec<_>, _>>()
.map_err(|e| Error::Runtime {
message: format!("Failed to read Arrow IPC file batches: {}", e),
})?;
(schema, batches)
} else {
let reader =
StreamReader::try_new(Cursor::new(bytes), None).map_err(|e| Error::Runtime {
message: format!("Failed to parse Arrow IPC response: {}", e),
})?;
let schema = reader.schema();
let batches = reader
.into_iter()
.collect::<std::result::Result<Vec<_>, _>>()
.map_err(|e| Error::Runtime {
message: format!("Failed to read Arrow IPC batches: {}", e),
})?;
(schema, batches)
};
// Create a stream from the batches
let stream = futures::stream::iter(batches.into_iter().map(Ok));
@@ -624,6 +645,59 @@ mod tests {
FixedSizeListArray::try_new_from_values(Float32Array::from(values), dimension).unwrap()
}
#[tokio::test]
async fn test_parse_arrow_ipc_response_handles_file_and_stream() {
use arrow_array::{Int32Array, RecordBatch};
use arrow_ipc::writer::{FileWriter, StreamWriter};
use arrow_schema::{DataType, Field, Schema};
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef],
)
.unwrap();
// Arrow IPC *file* format -- what REST/phalanx returns. Previously this
// failed with "failed to fill whole buffer" because we used a StreamReader.
let mut file_buf = Vec::new();
{
let mut writer = FileWriter::try_new(&mut file_buf, &schema).unwrap();
writer.write(&batch).unwrap();
writer.finish().unwrap();
}
assert!(file_buf.starts_with(ARROW_IPC_FILE_MAGIC));
let rows: usize = parse_arrow_ipc_response(bytes::Bytes::from(file_buf))
.await
.unwrap()
.try_collect::<Vec<_>>()
.await
.unwrap()
.iter()
.map(|b| b.num_rows())
.sum();
assert_eq!(rows, 3);
// Arrow IPC *stream* format must still parse.
let mut stream_buf = Vec::new();
{
let mut writer = StreamWriter::try_new(&mut stream_buf, &schema).unwrap();
writer.write(&batch).unwrap();
writer.finish().unwrap();
}
assert!(!stream_buf.starts_with(ARROW_IPC_FILE_MAGIC));
let rows: usize = parse_arrow_ipc_response(bytes::Bytes::from(stream_buf))
.await
.unwrap()
.try_collect::<Vec<_>>()
.await
.unwrap()
.iter()
.map(|b| b.num_rows())
.sum();
assert_eq!(rows, 3);
}
#[test]
fn test_convert_to_namespace_query_vector() {
let query_vector = Arc::new(Float32Array::from(vec![1.0, 2.0, 3.0, 4.0]));