Compare commits

..

1 Commits

Author SHA1 Message Date
Lance Release
21e4d2d3ba Bump version: 0.31.0-beta.3 → 0.31.0-beta.4 2026-06-29 11:11:46 +00:00
36 changed files with 234 additions and 1424 deletions

View File

@@ -1,137 +0,0 @@
---
name: lancedb-branch-ops
description: Branch management for LanceDB tables via the REST API. Use this skill whenever someone wants to create, delete, list, or switch branches on a LanceDB table — or needs to make sure a write (metadata update, index build, etc.) lands on a specific branch instead of main. Invoke it even without the word "branch" if context makes clear they want an experimental copy of a table, want to isolate changes, or want to confirm a mutation didn't touch main. Covers: branches/list, branches/create, branches/delete, and passing "branch" in describe/update_field_metadata/create_index to target a non-main version.
---
## Goal
Manage branches on a LanceDB table: list what exists, create new ones, delete stale ones, and direct read/write operations at a specific branch without touching main.
## Step 0: Establish the connection
Use the `lancedb-connect` skill to resolve the base URL and auth headers (`x-api-key`, `x-lancedb-database`). Skip this only if the connection is already known from the current conversation.
All examples below use `{base_url}` — substitute the resolved endpoint and include the auth headers on every request.
## The branch model (important)
LanceDB branches are named snapshots that diverge from the table's current state at creation time. There is **no checkout command** — you never switch the whole table to a branch. Instead, you **pass `"branch": "<name>"` in the request body** of any operation to target that branch. Omitting the key (or sending an empty body) always targets main.
`branches/list` returns only non-main branches. Main always exists and is not listed.
## List branches
```http
POST {base_url}/v1/table/{table_id}/branches/list
Content-Type: application/json
{}
```
Response:
```json
{
"branches": {
"experiment-reindex": {"parentVersion": 1, "createAt": 1782506085, "manifestSize": 1029}
}
}
```
If `branches` is `{}`, the table has no branches besides main.
## Create a branch
```http
POST {base_url}/v1/table/{table_id}/branches/create
Content-Type: application/json
{"name": "experiment-reindex"}
```
HTTP 200 with `{}` body = success. The branch is created off the table's current state on main.
Verify by calling `branches/list` and confirming the new name appears.
## Delete a branch
```http
POST {base_url}/v1/table/{table_id}/branches/delete
Content-Type: application/json
{"name": "stale-2024"}
```
HTTP 200 with `{}` body = success. Only the branch pointer is removed — main and all row data remain intact.
Verify by calling `branches/list` (name gone) and `describe` with no branch param (main still responds).
## Operate on a specific branch
Pass `"branch": "<name>"` in the body of any operation to scope it to that branch:
**Read schema on a branch:**
```http
POST {base_url}/v1/table/{table_id}/describe
Content-Type: application/json
{"branch": "wip-branch"}
```
**Write metadata to a branch (not main):**
```http
POST {base_url}/v1/table/{table_id}/update_field_metadata
Content-Type: application/json
{
"branch": "wip-branch",
"updates": [
{
"path": "category",
"metadata": {"lancedb:description": "Product category label."},
"replace": false
}
]
}
```
**Build an index on a branch:**
```http
POST {base_url}/v1/table/{table_id}/create_index
Content-Type: application/json
{
"branch": "wip-branch",
"column": "category",
"index_type": "BTREE"
}
```
## Verifying isolation
After writing to a branch, always confirm the change did NOT land on main:
```bash
# Should show the new metadata
curl -s -X POST {base_url}/v1/table/{table_id}/describe \
-H "x-api-key: <key>" -H "x-lancedb-database: <db>" \
-H "content-type: application/json" \
-d '{"branch": "wip-branch"}'
# Should NOT show the new metadata
curl -s -X POST {base_url}/v1/table/{table_id}/describe \
-H "x-api-key: <key>" -H "x-lancedb-database: <db>" \
-H "content-type: application/json" \
-d '{}'
```
## Quick reference
| Goal | Endpoint | Body |
|------|----------|------|
| List all branches | `branches/list` | `{}` |
| Create a branch | `branches/create` | `{"name": "..."}` |
| Delete a branch | `branches/delete` | `{"name": "..."}` |
| Read schema on branch | `describe` | `{"branch": "..."}` |
| Write metadata on branch | `update_field_metadata` | `{"branch": "...", "updates": [...]}` |
| Build index on branch | `create_index` | `{"branch": "...", "column": ..., "index_type": ...}` |
| Target main (default) | any endpoint | omit `"branch"` key |

132
Cargo.lock generated
View File

@@ -157,9 +157,9 @@ dependencies = [
[[package]]
name = "anyhow"
version = "1.0.103"
version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a4385e2e34eb35d6b3efe798b9eb88096925d87726c0798709bf56d9ed84af3"
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
name = "approx"
@@ -1297,6 +1297,15 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
[[package]]
name = "bitpacking"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96a7139abd3d9cebf8cd6f920a389cf3dc9576172e32f4563f188cae3c3eb019"
dependencies = [
"crunchy",
]
[[package]]
name = "bitvec"
version = "1.0.1"
@@ -3177,9 +3186,9 @@ dependencies = [
[[package]]
name = "env_filter"
version = "2.0.0"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "900d271a03799a1ee8d1ca9b19893b48ca674a9284fefcfb85f05e74ed314217"
checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef"
dependencies = [
"log",
"regex",
@@ -3187,9 +3196,9 @@ dependencies = [
[[package]]
name = "env_logger"
version = "0.11.11"
version = "0.11.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de671bd27a75a797dc9ae289ba1e77276e75e2026408aab65185384e2d5cd3f6"
checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a"
dependencies = [
"anstream",
"anstyle",
@@ -3423,8 +3432,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow-array",
"rand 0.9.4",
@@ -4726,8 +4735,8 @@ checksum = "e037a2e1d8d5fdbd49b16a4ea09d5d6401c1f29eca5ff29d03d3824dba16256a"
[[package]]
name = "lance"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arc-swap",
"arrow",
@@ -4745,6 +4754,7 @@ dependencies = [
"async_cell",
"aws-credential-types",
"aws-sdk-dynamodb",
"bitpacking",
"byteorder",
"bytes",
"chrono",
@@ -4763,7 +4773,6 @@ dependencies = [
"humantime",
"itertools 0.14.0",
"lance-arrow",
"lance-bitpacking",
"lance-core",
"lance-datafusion",
"lance-encoding",
@@ -4801,8 +4810,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4823,7 +4832,7 @@ dependencies = [
[[package]]
name = "lance-arrow-scalar"
version = "58.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4837,7 +4846,7 @@ dependencies = [
[[package]]
name = "lance-arrow-stats"
version = "58.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4846,19 +4855,18 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrayref",
"crunchy",
"paste",
"seq-macro",
]
[[package]]
name = "lance-core"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4896,8 +4904,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow",
"arrow-array",
@@ -4927,8 +4935,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow",
"arrow-array",
@@ -4945,8 +4953,8 @@ dependencies = [
[[package]]
name = "lance-derive"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"proc-macro2",
"quote",
@@ -4955,8 +4963,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4991,8 +4999,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -5022,8 +5030,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arc-swap",
"arrow",
@@ -5035,6 +5043,7 @@ dependencies = [
"async-channel",
"async-recursion",
"async-trait",
"bitpacking",
"bitvec",
"bytes",
"chrono",
@@ -5052,7 +5061,6 @@ dependencies = [
"jsonb",
"lance-arrow",
"lance-arrow-stats",
"lance-bitpacking",
"lance-core",
"lance-datafusion",
"lance-datagen",
@@ -5088,8 +5096,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow",
"arrow-arith",
@@ -5130,8 +5138,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -5147,8 +5155,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow",
"async-trait",
@@ -5160,8 +5168,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow",
"arrow-ipc",
@@ -5215,8 +5223,8 @@ dependencies = [
[[package]]
name = "lance-select"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -5231,8 +5239,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow",
"arrow-array",
@@ -5271,8 +5279,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -5285,8 +5293,8 @@ dependencies = [
[[package]]
name = "lance-tokenizer"
version = "9.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.10#e25b71e74b89d10c57b412d111bde087117383f3"
version = "9.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.8#71c4aa2174971e98acb7e256fde1e1589024f5bc"
dependencies = [
"icu_segmenter",
"jieba-rs",
@@ -5299,7 +5307,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.31.0-beta.4"
version = "0.31.0-beta.3"
dependencies = [
"ahash",
"anyhow",
@@ -5383,7 +5391,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.31.0-beta.4"
version = "0.31.0-beta.3"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -5408,7 +5416,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.34.0-beta.4"
version = "0.34.0-beta.3"
dependencies = [
"arrow",
"async-trait",
@@ -5641,9 +5649,9 @@ dependencies = [
[[package]]
name = "log"
version = "0.4.33"
version = "0.4.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ceec5bc11778974d1bcb055b18002eba7f4b3518b6a0081b3af5f21666da9ad"
checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a"
[[package]]
name = "loom"
@@ -5951,9 +5959,9 @@ dependencies = [
[[package]]
name = "napi"
version = "3.9.4"
version = "3.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b41bda2ac390efb5e8d22025d925ccc3f3807d8c1bea6d19b36127247c4b8f83"
checksum = "fbd9f9295f3ff5921e78a71222c3361a8216f7760b1a99a6ad4e8441de18bbb9"
dependencies = [
"bitflags 2.11.1",
"chrono",
@@ -5976,9 +5984,9 @@ checksum = "c9c366d2c8c60b86fa632df75f745509b52f9128f91a6bad4c796e44abb505e1"
[[package]]
name = "napi-derive"
version = "3.5.7"
version = "3.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61d66f70256ad5aef58659966064471d0ad90e2897bc36a5a5e0389c85aabc1e"
checksum = "89b3f766e04667e6da0e181e2da4f85475d5a6513b7cf6a80bea184e224a5b42"
dependencies = [
"convert_case",
"ctor 1.0.5",
@@ -5990,9 +5998,9 @@ dependencies = [
[[package]]
name = "napi-derive-backend"
version = "5.0.5"
version = "5.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81b4b08f15eed7a2a20c3f4c6314013fc3ac890a3afa9892b594485299ebdb2d"
checksum = "0d5af30503edf933ce7377cf6d4c877a62b0f1107ea05585f1b5e430e88d5baf"
dependencies = [
"convert_case",
"proc-macro2",
@@ -10120,9 +10128,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "uuid"
version = "1.23.4"
version = "1.23.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf80a72845275afea99e7f2b434723d3bc7e38470fcd1c7ed39a599c73319a53"
checksum = "144d6b123cef80b301b8f72a9e2ca4370ddec21950d0a103dd22c437006d2db7"
dependencies = [
"getrandom 0.4.2",
"js-sys",

View File

@@ -13,20 +13,20 @@ categories = ["database-implementations"]
rust-version = "1.91.0"
[workspace.dependencies]
lance = { "version" = "=9.0.0-beta.10", default-features = false, "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=9.0.0-beta.10", default-features = false, "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=9.0.0-beta.10", default-features = false, "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=9.0.0-beta.10", "tag" = "v9.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=9.0.0-beta.8", default-features = false, "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=9.0.0-beta.8", "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=9.0.0-beta.8", "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=9.0.0-beta.8", "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=9.0.0-beta.8", default-features = false, "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=9.0.0-beta.8", "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=9.0.0-beta.8", "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=9.0.0-beta.8", "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=9.0.0-beta.8", default-features = false, "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=9.0.0-beta.8", "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=9.0.0-beta.8", "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=9.0.0-beta.8", "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=9.0.0-beta.8", "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=9.0.0-beta.8", "tag" = "v9.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "58.0.0", optional = false }

View File

@@ -1,29 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / OAuthFlowType
# Enumeration: OAuthFlowType
OAuth authentication flow types.
## Enumeration Members
### AzureManagedIdentity
```ts
AzureManagedIdentity: "azure_managed_identity";
```
Azure Managed Identity via IMDS.
***
### ClientCredentials
```ts
ClientCredentials: "client_credentials";
```
Client Credentials grant (service-to-service / M2M).

View File

@@ -12,7 +12,6 @@
## Enumerations
- [FullTextQueryType](enumerations/FullTextQueryType.md)
- [OAuthFlowType](enumerations/OAuthFlowType.md)
- [Occur](enumerations/Occur.md)
- [Operator](enumerations/Operator.md)
@@ -86,8 +85,6 @@
- [ListNamespacesResponse](interfaces/ListNamespacesResponse.md)
- [LsmWriteSpec](interfaces/LsmWriteSpec.md)
- [MergeResult](interfaces/MergeResult.md)
- [NativeOAuthConfig](interfaces/NativeOAuthConfig.md)
- [OAuthConfig](interfaces/OAuthConfig.md)
- [OpenTableOptions](interfaces/OpenTableOptions.md)
- [OptimizeOptions](interfaces/OptimizeOptions.md)
- [OptimizeStats](interfaces/OptimizeStats.md)

View File

@@ -64,19 +64,6 @@ client used by manifest-enabled native connections.
***
### oauthConfig?
```ts
optional oauthConfig: NativeOAuthConfig;
```
(For LanceDB cloud only): OAuth configuration for IdP-based
authentication (e.g., Azure Entra ID). When set, token acquisition
and refresh are handled entirely in Rust. TypeScript users should pass
the public `OAuthConfig` type exported from `@lancedb/lancedb`.
***
### readConsistencyInterval?
```ts

View File

@@ -1,88 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / NativeOAuthConfig
# Interface: NativeOAuthConfig
OAuth configuration for LanceDB authentication.
This is the generated napi-rs binding shape. TypeScript users should prefer
the public `OAuthConfig` type exported from `@lancedb/lancedb`.
All token acquisition and refresh is handled in the Rust layer.
## Properties
### clientId
```ts
clientId: string;
```
Application / Client ID.
***
### clientSecret?
```ts
optional clientSecret: string;
```
Client secret (required for client_credentials).
***
### flow?
```ts
optional flow: string;
```
Authentication flow: "client_credentials" or "azure_managed_identity"
***
### issuerUrl
```ts
issuerUrl: string;
```
OIDC issuer URL or OAuth authority URL.
For Azure: `https://login.microsoftonline.com/{tenant_id}/v2.0`
***
### managedIdentityClientId?
```ts
optional managedIdentityClientId: string;
```
Client ID for user-assigned managed identity (azure_managed_identity).
***
### refreshBufferSecs?
```ts
optional refreshBufferSecs: number;
```
Seconds before expiry to trigger proactive refresh (default: 300).
Keep this well below the token TTL; if it is greater than or equal to
the TTL, each request refreshes the token.
***
### scopes
```ts
scopes: string[];
```
OAuth scopes to request. For Azure managed identity, exactly one scope
or resource is required. For example: `["api://{app_id}/.default"]`

View File

@@ -1,111 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / OAuthConfig
# Interface: OAuthConfig
OAuth configuration for LanceDB authentication.
This is the public TypeScript OAuth configuration type. The generated
`NativeOAuthConfig` type has the same runtime shape but is an implementation
detail of the napi-rs binding.
All token acquisition and refresh is handled in the Rust layer.
This config is passed through to Rust via napi-rs.
## Examples
```typescript
const config: OAuthConfig = {
issuerUrl: "https://login.microsoftonline.com/{tenant}/v2.0",
clientId: "app-id",
clientSecret: "secret",
scopes: ["api://lancedb-api/.default"],
};
```
```typescript
const config: OAuthConfig = {
issuerUrl: "https://login.microsoftonline.com/{tenant}/v2.0",
clientId: "app-id",
scopes: ["api://lancedb-api/.default"],
flow: OAuthFlowType.AzureManagedIdentity,
};
```
## Properties
### clientId
```ts
clientId: string;
```
Application / Client ID.
***
### clientSecret?
```ts
optional clientSecret: string;
```
Client secret (required for ClientCredentials).
***
### flow?
```ts
optional flow: OAuthFlowType;
```
Authentication flow (default: ClientCredentials).
***
### issuerUrl
```ts
issuerUrl: string;
```
OIDC issuer URL or OAuth authority URL.
For Azure: `https://login.microsoftonline.com/{tenant_id}/v2.0`
***
### managedIdentityClientId?
```ts
optional managedIdentityClientId: string;
```
Client ID for user-assigned managed identity (AzureManagedIdentity).
***
### refreshBufferSecs?
```ts
optional refreshBufferSecs: number;
```
Seconds before expiry to trigger proactive refresh (default: 300).
Keep this well below the token TTL; if it is greater than or equal to
the TTL, each request refreshes the token.
***
### scopes
```ts
scopes: string[];
```
OAuth scopes to request.
For Azure managed identity, exactly one scope or resource is required.
For example: `["api://{app_id}/.default"]`

View File

@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-core.version>9.0.0-beta.10</lance-core.version>
<lance-core.version>9.0.0-beta.8</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>

View File

@@ -52,7 +52,6 @@ export {
SplitHashOptions,
SplitSequentialOptions,
ShuffleOptions,
OAuthConfig as NativeOAuthConfig,
} from "./native.js";
export {
@@ -131,8 +130,6 @@ export {
TokenResponse,
} from "./header";
export { OAuthConfig, OAuthFlowType } from "./oauth";
export { MergeInsertBuilder, WriteExecutionOptions } from "./merge";
export * as embedding from "./embedding";

View File

@@ -1,76 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
/**
* OAuth authentication flow types.
*/
export enum OAuthFlowType {
/** Client Credentials grant (service-to-service / M2M). */
ClientCredentials = "client_credentials",
/** Azure Managed Identity via IMDS. */
AzureManagedIdentity = "azure_managed_identity",
}
/**
* OAuth configuration for LanceDB authentication.
*
* This is the public TypeScript OAuth configuration type. The generated
* `NativeOAuthConfig` type has the same runtime shape but is an implementation
* detail of the napi-rs binding.
*
* All token acquisition and refresh is handled in the Rust layer.
* This config is passed through to Rust via napi-rs.
*
* @example Client Credentials (service-to-service):
* ```typescript
* const config: OAuthConfig = {
* issuerUrl: "https://login.microsoftonline.com/{tenant}/v2.0",
* clientId: "app-id",
* clientSecret: "secret",
* scopes: ["api://lancedb-api/.default"],
* };
* ```
*
* @example Azure Managed Identity:
* ```typescript
* const config: OAuthConfig = {
* issuerUrl: "https://login.microsoftonline.com/{tenant}/v2.0",
* clientId: "app-id",
* scopes: ["api://lancedb-api/.default"],
* flow: OAuthFlowType.AzureManagedIdentity,
* };
* ```
*/
export interface OAuthConfig {
/**
* OIDC issuer URL or OAuth authority URL.
* For Azure: `https://login.microsoftonline.com/{tenant_id}/v2.0`
*/
issuerUrl: string;
/** Application / Client ID. */
clientId: string;
/**
* OAuth scopes to request.
* For Azure managed identity, exactly one scope or resource is required.
* For example: `["api://{app_id}/.default"]`
*/
scopes: string[];
/** Authentication flow (default: ClientCredentials). */
flow?: OAuthFlowType;
/** Client secret (required for ClientCredentials). */
clientSecret?: string;
/** Client ID for user-assigned managed identity (AzureManagedIdentity). */
managedIdentityClientId?: string;
/**
* Seconds before expiry to trigger proactive refresh (default: 300).
* Keep this well below the token TTL; if it is greater than or equal to
* the TTL, each request refreshes the token.
*/
refreshBufferSecs?: number;
}

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.31.0-beta.4",
"version": "0.31.0-beta.3",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.31.0-beta.4",
"version": "0.31.0-beta.3",
"cpu": [
"x64",
"arm64"

View File

@@ -112,12 +112,6 @@ impl Connection {
builder = builder.client_config(rust_config);
if let Some(oauth_config) = options.oauth_config {
let config: lancedb::remote::oauth::OAuthConfig =
oauth_config.try_into().default_error()?;
builder = builder.oauth_config(config);
}
if let Some(api_key) = options.api_key {
builder = builder.api_key(&api_key);
}

View File

@@ -65,11 +65,6 @@ pub struct ConnectionOptions {
/// (For LanceDB cloud only): the host to use for LanceDB cloud. Used
/// for testing purposes.
pub host_override: Option<String>,
/// (For LanceDB cloud only): OAuth configuration for IdP-based
/// authentication (e.g., Azure Entra ID). When set, token acquisition
/// and refresh are handled entirely in Rust. TypeScript users should pass
/// the public `OAuthConfig` type exported from `@lancedb/lancedb`.
pub oauth_config: Option<remote::OAuthConfig>,
}
#[napi(object)]

View File

@@ -3,7 +3,7 @@
use std::time::Duration;
use lancedb::{ipc::ipc_file_to_batches, table::merge::MergeInsertBuilder};
use lancedb::{arrow::IntoArrow, ipc::ipc_file_to_batches, table::merge::MergeInsertBuilder};
use napi::bindgen_prelude::*;
use napi_derive::napi;
@@ -66,9 +66,11 @@ impl NativeMergeInsertBuilder {
#[napi(catch_unwind)]
pub async fn execute(&self, buf: Buffer) -> napi::Result<MergeResult> {
let data = ipc_file_to_batches(buf.to_vec()).map_err(|e| {
napi::Error::from_reason(format!("Failed to read IPC file: {}", convert_error(&e)))
})?;
let data = ipc_file_to_batches(buf.to_vec())
.and_then(IntoArrow::into_arrow)
.map_err(|e| {
napi::Error::from_reason(format!("Failed to read IPC file: {}", convert_error(&e)))
})?;
let this = self.clone();

View File

@@ -3,7 +3,6 @@
use std::collections::HashMap;
use lancedb::error::Error;
use napi_derive::*;
/// Timeout configuration for remote HTTP client.
@@ -141,84 +140,6 @@ impl From<TlsConfig> for lancedb::remote::TlsConfig {
}
}
/// OAuth configuration for LanceDB authentication.
///
/// This is the generated napi-rs binding shape. TypeScript users should prefer
/// the public `OAuthConfig` type exported from `@lancedb/lancedb`.
///
/// All token acquisition and refresh is handled in the Rust layer.
#[napi(object)]
#[derive(Clone)]
pub struct OAuthConfig {
/// OIDC issuer URL or OAuth authority URL.
/// For Azure: `https://login.microsoftonline.com/{tenant_id}/v2.0`
pub issuer_url: String,
/// Application / Client ID.
pub client_id: String,
/// OAuth scopes to request. For Azure managed identity, exactly one scope
/// or resource is required. For example: `["api://{app_id}/.default"]`
pub scopes: Vec<String>,
/// Authentication flow: "client_credentials" or "azure_managed_identity"
pub flow: Option<String>,
/// Client secret (required for client_credentials).
pub client_secret: Option<String>,
/// Client ID for user-assigned managed identity (azure_managed_identity).
pub managed_identity_client_id: Option<String>,
/// Seconds before expiry to trigger proactive refresh (default: 300).
/// Keep this well below the token TTL; if it is greater than or equal to
/// the TTL, each request refreshes the token.
pub refresh_buffer_secs: Option<u32>,
}
impl std::fmt::Debug for OAuthConfig {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("OAuthConfig")
.field("issuer_url", &self.issuer_url)
.field("client_id", &self.client_id)
.field("scopes", &self.scopes)
.field("flow", &self.flow)
.field(
"client_secret",
&self.client_secret.as_deref().map(|_| "<redacted>"),
)
.field(
"managed_identity_client_id",
&self.managed_identity_client_id,
)
.field("refresh_buffer_secs", &self.refresh_buffer_secs)
.finish()
}
}
impl TryFrom<OAuthConfig> for lancedb::remote::oauth::OAuthConfig {
type Error = Error;
fn try_from(config: OAuthConfig) -> Result<Self, Self::Error> {
use lancedb::remote::oauth::OAuthFlow;
let flow = match config.flow.as_deref().unwrap_or("client_credentials") {
"client_credentials" => OAuthFlow::ClientCredentials,
"azure_managed_identity" => OAuthFlow::AzureManagedIdentity {
client_id: config.managed_identity_client_id,
},
other => {
return Err(Error::InvalidInput {
message: format!("Unknown OAuth flow type: {other}"),
});
}
};
Ok(Self {
issuer_url: config.issuer_url,
client_id: config.client_id,
client_secret: config.client_secret,
scopes: config.scopes,
flow,
refresh_buffer_secs: config.refresh_buffer_secs.map(|v| v as u64),
})
}
}
impl From<ClientConfig> for lancedb::remote::ClientConfig {
fn from(config: ClientConfig) -> Self {
Self {
@@ -235,45 +156,3 @@ impl From<ClientConfig> for lancedb::remote::ClientConfig {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_unknown_oauth_flow_returns_invalid_input() {
let config = OAuthConfig {
issuer_url: "https://issuer.example.com".to_string(),
client_id: "client-id".to_string(),
scopes: vec!["scope".to_string()],
flow: Some("typo".to_string()),
client_secret: None,
managed_identity_client_id: None,
refresh_buffer_secs: None,
};
let err = lancedb::remote::oauth::OAuthConfig::try_from(config).unwrap_err();
assert!(matches!(
err,
Error::InvalidInput { message }
if message == "Unknown OAuth flow type: typo"
));
}
#[test]
fn test_oauth_config_debug_redacts_client_secret() {
let config = OAuthConfig {
issuer_url: "https://issuer.example.com".to_string(),
client_id: "client-id".to_string(),
scopes: vec!["scope".to_string()],
flow: Some("client_credentials".to_string()),
client_secret: Some("super-secret".to_string()),
managed_identity_client_id: None,
refresh_buffer_secs: None,
};
let debug = format!("{config:?}");
assert!(!debug.contains("super-secret"));
assert!(debug.contains("client_secret: Some(\"<redacted>\")"));
}
}

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.34.0-beta.5"
current_version = "0.34.0-beta.4"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.34.0-beta.5"
version = "0.34.0-beta.4"
publish = false
edition.workspace = true
description = "Python bindings for LanceDB"

View File

@@ -89,8 +89,6 @@ def connect(
If presented, connect to LanceDB cloud.
Otherwise, connect to a database on file system or cloud storage.
Can be set via environment variable `LANCEDB_API_KEY`.
OAuth configuration is currently supported only by ``connect_async``;
synchronous LanceDB Cloud connections require an API key.
region: str, default "us-east-1"
The region to use for LanceDB Cloud.
host_override: str, optional
@@ -342,7 +340,6 @@ async def connect_async(
session: Optional[Session] = None,
manifest_enabled: bool = False,
namespace_client_properties: Optional[Dict[str, str]] = None,
oauth_config=None,
) -> AsyncConnection:
"""Connect to a LanceDB database.
@@ -392,10 +389,6 @@ async def connect_async(
namespace_client_properties : dict, optional
Additional directory namespace client properties to use with
``manifest_enabled=True``.
oauth_config : OAuthConfig, optional
OAuth configuration for LanceDB Cloud/Enterprise. This is supported by
``connect_async`` only; synchronous ``connect`` uses API key
authentication for ``db://`` URIs.
Examples
--------
@@ -442,7 +435,6 @@ async def connect_async(
session,
manifest_enabled,
namespace_client_properties,
oauth_config,
)
)

View File

@@ -280,24 +280,6 @@ async def connect(
session: Optional[Session],
manifest_enabled: bool = False,
namespace_client_properties: Optional[Dict[str, str]] = None,
oauth_config: Optional[Any] = None,
) -> Connection: ...
def connect_namespace(
namespace_client_impl: str,
namespace_client_properties: Dict[str, str],
read_consistency_interval: Optional[float] = None,
storage_options: Optional[Dict[str, str]] = None,
session: Optional[Session] = None,
namespace_client_pushdown_operations: Optional[List[str]] = None,
) -> Connection: ...
def connect_namespace_client(
namespace_client: Any,
read_consistency_interval: Optional[float] = None,
storage_options: Optional[Dict[str, str]] = None,
session: Optional[Session] = None,
namespace_client_pushdown_operations: Optional[List[str]] = None,
namespace_client_impl: Optional[str] = None,
namespace_client_properties: Optional[Dict[str, str]] = None,
) -> Connection: ...
class RecordBatchStream:

View File

@@ -38,11 +38,8 @@ from lance_namespace_urllib3_client.models.query_table_request_vector import (
QueryTableRequestVector,
)
from lance_namespace_urllib3_client.models.string_fts_query import StringFtsQuery
from lance_namespace.errors import NamespaceNotEmptyError, TableNotFoundError
from lancedb._lancedb import (
connect_namespace as _connect_namespace,
connect_namespace_client as _connect_namespace_client,
)
from lance_namespace.errors import TableNotFoundError
from lancedb._lancedb import connect_namespace_client as _connect_namespace_client
from lancedb.background_loop import LOOP
from lancedb.db import AsyncConnection, DBConnection
from lancedb.namespace_utils import (
@@ -389,10 +386,6 @@ def _builds_namespace_natively(
return namespace_client_impl == "rest" and bool(namespace_client_properties)
def _supports_native_sync_namespace(namespace_client_impl: str) -> bool:
return namespace_client_impl in {"dir", "rest"}
class LanceNamespaceDBConnection(DBConnection):
"""
A LanceDB connection that uses a namespace for table management.
@@ -403,7 +396,7 @@ class LanceNamespaceDBConnection(DBConnection):
def __init__(
self,
namespace_client: Optional[LanceNamespace] = None,
namespace_client: LanceNamespace,
*,
read_consistency_interval: Optional[timedelta] = None,
storage_options: Optional[Dict[str, str]] = None,
@@ -411,8 +404,6 @@ class LanceNamespaceDBConnection(DBConnection):
namespace_client_pushdown_operations: Optional[List[str]] = None,
namespace_client_impl: Optional[str] = None,
namespace_client_properties: Optional[Dict[str, str]] = None,
_inner: Optional[AsyncConnection] = None,
_route_pushdown_to_rust: Optional[bool] = None,
):
"""
Initialize a namespace-based LanceDB connection.
@@ -458,36 +449,26 @@ class LanceNamespaceDBConnection(DBConnection):
# ``build_namespace_natively``), the underlying Rust table performs
# QueryTable pushdown through the read-freshness context provider, which
# the pure-Python ``query_table`` path bypasses.
self._route_pushdown_to_rust = (
_route_pushdown_to_rust
if _route_pushdown_to_rust is not None
else _builds_namespace_natively(
namespace_client_impl, namespace_client_properties
self._route_pushdown_to_rust = _builds_namespace_natively(
namespace_client_impl, namespace_client_properties
)
self._inner = AsyncConnection(
_connect_namespace_client(
namespace_client,
read_consistency_interval=(
read_consistency_interval.total_seconds()
if read_consistency_interval is not None
else None
),
storage_options=self.storage_options or None,
session=session,
namespace_client_pushdown_operations=(
list(self._namespace_client_pushdown_operations)
),
namespace_client_impl=namespace_client_impl,
namespace_client_properties=namespace_client_properties,
)
)
if _inner is not None:
self._inner = _inner
else:
if namespace_client is None:
raise ValueError("namespace_client is required without a native _inner")
self._inner = AsyncConnection(
_connect_namespace_client(
namespace_client,
read_consistency_interval=(
read_consistency_interval.total_seconds()
if read_consistency_interval is not None
else None
),
storage_options=self.storage_options or None,
session=session,
namespace_client_pushdown_operations=(
list(self._namespace_client_pushdown_operations)
),
namespace_client_impl=namespace_client_impl,
namespace_client_properties=namespace_client_properties,
)
)
self._uri = self._inner.uri
@override
def serialize(self) -> str:
@@ -533,11 +514,11 @@ class LanceNamespaceDBConnection(DBConnection):
)
if namespace_path is None:
namespace_path = []
return LOOP.run(
self._inner.table_names(
namespace_path=namespace_path, start_after=page_token, limit=limit
)
request = ListTablesRequest(
id=namespace_path, page_token=page_token, limit=limit
)
response = self._namespace_client.list_tables(request)
return response.tables if response.tables else []
@override
def create_table(
@@ -608,8 +589,8 @@ class LanceNamespaceDBConnection(DBConnection):
index_cache_size=index_cache_size,
)
)
except (RuntimeError, ValueError) as e:
if "Table not found" in str(e) or "was not found" in str(e):
except RuntimeError as e:
if "Table not found" in str(e):
table_id = namespace_path + [name]
raise TableNotFoundError(f"Table not found: {'$'.join(table_id)}")
raise
@@ -631,9 +612,12 @@ class LanceNamespaceDBConnection(DBConnection):
@override
def drop_table(self, name: str, namespace_path: Optional[List[str]] = None):
# Use namespace drop_table directly
if namespace_path is None:
namespace_path = []
LOOP.run(self._inner.drop_table(name, namespace_path=namespace_path))
table_id = namespace_path + [name]
request = DropTableRequest(id=table_id)
self._namespace_client.drop_table(request)
@override
def rename_table(
@@ -647,19 +631,14 @@ class LanceNamespaceDBConnection(DBConnection):
cur_namespace_path = []
if new_namespace_path is None:
new_namespace_path = []
try:
LOOP.run(
self._inner.rename_table(
cur_name,
new_name,
cur_namespace_path=cur_namespace_path,
new_namespace_path=new_namespace_path,
)
)
except RuntimeError as e:
if "rename_table not implemented" in str(e):
raise NotImplementedError("rename_table not implemented") from e
raise
cur_table_id = cur_namespace_path + [cur_name]
new_namespace_id = new_namespace_path if new_namespace_path else None
request = RenameTableRequest(
id=cur_table_id,
new_table_name=new_name,
new_namespace_id=new_namespace_id,
)
self._namespace_client.rename_table(request)
@override
def drop_database(self):
@@ -671,7 +650,8 @@ class LanceNamespaceDBConnection(DBConnection):
def drop_all_tables(self, namespace_path: Optional[List[str]] = None):
if namespace_path is None:
namespace_path = []
LOOP.run(self._inner.drop_all_tables(namespace_path=namespace_path))
for table_name in self.table_names(namespace_path=namespace_path):
self.drop_table(table_name, namespace_path=namespace_path)
@override
def list_namespaces(
@@ -701,10 +681,13 @@ class LanceNamespaceDBConnection(DBConnection):
"""
if namespace_path is None:
namespace_path = []
return LOOP.run(
self._inner.list_namespaces(
namespace_path=namespace_path, page_token=page_token, limit=limit
)
request = ListNamespacesRequest(
id=namespace_path, page_token=page_token, limit=limit
)
response = self._namespace_client.list_namespaces(request)
return ListNamespacesResponse(
namespaces=response.namespaces if response.namespaces else [],
page_token=response.page_token,
)
@override
@@ -732,12 +715,14 @@ class LanceNamespaceDBConnection(DBConnection):
CreateNamespaceResponse
Response containing the properties of the created namespace.
"""
return LOOP.run(
self._inner.create_namespace(
namespace_path=namespace_path,
mode=mode,
properties=properties,
)
request = CreateNamespaceRequest(
id=namespace_path,
mode=_normalize_create_namespace_mode(mode),
properties=properties,
)
response = self._namespace_client.create_namespace(request)
return CreateNamespaceResponse(
properties=response.properties if hasattr(response, "properties") else None
)
@override
@@ -765,18 +750,20 @@ class LanceNamespaceDBConnection(DBConnection):
DropNamespaceResponse
Response containing properties and transaction_id if applicable.
"""
try:
return LOOP.run(
self._inner.drop_namespace(
namespace_path=namespace_path,
mode=mode,
behavior=behavior,
)
)
except RuntimeError as e:
if "Namespace not empty" in str(e):
raise NamespaceNotEmptyError(str(e)) from e
raise
request = DropNamespaceRequest(
id=namespace_path,
mode=_normalize_drop_namespace_mode(mode),
behavior=_normalize_drop_namespace_behavior(behavior),
)
response = self._namespace_client.drop_namespace(request)
return DropNamespaceResponse(
properties=(
response.properties if hasattr(response, "properties") else None
),
transaction_id=(
response.transaction_id if hasattr(response, "transaction_id") else None
),
)
@override
def describe_namespace(
@@ -795,7 +782,11 @@ class LanceNamespaceDBConnection(DBConnection):
DescribeNamespaceResponse
Response containing the namespace properties.
"""
return LOOP.run(self._inner.describe_namespace(namespace_path))
request = DescribeNamespaceRequest(id=namespace_path)
response = self._namespace_client.describe_namespace(request)
return DescribeNamespaceResponse(
properties=response.properties if hasattr(response, "properties") else None
)
@override
def list_tables(
@@ -825,10 +816,13 @@ class LanceNamespaceDBConnection(DBConnection):
"""
if namespace_path is None:
namespace_path = []
return LOOP.run(
self._inner.list_tables(
namespace_path=namespace_path, page_token=page_token, limit=limit
)
request = ListTablesRequest(
id=namespace_path, page_token=page_token, limit=limit
)
response = self._namespace_client.list_tables(request)
return ListTablesResponse(
tables=response.tables if response.tables else [],
page_token=response.page_token,
)
def _lance_table_from_uri(
@@ -884,18 +878,6 @@ class LanceNamespaceDBConnection(DBConnection):
LanceNamespace
The namespace client for this connection.
"""
if self._namespace_client is None:
if (
self._namespace_client_impl is None
or self._namespace_client_properties is None
):
raise ValueError(
"Cannot construct a Python namespace client without "
"namespace implementation properties"
)
self._namespace_client = namespace_connect(
self._namespace_client_impl, self._namespace_client_properties
)
return self._namespace_client
@@ -1360,33 +1342,6 @@ def connect_namespace(
LanceNamespaceDBConnection
A namespace-based connection to LanceDB
"""
if _supports_native_sync_namespace(namespace_client_impl):
inner = AsyncConnection(
_connect_namespace(
namespace_client_impl,
namespace_client_properties,
read_consistency_interval=(
read_consistency_interval.total_seconds()
if read_consistency_interval is not None
else None
),
storage_options=storage_options,
session=session,
namespace_client_pushdown_operations=namespace_client_pushdown_operations,
)
)
return LanceNamespaceDBConnection(
namespace_client=None,
read_consistency_interval=read_consistency_interval,
storage_options=storage_options,
session=session,
namespace_client_pushdown_operations=namespace_client_pushdown_operations,
namespace_client_impl=namespace_client_impl,
namespace_client_properties=namespace_client_properties,
_inner=inner,
_route_pushdown_to_rust=True,
)
namespace_client = namespace_connect(
namespace_client_impl, namespace_client_properties
)

View File

@@ -48,14 +48,6 @@ class PermutationBuilder:
By default, the permutation builder will create a single split that contains all
rows in the same order as the base table.
"""
if not hasattr(table, "_inner"):
raise TypeError(
f"PermutationBuilder requires a local LanceTable, "
f"got {type(table).__name__}. "
"The permutation API is not supported on remote tables. "
"Remote tables connect to LanceDB Cloud or Enterprise and do not have "
"direct access to the underlying Lance dataset needed for permutations."
)
self._async = async_permutation_builder(table)
def split_random(

View File

@@ -9,7 +9,6 @@ from typing import List, Optional
from lancedb import __version__
from .header import HeaderProvider
from .oauth import OAuthConfig, OAuthFlowType
__all__ = [
"TimeoutConfig",
@@ -17,8 +16,6 @@ __all__ = [
"TlsConfig",
"ClientConfig",
"HeaderProvider",
"OAuthConfig",
"OAuthFlowType",
]

View File

@@ -1,75 +0,0 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Optional
class OAuthFlowType(str, Enum):
"""OAuth authentication flow types."""
CLIENT_CREDENTIALS = "client_credentials"
"""Client Credentials grant (service-to-service / M2M)."""
AZURE_MANAGED_IDENTITY = "azure_managed_identity"
"""Azure Managed Identity via IMDS."""
@dataclass
class OAuthConfig:
"""OAuth configuration for LanceDB authentication.
All token acquisition and refresh is handled in the Rust layer.
This config is passed through to Rust via PyO3.
Parameters
----------
issuer_url : str
OIDC issuer URL or OAuth authority URL.
For Azure: ``https://login.microsoftonline.com/{tenant_id}/v2.0``
client_id : str
Application / Client ID.
scopes : List[str]
OAuth scopes to request.
For Azure managed identity, exactly one scope or resource is required.
For example: ``["api://{app_id}/.default"]``
flow : OAuthFlowType
Authentication flow to use. Default: CLIENT_CREDENTIALS.
client_secret : Optional[str]
Client secret (required for CLIENT_CREDENTIALS).
managed_identity_client_id : Optional[str]
Client ID for user-assigned managed identity (AZURE_MANAGED_IDENTITY).
refresh_buffer_secs : Optional[int]
Seconds before expiry to trigger proactive refresh (default: 300).
Keep this well below the token TTL; if it is greater than or equal to
the TTL, each request refreshes the token.
Examples
--------
Client Credentials (service-to-service):
>>> config = OAuthConfig(
... issuer_url="https://login.microsoftonline.com/{tenant}/v2.0",
... client_id="app-id",
... client_secret="secret",
... scopes=["api://lancedb-api/.default"],
... )
Azure Managed Identity:
>>> config = OAuthConfig(
... issuer_url="https://login.microsoftonline.com/{tenant}/v2.0",
... client_id="app-id",
... scopes=["api://lancedb-api/.default"],
... flow=OAuthFlowType.AZURE_MANAGED_IDENTITY,
... )
"""
issuer_url: str
client_id: str
scopes: List[str]
flow: OAuthFlowType = OAuthFlowType.CLIENT_CREDENTIALS
client_secret: Optional[str] = field(default=None, repr=False)
managed_identity_client_id: Optional[str] = None
refresh_buffer_secs: Optional[int] = None

View File

@@ -2142,19 +2142,12 @@ class LanceTable(Table):
branch = self.current_branch()
version = None if branch is not None else self.version
namespace_client = self._namespace_client
if namespace_client is None:
conn_uri = getattr(self._conn, "uri", "")
if get_uri_scheme(conn_uri) == "namespace":
namespace_client = self._conn.namespace_client()
self._namespace_client = namespace_client
if namespace_client is not None:
if self._namespace_client is not None:
table_id = self._namespace_path + [self.name]
ds = lance.dataset(
version=version,
storage_options=self._conn.storage_options,
namespace_client=namespace_client,
namespace_client=self._namespace_client,
table_id=table_id,
**kwargs,
)

View File

@@ -5,7 +5,6 @@
import tempfile
import shutil
import importlib
import pytest
import pyarrow as pa
import lancedb
@@ -104,40 +103,6 @@ class TestNamespaceConnection:
assert isinstance(db, lancedb.LanceNamespaceDBConnection)
assert len(list(db.table_names())) == 0
def test_sync_builtin_namespace_uses_rust_without_python_client(self, monkeypatch):
"""Built-in sync namespace connections should not construct or call the
Python namespace client for normal namespace/table management."""
namespace_module = importlib.import_module("lancedb.namespace")
def fail_namespace_connect(*args, **kwargs):
raise AssertionError("Python namespace client should not be constructed")
monkeypatch.setattr(
namespace_module, "namespace_connect", fail_namespace_connect
)
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
assert isinstance(db, lancedb.LanceNamespaceDBConnection)
assert db._namespace_client is None
assert db._route_pushdown_to_rust is True
db.create_namespace(["test_ns"])
assert "test_ns" in db.list_namespaces().namespaces
schema = pa.schema([pa.field("id", pa.int64())])
table = db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
assert table.namespace == ["test_ns"]
assert "test_table" in db.table_names(namespace_path=["test_ns"])
assert "test_table" in db.list_tables(namespace_path=["test_ns"]).tables
opened = db.open_table("test_table", namespace_path=["test_ns"])
assert opened.namespace == ["test_ns"]
db.drop_table("test_table", namespace_path=["test_ns"])
assert db.list_tables(namespace_path=["test_ns"]).tables == []
db.drop_namespace(["test_ns"])
assert "test_ns" not in db.list_namespaces().namespaces
def test_create_table_through_namespace(self):
"""Test creating a table through namespace."""
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
@@ -853,11 +818,10 @@ class TestPushdownOperations:
)
assert db._route_pushdown_to_rust is True
def test_route_pushdown_to_rust_for_native_dir(self):
"""The sync dir connection is natively built and defers QueryTable
pushdown to Rust."""
def test_route_pushdown_to_rust_false_for_dir(self):
"""A non-native (dir) connection keeps the Python pushdown path."""
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
assert db._route_pushdown_to_rust is True
assert db._route_pushdown_to_rust is False
def test_async_route_pushdown_to_rust_for_native_rest(self):
"""The async connection must not silently bypass the read-freshness fix:

View File

@@ -1137,16 +1137,6 @@ def test_namespace_open_table_with_branch_version(tmp_path):
assert db.open_table("t", namespace_path=["ns1"], branch="exp").count_rows() == 3
def test_namespace_root_table_to_lance_uses_namespace_client(tmp_path):
pytest.importorskip("lance") # "dir" impl is lance.namespace.DirectoryNamespace
db = lancedb.connect_namespace("dir", {"root": str(tmp_path)})
table = db.create_table("t", [{"i": 0}])
assert table._namespace_client is None
assert table.to_lance().count_rows() == 1
assert table._namespace_client is not None
@pytest.mark.asyncio
async def test_async_namespace_open_table_with_branch_version(tmp_path):
pytest.importorskip("lance") # "dir" impl is lance.namespace.DirectoryNamespace

View File

@@ -539,7 +539,7 @@ impl Connection {
}
#[pyfunction]
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None, manifest_enabled=false, namespace_client_properties=None, oauth_config=None))]
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None, manifest_enabled=false, namespace_client_properties=None))]
#[allow(clippy::too_many_arguments)]
pub fn connect(
py: Python<'_>,
@@ -553,7 +553,6 @@ pub fn connect(
session: Option<crate::session::Session>,
manifest_enabled: bool,
namespace_client_properties: Option<HashMap<String, String>>,
oauth_config: Option<crate::oauth::PyOAuthConfig>,
) -> PyResult<Bound<'_, PyAny>> {
future_into_py(py, async move {
let mut builder = lancedb::connect(&uri);
@@ -583,11 +582,6 @@ pub fn connect(
if let Some(client_config) = client_config {
builder = builder.client_config(client_config.into());
}
if let Some(oauth_config) = oauth_config {
let config: lancedb::remote::oauth::OAuthConfig =
oauth_config.try_into().infer_error()?;
builder = builder.oauth_config(config);
}
if let Some(session) = session {
builder = builder.session(session.inner.clone());
}
@@ -655,46 +649,6 @@ pub fn connect_namespace_client(
)))
}
#[pyfunction]
#[pyo3(signature = (
namespace_client_impl,
namespace_client_properties,
read_consistency_interval=None,
storage_options=None,
session=None,
namespace_client_pushdown_operations=None,
))]
#[allow(clippy::too_many_arguments)]
pub fn connect_namespace(
namespace_client_impl: String,
namespace_client_properties: HashMap<String, String>,
read_consistency_interval: Option<f64>,
storage_options: Option<HashMap<String, String>>,
session: Option<crate::session::Session>,
namespace_client_pushdown_operations: Option<Vec<String>>,
) -> PyResult<Connection> {
let read_consistency_interval = read_consistency_interval.map(Duration::from_secs_f64);
let namespace_client_pushdown_operations =
parse_namespace_client_pushdown_operations(namespace_client_pushdown_operations)?;
let mut builder =
lancedb::connect_namespace(&namespace_client_impl, namespace_client_properties)
.pushdown_operations(namespace_client_pushdown_operations);
if let Some(storage_options) = storage_options {
builder = builder.storage_options(storage_options);
}
if let Some(read_consistency_interval) = read_consistency_interval {
builder = builder.read_consistency_interval(read_consistency_interval);
}
if let Some(session) = session {
builder = builder.session(session.inner.clone());
}
Ok(Connection::new(
crate::runtime::block_on(builder.execute()).infer_error()?,
))
}
/// Whether to build the namespace natively (from impl + properties) instead of
/// wrapping a pre-built client. Native construction is required for the
/// read-freshness provider to be installed

View File

@@ -2,7 +2,7 @@
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use arrow::RecordBatchStream;
use connection::{Connection, connect, connect_namespace, connect_namespace_client};
use connection::{Connection, connect, connect_namespace_client};
use env_logger::Env;
use expr::{PyExpr, expr_col, expr_func, expr_lit};
use index::IndexConfig;
@@ -26,7 +26,6 @@ pub mod expr;
pub mod header;
pub mod index;
pub mod namespace;
pub mod oauth;
pub mod permutation;
pub mod query;
pub mod runtime;
@@ -62,7 +61,6 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyPermutationReader>()?;
m.add_class::<PyExpr>()?;
m.add_function(wrap_pyfunction!(connect, m)?)?;
m.add_function(wrap_pyfunction!(connect_namespace, m)?)?;
m.add_function(wrap_pyfunction!(connect_namespace_client, m)?)?;
m.add_function(wrap_pyfunction!(permutation::async_permutation_builder, m)?)?;
m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?;

View File

@@ -1,72 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use pyo3::FromPyObject;
use lancedb::error::Error;
use lancedb::remote::oauth::{OAuthConfig, OAuthFlow};
/// Python-side OAuth configuration, extracted via FromPyObject.
/// Maps to `lancedb.remote.oauth.OAuthConfig` Python dataclass.
#[derive(FromPyObject)]
pub struct PyOAuthConfig {
pub issuer_url: String,
pub client_id: String,
pub scopes: Vec<String>,
pub flow: String,
pub client_secret: Option<String>,
pub managed_identity_client_id: Option<String>,
pub refresh_buffer_secs: Option<u64>,
}
impl TryFrom<PyOAuthConfig> for OAuthConfig {
type Error = Error;
fn try_from(py: PyOAuthConfig) -> Result<Self, Self::Error> {
let flow = match py.flow.as_str() {
"client_credentials" => OAuthFlow::ClientCredentials,
"azure_managed_identity" => OAuthFlow::AzureManagedIdentity {
client_id: py.managed_identity_client_id,
},
other => {
return Err(Error::InvalidInput {
message: format!("Unknown OAuth flow type: {other}"),
});
}
};
Ok(Self {
issuer_url: py.issuer_url,
client_id: py.client_id,
client_secret: py.client_secret,
scopes: py.scopes,
flow,
refresh_buffer_secs: py.refresh_buffer_secs,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_unknown_oauth_flow_returns_invalid_input() {
let config = PyOAuthConfig {
issuer_url: "https://issuer.example.com".to_string(),
client_id: "client-id".to_string(),
scopes: vec!["scope".to_string()],
flow: "typo".to_string(),
client_secret: None,
managed_identity_client_id: None,
refresh_buffer_secs: None,
};
let err = OAuthConfig::try_from(config).unwrap_err();
assert!(matches!(
err,
Error::InvalidInput { message }
if message == "Unknown OAuth flow type: typo"
));
}
}

View File

@@ -1,33 +0,0 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
import importlib.util
import sys
from pathlib import Path
def _load_oauth_module():
oauth_path = (
Path(__file__).parents[1] / "python" / "lancedb" / "remote" / "oauth.py"
)
spec = importlib.util.spec_from_file_location("lancedb_remote_oauth", oauth_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
sys.modules[spec.name] = module
spec.loader.exec_module(module)
return module
def test_oauth_config_repr_redacts_client_secret():
oauth = _load_oauth_module()
config = oauth.OAuthConfig(
issuer_url="https://issuer.example.com",
client_id="client-id",
scopes=["scope"],
client_secret="super-secret",
)
rendered = repr(config)
assert "super-secret" not in rendered
assert "client_secret" not in rendered

View File

@@ -166,10 +166,6 @@ required-features = ["bedrock"]
[[example]]
name = "simple"
[[example]]
name = "polars"
required-features = ["polars"]
[[example]]
name = "full_text_search"

View File

@@ -1,47 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
//! This example demonstrates ingesting a Polars DataFrame into LanceDB and
//! reading it back out as a Polars DataFrame.
use lancedb::arrow::IntoPolars;
use lancedb::query::ExecutableQuery;
use lancedb::{Result, connect};
use polars::prelude::{DataFrame, NamedFrom, Series};
fn make_dataframe() -> DataFrame {
let ids = Series::new("id", &[1i32, 2, 3, 4, 5]);
let names = Series::new("name", &["Alice", "Bob", "Carol", "Dave", "Eve"]);
let scores = Series::new("score", &[9.5f64, 8.1, 7.3, 9.0, 6.5]);
DataFrame::new(vec![ids, names, scores]).unwrap()
}
#[tokio::main]
async fn main() -> Result<()> {
let tmp = tempfile::tempdir().unwrap();
let db = connect(tmp.path().to_str().unwrap()).execute().await?;
// Ingest a Polars DataFrame directly — DataFrame now implements Scannable.
let df = make_dataframe();
println!("Input DataFrame:\n{df}");
let table = db.create_table("people", df).execute().await?;
// Append more rows.
let more = DataFrame::new(vec![
Series::new("id", &[6i32, 7]),
Series::new("name", &["Frank", "Grace"]),
Series::new("score", &[7.8f64, 8.9]),
])
.unwrap();
table.add(more).execute().await?;
// Read back as a Polars DataFrame.
let result_df = table.query().execute().await?.into_polars().await?;
println!(
"\nRound-tripped DataFrame ({} rows):\n{result_df}",
result_df.height()
);
Ok(())
}

View File

@@ -112,14 +112,54 @@ impl<S: Stream<Item = Result<arrow_array::RecordBatch>>> RecordBatchStream
/// A trait for converting incoming data to Arrow
///
/// Integrations should implement this trait to allow data to be
/// imported directly from the integration. For example, implementing
/// this trait for `Vec<Vec<...>>` would allow the `Vec` to be directly
/// used in methods like [`crate::connection::Connection::create_table`]
/// or [`crate::table::Table::add`]
pub trait IntoArrow {
/// Convert the data into an iterator of Arrow batches
fn into_arrow(self) -> Result<Box<dyn arrow_array::RecordBatchReader + Send>>;
}
pub type BoxedRecordBatchReader = Box<dyn arrow_array::RecordBatchReader + Send>;
impl<T: arrow_array::RecordBatchReader + Send + 'static> IntoArrow for T {
fn into_arrow(self) -> Result<Box<dyn arrow_array::RecordBatchReader + Send>> {
Ok(Box::new(self))
}
}
/// A trait for converting incoming data to Arrow asynchronously
///
/// Serves the same purpose as [`IntoArrow`], but for asynchronous data.
///
/// Note: Arrow has no async equivalent to RecordBatchReader and so
pub trait IntoArrowStream {
/// Convert the data into a stream of Arrow batches
fn into_arrow(self) -> Result<SendableRecordBatchStream>;
}
impl<S: Stream<Item = Result<arrow_array::RecordBatch>>> SimpleRecordBatchStream<S> {
pub fn new(stream: S, schema: Arc<arrow_schema::Schema>) -> Self {
Self { schema, stream }
}
}
impl IntoArrowStream for SendableRecordBatchStream {
fn into_arrow(self) -> Result<SendableRecordBatchStream> {
Ok(self)
}
}
impl IntoArrowStream for datafusion_physical_plan::SendableRecordBatchStream {
fn into_arrow(self) -> Result<SendableRecordBatchStream> {
let schema = self.schema();
let stream = self.map_err(|df_err| df_err.into());
Ok(Box::pin(SimpleRecordBatchStream::new(stream, schema)))
}
}
pub trait LanceDbDatagenExt {
fn into_ldb_stream(
self,
@@ -224,7 +264,9 @@ impl IntoPolars for SendableRecordBatchStream {
#[cfg(all(test, feature = "polars"))]
mod tests {
use super::SendableRecordBatchStream;
use crate::arrow::{IntoPolars, PolarsDataFrameRecordBatchReader, SimpleRecordBatchStream};
use crate::arrow::{
IntoArrow, IntoPolars, PolarsDataFrameRecordBatchReader, SimpleRecordBatchStream,
};
use polars::prelude::{DataFrame, NamedFrom, Series};
fn get_record_batch_reader_from_polars() -> Box<dyn arrow_array::RecordBatchReader + Send> {
@@ -238,7 +280,10 @@ mod tests {
float_series = Series::new("float", &[2.0]);
let df2 = DataFrame::new(vec![string_series, int_series, float_series]).unwrap();
Box::new(PolarsDataFrameRecordBatchReader::new(df1.vstack(&df2).unwrap()).unwrap())
PolarsDataFrameRecordBatchReader::new(df1.vstack(&df2).unwrap())
.unwrap()
.into_arrow()
.unwrap()
}
#[test]

View File

@@ -185,43 +185,6 @@ impl Scannable for SendableRecordBatchStream {
}
}
#[cfg(feature = "polars")]
impl Scannable for polars::frame::DataFrame {
fn schema(&self) -> SchemaRef {
crate::polars_arrow_convertors::convert_polars_df_schema_to_arrow_rb_schema(
self.schema().clone(),
)
.expect("failed to convert Polars DataFrame schema to Arrow schema")
}
fn scan_as_stream(&mut self) -> SendableRecordBatchStream {
let schema = Scannable::schema(self);
let batches: crate::Result<Vec<RecordBatch>> =
match crate::arrow::PolarsDataFrameRecordBatchReader::new(self.clone()) {
Err(e) => Err(e),
Ok(reader) => reader.map(|b| b.map_err(Into::into)).collect(),
};
match batches {
Err(e) => Box::pin(SimpleRecordBatchStream {
schema,
stream: once(async move { Err(e) }),
}),
Ok(batches) => {
let stream = futures::stream::iter(batches.into_iter().map(Ok));
Box::pin(SimpleRecordBatchStream { schema, stream })
}
}
}
fn num_rows(&self) -> Option<usize> {
Some(self.height())
}
fn rescannable(&self) -> bool {
true
}
}
#[async_trait]
impl StreamingWriteSource for Box<dyn Scannable> {
fn arrow_schema(&self) -> SchemaRef {
@@ -1126,60 +1089,4 @@ mod tests {
);
}
}
#[cfg(feature = "polars")]
mod polars_tests {
use super::*;
use crate::arrow::IntoPolars;
use crate::query::ExecutableQuery;
use polars::prelude::{DataFrame, NamedFrom, Series};
fn make_df() -> DataFrame {
DataFrame::new(vec![
Series::new("id", &[1i32, 2, 3]),
Series::new("val", &[1.1f64, 2.2, 3.3]),
])
.unwrap()
}
#[tokio::test]
async fn test_dataframe_scannable_round_trip() {
let tmp = tempfile::tempdir().unwrap();
let db = crate::connect(tmp.path().to_str().unwrap())
.execute()
.await
.unwrap();
let df = make_df();
let table = db.create_table("t", df.clone()).execute().await.unwrap();
// Append the same rows again.
table.add(df.clone()).execute().await.unwrap();
let result = table
.query()
.execute()
.await
.unwrap()
.into_polars()
.await
.unwrap();
assert_eq!(result.height(), df.height() * 2);
assert_eq!(result.schema(), df.schema());
}
#[tokio::test]
async fn test_dataframe_scannable_rescannable() {
let mut df = make_df();
assert!(df.rescannable());
let batches1: Vec<RecordBatch> = df.scan_as_stream().try_collect().await.unwrap();
assert_eq!(batches1.iter().map(|b| b.num_rows()).sum::<usize>(), 3);
// Can be scanned again.
let batches2: Vec<RecordBatch> = df.scan_as_stream().try_collect().await.unwrap();
assert_eq!(batches2.iter().map(|b| b.num_rows()).sum::<usize>(), 3);
}
}
}

View File

@@ -70,29 +70,18 @@ use tokio::sync::RwLock;
const REQUEST_TIMEOUT_HEADER: HeaderName = HeaderName::from_static("x-request-timeout-ms");
const MIN_VERSION_HEADER: HeaderName = HeaderName::from_static("x-lancedb-min-version");
const MIN_TIMESTAMP_HEADER: HeaderName = HeaderName::from_static("x-lancedb-min-timestamp");
const MIN_READ_VERSION_HEADER: HeaderName = HeaderName::from_static("x-lancedb-min-read-version");
const VERSION_HEADER: HeaderName = HeaderName::from_static("x-lancedb-version");
const METRIC_TYPE_KEY: &str = "metric_type";
const INDEX_TYPE_KEY: &str = "index_type";
const SCHEMA_CACHE_TTL: Duration = Duration::from_secs(30);
const SCHEMA_CACHE_REFRESH_WINDOW: Duration = Duration::from_secs(5);
/// Per-table state driving the freshness headers (`x-lancedb-min-version`,
/// `x-lancedb-min-timestamp`, and `x-lancedb-min-read-version`) sent on read
/// requests.
/// Per-table state driving the freshness headers (`x-lancedb-min-version` and
/// `x-lancedb-min-timestamp`) sent on read requests.
#[derive(Debug, Default, Clone, Copy)]
struct FreshnessState {
/// Provides read-your-write within a single handle: writes that return a
/// version update this, and reads send it as `x-lancedb-min-version`.
min_version: Option<u64>,
/// Highest dataset version observed in a *read* response on this handle.
/// Reads send it as `x-lancedb-min-read-version` so a load-balanced query
/// node whose cache is behind this version must refresh before serving,
/// giving monotonic reads across nodes regardless of which one the load
/// balancer routes to. Sourced only from reads (always committed dataset
/// versions), never from writes (which may return WAL entry ids), so it is
/// unaffected by the WAL/version mismatch that retired `min_version`.
min_read_version: Option<u64>,
/// Wall-clock time captured at the last [`BaseTable::checkout_latest`]
/// call. Subsequent reads send
/// `max(baseline, now - read_consistency_interval)` as
@@ -113,7 +102,6 @@ struct FreshnessState {
struct FreshnessHeaders {
min_version: Option<u64>,
min_timestamp: Option<SystemTime>,
min_read_version: Option<u64>,
}
impl FreshnessHeaders {
@@ -125,9 +113,6 @@ impl FreshnessHeaders {
let dt: chrono::DateTime<chrono::Utc> = ts.into();
request = request.header(MIN_TIMESTAMP_HEADER, dt.to_rfc3339());
}
if let Some(v) = self.min_read_version {
request = request.header(MIN_READ_VERSION_HEADER, v.to_string());
}
request
}
}
@@ -899,7 +884,6 @@ impl<S: HttpSend> RemoteTable<S> {
self.client.read_consistency_interval,
SystemTime::now(),
),
min_read_version: state.min_read_version,
}
}
@@ -921,30 +905,6 @@ impl<S: HttpSend> RemoteTable<S> {
state.min_version = Some(state.min_version.map_or(version, |v| v.max(version)));
}
/// Record a dataset version observed in a *read* response so subsequent
/// reads request at least this version via `x-lancedb-min-read-version`,
/// giving monotonic reads across load-balanced query nodes. A returned `0`
/// (or absent header from an old server) is ignored.
fn track_read_version(&self, version: u64) {
if version == 0 {
return;
}
let mut state = self.freshness.lock().unwrap();
state.min_read_version = Some(state.min_read_version.map_or(version, |v| v.max(version)));
}
/// Parse the `x-lancedb-version` response header (the dataset version a read
/// reflects) and fold it into the read-version watermark.
fn track_read_version_from_headers(&self, headers: &reqwest::header::HeaderMap) {
if let Some(version) = headers
.get(&VERSION_HEADER)
.and_then(|value| value.to_str().ok())
.and_then(|value| value.parse::<u64>().ok())
{
self.track_read_version(version);
}
}
async fn execute_query(
&self,
query: &AnyQuery,
@@ -968,7 +928,6 @@ impl<S: HttpSend> RemoteTable<S> {
let futures = requests.into_iter().map(|req| async move {
let (request_id, response) = self.send(req, true).await?;
self.track_read_version_from_headers(response.headers());
self.read_arrow_stream(&request_id, response).await
});
let streams = futures::future::try_join_all(futures);
@@ -1586,12 +1545,11 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
*write_guard = None;
drop(write_guard);
// Drop any per-handle read/write tracking; subsequent reads use the
// Drop any per-handle write tracking; subsequent reads use the
// baseline timestamp captured now to guarantee freshness.
*self.freshness.lock().unwrap() = FreshnessState {
min_version: None,
checkout_baseline: Some(SystemTime::now()),
min_read_version: None,
};
// Invalidate schema cache since we're switching versions
@@ -1847,7 +1805,6 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
}
};
self.track_read_version_from_headers(response.headers());
let body = response.text().await.err_to_http(request_id.clone())?;
serde_json::from_str(&body).map_err(|e| Error::Http {
@@ -7167,7 +7124,6 @@ mod tests {
let state = FreshnessState {
min_version: None,
checkout_baseline: Some(baseline),
min_read_version: None,
};
assert_eq!(compute_min_timestamp(&state, None, now), Some(baseline));
@@ -7192,7 +7148,6 @@ mod tests {
let state = FreshnessState {
min_version: None,
checkout_baseline: Some(baseline),
min_read_version: None,
};
assert_eq!(
compute_min_timestamp(&state, Some(Duration::from_secs(10)), now),
@@ -7204,7 +7159,6 @@ mod tests {
let state = FreshnessState {
min_version: None,
checkout_baseline: Some(recent_baseline),
min_read_version: None,
};
assert_eq!(
compute_min_timestamp(&state, Some(Duration::from_secs(60)), now),
@@ -7349,106 +7303,6 @@ mod tests {
);
}
/// A handler that records every request's headers and answers each read with
/// an `x-lancedb-version` response header taken from `versions` (by call
/// index, saturating at the last entry). An empty string means "no header".
fn read_version_handler(
versions: &'static [&'static str],
) -> (
impl Fn(reqwest::Request) -> http::Response<String> + Clone + Send + Sync + 'static,
Arc<std::sync::Mutex<Vec<http::HeaderMap>>>,
) {
let requests = Arc::new(std::sync::Mutex::new(Vec::new()));
let requests_c = requests.clone();
let call = Arc::new(AtomicUsize::new(0));
let handler = move |request: reqwest::Request| {
requests_c.lock().unwrap().push(request.headers().clone());
let i = call.fetch_add(1, Ordering::SeqCst).min(versions.len() - 1);
let mut builder = http::Response::builder().status(200);
if !versions[i].is_empty() {
builder = builder.header("x-lancedb-version", versions[i]);
}
builder.body("42".to_string()).unwrap()
};
(handler, requests)
}
#[tokio::test]
async fn test_read_version_watermark_tracked_and_sent() {
let (handler, requests) = read_version_handler(&["100", "100"]);
let table = Table::new_with_handler("my_table", handler);
// First read has no watermark yet; the response advertises version 100,
// so the second read must floor the server at 100.
table.count_rows(None).await.unwrap();
table.count_rows(None).await.unwrap();
let reqs = requests.lock().unwrap();
assert!(!reqs[0].contains_key("x-lancedb-min-read-version"));
assert_eq!(
reqs[1]
.get("x-lancedb-min-read-version")
.unwrap()
.to_str()
.unwrap(),
"100"
);
}
#[tokio::test]
async fn test_read_version_watermark_keeps_max() {
// Server reports 100 then a stale 50; the watermark must not regress.
let (handler, requests) = read_version_handler(&["100", "50", "50"]);
let table = Table::new_with_handler("my_table", handler);
table.count_rows(None).await.unwrap();
table.count_rows(None).await.unwrap();
table.count_rows(None).await.unwrap();
let reqs = requests.lock().unwrap();
assert_eq!(
reqs[2]
.get("x-lancedb-min-read-version")
.unwrap()
.to_str()
.unwrap(),
"100"
);
}
#[tokio::test]
async fn test_read_version_absent_header_no_watermark() {
// An old server that doesn't return the version header leaves the
// watermark unset, preserving backward compatibility.
let (handler, requests) = read_version_handler(&[""]);
let table = Table::new_with_handler("my_table", handler);
table.count_rows(None).await.unwrap();
table.count_rows(None).await.unwrap();
let reqs = requests.lock().unwrap();
assert!(!reqs[1].contains_key("x-lancedb-min-read-version"));
}
#[tokio::test]
async fn test_read_version_watermark_reset_on_checkout_latest() {
let (handler, requests) = read_version_handler(&["100", "100"]);
let table = Table::new_with_handler("my_table", handler);
table.count_rows(None).await.unwrap();
table.checkout_latest().await.unwrap();
table.count_rows(None).await.unwrap();
// The read after checkout_latest starts from a clean slate.
let reqs = requests.lock().unwrap();
assert!(
!reqs
.last()
.unwrap()
.contains_key("x-lancedb-min-read-version")
);
}
/// Like `capturing_handler`, but keeps a per-path snapshot of the headers
/// from every request so tests can assert on a specific endpoint.
#[allow(clippy::type_complexity)]