Compare commits

..

6 Commits

Author SHA1 Message Date
Jack Ye
fda4fe436b feat(rust): add OAuth header provider 2026-06-24 15:33:57 -07:00
Jack Ye
fe287dc98c fix(remote): support namespace clients with dynamic headers
Bridge LanceDB dynamic header providers into Lance Namespace dynamic context providers for live remote namespace clients.
2026-06-24 15:30:00 -07:00
Jack Ye
411568b72c fix(remote): omit empty api key header (#3573)
## Summary

Skip inserting the x-api-key header when the configured API key is
empty.

This lets bearer-token or other dynamic-header authentication avoid
sending an empty static API key header alongside the real auth header.
2026-06-24 13:25:59 -07:00
LanceDB Robot
ebf8d55ede chore: update lance dependency to v9.0.0-beta.4 (#3570)
Bumps the Lance dependencies to v9.0.0-beta.4 and refreshes the
generated lockfile metadata. No compatibility fixes were required beyond
the dependency updates. Triggered by
https://github.com/lance-format/lance/releases/tag/v9.0.0-beta.4
2026-06-24 10:16:29 -05:00
Raphael Malikian
0ba70d96c3 fix: add missing stacklevel=2 to warnings.warn() and fix broken message concatenation (Fixes #3563) (#3564)
Fixes #3563

## Summary

- Add `stacklevel=2` to 10 `warnings.warn()` calls across 4 files
- Fix broken message concatenation in `table.py` where the second string
was incorrectly passed as the `category` parameter

## Problem

Multiple `warnings.warn()` calls in the `python/lancedb/` codebase were
missing the `stacklevel` parameter. Without `stacklevel=2`, warnings
point to library internals instead of the caller's code, making it
impossible for users to identify which of their function calls triggered
the warning.

Additionally, two calls in `table.py` (lines 3411 and 3420) had a more
serious bug: the deprecation message was split across two separate
string arguments, causing the second string to be passed as the
`category` parameter instead of being concatenated with the first
string. This would cause `TypeError` when the warning was triggered.

## Changes

| File | Fixes | Description |
|------|-------|-------------|
| `embeddings/colpali.py` | 1 | Add `stacklevel=2` to
`use_token_pooling` deprecation warning |
| `remote/db.py` | 3 | Add `stacklevel=2` to `request_thread_pool`,
`connection_timeout`, `read_timeout` deprecation warnings |
| `remote/table.py` | 3 | Add `stacklevel=2` to `cleanup_old_versions`,
`compact_files`, `optimize` no-op warnings |
| `table.py` | 3 | Fix broken message concatenation for
`data_storage_version` and `enable_v2_manifest_paths` deprecation
warnings + add `stacklevel=2` to `retrain` deprecation warning |

## Verification

```python
# All warnings.warn() calls now have stacklevel
python3 -c "import ast, os; ..."
# Result: All warnings.warn() calls now have stacklevel!
```

## Changelog

| Date | Change | Author |
|------|--------|--------|
| 2026-06-20 | Fix missing stacklevel=2 in 10 warnings.warn() calls +
fix broken message concatenation | rtmalikian |

### Files Changed
- `python/python/lancedb/embeddings/colpali.py` — Add stacklevel=2
- `python/python/lancedb/remote/db.py` — Add stacklevel=2 to 3
deprecation warnings
- `python/python/lancedb/remote/table.py` — Add stacklevel=2 to 3 no-op
warnings
- `python/python/lancedb/table.py` — Fix broken message concatenation +
add stacklevel=2

### Verification
- AST-based audit confirms all `warnings.warn()` calls now include
`stacklevel=2`
- Syntax check passes for all 4 modified files

---

**About the Author:** Raphael Malikian — Clinical AI Solutions
Architect. I specialise in building and fixing AI/ML systems for
healthcare, including vector databases, RAG pipelines, and clinical NLP.
If you need help with your project or think I can add value to your
organisation, feel free to reach out — I'd love to connect.

📧 rtmalikian@gmail.com
🔗 GitHub: https://github.com/rtmalikian
🔗 LinkedIn:
http://www.linkedin.com/in/raphael-t-malikian-mbbs-bsc-hons-71075436a

---

**Disclosure:** This code was developed with assistance from **Hermes
Agent** (Nous Research). All changes were reviewed, tested against the
actual codebase, and verified for correctness.

Signed-off-by: rtmalikian <rtmalikian@gmail.com>
2026-06-23 13:42:59 -07:00
Lance Release
0749532c3c Bump version: 0.31.0-beta.1 → 0.31.0-beta.2 2026-06-23 16:23:08 +00:00
14 changed files with 1282 additions and 85 deletions

97
Cargo.lock generated
View File

@@ -3432,8 +3432,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"rand 0.9.4",
@@ -4735,8 +4735,8 @@ checksum = "e037a2e1d8d5fdbd49b16a4ea09d5d6401c1f29eca5ff29d03d3824dba16256a"
[[package]]
name = "lance"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arc-swap",
"arrow",
@@ -4771,7 +4771,7 @@ dependencies = [
"futures",
"half",
"humantime",
"itertools 0.13.0",
"itertools 0.14.0",
"lance-arrow",
"lance-core",
"lance-datafusion",
@@ -4810,8 +4810,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4832,7 +4832,7 @@ dependencies = [
[[package]]
name = "lance-arrow-scalar"
version = "58.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4846,7 +4846,7 @@ dependencies = [
[[package]]
name = "lance-arrow-stats"
version = "58.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4855,8 +4855,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrayref",
"paste",
@@ -4865,8 +4865,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4878,7 +4878,7 @@ dependencies = [
"datafusion-common",
"datafusion-sql",
"futures",
"itertools 0.13.0",
"itertools 0.14.0",
"lance-arrow",
"lance-derive",
"libc",
@@ -4904,8 +4904,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow",
"arrow-array",
@@ -4935,8 +4935,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow",
"arrow-array",
@@ -4953,8 +4953,8 @@ dependencies = [
[[package]]
name = "lance-derive"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"proc-macro2",
"quote",
@@ -4963,8 +4963,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4980,7 +4980,7 @@ dependencies = [
"futures",
"hex",
"hyperloglogplus",
"itertools 0.13.0",
"itertools 0.14.0",
"lance-arrow",
"lance-bitpacking",
"lance-core",
@@ -4999,8 +4999,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -5030,8 +5030,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arc-swap",
"arrow",
@@ -5056,7 +5056,7 @@ dependencies = [
"fst",
"futures",
"half",
"itertools 0.13.0",
"itertools 0.14.0",
"jieba-rs",
"jsonb",
"lance-arrow",
@@ -5096,8 +5096,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow",
"arrow-arith",
@@ -5138,8 +5138,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -5155,8 +5155,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow",
"async-trait",
@@ -5168,8 +5168,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow",
"arrow-ipc",
@@ -5223,15 +5223,15 @@ dependencies = [
[[package]]
name = "lance-select"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-schema",
"byteorder",
"bytes",
"itertools 0.13.0",
"itertools 0.14.0",
"lance-core",
"roaring",
"tracing",
@@ -5239,8 +5239,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow",
"arrow-array",
@@ -5279,8 +5279,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -5293,8 +5293,8 @@ dependencies = [
[[package]]
name = "lance-tokenizer"
version = "9.0.0-beta.2"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.2#23211989de648fefc4454f5eee09ec176f0a465b"
version = "9.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v9.0.0-beta.4#8eed3129a8b2c078e5f6f6612cb44fd9dccf4b39"
dependencies = [
"icu_segmenter",
"jieba-rs",
@@ -5307,7 +5307,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.31.0-beta.1"
version = "0.31.0-beta.2"
dependencies = [
"ahash",
"anyhow",
@@ -5384,13 +5384,14 @@ dependencies = [
"tokenizers",
"tokio",
"url",
"urlencoding",
"uuid",
"walkdir",
]
[[package]]
name = "lancedb-nodejs"
version = "0.31.0-beta.1"
version = "0.31.0-beta.2"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -5415,7 +5416,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.34.0-beta.1"
version = "0.34.0-beta.2"
dependencies = [
"arrow",
"async-trait",

View File

@@ -13,20 +13,20 @@ categories = ["database-implementations"]
rust-version = "1.91.0"
[workspace.dependencies]
lance = { "version" = "=9.0.0-beta.2", default-features = false, "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=9.0.0-beta.2", default-features = false, "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=9.0.0-beta.2", default-features = false, "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=9.0.0-beta.2", "tag" = "v9.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=9.0.0-beta.4", default-features = false, "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=9.0.0-beta.4", default-features = false, "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=9.0.0-beta.4", default-features = false, "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=9.0.0-beta.4", "tag" = "v9.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "58.0.0", optional = false }

View File

@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-core.version>9.0.0-beta.2</lance-core.version>
<lance-core.version>9.0.0-beta.4</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.31.0-beta.1",
"version": "0.31.0-beta.2",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.31.0-beta.1",
"version": "0.31.0-beta.2",
"cpu": [
"x64",
"arm64"

View File

@@ -81,6 +81,7 @@ class ColPaliEmbeddings(EmbeddingFunction):
warnings.warn(
"use_token_pooling is deprecated, use pooling_strategy=None instead",
DeprecationWarning,
stacklevel=2,
)
self.pooling_strategy = None

View File

@@ -124,6 +124,7 @@ class RemoteDBConnection(DBConnection):
"request_thread_pool is no longer used and will be removed in "
"a future release.",
DeprecationWarning,
stacklevel=2,
)
if connection_timeout is not None:
@@ -132,6 +133,7 @@ class RemoteDBConnection(DBConnection):
"release. Please use client_config.timeout_config.connect_timeout "
"instead.",
DeprecationWarning,
stacklevel=2,
)
client_config.timeout_config.connect_timeout = timedelta(
seconds=connection_timeout
@@ -142,6 +144,7 @@ class RemoteDBConnection(DBConnection):
"read_timeout is deprecated and will be removed in a future release. "
"Please use client_config.timeout_config.read_timeout instead.",
DeprecationWarning,
stacklevel=2,
)
client_config.timeout_config.read_timeout = timedelta(seconds=read_timeout)

View File

@@ -845,7 +845,8 @@ class RemoteTable(Table):
"""
warnings.warn(
"cleanup_old_versions() is a no-op on LanceDB Cloud. "
"Tables are automatically cleaned up and optimized."
"Tables are automatically cleaned up and optimized.",
stacklevel=2,
)
pass
@@ -857,7 +858,8 @@ class RemoteTable(Table):
"""
warnings.warn(
"compact_files() is a no-op on LanceDB Cloud. "
"Tables are automatically compacted and optimized."
"Tables are automatically compacted and optimized.",
stacklevel=2,
)
pass
@@ -874,7 +876,8 @@ class RemoteTable(Table):
"""
warnings.warn(
"optimize() is a no-op on LanceDB Cloud. "
"Indices are optimized automatically."
"Indices are optimized automatically.",
stacklevel=2,
)
pass

View File

@@ -3409,18 +3409,20 @@ class LanceTable(Table):
if data_storage_version is not None:
warnings.warn(
"setting data_storage_version directly on create_table is deprecated. ",
"setting data_storage_version directly on create_table is deprecated. "
"Use database_options instead.",
DeprecationWarning,
stacklevel=2,
)
if storage_options is None:
storage_options = {}
storage_options["new_table_data_storage_version"] = data_storage_version
if enable_v2_manifest_paths is not None:
warnings.warn(
"setting enable_v2_manifest_paths directly on create_table is ",
"setting enable_v2_manifest_paths directly on create_table is "
"deprecated. Use database_options instead.",
DeprecationWarning,
stacklevel=2,
)
if storage_options is None:
storage_options = {}
@@ -5662,6 +5664,7 @@ class AsyncTable:
"The 'retrain' parameter is deprecated and will be removed in a "
"future version.",
DeprecationWarning,
stacklevel=2,
)
return await self._inner.optimize(

View File

@@ -50,7 +50,7 @@ lance-namespace = { workspace = true }
lance-namespace-impls = { workspace = true }
moka = { workspace = true }
pin-project = { workspace = true }
tokio = { version = "1.23", features = ["rt-multi-thread"] }
tokio = { version = "1.23", features = ["rt-multi-thread", "sync"] }
log.workspace = true
async-trait = "0"
bytes = "1"
@@ -75,6 +75,7 @@ reqwest = { version = "0.12.0", default-features = false, features = [
"stream",
], optional = true }
http = { version = "1", optional = true } # Matching what is in reqwest
urlencoding = { version = "2", optional = true }
uuid = { version = "1.7.0", features = ["v4", "v5"] }
polars-arrow = { version = ">=0.37,<0.40.0", optional = true }
polars = { version = ">=0.37,<0.40.0", optional = true }
@@ -93,6 +94,7 @@ semver = { workspace = true }
anyhow = "1"
tempfile = "3.5.0"
random_word = { version = "0.4.3", features = ["en"] }
tokio = { version = "1.23", features = ["io-util", "macros", "net", "rt-multi-thread", "sync"] }
uuid = { version = "1.7.0", features = ["v4"] }
walkdir = "2"
aws-sdk-dynamodb = { version = "1.55.0" }
@@ -129,7 +131,13 @@ huggingface = [
"lance-namespace-impls/dir-huggingface",
]
dynamodb = ["lance/dynamodb", "aws"]
remote = ["dep:reqwest", "dep:http", "lance-namespace-impls/rest", "lance-namespace-impls/rest-adapter"]
remote = [
"dep:reqwest",
"dep:http",
"dep:urlencoding",
"lance-namespace-impls/rest",
"lance-namespace-impls/rest-adapter",
]
fp16kernels = ["lance-linalg/fp16kernels"]
s3-test = []
bedrock = ["dep:aws-sdk-bedrockruntime"]

View File

@@ -576,6 +576,9 @@ impl Connection {
/// For LanceNamespaceDatabase, it is the underlying LanceNamespace.
/// For ListingDatabase, it is the equivalent DirectoryNamespace.
/// For RemoteDatabase, it is the equivalent RestNamespace.
///
/// Remote connections using dynamic headers forward them through the
/// namespace client's per-request context provider.
pub async fn namespace_client(&self) -> Result<Arc<dyn lance_namespace::LanceNamespace>> {
self.internal.namespace_client().await
}
@@ -584,6 +587,9 @@ impl Connection {
/// Returns (impl_type, properties) where:
/// - impl_type: "dir" for DirectoryNamespace, "rest" for RestNamespace
/// - properties: configuration properties for the namespace
///
/// Remote connections using dynamic headers cannot be exported because the
/// namespace client config only carries static headers.
pub async fn namespace_client_config(
&self,
) -> Result<(String, std::collections::HashMap<String, String>)> {
@@ -661,6 +667,8 @@ pub struct ConnectRequest {
pub struct ConnectBuilder {
request: ConnectRequest,
embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
#[cfg(feature = "remote")]
oauth_config: Option<crate::remote::OAuthConfig>,
}
#[cfg(feature = "remote")]
@@ -682,6 +690,8 @@ impl ConnectBuilder {
session: None,
},
embedding_registry: None,
#[cfg(feature = "remote")]
oauth_config: None,
}
}
@@ -770,6 +780,19 @@ impl ConnectBuilder {
self
}
/// Configure OAuth authentication for LanceDB Cloud/Enterprise.
///
/// This creates an [`OAuthHeaderProvider`](crate::remote::OAuthHeaderProvider)
/// from the given config and sets it as the header provider. OAuth cannot
/// be combined with an API key or another header provider.
///
/// Token acquisition and refresh are handled in Rust.
#[cfg(feature = "remote")]
pub fn oauth_config(mut self, config: crate::remote::OAuthConfig) -> Self {
self.oauth_config = Some(config);
self
}
/// Provide a custom [`EmbeddingRegistry`] to use for this connection.
pub fn embedding_registry(mut self, registry: Arc<dyn EmbeddingRegistry>) -> Self {
self.embedding_registry = Some(registry);
@@ -915,9 +938,40 @@ impl ConnectBuilder {
let region = options.region.ok_or_else(|| Error::InvalidInput {
message: "A region is required when connecting to LanceDb Cloud".to_string(),
})?;
let api_key = options.api_key.ok_or_else(|| Error::InvalidInput {
message: "An api_key is required when connecting to LanceDb Cloud".to_string(),
})?;
let api_key = match (&self.oauth_config, &options.api_key) {
(Some(_), None) => String::new(),
(Some(_), Some(_)) => {
return Err(Error::InvalidInput {
message:
"api_key and oauth_config cannot both be set when connecting to LanceDb Cloud"
.to_string(),
});
}
(None, Some(key)) => key.clone(),
(None, None) => {
return Err(Error::InvalidInput {
message:
"An api_key or oauth_config is required when connecting to LanceDb Cloud"
.to_string(),
});
}
};
if self.oauth_config.is_some() && self.request.client_config.header_provider.is_some() {
return Err(Error::InvalidInput {
message:
"oauth_config and client_config.header_provider cannot both be set when connecting to LanceDb Cloud"
.to_string(),
});
}
let mut client_config = self.request.client_config;
if let Some(oauth_config) = self.oauth_config {
let provider = crate::remote::OAuthHeaderProvider::new(oauth_config)?;
client_config.header_provider =
Some(Arc::new(provider) as Arc<dyn crate::remote::HeaderProvider>);
}
let storage_options = StorageOptions(options.storage_options.clone());
let internal = Arc::new(crate::remote::db::RemoteDatabase::try_new(
@@ -925,7 +979,7 @@ impl ConnectBuilder {
&api_key,
&region,
options.host_override,
self.request.client_config,
client_config,
storage_options.into(),
self.request.read_consistency_interval,
)?);
@@ -1234,6 +1288,83 @@ mod tests {
assert_eq!(Some(&"EXPLICIT-VALUE".to_string()), options.get(opts_key));
}
#[cfg(feature = "remote")]
#[tokio::test]
async fn test_connect_rejects_api_key_with_oauth_config() {
let oauth_config = crate::remote::OAuthConfig {
issuer_url: "https://issuer.example.com".to_string(),
client_id: "client-id".to_string(),
client_secret: Some("secret".to_string()),
scopes: vec!["scope".to_string()],
flow: crate::remote::OAuthFlow::ClientCredentials,
refresh_buffer_secs: None,
};
let result = ConnectBuilder::new("db://my-container/my-prefix")
.region("us-east-1")
.api_key("my-api-key")
.oauth_config(oauth_config)
.execute()
.await;
match result {
Err(Error::InvalidInput { message })
if message
== "api_key and oauth_config cannot both be set when connecting to LanceDb Cloud" =>
{}
Err(err) => panic!("expected InvalidInput, got {err:?}"),
Ok(_) => panic!("expected api_key and oauth_config to be rejected"),
}
}
#[cfg(feature = "remote")]
#[tokio::test]
async fn test_connect_rejects_header_provider_with_oauth_config() {
#[derive(Debug)]
struct TestHeaderProvider;
#[async_trait::async_trait]
impl crate::remote::HeaderProvider for TestHeaderProvider {
async fn get_headers(&self) -> Result<HashMap<String, String>> {
Ok(HashMap::from([(
"authorization".to_string(),
"Bearer token".to_string(),
)]))
}
}
let oauth_config = crate::remote::OAuthConfig {
issuer_url: "https://issuer.example.com".to_string(),
client_id: "client-id".to_string(),
client_secret: Some("secret".to_string()),
scopes: vec!["scope".to_string()],
flow: crate::remote::OAuthFlow::ClientCredentials,
refresh_buffer_secs: None,
};
let client_config = crate::remote::ClientConfig {
header_provider: Some(
Arc::new(TestHeaderProvider) as Arc<dyn crate::remote::HeaderProvider>
),
..Default::default()
};
let result = ConnectBuilder::new("db://my-container/my-prefix")
.region("us-east-1")
.client_config(client_config)
.oauth_config(oauth_config)
.execute()
.await;
match result {
Err(Error::InvalidInput { message })
if message
== "oauth_config and client_config.header_provider cannot both be set when connecting to LanceDb Cloud" =>
{}
Err(err) => panic!("expected InvalidInput, got {err:?}"),
Ok(_) => panic!("expected header_provider and oauth_config to be rejected"),
}
}
#[cfg(not(windows))]
#[tokio::test]
async fn test_connect_relative() {

View File

@@ -8,6 +8,7 @@
pub(crate) mod client;
pub(crate) mod db;
pub mod oauth;
mod retry;
pub(crate) mod table;
pub(crate) mod util;
@@ -20,3 +21,4 @@ const JSON_CONTENT_TYPE: &str = "application/json";
pub use client::{ClientConfig, HeaderProvider, RetryConfig, TimeoutConfig, TlsConfig};
pub use db::{RemoteDatabaseOptions, RemoteDatabaseOptionsBuilder};
pub use oauth::{OAuthConfig, OAuthFlow, OAuthHeaderProvider};

View File

@@ -459,12 +459,14 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
config: &ClientConfig,
) -> Result<HeaderMap> {
let mut headers = HeaderMap::new();
headers.insert(
HeaderName::from_static("x-api-key"),
HeaderValue::from_str(api_key).map_err(|_| Error::InvalidInput {
message: "non-ascii api key provided".to_string(),
})?,
);
if !api_key.is_empty() {
headers.insert(
HeaderName::from_static("x-api-key"),
HeaderValue::from_str(api_key).map_err(|_| Error::InvalidInput {
message: "non-ascii api key provided".to_string(),
})?,
);
}
if region == "local" {
let host = format!("{}.local.api.lancedb.com", db_name);
headers.insert(
@@ -1005,6 +1007,33 @@ mod tests {
assert!(!config_tls.assert_hostname);
}
#[test]
fn test_default_headers_skip_empty_api_key() {
let headers = RestfulLanceDbClient::<Sender>::default_headers(
"",
"us-east-1",
"db-name",
false,
&RemoteOptions::default(),
None,
&ClientConfig::default(),
)
.unwrap();
assert!(!headers.contains_key("x-api-key"));
let headers = RestfulLanceDbClient::<Sender>::default_headers(
"api-key",
"us-east-1",
"db-name",
false,
&RemoteOptions::default(),
None,
&ClientConfig::default(),
)
.unwrap();
assert_eq!(headers.get("x-api-key").unwrap(), "api-key");
}
// Test implementation of HeaderProvider
#[derive(Debug, Clone)]
struct TestHeaderProvider {

View File

@@ -7,6 +7,7 @@ use std::sync::Arc;
use async_trait::async_trait;
use http::StatusCode;
use lance_io::object_store::StorageOptions;
use lance_namespace_impls::{DynamicContextProvider, OperationInfo};
use moka::future::Cache;
use reqwest::header::CONTENT_TYPE;
@@ -26,7 +27,9 @@ use crate::remote::util::stream_as_body;
use crate::table::BaseTable;
use super::ARROW_STREAM_CONTENT_TYPE;
use super::client::{ClientConfig, HttpSend, RequestResultExt, RestfulLanceDbClient, Sender};
use super::client::{
ClientConfig, HeaderProvider, HttpSend, RequestResultExt, RestfulLanceDbClient, Sender,
};
use super::table::RemoteTable;
use super::util::parse_server_version;
@@ -194,10 +197,66 @@ pub struct RemoteDatabase<S: HttpSend = Sender> {
uri: String,
/// Headers to pass to the namespace client for authentication
namespace_headers: HashMap<String, String>,
namespace_context_provider: Option<Arc<dyn DynamicContextProvider>>,
/// TLS configuration for mTLS support
tls_config: Option<super::client::TlsConfig>,
}
#[derive(Clone)]
struct NamespaceHeaderProviderContext {
header_provider: Arc<dyn HeaderProvider>,
}
impl std::fmt::Debug for NamespaceHeaderProviderContext {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("NamespaceHeaderProviderContext")
.field("header_provider", &"Some(...)")
.finish()
}
}
impl DynamicContextProvider for NamespaceHeaderProviderContext {
fn provide_context(&self, _info: &OperationInfo) -> HashMap<String, String> {
let header_provider = Arc::clone(&self.header_provider);
let handle = match std::thread::Builder::new()
.name("lancedb-namespace-headers".to_string())
.spawn(move || {
tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.map_err(|e| Error::Runtime {
message: format!(
"Failed to create runtime for namespace header provider: {e}"
),
})?
.block_on(header_provider.get_headers())
}) {
Ok(handle) => handle,
Err(err) => {
log::warn!("Failed to spawn dynamic namespace header provider thread: {err}");
return HashMap::new();
}
};
let headers = handle.join();
match headers {
Ok(Ok(headers)) => headers
.into_iter()
.map(|(key, value)| (format!("headers.{key}"), value))
.collect(),
Ok(Err(err)) => {
log::warn!("Failed to get dynamic namespace headers: {err}");
HashMap::new()
}
Err(_) => {
log::warn!("Dynamic namespace header provider panicked");
HashMap::new()
}
}
}
}
impl RemoteDatabase {
pub fn try_new(
uri: &str,
@@ -228,6 +287,16 @@ impl RemoteDatabase {
})
.collect();
let namespace_context_provider =
client_config
.header_provider
.as_ref()
.map(|header_provider| {
Arc::new(NamespaceHeaderProviderContext {
header_provider: Arc::clone(header_provider),
}) as Arc<dyn DynamicContextProvider>
});
let client = RestfulLanceDbClient::try_new(
&parsed,
region,
@@ -247,6 +316,7 @@ impl RemoteDatabase {
table_cache,
uri: uri.to_owned(),
namespace_headers,
namespace_context_provider,
tls_config: client_config.tls_config,
})
}
@@ -271,6 +341,7 @@ mod test_utils {
table_cache: Cache::new(0),
uri: "http://localhost".to_string(),
namespace_headers: HashMap::new(),
namespace_context_provider: None,
tls_config: None,
}
}
@@ -281,11 +352,18 @@ mod test_utils {
T: Into<reqwest::Body>,
{
let client = client_with_handler_and_config(handler, config.clone());
let namespace_context_provider =
config.header_provider.as_ref().map(|header_provider| {
Arc::new(NamespaceHeaderProviderContext {
header_provider: Arc::clone(header_provider),
}) as Arc<dyn DynamicContextProvider>
});
Self {
client,
table_cache: Cache::new(0),
uri: "http://localhost".to_string(),
namespace_headers: config.extra_headers.clone(),
namespace_context_provider,
tls_config: config.tls_config.clone(),
}
}
@@ -759,9 +837,12 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
// Create a RestNamespace pointing to the same remote host with the same authentication headers
let mut builder = lance_namespace_impls::RestNamespaceBuilder::new(self.client.host())
.delimiter(&self.client.id_delimiter)
// TODO: support header provider
.headers(self.namespace_headers.clone());
if let Some(context_provider) = &self.namespace_context_provider {
builder = builder.context_provider(Arc::clone(context_provider));
}
// Apply mTLS configuration if present
if let Some(tls_config) = &self.tls_config {
if let Some(cert_file) = &tls_config.cert_file {
@@ -781,6 +862,14 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
}
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)> {
if self.namespace_context_provider.is_some() {
return Err(Error::NotSupported {
message:
"Cannot export a namespace client config when dynamic headers are configured; use LanceDB connection namespace methods instead"
.to_string(),
});
}
let mut properties = HashMap::new();
properties.insert("uri".to_string(), self.client.host().to_string());
properties.insert("delimiter".to_string(), self.client.id_delimiter.clone());
@@ -832,12 +921,13 @@ impl From<StorageOptions> for RemoteOptions {
#[cfg(test)]
mod tests {
use super::build_cache_key;
use super::{NamespaceHeaderProviderContext, build_cache_key};
use std::collections::HashMap;
use std::sync::{Arc, OnceLock};
use arrow_array::{Int32Array, RecordBatch};
use arrow_schema::{DataType, Field, Schema};
use lance_namespace_impls::{DynamicContextProvider, OperationInfo};
use crate::connection::ConnectBuilder;
use crate::{
@@ -1702,6 +1792,75 @@ mod tests {
assert!(namespace_client.is_ok());
}
#[test]
fn test_namespace_header_provider_context_maps_headers() {
#[derive(Debug)]
struct TestHeaderProvider;
#[async_trait::async_trait]
impl HeaderProvider for TestHeaderProvider {
async fn get_headers(&self) -> crate::Result<HashMap<String, String>> {
Ok(HashMap::from([(
"authorization".to_string(),
"Bearer token".to_string(),
)]))
}
}
let context_provider = NamespaceHeaderProviderContext {
header_provider: Arc::new(TestHeaderProvider) as Arc<dyn HeaderProvider>,
};
let context =
context_provider.provide_context(&OperationInfo::new("list_tables", "namespace"));
assert_eq!(
context.get("headers.authorization"),
Some(&"Bearer token".to_string())
);
}
#[tokio::test]
async fn test_namespace_client_supports_dynamic_headers() {
#[derive(Debug)]
struct TestHeaderProvider;
#[async_trait::async_trait]
impl HeaderProvider for TestHeaderProvider {
async fn get_headers(&self) -> crate::Result<HashMap<String, String>> {
Ok(HashMap::from([(
"authorization".to_string(),
"Bearer token".to_string(),
)]))
}
}
let client_config = ClientConfig {
header_provider: Some(Arc::new(TestHeaderProvider) as Arc<dyn HeaderProvider>),
..Default::default()
};
let conn = Connection::new_with_handler_and_config(
|_| {
http::Response::builder()
.status(200)
.body(r#"{"tables": []}"#)
.unwrap()
},
client_config,
);
let namespace_client = conn.namespace_client().await;
assert!(namespace_client.is_ok());
match conn.namespace_client_config().await {
Err(Error::NotSupported { message })
if message.contains("dynamic headers are configured") => {}
Err(err) => panic!("expected NotSupported, got {err:?}"),
Ok(_) => panic!("expected namespace_client_config to reject dynamic headers"),
}
}
/// Integration tests using RestAdapter to run RemoteDatabase against a real namespace server
mod rest_adapter_integration {
use super::*;

View File

@@ -0,0 +1,857 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::collections::HashMap;
use std::net::IpAddr;
use std::sync::Arc;
use std::time::{Duration, Instant};
use log::debug;
use reqwest::Client;
use serde::Deserialize;
use tokio::sync::RwLock;
use crate::error::{Error, Result};
use crate::remote::client::HeaderProvider;
const DEFAULT_REFRESH_BUFFER_SECS: u64 = 300;
const DEFAULT_TOKEN_TTL_SECS: u64 = 3600;
const AZURE_IMDS_ENDPOINT: &str = "http://169.254.169.254/metadata/identity/oauth2/token";
const AZURE_IMDS_API_VERSION: &str = "2018-02-01";
/// OAuth authentication flow configuration.
#[derive(Debug, Clone)]
pub enum OAuthFlow {
/// Client Credentials grant (service-to-service / M2M).
/// Requires `client_secret` in [`OAuthConfig`].
ClientCredentials,
/// Azure Managed Identity via IMDS.
/// Works on Azure VMs, AKS, App Service, and Azure Functions.
AzureManagedIdentity {
/// Client ID for user-assigned managed identity.
/// Omit for system-assigned managed identity.
client_id: Option<String>,
},
}
/// OAuth configuration for LanceDB authentication.
///
/// All token acquisition and refresh is handled in the Rust layer.
/// Python and TypeScript bindings expose this as a plain config object.
#[derive(Clone)]
pub struct OAuthConfig {
/// OIDC issuer URL or OAuth authority URL.
/// For Azure: `https://login.microsoftonline.com/{tenant_id}/v2.0`
pub issuer_url: String,
/// Application / Client ID.
pub client_id: String,
/// Client secret (required for `ClientCredentials`, optional for others).
pub client_secret: Option<String>,
/// OAuth scopes to request.
/// For Azure managed identity, exactly one scope or resource is required.
/// For example: `["api://{app_id}/.default"]`
pub scopes: Vec<String>,
/// Authentication flow to use.
pub flow: OAuthFlow,
/// Seconds before token expiry to trigger proactive refresh (default: 300).
/// Keep this well below the token TTL; if it is greater than or equal to
/// the TTL, each request refreshes the token.
pub refresh_buffer_secs: Option<u64>,
}
impl std::fmt::Debug for OAuthConfig {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("OAuthConfig")
.field("issuer_url", &self.issuer_url)
.field("client_id", &self.client_id)
.field(
"client_secret",
&self.client_secret.as_deref().map(|_| "<redacted>"),
)
.field("scopes", &self.scopes)
.field("flow", &self.flow)
.field("refresh_buffer_secs", &self.refresh_buffer_secs)
.finish()
}
}
// -- OIDC Discovery --
#[derive(Debug, Deserialize)]
struct OidcDiscovery {
token_endpoint: String,
}
// -- Token Response --
#[derive(Deserialize)]
struct TokenResponse {
access_token: String,
/// Token lifetime in seconds.
/// Some providers (Azure IMDS) return this as a string, so we accept both.
#[serde(default, deserialize_with = "deserialize_optional_u64_or_string")]
expires_in: Option<u64>,
#[serde(default)]
#[allow(dead_code)]
token_type: Option<String>,
}
impl std::fmt::Debug for TokenResponse {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("TokenResponse")
.field("access_token", &"<redacted>")
.field("expires_in", &self.expires_in)
.field("token_type", &self.token_type)
.finish()
}
}
fn deserialize_optional_u64_or_string<'de, D>(
deserializer: D,
) -> std::result::Result<Option<u64>, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de;
struct U64OrString;
impl<'de> de::Visitor<'de> for U64OrString {
type Value = Option<u64>;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("an integer, an integer-valued float, a numeric string, or null")
}
fn visit_u64<E: de::Error>(self, v: u64) -> std::result::Result<Self::Value, E> {
Ok(Some(v))
}
fn visit_i64<E: de::Error>(self, v: i64) -> std::result::Result<Self::Value, E> {
if v < 0 {
return Err(E::custom(format!("invalid expires_in value: {v}")));
}
Ok(Some(v as u64))
}
fn visit_f64<E: de::Error>(self, v: f64) -> std::result::Result<Self::Value, E> {
if !v.is_finite() || v < 0.0 || v.fract() != 0.0 || v > u64::MAX as f64 {
return Err(E::custom(format!("invalid expires_in value: {v}")));
}
Ok(Some(v as u64))
}
fn visit_str<E: de::Error>(self, v: &str) -> std::result::Result<Self::Value, E> {
v.parse::<u64>().map(Some).map_err(de::Error::custom)
}
fn visit_none<E: de::Error>(self) -> std::result::Result<Self::Value, E> {
Ok(None)
}
fn visit_unit<E: de::Error>(self) -> std::result::Result<Self::Value, E> {
Ok(None)
}
}
deserializer.deserialize_any(U64OrString)
}
// -- Internal Token State --
struct TokenState {
access_token: Option<String>,
expires_at: Option<Instant>,
}
impl TokenState {
fn new() -> Self {
Self {
access_token: None,
expires_at: None,
}
}
fn is_expired(&self, buffer: Duration) -> bool {
match (self.access_token.as_ref(), self.expires_at) {
(Some(_), Some(expires_at)) => Instant::now() + buffer >= expires_at,
(None, _) => true,
(Some(_), None) => true,
}
}
fn update(&mut self, resp: &TokenResponse) {
self.access_token = Some(resp.access_token.clone());
let expires_in = resp.expires_in.unwrap_or(DEFAULT_TOKEN_TTL_SECS);
self.expires_at = Some(Instant::now() + Duration::from_secs(expires_in));
}
}
/// OAuth header provider that manages the full token lifecycle.
///
/// Implements [`HeaderProvider`] to inject `Authorization: Bearer <token>`
/// headers into every LanceDB request, with automatic token refresh.
pub struct OAuthHeaderProvider {
config: OAuthConfig,
http_client: Client,
token_state: Arc<RwLock<TokenState>>,
/// Cached OIDC discovery document
discovery: Arc<RwLock<Option<OidcDiscovery>>>,
refresh_buffer: Duration,
}
impl std::fmt::Debug for OAuthHeaderProvider {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("OAuthHeaderProvider")
.field("issuer_url", &self.config.issuer_url)
.field("client_id", &self.config.client_id)
.field("flow", &self.config.flow)
.finish()
}
}
impl OAuthHeaderProvider {
/// Create a new OAuth header provider from configuration.
pub fn new(config: OAuthConfig) -> Result<Self> {
// Validate config upfront
if matches!(config.flow, OAuthFlow::ClientCredentials) && config.client_secret.is_none() {
return Err(Error::InvalidInput {
message: "client_secret is required for ClientCredentials flow".to_string(),
});
}
if config.scopes.is_empty() {
return Err(Error::InvalidInput {
message: "At least one OAuth scope is required".to_string(),
});
}
if matches!(config.flow, OAuthFlow::AzureManagedIdentity { .. }) && config.scopes.len() != 1
{
return Err(Error::InvalidInput {
message: "AzureManagedIdentity flow requires exactly one OAuth scope or resource"
.to_string(),
});
}
Self::validate_issuer_transport(&config)?;
let http_client = Client::builder()
.timeout(Duration::from_secs(30))
.build()
.map_err(|e| Error::Runtime {
message: format!("Failed to create HTTP client for OAuth: {e}"),
})?;
let refresh_buffer = Duration::from_secs(
config
.refresh_buffer_secs
.unwrap_or(DEFAULT_REFRESH_BUFFER_SECS),
);
Ok(Self {
config,
http_client,
token_state: Arc::new(RwLock::new(TokenState::new())),
discovery: Arc::new(RwLock::new(None)),
refresh_buffer,
})
}
fn validate_issuer_transport(config: &OAuthConfig) -> Result<()> {
if !matches!(config.flow, OAuthFlow::ClientCredentials) {
return Ok(());
}
let issuer = url::Url::parse(&config.issuer_url).map_err(|e| Error::InvalidInput {
message: format!("Invalid OAuth issuer_url: {e}"),
})?;
match issuer.scheme() {
"https" => Ok(()),
"http" if Self::is_loopback_issuer(&issuer) => Ok(()),
_ => Err(Error::InvalidInput {
message:
"ClientCredentials OAuth issuer_url must use https, except for loopback hosts"
.to_string(),
}),
}
}
fn is_loopback_issuer(issuer: &url::Url) -> bool {
let Some(host) = issuer.host_str() else {
return false;
};
host.eq_ignore_ascii_case("localhost")
|| host
.parse::<IpAddr>()
.map(|addr| addr.is_loopback())
.unwrap_or(false)
}
/// Get a valid access token, refreshing if necessary.
async fn get_valid_token(&self) -> Result<String> {
// Fast path: check if current token is still valid
{
let state = self.token_state.read().await;
if !state.is_expired(self.refresh_buffer)
&& let Some(ref token) = state.access_token
{
return Ok(token.clone());
}
}
// Slow path: acquire or refresh token
let mut state = self.token_state.write().await;
// Double-check after acquiring write lock
if !state.is_expired(self.refresh_buffer)
&& let Some(ref token) = state.access_token
{
return Ok(token.clone());
}
debug!("Acquiring new OAuth token via {:?} flow", self.config.flow);
let resp = self.acquire_token().await?;
state.update(&resp);
Ok(resp.access_token)
}
/// Acquire a new token using the configured flow.
async fn acquire_token(&self) -> Result<TokenResponse> {
match &self.config.flow {
OAuthFlow::ClientCredentials => self.acquire_client_credentials().await,
OAuthFlow::AzureManagedIdentity { client_id } => {
self.acquire_managed_identity(client_id.as_deref()).await
}
}
}
// -- OIDC Discovery --
async fn get_discovery(&self) -> Result<OidcDiscovery> {
{
let cached = self.discovery.read().await;
if let Some(ref disc) = *cached {
return Ok(OidcDiscovery {
token_endpoint: disc.token_endpoint.clone(),
});
}
}
let mut cache = self.discovery.write().await;
// Double-check
if let Some(ref disc) = *cache {
return Ok(OidcDiscovery {
token_endpoint: disc.token_endpoint.clone(),
});
}
let discovery_url = format!(
"{}/.well-known/openid-configuration",
self.config.issuer_url.trim_end_matches('/')
);
debug!("Fetching OIDC discovery from {}", discovery_url);
let resp = self
.http_client
.get(&discovery_url)
.send()
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to fetch OIDC discovery document: {e}"),
})?;
if !resp.status().is_success() {
return Err(Error::Runtime {
message: format!(
"OIDC discovery failed with status {}: {}",
resp.status(),
resp.text().await.unwrap_or_default()
),
});
}
let disc: OidcDiscovery = resp.json().await.map_err(|e| Error::Runtime {
message: format!("Failed to parse OIDC discovery document: {e}"),
})?;
let result = OidcDiscovery {
token_endpoint: disc.token_endpoint.clone(),
};
*cache = Some(disc);
Ok(result)
}
async fn get_token_endpoint(&self) -> Result<String> {
self.get_discovery().await.map(|disc| disc.token_endpoint)
}
fn scopes_string(&self) -> String {
self.config.scopes.join(" ")
}
fn managed_identity_resource(&self) -> Result<String> {
let [scope] = self.config.scopes.as_slice() else {
return Err(Error::InvalidInput {
message: "AzureManagedIdentity flow requires exactly one OAuth scope or resource"
.to_string(),
});
};
Ok(scope.strip_suffix("/.default").unwrap_or(scope).to_string())
}
// -- Client Credentials Flow --
async fn acquire_client_credentials(&self) -> Result<TokenResponse> {
let client_secret = self
.config
.client_secret
.as_ref()
.ok_or(Error::InvalidInput {
message: "client_secret is required for ClientCredentials flow".to_string(),
})?;
let token_endpoint = self.get_token_endpoint().await?;
let params = [
("grant_type", "client_credentials"),
("client_id", &self.config.client_id),
("client_secret", client_secret),
("scope", &self.scopes_string()),
];
self.post_token_request(&token_endpoint, &params).await
}
// -- Azure Managed Identity Flow --
async fn acquire_managed_identity(&self, mi_client_id: Option<&str>) -> Result<TokenResponse> {
let resource = self.managed_identity_resource()?;
let mut url = format!(
"{AZURE_IMDS_ENDPOINT}?api-version={AZURE_IMDS_API_VERSION}&resource={}",
urlencoding::encode(&resource),
);
if let Some(cid) = mi_client_id {
url.push_str(&format!("&client_id={}", urlencoding::encode(cid)));
}
let resp = self
.http_client
.get(&url)
.header("Metadata", "true")
.send()
.await
.map_err(|e| Error::Runtime {
message: format!("Azure IMDS request failed: {e}"),
})?;
if !resp.status().is_success() {
return Err(Error::Runtime {
message: format!(
"Azure IMDS returned status {}: {}",
resp.status(),
resp.text().await.unwrap_or_default()
),
});
}
resp.json().await.map_err(|e| Error::Runtime {
message: format!("Failed to parse IMDS token response: {e}"),
})
}
// -- Shared Helpers --
async fn post_token_request(
&self,
endpoint: &str,
params: &[(&str, &str)],
) -> Result<TokenResponse> {
let resp = self
.http_client
.post(endpoint)
.form(params)
.send()
.await
.map_err(|e| Error::Runtime {
message: format!("Token request to {endpoint} failed: {e}"),
})?;
if !resp.status().is_success() {
return Err(Error::Runtime {
message: format!(
"Token request failed with status {}: {}",
resp.status(),
resp.text().await.unwrap_or_default()
),
});
}
resp.json().await.map_err(|e| Error::Runtime {
message: format!("Failed to parse token response: {e}"),
})
}
}
#[async_trait::async_trait]
impl HeaderProvider for OAuthHeaderProvider {
async fn get_headers(&self) -> Result<HashMap<String, String>> {
let token = self.get_valid_token().await?;
Ok(HashMap::from([(
"authorization".to_string(),
format!("Bearer {token}"),
)]))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::atomic::{AtomicUsize, Ordering};
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::{TcpListener, TcpStream};
use tokio::task::JoinHandle;
#[test]
fn test_token_state_expiry() {
let mut state = TokenState::new();
assert!(state.is_expired(Duration::from_secs(0)));
state.access_token = Some("tok".to_string());
state.expires_at = Some(Instant::now() + Duration::from_secs(600));
assert!(!state.is_expired(Duration::from_secs(300)));
assert!(state.is_expired(Duration::from_secs(601)));
state.expires_at = None;
assert!(state.is_expired(Duration::from_secs(0)));
}
#[test]
fn test_token_state_uses_default_expiry() {
let mut state = TokenState::new();
let response = TokenResponse {
access_token: "tok".to_string(),
expires_in: None,
token_type: None,
};
state.update(&response);
assert!(!state.is_expired(Duration::from_secs(DEFAULT_TOKEN_TTL_SECS - 1)));
assert!(state.is_expired(Duration::from_secs(DEFAULT_TOKEN_TTL_SECS + 1)));
}
#[test]
fn test_token_response_accepts_float_expires_in() {
let response: TokenResponse =
serde_json::from_str(r#"{"access_token":"tok","expires_in":3600.0}"#).unwrap();
assert_eq!(response.expires_in, Some(3600));
}
#[test]
fn test_token_response_rejects_negative_expires_in() {
let err =
serde_json::from_str::<TokenResponse>(r#"{"access_token":"tok","expires_in":-1}"#)
.unwrap_err();
assert!(err.to_string().contains("invalid expires_in value: -1"));
}
#[test]
fn test_token_response_debug_redacts_access_token() {
let response = TokenResponse {
access_token: "secret-token".to_string(),
expires_in: Some(3600),
token_type: Some("Bearer".to_string()),
};
let debug = format!("{response:?}");
assert!(!debug.contains("secret-token"));
assert!(debug.contains("access_token: \"<redacted>\""));
}
#[test]
fn test_scopes_string() {
let config = OAuthConfig {
issuer_url: "https://login.microsoftonline.com/tenant/v2.0".to_string(),
client_id: "app-id".to_string(),
client_secret: Some("secret".to_string()),
scopes: vec!["scope1".to_string(), "scope2".to_string()],
flow: OAuthFlow::ClientCredentials,
refresh_buffer_secs: None,
};
let provider = OAuthHeaderProvider::new(config).unwrap();
assert_eq!(provider.scopes_string(), "scope1 scope2");
}
#[test]
fn test_oauth_config_debug_redacts_client_secret() {
let config = OAuthConfig {
issuer_url: "https://issuer.example.com".to_string(),
client_id: "client-id".to_string(),
client_secret: Some("super-secret".to_string()),
scopes: vec!["scope".to_string()],
flow: OAuthFlow::ClientCredentials,
refresh_buffer_secs: None,
};
let debug = format!("{config:?}");
assert!(!debug.contains("super-secret"));
assert!(debug.contains("client_secret: Some(\"<redacted>\")"));
}
#[test]
fn test_managed_identity_resource_from_default_scope() {
let config = OAuthConfig {
issuer_url: "https://login.microsoftonline.com/tenant/v2.0".to_string(),
client_id: "app-id".to_string(),
client_secret: None,
scopes: vec!["api://test/.default".to_string()],
flow: OAuthFlow::AzureManagedIdentity { client_id: None },
refresh_buffer_secs: None,
};
let provider = OAuthHeaderProvider::new(config).unwrap();
assert_eq!(provider.managed_identity_resource().unwrap(), "api://test");
}
#[test]
fn test_managed_identity_resource_without_default_suffix() {
let config = OAuthConfig {
issuer_url: "https://login.microsoftonline.com/tenant/v2.0".to_string(),
client_id: "app-id".to_string(),
client_secret: None,
scopes: vec!["api://test".to_string()],
flow: OAuthFlow::AzureManagedIdentity { client_id: None },
refresh_buffer_secs: None,
};
let provider = OAuthHeaderProvider::new(config).unwrap();
assert_eq!(provider.managed_identity_resource().unwrap(), "api://test");
}
#[test]
fn test_managed_identity_rejects_multiple_scopes() {
let config = OAuthConfig {
issuer_url: "https://login.microsoftonline.com/tenant/v2.0".to_string(),
client_id: "app-id".to_string(),
client_secret: None,
scopes: vec![
"api://test-a/.default".to_string(),
"api://test-b/.default".to_string(),
],
flow: OAuthFlow::AzureManagedIdentity { client_id: None },
refresh_buffer_secs: None,
};
assert!(OAuthHeaderProvider::new(config).is_err());
}
#[tokio::test]
async fn test_token_endpoint_requires_discovery_success() {
let (issuer_url, server) = spawn_discovery_error_server().await;
let config = OAuthConfig {
issuer_url,
client_id: "client-id".to_string(),
client_secret: Some("secret".to_string()),
scopes: vec!["scope".to_string()],
flow: OAuthFlow::ClientCredentials,
refresh_buffer_secs: None,
};
let provider = OAuthHeaderProvider::new(config).unwrap();
let err = provider.get_token_endpoint().await.unwrap_err();
assert!(matches!(
err,
Error::Runtime { message }
if message.contains("OIDC discovery failed with status 503")
));
server.await.unwrap();
}
#[test]
fn test_client_credentials_requires_secret() {
let config = OAuthConfig {
issuer_url: "https://login.microsoftonline.com/tenant/v2.0".to_string(),
client_id: "app-id".to_string(),
client_secret: None,
scopes: vec!["scope".to_string()],
flow: OAuthFlow::ClientCredentials,
refresh_buffer_secs: None,
};
assert!(OAuthHeaderProvider::new(config).is_err());
}
#[test]
fn test_client_credentials_rejects_insecure_non_loopback_issuer() {
let config = OAuthConfig {
issuer_url: "http://issuer.example.com".to_string(),
client_id: "app-id".to_string(),
client_secret: Some("secret".to_string()),
scopes: vec!["scope".to_string()],
flow: OAuthFlow::ClientCredentials,
refresh_buffer_secs: None,
};
let err = OAuthHeaderProvider::new(config).unwrap_err();
assert!(matches!(
err,
Error::InvalidInput { message }
if message == "ClientCredentials OAuth issuer_url must use https, except for loopback hosts"
));
}
#[test]
fn test_empty_scopes_rejected() {
let config = OAuthConfig {
issuer_url: "https://login.microsoftonline.com/tenant/v2.0".to_string(),
client_id: "app-id".to_string(),
client_secret: None,
scopes: vec![],
flow: OAuthFlow::AzureManagedIdentity { client_id: None },
refresh_buffer_secs: None,
};
assert!(OAuthHeaderProvider::new(config).is_err());
}
#[tokio::test]
async fn test_client_credentials_token_lifecycle() {
let (issuer_url, token_requests, server) = spawn_oauth_server().await;
let config = OAuthConfig {
issuer_url,
client_id: "client-id".to_string(),
client_secret: Some("secret".to_string()),
scopes: vec!["scope".to_string()],
flow: OAuthFlow::ClientCredentials,
refresh_buffer_secs: Some(0),
};
let provider = OAuthHeaderProvider::new(config).unwrap();
let headers = provider.get_headers().await.unwrap();
assert_eq!(headers.get("authorization").unwrap(), "Bearer token-1");
assert_eq!(token_requests.load(Ordering::SeqCst), 1);
let headers = provider.get_headers().await.unwrap();
assert_eq!(headers.get("authorization").unwrap(), "Bearer token-1");
assert_eq!(token_requests.load(Ordering::SeqCst), 1);
provider.token_state.write().await.expires_at =
Some(Instant::now() - Duration::from_secs(1));
let headers = provider.get_headers().await.unwrap();
assert_eq!(headers.get("authorization").unwrap(), "Bearer token-2");
assert_eq!(token_requests.load(Ordering::SeqCst), 2);
server.await.unwrap();
}
async fn spawn_oauth_server() -> (String, Arc<AtomicUsize>, JoinHandle<()>) {
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
let issuer_url = format!("http://{addr}");
let token_requests = Arc::new(AtomicUsize::new(0));
let server_token_requests = Arc::clone(&token_requests);
let server = tokio::spawn(async move {
for _ in 0..3 {
let (mut stream, _) = listener.accept().await.unwrap();
let (request_line, body) = read_http_request(&mut stream).await;
if request_line.starts_with("GET /.well-known/openid-configuration ") {
let discovery = format!(r#"{{"token_endpoint":"http://{addr}/token"}}"#);
write_json_response(&mut stream, "200 OK", &discovery).await;
} else if request_line.starts_with("POST /token ") {
assert!(body.contains("grant_type=client_credentials"));
assert!(body.contains("client_id=client-id"));
assert!(body.contains("client_secret=secret"));
assert!(body.contains("scope=scope"));
let token_num = server_token_requests.fetch_add(1, Ordering::SeqCst) + 1;
let token = format!(
r#"{{"access_token":"token-{token_num}","expires_in":3600,"token_type":"Bearer"}}"#
);
write_json_response(&mut stream, "200 OK", &token).await;
} else {
write_json_response(&mut stream, "404 Not Found", "{}").await;
}
}
});
(issuer_url, token_requests, server)
}
async fn spawn_discovery_error_server() -> (String, JoinHandle<()>) {
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
let issuer_url = format!("http://{addr}");
let server = tokio::spawn(async move {
let (mut stream, _) = listener.accept().await.unwrap();
let (request_line, _) = read_http_request(&mut stream).await;
assert!(request_line.starts_with("GET /.well-known/openid-configuration "));
write_json_response(&mut stream, "503 Service Unavailable", "{}").await;
});
(issuer_url, server)
}
async fn read_http_request(stream: &mut TcpStream) -> (String, String) {
let mut buffer = Vec::new();
let mut header_end = None;
while header_end.is_none() {
let mut chunk = [0; 1024];
let read = stream.read(&mut chunk).await.unwrap();
assert_ne!(read, 0, "connection closed before request headers");
buffer.extend_from_slice(&chunk[..read]);
header_end = find_subsequence(&buffer, b"\r\n\r\n").map(|pos| pos + 4);
}
let header_end = header_end.unwrap();
let headers = String::from_utf8_lossy(&buffer[..header_end]).to_string();
let request_line = headers.lines().next().unwrap_or_default().to_string();
let content_length = headers
.lines()
.find_map(|line| {
let (name, value) = line.split_once(':')?;
name.eq_ignore_ascii_case("content-length")
.then(|| value.trim().parse::<usize>().ok())
.flatten()
})
.unwrap_or(0);
while buffer.len() < header_end + content_length {
let mut chunk = [0; 1024];
let read = stream.read(&mut chunk).await.unwrap();
assert_ne!(read, 0, "connection closed before request body");
buffer.extend_from_slice(&chunk[..read]);
}
let body =
String::from_utf8_lossy(&buffer[header_end..header_end + content_length]).to_string();
(request_line, body)
}
fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option<usize> {
haystack
.windows(needle.len())
.position(|window| window == needle)
}
async fn write_json_response(stream: &mut TcpStream, status: &str, body: &str) {
let response = format!(
"HTTP/1.1 {status}\r\ncontent-type: application/json\r\ncontent-length: {}\r\nconnection: close\r\n\r\n{body}",
body.len()
);
stream.write_all(response.as_bytes()).await.unwrap();
}
}