Compare commits

..

9 Commits

Author SHA1 Message Date
Xuanwo
7c37ba216a style(python): format permutation pickle tests 2026-04-09 14:47:06 +08:00
Xuanwo
768d84845c feat(python): support pickling permutations 2026-04-09 00:32:48 +08:00
Xuanwo
2d380d1669 Track permutation reopen metadata 2026-04-08 17:34:05 +08:00
Jack Ye
a898dc81c2 feat: add user_id field to ClientConfig for user identification (#3240)
## Summary

- Add a `user_id` field to `ClientConfig` that allows users to identify
themselves to LanceDB Cloud/Enterprise
- The user_id is sent as the `x-lancedb-user-id` HTTP header in all
requests
- Supports three configuration methods:
  - Direct assignment via `ClientConfig.user_id`
  - Environment variable `LANCEDB_USER_ID`
  - Indirect env var lookup via `LANCEDB_USER_ID_ENV_KEY`

Closes #3230

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-06 11:20:10 -07:00
Lance Release
de3f8097e7 Bump version: 0.28.0-beta.0 → 0.28.0-beta.1 2026-04-05 02:51:18 +00:00
Lance Release
0ac59de5f1 Bump version: 0.31.0-beta.0 → 0.31.0-beta.1 2026-04-05 02:50:52 +00:00
LanceDB Robot
d082c2d2ac chore: update lance dependency to v5.0.0-beta.5 (#3237)
## Summary
- update Rust Lance workspace dependencies to `v5.0.0-beta.5` using
`ci/set_lance_version.py`
- update Java `lance-core` dependency property to `5.0.0-beta.5`
- refresh Cargo lockfile to the new Lance tag

## Verification
- `cargo clippy --workspace --tests --all-features -- -D warnings`
- `cargo fmt --all`

## Upstream Tag
- https://github.com/lance-format/lance/releases/tag/v5.0.0-beta.5

---------

Co-authored-by: Jack Ye <yezhaoqin@gmail.com>
2026-04-04 19:49:51 -07:00
Zelys
9d8699f99e feat(python): support Enum types in Pydantic to Arrow schema conversion (#3232)
## Summary

Fixes #1846.

Python `Enum` fields raised `TypeError: Converting Pydantic type to
Arrow Type: unsupported type <enum 'SomethingTypes'>` when converting a
Pydantic model to an Arrow schema.

The fix adds Enum detection in `_pydantic_type_to_arrow_type`. When an
Enum subclass is encountered, the value type of its members is inspected
and mapped to the appropriate Arrow type:

- `str`-valued enums (e.g. `class Status(str, Enum)`) → `pa.utf8()`
- `int`-valued enums (e.g. `class Priority(int, Enum)`) → `pa.int64()`
- Other homogeneous value types → the Arrow type for that Python type
- Mixed-value or empty enums → `pa.utf8()` (safe fallback)

This covers the common `(str, Enum)` and `(int, Enum)` mixin patterns
used in practice.

## Changes

- `python/python/lancedb/pydantic.py`: add Enum branch in
`_pydantic_type_to_arrow_type`
- `python/python/tests/test_pydantic.py`: add `test_enum_types` covering
`str`, `int`, and `Optional` Enum fields

## Note on #2395

PR #2395 handles `StrEnum` (Python 3.11+) specifically, using a
dictionary-encoded type. This PR handles the broader `(str, Enum)` /
`(int, Enum)` mixin pattern that works across all Python versions and
stores values as their natural Arrow type.

AI assistance was used in developing this fix.
2026-04-03 10:40:49 -07:00
Lance Release
aa2c7b3591 Bump version: 0.27.2 → 0.28.0-beta.0 2026-04-03 08:45:56 +00:00
39 changed files with 864 additions and 99 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.27.2"
current_version = "0.28.0-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

70
Cargo.lock generated
View File

@@ -3072,8 +3072,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4134,8 +4134,8 @@ dependencies = [
[[package]]
name = "lance"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow",
"arrow-arith",
@@ -4201,8 +4201,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4222,8 +4222,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrayref",
"paste",
@@ -4232,8 +4232,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4270,8 +4270,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow",
"arrow-array",
@@ -4301,8 +4301,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow",
"arrow-array",
@@ -4320,8 +4320,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4358,8 +4358,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4391,8 +4391,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow",
"arrow-arith",
@@ -4456,8 +4456,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow",
"arrow-arith",
@@ -4501,8 +4501,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4518,8 +4518,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow",
"async-trait",
@@ -4532,8 +4532,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4578,8 +4578,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow",
"arrow-array",
@@ -4618,8 +4618,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "5.0.0-beta.4"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4630,7 +4630,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.27.2"
version = "0.28.0-beta.1"
dependencies = [
"ahash",
"anyhow",
@@ -4712,7 +4712,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.27.2"
version = "0.28.0-beta.1"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4734,7 +4734,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.30.2"
version = "0.31.0-beta.1"
dependencies = [
"arrow",
"async-trait",

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.91.0"
[workspace.dependencies]
lance = { "version" = "=5.0.0-beta.4", default-features = false, "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=5.0.0-beta.4", default-features = false, "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=5.0.0-beta.4", default-features = false, "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false }

View File

@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-core</artifactId>
<version>0.27.2</version>
<version>0.28.0-beta.1</version>
</dependency>
```

View File

@@ -53,3 +53,18 @@ optional tlsConfig: TlsConfig;
```ts
optional userAgent: string;
```
***
### userId?
```ts
optional userId: string;
```
User identifier for tracking purposes.
This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
variable that contains the user ID value.

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.27.2-final.0</version>
<version>0.28.0-beta.1</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.27.2-final.0</version>
<version>0.28.0-beta.1</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>
@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-core.version>5.0.0-beta.4</lance-core.version>
<lance-core.version>5.0.0-beta.5</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.27.2"
version = "0.28.0-beta.1"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.27.2",
"version": "0.28.0-beta.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.27.2",
"version": "0.28.0-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.27.2",
"version": "0.28.0-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.27.2",
"version": "0.28.0-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.27.2",
"version": "0.28.0-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.27.2",
"version": "0.28.0-beta.1",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.27.2",
"version": "0.28.0-beta.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.27.2",
"version": "0.28.0-beta.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.27.2",
"version": "0.28.0-beta.1",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.27.2",
"version": "0.28.0-beta.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -92,6 +92,13 @@ pub struct ClientConfig {
pub extra_headers: Option<HashMap<String, String>>,
pub id_delimiter: Option<String>,
pub tls_config: Option<TlsConfig>,
/// User identifier for tracking purposes.
///
/// This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
/// It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
/// Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
/// variable that contains the user ID value.
pub user_id: Option<String>,
}
impl From<TimeoutConfig> for lancedb::remote::TimeoutConfig {
@@ -145,6 +152,7 @@ impl From<ClientConfig> for lancedb::remote::ClientConfig {
id_delimiter: config.id_delimiter,
tls_config: config.tls_config.map(Into::into),
header_provider: None, // the header provider is set separately later
user_id: config.user_id,
}
}
}

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.31.0-beta.0"
current_version = "0.31.0-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.31.0-beta.0"
version = "0.31.0-beta.1"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -45,7 +45,7 @@ repository = "https://github.com/lancedb/lancedb"
[project.optional-dependencies]
pylance = [
"pylance>=5.0.0b3",
"pylance>=5.0.0b5",
]
tests = [
"aiohttp>=3.9.0",
@@ -59,7 +59,7 @@ tests = [
"polars>=0.19, <=1.3.0",
"tantivy>=0.20.0",
"pyarrow-stubs>=16.0",
"pylance>=5.0.0b3",
"pylance>=5.0.0b5",
"requests>=2.31.0",
"datafusion>=52,<53",
]

View File

@@ -151,6 +151,9 @@ class Connection(object):
async def drop_all_tables(
self, namespace_path: Optional[List[str]] = None
) -> None: ...
async def namespace_client_config(
self,
) -> Dict[str, Any]: ...
class Table:
def name(self) -> str: ...

View File

@@ -23,11 +23,13 @@ from lancedb.embeddings.registry import EmbeddingFunctionRegistry
from lancedb.common import data_to_reader, sanitize_uri, validate_schema
from lancedb.background_loop import LOOP
from lance_namespace import (
LanceNamespace,
ListNamespacesResponse,
CreateNamespaceResponse,
DropNamespaceResponse,
DescribeNamespaceResponse,
ListTablesResponse,
connect as namespace_connect,
)
from . import __version__
@@ -507,6 +509,26 @@ class DBConnection(EnforceOverrides):
def uri(self) -> str:
return self._uri
def namespace_client(self) -> LanceNamespace:
"""Get the equivalent namespace client for this connection.
For native storage connections, this returns a DirectoryNamespace
pointing to the same root with the same storage options.
For namespace connections, this returns the backing namespace client.
For enterprise (remote) connections, this returns a RestNamespace
with the same URI and authentication headers.
Returns
-------
LanceNamespace
The namespace client for this connection.
"""
raise NotImplementedError(
"namespace_client is not supported for this connection type"
)
class LanceDBConnection(DBConnection):
"""
@@ -1044,6 +1066,20 @@ class LanceDBConnection(DBConnection):
)
)
@override
def namespace_client(self) -> LanceNamespace:
"""Get the equivalent namespace client for this connection.
Returns a DirectoryNamespace pointing to the same root with the
same storage options.
Returns
-------
LanceNamespace
The namespace client for this connection.
"""
return LOOP.run(self._conn.namespace_client())
@deprecation.deprecated(
deprecated_in="0.15.1",
removed_in="0.17",
@@ -1716,6 +1752,25 @@ class AsyncConnection(object):
namespace_path = []
await self._inner.drop_all_tables(namespace_path=namespace_path)
async def namespace_client(self) -> LanceNamespace:
"""Get the equivalent namespace client for this connection.
For native storage connections, this returns a DirectoryNamespace
pointing to the same root with the same storage options.
For namespace connections, this returns the backing namespace client.
For enterprise (remote) connections, this returns a RestNamespace
with the same URI and authentication headers.
Returns
-------
LanceNamespace
The namespace client for this connection.
"""
config = await self._inner.namespace_client_config()
return namespace_connect(config["impl"], config["properties"])
@deprecation.deprecated(
deprecated_in="0.15.1",
removed_in="0.17",

View File

@@ -890,6 +890,20 @@ class LanceNamespaceDBConnection(DBConnection):
pushdown_operations=self._pushdown_operations,
)
@override
def namespace_client(self) -> LanceNamespace:
"""Get the namespace client for this connection.
For namespace connections, this returns the backing namespace client
that was provided during construction.
Returns
-------
LanceNamespace
The namespace client for this connection.
"""
return self._namespace_client
class AsyncLanceNamespaceDBConnection:
"""
@@ -1387,6 +1401,19 @@ class AsyncLanceNamespaceDBConnection:
page_token=response.page_token,
)
async def namespace_client(self) -> LanceNamespace:
"""Get the namespace client for this connection.
For namespace connections, this returns the backing namespace client
that was provided during construction.
Returns
-------
LanceNamespace
The namespace client for this connection.
"""
return self._namespace_client
def connect_namespace(
namespace_client_impl: str,

View File

@@ -1,21 +1,98 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
import json
import pickle
from datetime import timedelta
from typing import Any, Callable, Iterator, Literal, Optional, TYPE_CHECKING, Union
import pyarrow as pa
from deprecation import deprecated
from lancedb import AsyncConnection, DBConnection
import pyarrow as pa
import json
from ._lancedb import async_permutation_builder, PermutationReader
from .table import LanceTable
from .background_loop import LOOP
from .table import LanceTable
from .util import batch_to_tensor, batch_to_tensor_rows
from typing import Any, Callable, Iterator, Literal, Optional, TYPE_CHECKING, Union
if TYPE_CHECKING:
from lancedb.dependencies import pandas as pd, numpy as np, polars as pl
def _builtin_transform(format: str) -> Callable[[pa.RecordBatch], Any]:
if format == "python":
return Transforms.arrow2python
if format == "python_col":
return Transforms.arrow2pythoncol
if format == "numpy":
return Transforms.arrow2numpy
if format == "pandas":
return Transforms.arrow2pandas
if format == "arrow":
return Transforms.arrow2arrow
if format == "torch":
return batch_to_tensor_rows
if format == "torch_col":
return batch_to_tensor
if format == "polars":
return Transforms.arrow2polars()
raise ValueError(f"Invalid format: {format}")
def _table_to_state(
table: Union[LanceTable, dict[str, Any]],
) -> dict[str, Any]:
if isinstance(table, dict):
return table
if not isinstance(table, LanceTable):
raise pickle.PicklingError(
"Permutation pickling only supports LanceTable-backed permutations"
)
if table._namespace_client is not None:
raise pickle.PicklingError(
"Permutation pickling does not yet support namespace-backed tables"
)
if table._conn.uri.startswith("memory://"):
raise pickle.PicklingError(
"Permutation pickling does not support in-memory databases"
)
try:
read_consistency_interval = table._conn.read_consistency_interval
except Exception:
read_consistency_interval = None
return {
"uri": table._conn.uri,
"name": table.name,
"version": table.version,
"storage_options": table.initial_storage_options(),
"read_consistency_interval_secs": (
read_consistency_interval.total_seconds()
if read_consistency_interval is not None
else None
),
"namespace_path": list(table.namespace),
}
def _table_from_state(state: dict[str, Any]) -> LanceTable:
from . import connect
read_consistency_interval = (
timedelta(seconds=state["read_consistency_interval_secs"])
if state["read_consistency_interval_secs"] is not None
else None
)
db = connect(
state["uri"],
read_consistency_interval=read_consistency_interval,
storage_options=state["storage_options"],
)
table = db.open_table(state["name"], namespace_path=state["namespace_path"])
table.checkout(state["version"])
return table
class PermutationBuilder:
"""
A utility for creating a "permutation table" which is a table that defines an
@@ -385,6 +462,13 @@ class Permutation:
selection: dict[str, str],
batch_size: int,
transform_fn: Callable[pa.RecordBatch, Any],
*,
base_table: Union[LanceTable, dict[str, Any]],
permutation_table: Optional[Union[LanceTable, dict[str, Any]]],
split: int,
offset: Optional[int] = None,
limit: Optional[int] = None,
transform_spec: Optional[str] = None,
):
"""
Internal constructor. Use [from_tables](#from_tables) instead.
@@ -395,6 +479,93 @@ class Permutation:
self.selection = selection
self.transform_fn = transform_fn
self.batch_size = batch_size
self._transform_spec = transform_spec
# These fields are used to reconstruct the permutation in a new process.
self._base_table = base_table
self._permutation_table = permutation_table
self._split = split
self._offset = offset
self._limit = limit
def _reopen_metadata(self) -> dict[str, Any]:
return {
"base_table": self._base_table,
"permutation_table": self._permutation_table,
"split": self._split,
"offset": self._offset,
"limit": self._limit,
"transform_spec": self._transform_spec,
}
def __getstate__(self) -> dict[str, Any]:
if self._transform_spec is not None:
transform_state = {
"kind": "builtin",
"format": self._transform_spec,
}
else:
transform_state = {
"kind": "callable",
"transform_fn": self.transform_fn,
}
return {
"selection": self.selection,
"batch_size": self.batch_size,
"transform": transform_state,
"reopen": {
**self._reopen_metadata(),
# Store reopen state instead of live LanceTable handles.
"base_table": _table_to_state(self._base_table),
"permutation_table": (
_table_to_state(self._permutation_table)
if self._permutation_table is not None
else None
),
},
}
def __setstate__(self, state: dict[str, Any]) -> None:
reopen = state["reopen"]
base_table = _table_from_state(reopen["base_table"])
permutation_table_state = reopen["permutation_table"]
permutation_table = (
_table_from_state(permutation_table_state)
if permutation_table_state is not None
else None
)
split = reopen["split"]
offset = reopen["offset"]
limit = reopen["limit"]
async def do_reopen():
reader = await PermutationReader.from_tables(
base_table, permutation_table, split
)
if offset is not None:
reader = await reader.with_offset(offset)
if limit is not None:
reader = await reader.with_limit(limit)
return reader
transform = state["transform"]
if transform["kind"] == "builtin":
transform_spec = transform["format"]
transform_fn = _builtin_transform(transform_spec)
else:
transform_spec = None
transform_fn = transform["transform_fn"]
self.reader = LOOP.run(do_reopen())
self.selection = state["selection"]
self.batch_size = state["batch_size"]
self.transform_fn = transform_fn
self._transform_spec = transform_spec
self._base_table = reopen["base_table"]
self._permutation_table = permutation_table_state
self._split = split
self._offset = offset
self._limit = limit
def _with_selection(self, selection: dict[str, str]) -> "Permutation":
"""
@@ -403,7 +574,13 @@ class Permutation:
Does not validation of the selection and it replaces it entirely. This is not
intended for public use.
"""
return Permutation(self.reader, selection, self.batch_size, self.transform_fn)
return Permutation(
self.reader,
selection,
self.batch_size,
self.transform_fn,
**self._reopen_metadata(),
)
def _with_reader(self, reader: PermutationReader) -> "Permutation":
"""
@@ -411,13 +588,25 @@ class Permutation:
This is an internal method and should not be used directly.
"""
return Permutation(reader, self.selection, self.batch_size, self.transform_fn)
return Permutation(
reader,
self.selection,
self.batch_size,
self.transform_fn,
**self._reopen_metadata(),
)
def with_batch_size(self, batch_size: int) -> "Permutation":
"""
Creates a new permutation with the given batch size
"""
return Permutation(self.reader, self.selection, batch_size, self.transform_fn)
return Permutation(
self.reader,
self.selection,
batch_size,
self.transform_fn,
**self._reopen_metadata(),
)
@classmethod
def identity(cls, table: LanceTable) -> "Permutation":
@@ -491,7 +680,14 @@ class Permutation:
schema = await reader.output_schema(None)
initial_selection = {name: name for name in schema.names}
return cls(
reader, initial_selection, DEFAULT_BATCH_SIZE, Transforms.arrow2python
reader,
initial_selection,
DEFAULT_BATCH_SIZE,
Transforms.arrow2python,
base_table=base_table,
permutation_table=permutation_table,
split=split,
transform_spec="python",
)
return LOOP.run(do_from_tables())
@@ -732,24 +928,16 @@ class Permutation:
this method.
"""
assert format is not None, "format is required"
if format == "python":
return self.with_transform(Transforms.arrow2python)
if format == "python_col":
return self.with_transform(Transforms.arrow2pythoncol)
elif format == "numpy":
return self.with_transform(Transforms.arrow2numpy)
elif format == "pandas":
return self.with_transform(Transforms.arrow2pandas)
elif format == "arrow":
return self.with_transform(Transforms.arrow2arrow)
elif format == "torch":
return self.with_transform(batch_to_tensor_rows)
elif format == "torch_col":
return self.with_transform(batch_to_tensor)
elif format == "polars":
return self.with_transform(Transforms.arrow2polars())
else:
raise ValueError(f"Invalid format: {format}")
return Permutation(
self.reader,
self.selection,
self.batch_size,
_builtin_transform(format),
**{
**self._reopen_metadata(),
"transform_spec": format,
},
)
def with_transform(self, transform: Callable[pa.RecordBatch, Any]) -> "Permutation":
"""
@@ -762,7 +950,16 @@ class Permutation:
for expensive operations such as image decoding.
"""
assert transform is not None, "transform is required"
return Permutation(self.reader, self.selection, self.batch_size, transform)
return Permutation(
self.reader,
self.selection,
self.batch_size,
transform,
**{
**self._reopen_metadata(),
"transform_spec": None,
},
)
def __getitem__(self, index: int) -> Any:
"""
@@ -800,7 +997,16 @@ class Permutation:
async def do_with_skip():
reader = await self.reader.with_offset(skip)
return self._with_reader(reader)
return Permutation(
reader,
self.selection,
self.batch_size,
self.transform_fn,
**{
**self._reopen_metadata(),
"offset": skip,
},
)
return LOOP.run(do_with_skip())
@@ -823,7 +1029,16 @@ class Permutation:
async def do_with_take():
reader = await self.reader.with_limit(limit)
return self._with_reader(reader)
return Permutation(
reader,
self.selection,
self.batch_size,
self.transform_fn,
**{
**self._reopen_metadata(),
"limit": limit,
},
)
return LOOP.run(do_with_take())

View File

@@ -10,6 +10,7 @@ import sys
import types
from abc import ABC, abstractmethod
from datetime import date, datetime
from enum import Enum
from typing import (
TYPE_CHECKING,
Any,
@@ -314,6 +315,19 @@ def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
return pa.list_(pa.list_(tp.value_arrow_type(), tp.dim()))
# For regular Vector
return pa.list_(tp.value_arrow_type(), tp.dim())
if _safe_issubclass(tp, Enum):
# Map Enum to the Arrow type of its value.
# For string-valued enums, use dictionary encoding for efficiency.
# For integer enums, use the native type.
# Fall back to utf8 for mixed-type or empty enums.
value_types = {type(m.value) for m in tp}
if len(value_types) == 1:
value_type = value_types.pop()
if value_type is str:
# Use dictionary encoding for string enums
return pa.dictionary(pa.int32(), pa.utf8())
return _py_type_to_arrow_type(value_type, field)
return pa.utf8()
return _py_type_to_arrow_type(tp, field)

View File

@@ -145,6 +145,33 @@ class TlsConfig:
@dataclass
class ClientConfig:
"""Configuration for the LanceDB Cloud HTTP client.
Attributes
----------
user_agent: str
User agent string sent with requests.
retry_config: RetryConfig
Configuration for retrying failed requests.
timeout_config: Optional[TimeoutConfig]
Configuration for request timeouts.
extra_headers: Optional[dict]
Additional headers to include in requests.
id_delimiter: Optional[str]
The delimiter to use when constructing object identifiers.
tls_config: Optional[TlsConfig]
TLS/mTLS configuration for secure connections.
header_provider: Optional[HeaderProvider]
Provider for dynamic headers to be added to each request.
user_id: Optional[str]
User identifier for tracking purposes. This is sent as the
`x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
This can also be set via the `LANCEDB_USER_ID` environment variable.
Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another
environment variable that contains the user ID value.
"""
user_agent: str = f"LanceDB-Python-Client/{__version__}"
retry_config: RetryConfig = field(default_factory=RetryConfig)
timeout_config: Optional[TimeoutConfig] = field(default_factory=TimeoutConfig)
@@ -152,6 +179,7 @@ class ClientConfig:
id_delimiter: Optional[str] = None
tls_config: Optional[TlsConfig] = None
header_provider: Optional["HeaderProvider"] = None
user_id: Optional[str] = None
def __post_init__(self):
if isinstance(self.retry_config, dict):

View File

@@ -24,6 +24,7 @@ from ..common import DATA
from ..db import DBConnection, LOOP
from ..embeddings import EmbeddingFunctionConfig
from lance_namespace import (
LanceNamespace,
CreateNamespaceResponse,
DescribeNamespaceResponse,
DropNamespaceResponse,
@@ -570,6 +571,19 @@ class RemoteDBConnection(DBConnection):
)
)
@override
def namespace_client(self) -> LanceNamespace:
"""Get the equivalent namespace client for this connection.
Returns a RestNamespace with the same URI and authentication headers.
Returns
-------
LanceNamespace
The namespace client for this connection.
"""
return LOOP.run(self._conn.namespace_client())
async def close(self):
"""Close the connection to the database."""
self._conn.close()

View File

@@ -3,6 +3,7 @@
import re
import sys
from datetime import timedelta
import os
@@ -1048,3 +1049,59 @@ def test_clone_table_deep_clone_fails(tmp_path):
source_uri = os.path.join(tmp_path, "source.lance")
with pytest.raises(Exception, match="Deep clone is not yet implemented"):
db.clone_table("cloned", source_uri, is_shallow=False)
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
def test_namespace_client_native_storage(tmp_path):
"""Test namespace_client() returns DirectoryNamespace for native storage."""
from lance.namespace import DirectoryNamespace
db = lancedb.connect(tmp_path)
ns_client = db.namespace_client()
assert isinstance(ns_client, DirectoryNamespace)
assert str(tmp_path) in ns_client.namespace_id()
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
def test_namespace_client_with_storage_options(tmp_path):
"""Test namespace_client() preserves storage options."""
from lance.namespace import DirectoryNamespace
storage_options = {"timeout": "10s"}
db = lancedb.connect(tmp_path, storage_options=storage_options)
ns_client = db.namespace_client()
assert isinstance(ns_client, DirectoryNamespace)
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
def test_namespace_client_operations(tmp_path):
"""Test that namespace_client() returns a functional namespace client."""
db = lancedb.connect(tmp_path)
ns_client = db.namespace_client()
# Create a table through the main db connection
data = [{"id": 1, "text": "hello", "vector": [1.0, 2.0]}]
db.create_table("test_table", data=data)
# Verify the namespace client can see the table
from lance_namespace import ListTablesRequest
# id=[] means root namespace
response = ns_client.list_tables(ListTablesRequest(id=[]))
# Tables can be strings or objects with name attribute
table_names = [t.name if hasattr(t, "name") else t for t in response.tables]
assert "test_table" in table_names
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
def test_namespace_client_namespace_connection(tmp_path):
"""Test namespace_client() returns the backing client for namespace connections."""
from lance.namespace import DirectoryNamespace
db = lancedb.connect_namespace("dir", {"root": str(tmp_path)})
ns_client = db.namespace_client()
assert isinstance(ns_client, DirectoryNamespace)
assert str(tmp_path) in ns_client.namespace_id()

View File

@@ -3,6 +3,7 @@
import pyarrow as pa
import math
import pickle
import pytest
from lancedb import DBConnection, Table, connect
@@ -599,6 +600,87 @@ def test_limit_offset(some_permutation: Permutation):
some_permutation.with_skip(500).with_take(500).num_rows
def test_permutation_pickle_rejects_in_memory_tables(mem_db: DBConnection):
table = mem_db.create_table("identity_table", pa.table({"id": range(10)}))
permutation = Permutation.identity(table)
with pytest.raises(
pickle.PicklingError,
match="in-memory databases",
):
pickle.dumps(permutation)
def test_identity_permutation_pickle_roundtrip_preserves_table_version(tmp_path):
db = connect(tmp_path)
table = db.create_table(
"identity_table",
pa.table({"id": range(10), "value": range(10)}),
)
permutation = (
Permutation.identity(table).with_skip(2).with_take(3).with_format("python_col")
)
payload = pickle.dumps(permutation)
table.add(pa.table({"id": [10], "value": [10]}))
restored = pickle.loads(payload)
assert restored.num_rows == 3
batches = list(restored.iter(10, skip_last_batch=False))
assert batches == [{"id": [2, 3, 4], "value": [2, 3, 4]}]
def test_permutation_pickle_roundtrip_with_persisted_permutation_table(tmp_path):
db = connect(tmp_path)
table = db.create_table(
"base_table",
pa.table({"id": range(1000), "value": range(1000)}),
)
permutation_table = (
permutation_builder(table)
.split_random(ratios=[0.95, 0.05], seed=42, split_names=["train", "test"])
.shuffle(seed=42)
.persist(db, "persisted_permutation")
.execute()
)
permutation = (
Permutation.from_tables(table, permutation_table, "test")
.select_columns(["id"])
.rename_column("id", "row_id")
.with_batch_size(32)
.with_skip(5)
.with_take(10)
.with_format("arrow")
)
restored = pickle.loads(pickle.dumps(permutation))
assert restored.batch_size == 32
assert restored.column_names == ["row_id"]
assert restored.num_rows == 10
assert (
restored.__getitems__([0, 1, 2]).to_pylist()
== permutation.__getitems__([0, 1, 2]).to_pylist()
)
def test_permutation_pickle_roundtrip_preserves_builtin_polars_format(tmp_path):
pl = pytest.importorskip("polars")
db = connect(tmp_path)
table = db.create_table(
"polars_table",
pa.table({"id": range(5), "value": range(5)}),
)
permutation = Permutation.identity(table).with_take(2).with_format("polars")
restored = pickle.loads(pickle.dumps(permutation))
batch = restored.__getitems__([0, 1])
assert isinstance(batch, pl.DataFrame)
assert batch.to_dict(as_series=False) == {"id": [0, 1], "value": [0, 1]}
def test_remove_columns(some_permutation: Permutation):
assert some_permutation.remove_columns(["value"]).schema == pa.schema(
[("id", pa.int64())]

View File

@@ -3,6 +3,7 @@
import json
from datetime import date, datetime
from enum import Enum
from typing import List, Optional, Tuple
import pyarrow as pa
@@ -673,3 +674,29 @@ async def test_aliases_in_lance_model_async(mem_db_async):
assert hasattr(model, "name")
assert hasattr(model, "distance")
assert model.distance < 0.01
def test_enum_types():
"""Enum fields should map to the Arrow type of their value (issue #1846)."""
class StrStatus(str, Enum):
PENDING = "pending"
RUNNING = "running"
DONE = "done"
class IntPriority(int, Enum):
LOW = 1
MEDIUM = 2
HIGH = 3
class TestModel(pydantic.BaseModel):
status: StrStatus
priority: IntPriority
opt_status: Optional[StrStatus] = None
schema = pydantic_to_schema(TestModel)
assert schema.field("status").type == pa.dictionary(pa.int32(), pa.utf8())
assert schema.field("priority").type == pa.int64()
assert schema.field("opt_status").type == pa.dictionary(pa.int32(), pa.utf8())
assert schema.field("opt_status").nullable

View File

@@ -474,6 +474,25 @@ impl Connection {
})
})
}
/// Get the configuration for constructing an equivalent namespace client.
/// Returns a dict with:
/// - "impl": "dir" for DirectoryNamespace, "rest" for RestNamespace
/// - "properties": configuration properties for the namespace
#[pyo3(signature = ())]
pub fn namespace_client_config(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
let py = self_.py();
future_into_py(py, async move {
let (impl_type, properties) = inner.namespace_client_config().await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("impl", impl_type)?;
dict.set_item("properties", properties)?;
Ok(dict.unbind())
})
})
}
}
#[pyfunction]
@@ -528,6 +547,7 @@ pub struct PyClientConfig {
id_delimiter: Option<String>,
tls_config: Option<PyClientTlsConfig>,
header_provider: Option<Py<PyAny>>,
user_id: Option<String>,
}
#[derive(FromPyObject)]
@@ -612,6 +632,7 @@ impl From<PyClientConfig> for lancedb::remote::ClientConfig {
id_delimiter: value.id_delimiter,
tls_config: value.tls_config.map(Into::into),
header_provider,
user_id: value.user_id,
}
}
}

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.27.2"
version = "0.28.0-beta.1"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -541,6 +541,16 @@ impl Connection {
self.internal.namespace_client().await
}
/// Get the configuration for constructing an equivalent namespace client.
/// Returns (impl_type, properties) where:
/// - impl_type: "dir" for DirectoryNamespace, "rest" for RestNamespace
/// - properties: configuration properties for the namespace
pub async fn namespace_client_config(
&self,
) -> Result<(String, std::collections::HashMap<String, String>)> {
self.internal.namespace_client_config().await
}
/// List tables with pagination support
pub async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
self.internal.list_tables(request).await

View File

@@ -265,4 +265,13 @@ pub trait Database:
/// For ListingDatabase, it is the equivalent DirectoryNamespace.
/// For RemoteDatabase, it is the equivalent RestNamespace.
async fn namespace_client(&self) -> Result<Arc<dyn LanceNamespace>>;
/// Get the configuration for constructing an equivalent namespace client.
/// Returns (impl_type, properties) where:
/// - impl_type: "dir" for DirectoryNamespace, "rest" for RestNamespace
/// - properties: configuration properties for the namespace
///
/// This is useful for Python bindings where we want to return a Python
/// namespace object rather than a Rust trait object.
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)>;
}

View File

@@ -1099,6 +1099,15 @@ impl Database for ListingDatabase {
})?;
Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
}
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)> {
let mut properties = HashMap::new();
properties.insert("root".to_string(), self.uri.clone());
for (key, value) in &self.storage_options {
properties.insert(format!("storage.{}", key), value.clone());
}
Ok(("dir".to_string(), properties))
}
}
#[cfg(test)]

View File

@@ -45,6 +45,10 @@ pub struct LanceNamespaceDatabase {
uri: String,
// Operations to push down to the namespace server
pushdown_operations: HashSet<PushdownOperation>,
// Namespace implementation type (e.g., "dir", "rest")
ns_impl: String,
// Namespace properties used to construct the namespace client
ns_properties: HashMap<String, String>,
}
impl LanceNamespaceDatabase {
@@ -74,6 +78,8 @@ impl LanceNamespaceDatabase {
session,
uri: format!("namespace://{}", ns_impl),
pushdown_operations,
ns_impl: ns_impl.to_string(),
ns_properties,
})
}
}
@@ -345,6 +351,10 @@ impl Database for LanceNamespaceDatabase {
async fn namespace_client(&self) -> Result<Arc<dyn LanceNamespace>> {
Ok(self.namespace.clone())
}
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)> {
Ok((self.ns_impl.clone(), self.ns_properties.clone()))
}
}
#[cfg(test)]

View File

@@ -52,6 +52,13 @@ pub struct ClientConfig {
pub tls_config: Option<TlsConfig>,
/// Provider for custom headers to be added to each request
pub header_provider: Option<Arc<dyn HeaderProvider>>,
/// User identifier for tracking purposes.
///
/// This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
/// It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
/// Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
/// variable that contains the user ID value.
pub user_id: Option<String>,
}
impl std::fmt::Debug for ClientConfig {
@@ -67,6 +74,7 @@ impl std::fmt::Debug for ClientConfig {
"header_provider",
&self.header_provider.as_ref().map(|_| "Some(...)"),
)
.field("user_id", &self.user_id)
.finish()
}
}
@@ -81,10 +89,41 @@ impl Default for ClientConfig {
id_delimiter: None,
tls_config: None,
header_provider: None,
user_id: None,
}
}
}
impl ClientConfig {
/// Resolve the user ID from the config or environment variables.
///
/// Resolution order:
/// 1. If `user_id` is set in the config, use that value
/// 2. If `LANCEDB_USER_ID` environment variable is set, use that value
/// 3. If `LANCEDB_USER_ID_ENV_KEY` is set, read the env var it points to
/// 4. Otherwise, return None
pub fn resolve_user_id(&self) -> Option<String> {
if self.user_id.is_some() {
return self.user_id.clone();
}
if let Ok(user_id) = std::env::var("LANCEDB_USER_ID")
&& !user_id.is_empty()
{
return Some(user_id);
}
if let Ok(env_key) = std::env::var("LANCEDB_USER_ID_ENV_KEY")
&& let Ok(user_id) = std::env::var(&env_key)
&& !user_id.is_empty()
{
return Some(user_id);
}
None
}
}
/// How to handle timeouts for HTTP requests.
#[derive(Clone, Default, Debug)]
pub struct TimeoutConfig {
@@ -464,6 +503,15 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
);
}
if let Some(user_id) = config.resolve_user_id() {
headers.insert(
HeaderName::from_static("x-lancedb-user-id"),
HeaderValue::from_str(&user_id).map_err(|_| Error::InvalidInput {
message: format!("non-ascii user_id '{}' provided", user_id),
})?,
);
}
Ok(headers)
}
@@ -1072,4 +1120,91 @@ mod tests {
_ => panic!("Expected Runtime error"),
}
}
#[test]
fn test_resolve_user_id_direct_value() {
let config = ClientConfig {
user_id: Some("direct-user-id".to_string()),
..Default::default()
};
assert_eq!(config.resolve_user_id(), Some("direct-user-id".to_string()));
}
#[test]
fn test_resolve_user_id_none() {
let config = ClientConfig::default();
// Clear env vars that might be set from other tests
// SAFETY: This is only called in tests
unsafe {
std::env::remove_var("LANCEDB_USER_ID");
std::env::remove_var("LANCEDB_USER_ID_ENV_KEY");
}
assert_eq!(config.resolve_user_id(), None);
}
#[test]
fn test_resolve_user_id_from_env() {
// SAFETY: This is only called in tests
unsafe {
std::env::set_var("LANCEDB_USER_ID", "env-user-id");
}
let config = ClientConfig::default();
assert_eq!(config.resolve_user_id(), Some("env-user-id".to_string()));
// SAFETY: This is only called in tests
unsafe {
std::env::remove_var("LANCEDB_USER_ID");
}
}
#[test]
fn test_resolve_user_id_from_env_key() {
// SAFETY: This is only called in tests
unsafe {
std::env::remove_var("LANCEDB_USER_ID");
std::env::set_var("LANCEDB_USER_ID_ENV_KEY", "MY_CUSTOM_USER_ID");
std::env::set_var("MY_CUSTOM_USER_ID", "custom-env-user-id");
}
let config = ClientConfig::default();
assert_eq!(
config.resolve_user_id(),
Some("custom-env-user-id".to_string())
);
// SAFETY: This is only called in tests
unsafe {
std::env::remove_var("LANCEDB_USER_ID_ENV_KEY");
std::env::remove_var("MY_CUSTOM_USER_ID");
}
}
#[test]
fn test_resolve_user_id_direct_takes_precedence() {
// SAFETY: This is only called in tests
unsafe {
std::env::set_var("LANCEDB_USER_ID", "env-user-id");
}
let config = ClientConfig {
user_id: Some("direct-user-id".to_string()),
..Default::default()
};
assert_eq!(config.resolve_user_id(), Some("direct-user-id".to_string()));
// SAFETY: This is only called in tests
unsafe {
std::env::remove_var("LANCEDB_USER_ID");
}
}
#[test]
fn test_resolve_user_id_empty_env_ignored() {
// SAFETY: This is only called in tests
unsafe {
std::env::set_var("LANCEDB_USER_ID", "");
std::env::remove_var("LANCEDB_USER_ID_ENV_KEY");
}
let config = ClientConfig::default();
assert_eq!(config.resolve_user_id(), None);
// SAFETY: This is only called in tests
unsafe {
std::env::remove_var("LANCEDB_USER_ID");
}
}
}

View File

@@ -777,6 +777,32 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
let namespace = builder.build();
Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
}
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)> {
let mut properties = HashMap::new();
properties.insert("uri".to_string(), self.client.host().to_string());
properties.insert("delimiter".to_string(), self.client.id_delimiter.clone());
for (key, value) in &self.namespace_headers {
properties.insert(format!("header.{}", key), value.clone());
}
// Add TLS configuration if present
if let Some(tls_config) = &self.tls_config {
if let Some(cert_file) = &tls_config.cert_file {
properties.insert("tls.cert_file".to_string(), cert_file.clone());
}
if let Some(key_file) = &tls_config.key_file {
properties.insert("tls.key_file".to_string(), key_file.clone());
}
if let Some(ssl_ca_cert) = &tls_config.ssl_ca_cert {
properties.insert("tls.ssl_ca_cert".to_string(), ssl_ca_cert.clone());
}
properties.insert(
"tls.assert_hostname".to_string(),
tls_config.assert_hostname.to_string(),
);
}
Ok(("rest".to_string(), properties))
}
}
/// RemoteOptions contains a subset of StorageOptions that are compatible with Remote LanceDB connections