mirror of
https://github.com/lancedb/lancedb.git
synced 2026-04-11 02:10:40 +00:00
Compare commits
11 Commits
python-v0.
...
codex/upda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3bed9b6db8 | ||
|
|
2807ad6854 | ||
|
|
4761fa9bcb | ||
|
|
4c2939d66e | ||
|
|
a813ce2f71 | ||
|
|
a898dc81c2 | ||
|
|
de3f8097e7 | ||
|
|
0ac59de5f1 | ||
|
|
d082c2d2ac | ||
|
|
9d8699f99e | ||
|
|
aa2c7b3591 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.27.2"
|
||||
current_version = "0.28.0-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
1
.github/workflows/nodejs.yml
vendored
1
.github/workflows/nodejs.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- nodejs/**
|
||||
- rust/**
|
||||
- docs/src/js/**
|
||||
|
||||
1
.github/workflows/python.yml
vendored
1
.github/workflows/python.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- python/**
|
||||
- rust/**
|
||||
- .github/workflows/python.yml
|
||||
|
||||
1
.github/workflows/rust.yml
vendored
1
.github/workflows/rust.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- rust/**
|
||||
- .github/workflows/rust.yml
|
||||
|
||||
|
||||
71
Cargo.lock
generated
71
Cargo.lock
generated
@@ -3072,8 +3072,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "fsst"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"rand 0.9.2",
|
||||
@@ -4134,8 +4134,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4201,13 +4201,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-arrow"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-data",
|
||||
"arrow-ipc",
|
||||
"arrow-ord",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
@@ -4222,8 +4223,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-bitpacking"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"paste",
|
||||
@@ -4232,8 +4233,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-core"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4270,8 +4271,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datafusion"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4301,8 +4302,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datagen"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4320,8 +4321,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-encoding"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4358,8 +4359,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-file"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4391,8 +4392,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-index"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4456,8 +4457,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-io"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4501,8 +4502,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-linalg"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4518,8 +4519,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -4532,8 +4533,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace-impls"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-ipc",
|
||||
@@ -4578,8 +4579,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-table"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4618,8 +4619,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-testing"
|
||||
version = "5.0.0-beta.4"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.4#d9068e76a301df9e21d7282419f24f61a11375ac"
|
||||
version = "5.1.0-beta.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-schema",
|
||||
@@ -4630,7 +4631,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb"
|
||||
version = "0.27.2"
|
||||
version = "0.28.0-beta.1"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
@@ -4712,7 +4713,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-nodejs"
|
||||
version = "0.27.2"
|
||||
version = "0.28.0-beta.1"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4734,7 +4735,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-python"
|
||||
version = "0.30.2"
|
||||
version = "0.31.0-beta.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
|
||||
28
Cargo.toml
28
Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=5.0.0-beta.4", default-features = false, "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=5.0.0-beta.4", default-features = false, "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=5.0.0-beta.4", default-features = false, "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=5.0.0-beta.4", "tag" = "v5.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=5.1.0-beta.1", default-features = false, "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=5.1.0-beta.1", default-features = false, "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=5.1.0-beta.1", default-features = false, "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "57.2", optional = false }
|
||||
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.27.2</version>
|
||||
<version>0.28.0-beta.1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -53,3 +53,18 @@ optional tlsConfig: TlsConfig;
|
||||
```ts
|
||||
optional userAgent: string;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### userId?
|
||||
|
||||
```ts
|
||||
optional userId: string;
|
||||
```
|
||||
|
||||
User identifier for tracking purposes.
|
||||
|
||||
This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
|
||||
It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
|
||||
Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
|
||||
variable that contains the user ID value.
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.27.2-final.0</version>
|
||||
<version>0.28.0-beta.1</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.27.2-final.0</version>
|
||||
<version>0.28.0-beta.1</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>5.0.0-beta.4</lance-core.version>
|
||||
<lance-core.version>5.1.0-beta.1</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.27.2"
|
||||
version = "0.28.0-beta.1"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.1",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.1",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.1",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.27.2",
|
||||
"version": "0.28.0-beta.1",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -92,6 +92,13 @@ pub struct ClientConfig {
|
||||
pub extra_headers: Option<HashMap<String, String>>,
|
||||
pub id_delimiter: Option<String>,
|
||||
pub tls_config: Option<TlsConfig>,
|
||||
/// User identifier for tracking purposes.
|
||||
///
|
||||
/// This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
|
||||
/// It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
|
||||
/// Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
|
||||
/// variable that contains the user ID value.
|
||||
pub user_id: Option<String>,
|
||||
}
|
||||
|
||||
impl From<TimeoutConfig> for lancedb::remote::TimeoutConfig {
|
||||
@@ -145,6 +152,7 @@ impl From<ClientConfig> for lancedb::remote::ClientConfig {
|
||||
id_delimiter: config.id_delimiter,
|
||||
tls_config: config.tls_config.map(Into::into),
|
||||
header_provider: None, // the header provider is set separately later
|
||||
user_id: config.user_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.31.0-beta.0"
|
||||
current_version = "0.31.0-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.31.0-beta.0"
|
||||
version = "0.31.0-beta.1"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -45,7 +45,7 @@ repository = "https://github.com/lancedb/lancedb"
|
||||
|
||||
[project.optional-dependencies]
|
||||
pylance = [
|
||||
"pylance>=5.0.0b3",
|
||||
"pylance>=5.0.0b5",
|
||||
]
|
||||
tests = [
|
||||
"aiohttp>=3.9.0",
|
||||
@@ -59,7 +59,7 @@ tests = [
|
||||
"polars>=0.19, <=1.3.0",
|
||||
"tantivy>=0.20.0",
|
||||
"pyarrow-stubs>=16.0",
|
||||
"pylance>=5.0.0b3",
|
||||
"pylance>=5.0.0b5",
|
||||
"requests>=2.31.0",
|
||||
"datafusion>=52,<53",
|
||||
]
|
||||
@@ -83,7 +83,7 @@ embeddings = [
|
||||
"colpali-engine>=0.3.10",
|
||||
"huggingface_hub>=0.19.0",
|
||||
"InstructorEmbedding>=1.0.1",
|
||||
"google.generativeai>=0.3.0",
|
||||
"google-genai>=1.0.0",
|
||||
"boto3>=1.28.57",
|
||||
"awscli>=1.44.38",
|
||||
"botocore>=1.31.57",
|
||||
|
||||
@@ -151,6 +151,9 @@ class Connection(object):
|
||||
async def drop_all_tables(
|
||||
self, namespace_path: Optional[List[str]] = None
|
||||
) -> None: ...
|
||||
async def namespace_client_config(
|
||||
self,
|
||||
) -> Dict[str, Any]: ...
|
||||
|
||||
class Table:
|
||||
def name(self) -> str: ...
|
||||
|
||||
@@ -23,11 +23,13 @@ from lancedb.embeddings.registry import EmbeddingFunctionRegistry
|
||||
from lancedb.common import data_to_reader, sanitize_uri, validate_schema
|
||||
from lancedb.background_loop import LOOP
|
||||
from lance_namespace import (
|
||||
LanceNamespace,
|
||||
ListNamespacesResponse,
|
||||
CreateNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
ListTablesResponse,
|
||||
connect as namespace_connect,
|
||||
)
|
||||
|
||||
from . import __version__
|
||||
@@ -507,6 +509,26 @@ class DBConnection(EnforceOverrides):
|
||||
def uri(self) -> str:
|
||||
return self._uri
|
||||
|
||||
def namespace_client(self) -> LanceNamespace:
|
||||
"""Get the equivalent namespace client for this connection.
|
||||
|
||||
For native storage connections, this returns a DirectoryNamespace
|
||||
pointing to the same root with the same storage options.
|
||||
|
||||
For namespace connections, this returns the backing namespace client.
|
||||
|
||||
For enterprise (remote) connections, this returns a RestNamespace
|
||||
with the same URI and authentication headers.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"namespace_client is not supported for this connection type"
|
||||
)
|
||||
|
||||
|
||||
class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
@@ -1044,6 +1066,20 @@ class LanceDBConnection(DBConnection):
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def namespace_client(self) -> LanceNamespace:
|
||||
"""Get the equivalent namespace client for this connection.
|
||||
|
||||
Returns a DirectoryNamespace pointing to the same root with the
|
||||
same storage options.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
return LOOP.run(self._conn.namespace_client())
|
||||
|
||||
@deprecation.deprecated(
|
||||
deprecated_in="0.15.1",
|
||||
removed_in="0.17",
|
||||
@@ -1716,6 +1752,25 @@ class AsyncConnection(object):
|
||||
namespace_path = []
|
||||
await self._inner.drop_all_tables(namespace_path=namespace_path)
|
||||
|
||||
async def namespace_client(self) -> LanceNamespace:
|
||||
"""Get the equivalent namespace client for this connection.
|
||||
|
||||
For native storage connections, this returns a DirectoryNamespace
|
||||
pointing to the same root with the same storage options.
|
||||
|
||||
For namespace connections, this returns the backing namespace client.
|
||||
|
||||
For enterprise (remote) connections, this returns a RestNamespace
|
||||
with the same URI and authentication headers.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
config = await self._inner.namespace_client_config()
|
||||
return namespace_connect(config["impl"], config["properties"])
|
||||
|
||||
@deprecation.deprecated(
|
||||
deprecated_in="0.15.1",
|
||||
removed_in="0.17",
|
||||
|
||||
@@ -19,10 +19,10 @@ from .utils import TEXT, api_key_not_found_help
|
||||
@register("gemini-text")
|
||||
class GeminiText(TextEmbeddingFunction):
|
||||
"""
|
||||
An embedding function that uses the Google's Gemini API. Requires GOOGLE_API_KEY to
|
||||
An embedding function that uses Google's Gemini API. Requires GOOGLE_API_KEY to
|
||||
be set.
|
||||
|
||||
https://ai.google.dev/docs/embeddings_guide
|
||||
https://ai.google.dev/gemini-api/docs/embeddings
|
||||
|
||||
Supports various tasks types:
|
||||
| Task Type | Description |
|
||||
@@ -46,9 +46,12 @@ class GeminiText(TextEmbeddingFunction):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str, default "models/embedding-001"
|
||||
The name of the model to use. See the Gemini documentation for a list of
|
||||
available models.
|
||||
name: str, default "gemini-embedding-001"
|
||||
The name of the model to use. Supported models include:
|
||||
- "gemini-embedding-001" (768 dimensions)
|
||||
|
||||
Note: The legacy "models/embedding-001" format is also supported but
|
||||
"gemini-embedding-001" is recommended.
|
||||
|
||||
query_task_type: str, default "retrieval_query"
|
||||
Sets the task type for the queries.
|
||||
@@ -77,7 +80,7 @@ class GeminiText(TextEmbeddingFunction):
|
||||
|
||||
"""
|
||||
|
||||
name: str = "models/embedding-001"
|
||||
name: str = "gemini-embedding-001"
|
||||
query_task_type: str = "retrieval_query"
|
||||
source_task_type: str = "retrieval_document"
|
||||
|
||||
@@ -114,23 +117,48 @@ class GeminiText(TextEmbeddingFunction):
|
||||
texts: list[str] or np.ndarray (of str)
|
||||
The texts to embed
|
||||
"""
|
||||
if (
|
||||
kwargs.get("task_type") == "retrieval_document"
|
||||
): # Provide a title to use existing API design
|
||||
title = "Embedding of a document"
|
||||
kwargs["title"] = title
|
||||
from google.genai import types
|
||||
|
||||
return [
|
||||
self.client.embed_content(model=self.name, content=text, **kwargs)[
|
||||
"embedding"
|
||||
]
|
||||
for text in texts
|
||||
]
|
||||
task_type = kwargs.get("task_type")
|
||||
|
||||
# Build content objects for embed_content
|
||||
contents = []
|
||||
for text in texts:
|
||||
if task_type == "retrieval_document":
|
||||
# Provide a title for retrieval_document task
|
||||
contents.append(
|
||||
{"parts": [{"text": "Embedding of a document"}, {"text": text}]}
|
||||
)
|
||||
else:
|
||||
contents.append({"parts": [{"text": text}]})
|
||||
|
||||
# Build config
|
||||
config_kwargs = {}
|
||||
if task_type:
|
||||
config_kwargs["task_type"] = task_type.upper() # API expects uppercase
|
||||
|
||||
# Call embed_content for each content
|
||||
embeddings = []
|
||||
for content in contents:
|
||||
config = (
|
||||
types.EmbedContentConfig(**config_kwargs) if config_kwargs else None
|
||||
)
|
||||
response = self.client.models.embed_content(
|
||||
model=self.name,
|
||||
contents=content,
|
||||
config=config,
|
||||
)
|
||||
embeddings.append(response.embeddings[0].values)
|
||||
|
||||
return embeddings
|
||||
|
||||
@cached_property
|
||||
def client(self):
|
||||
genai = attempt_import_or_raise("google.generativeai", "google.generativeai")
|
||||
attempt_import_or_raise("google.genai", "google-genai")
|
||||
|
||||
if not os.environ.get("GOOGLE_API_KEY"):
|
||||
api_key_not_found_help("google")
|
||||
return genai
|
||||
|
||||
from google import genai as genai_module
|
||||
|
||||
return genai_module.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
|
||||
|
||||
@@ -890,6 +890,20 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
)
|
||||
|
||||
@override
|
||||
def namespace_client(self) -> LanceNamespace:
|
||||
"""Get the namespace client for this connection.
|
||||
|
||||
For namespace connections, this returns the backing namespace client
|
||||
that was provided during construction.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
return self._namespace_client
|
||||
|
||||
|
||||
class AsyncLanceNamespaceDBConnection:
|
||||
"""
|
||||
@@ -1387,6 +1401,19 @@ class AsyncLanceNamespaceDBConnection:
|
||||
page_token=response.page_token,
|
||||
)
|
||||
|
||||
async def namespace_client(self) -> LanceNamespace:
|
||||
"""Get the namespace client for this connection.
|
||||
|
||||
For namespace connections, this returns the backing namespace client
|
||||
that was provided during construction.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
return self._namespace_client
|
||||
|
||||
|
||||
def connect_namespace(
|
||||
namespace_client_impl: str,
|
||||
|
||||
@@ -284,9 +284,8 @@ class Permutations:
|
||||
self.permutation_table = permutation_table
|
||||
|
||||
if permutation_table.schema.metadata is not None:
|
||||
split_names = permutation_table.schema.metadata.get(
|
||||
b"split_names", None
|
||||
).decode("utf-8")
|
||||
raw = permutation_table.schema.metadata.get(b"split_names")
|
||||
split_names = raw.decode("utf-8") if raw is not None else None
|
||||
if split_names is not None:
|
||||
self.split_names = json.loads(split_names)
|
||||
self.split_dict = {
|
||||
@@ -460,9 +459,8 @@ class Permutation:
|
||||
f"Cannot create a permutation on split `{split}`"
|
||||
" because no split names are defined in the permutation table"
|
||||
)
|
||||
split_names = permutation_table.schema.metadata.get(
|
||||
b"split_names", None
|
||||
).decode("utf-8")
|
||||
raw = permutation_table.schema.metadata.get(b"split_names")
|
||||
split_names = raw.decode("utf-8") if raw is not None else None
|
||||
if split_names is None:
|
||||
raise ValueError(
|
||||
f"Cannot create a permutation on split `{split}`"
|
||||
|
||||
@@ -10,6 +10,7 @@ import sys
|
||||
import types
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import date, datetime
|
||||
from enum import Enum
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
@@ -314,6 +315,19 @@ def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
|
||||
return pa.list_(pa.list_(tp.value_arrow_type(), tp.dim()))
|
||||
# For regular Vector
|
||||
return pa.list_(tp.value_arrow_type(), tp.dim())
|
||||
if _safe_issubclass(tp, Enum):
|
||||
# Map Enum to the Arrow type of its value.
|
||||
# For string-valued enums, use dictionary encoding for efficiency.
|
||||
# For integer enums, use the native type.
|
||||
# Fall back to utf8 for mixed-type or empty enums.
|
||||
value_types = {type(m.value) for m in tp}
|
||||
if len(value_types) == 1:
|
||||
value_type = value_types.pop()
|
||||
if value_type is str:
|
||||
# Use dictionary encoding for string enums
|
||||
return pa.dictionary(pa.int32(), pa.utf8())
|
||||
return _py_type_to_arrow_type(value_type, field)
|
||||
return pa.utf8()
|
||||
return _py_type_to_arrow_type(tp, field)
|
||||
|
||||
|
||||
|
||||
@@ -145,6 +145,33 @@ class TlsConfig:
|
||||
|
||||
@dataclass
|
||||
class ClientConfig:
|
||||
"""Configuration for the LanceDB Cloud HTTP client.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
user_agent: str
|
||||
User agent string sent with requests.
|
||||
retry_config: RetryConfig
|
||||
Configuration for retrying failed requests.
|
||||
timeout_config: Optional[TimeoutConfig]
|
||||
Configuration for request timeouts.
|
||||
extra_headers: Optional[dict]
|
||||
Additional headers to include in requests.
|
||||
id_delimiter: Optional[str]
|
||||
The delimiter to use when constructing object identifiers.
|
||||
tls_config: Optional[TlsConfig]
|
||||
TLS/mTLS configuration for secure connections.
|
||||
header_provider: Optional[HeaderProvider]
|
||||
Provider for dynamic headers to be added to each request.
|
||||
user_id: Optional[str]
|
||||
User identifier for tracking purposes. This is sent as the
|
||||
`x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
|
||||
|
||||
This can also be set via the `LANCEDB_USER_ID` environment variable.
|
||||
Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another
|
||||
environment variable that contains the user ID value.
|
||||
"""
|
||||
|
||||
user_agent: str = f"LanceDB-Python-Client/{__version__}"
|
||||
retry_config: RetryConfig = field(default_factory=RetryConfig)
|
||||
timeout_config: Optional[TimeoutConfig] = field(default_factory=TimeoutConfig)
|
||||
@@ -152,6 +179,7 @@ class ClientConfig:
|
||||
id_delimiter: Optional[str] = None
|
||||
tls_config: Optional[TlsConfig] = None
|
||||
header_provider: Optional["HeaderProvider"] = None
|
||||
user_id: Optional[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if isinstance(self.retry_config, dict):
|
||||
|
||||
@@ -24,6 +24,7 @@ from ..common import DATA
|
||||
from ..db import DBConnection, LOOP
|
||||
from ..embeddings import EmbeddingFunctionConfig
|
||||
from lance_namespace import (
|
||||
LanceNamespace,
|
||||
CreateNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
@@ -570,6 +571,19 @@ class RemoteDBConnection(DBConnection):
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def namespace_client(self) -> LanceNamespace:
|
||||
"""Get the equivalent namespace client for this connection.
|
||||
|
||||
Returns a RestNamespace with the same URI and authentication headers.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
return LOOP.run(self._conn.namespace_client())
|
||||
|
||||
async def close(self):
|
||||
"""Close the connection to the database."""
|
||||
self._conn.close()
|
||||
|
||||
@@ -270,15 +270,17 @@ def _sanitize_data(
|
||||
reader,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
target_schema=target_schema,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
if target_schema is None:
|
||||
target_schema, reader = _infer_target_schema(reader)
|
||||
|
||||
if metadata:
|
||||
new_metadata = target_schema.metadata or {}
|
||||
new_metadata.update(metadata)
|
||||
target_schema = target_schema.with_metadata(new_metadata)
|
||||
target_schema = target_schema.with_metadata(
|
||||
_merge_metadata(target_schema.metadata, metadata)
|
||||
)
|
||||
|
||||
_validate_schema(target_schema)
|
||||
reader = _cast_to_target_schema(reader, target_schema, allow_subschema)
|
||||
@@ -294,7 +296,7 @@ def _cast_to_target_schema(
|
||||
# pa.Table.cast expects field order not to be changed.
|
||||
# Lance doesn't care about field order, so we don't need to rearrange fields
|
||||
# to match the target schema. We just need to correctly cast the fields.
|
||||
if reader.schema == target_schema:
|
||||
if reader.schema.equals(target_schema, check_metadata=True):
|
||||
# Fast path when the schemas are already the same
|
||||
return reader
|
||||
|
||||
@@ -314,7 +316,13 @@ def _cast_to_target_schema(
|
||||
def gen():
|
||||
for batch in reader:
|
||||
# Table but not RecordBatch has cast.
|
||||
yield pa.Table.from_batches([batch]).cast(reordered_schema).to_batches()[0]
|
||||
cast_batches = (
|
||||
pa.Table.from_batches([batch]).cast(reordered_schema).to_batches()
|
||||
)
|
||||
if cast_batches:
|
||||
yield pa.RecordBatch.from_arrays(
|
||||
cast_batches[0].columns, schema=reordered_schema
|
||||
)
|
||||
|
||||
return pa.RecordBatchReader.from_batches(reordered_schema, gen())
|
||||
|
||||
@@ -332,37 +340,51 @@ def _align_field_types(
|
||||
if target_field is None:
|
||||
raise ValueError(f"Field '{field.name}' not found in target schema")
|
||||
if pa.types.is_struct(target_field.type):
|
||||
new_type = pa.struct(
|
||||
_align_field_types(
|
||||
field.type.fields,
|
||||
target_field.type.fields,
|
||||
if pa.types.is_struct(field.type):
|
||||
new_type = pa.struct(
|
||||
_align_field_types(
|
||||
field.type.fields,
|
||||
target_field.type.fields,
|
||||
)
|
||||
)
|
||||
)
|
||||
else:
|
||||
new_type = target_field.type
|
||||
elif pa.types.is_list(target_field.type):
|
||||
new_type = pa.list_(
|
||||
_align_field_types(
|
||||
[field.type.value_field],
|
||||
[target_field.type.value_field],
|
||||
)[0]
|
||||
)
|
||||
if _is_list_like(field.type):
|
||||
new_type = pa.list_(
|
||||
_align_field_types(
|
||||
[field.type.value_field],
|
||||
[target_field.type.value_field],
|
||||
)[0]
|
||||
)
|
||||
else:
|
||||
new_type = target_field.type
|
||||
elif pa.types.is_large_list(target_field.type):
|
||||
new_type = pa.large_list(
|
||||
_align_field_types(
|
||||
[field.type.value_field],
|
||||
[target_field.type.value_field],
|
||||
)[0]
|
||||
)
|
||||
if _is_list_like(field.type):
|
||||
new_type = pa.large_list(
|
||||
_align_field_types(
|
||||
[field.type.value_field],
|
||||
[target_field.type.value_field],
|
||||
)[0]
|
||||
)
|
||||
else:
|
||||
new_type = target_field.type
|
||||
elif pa.types.is_fixed_size_list(target_field.type):
|
||||
new_type = pa.list_(
|
||||
_align_field_types(
|
||||
[field.type.value_field],
|
||||
[target_field.type.value_field],
|
||||
)[0],
|
||||
target_field.type.list_size,
|
||||
)
|
||||
if _is_list_like(field.type):
|
||||
new_type = pa.list_(
|
||||
_align_field_types(
|
||||
[field.type.value_field],
|
||||
[target_field.type.value_field],
|
||||
)[0],
|
||||
target_field.type.list_size,
|
||||
)
|
||||
else:
|
||||
new_type = target_field.type
|
||||
else:
|
||||
new_type = target_field.type
|
||||
new_fields.append(pa.field(field.name, new_type, field.nullable))
|
||||
new_fields.append(
|
||||
pa.field(field.name, new_type, field.nullable, target_field.metadata)
|
||||
)
|
||||
return new_fields
|
||||
|
||||
|
||||
@@ -440,6 +462,7 @@ def sanitize_create_table(
|
||||
schema = data.schema
|
||||
|
||||
if metadata:
|
||||
metadata = _merge_metadata(schema.metadata, metadata)
|
||||
schema = schema.with_metadata(metadata)
|
||||
# Need to apply metadata to the data as well
|
||||
if isinstance(data, pa.Table):
|
||||
@@ -492,9 +515,9 @@ def _append_vector_columns(
|
||||
vector columns to the table.
|
||||
"""
|
||||
if schema is None:
|
||||
metadata = metadata or {}
|
||||
metadata = _merge_metadata(metadata)
|
||||
else:
|
||||
metadata = schema.metadata or metadata or {}
|
||||
metadata = _merge_metadata(schema.metadata, metadata)
|
||||
functions = EmbeddingFunctionRegistry.get_instance().parse_functions(metadata)
|
||||
|
||||
if not functions:
|
||||
@@ -3211,43 +3234,157 @@ def _handle_bad_vectors(
|
||||
reader: pa.RecordBatchReader,
|
||||
on_bad_vectors: Literal["error", "drop", "fill", "null"] = "error",
|
||||
fill_value: float = 0.0,
|
||||
target_schema: Optional[pa.Schema] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
) -> pa.RecordBatchReader:
|
||||
vector_columns = []
|
||||
vector_columns = _find_vector_columns(reader.schema, target_schema, metadata)
|
||||
if not vector_columns:
|
||||
return reader
|
||||
|
||||
for field in reader.schema:
|
||||
# They can provide a 'vector' column that isn't yet a FSL
|
||||
named_vector_col = (
|
||||
(
|
||||
pa.types.is_list(field.type)
|
||||
or pa.types.is_large_list(field.type)
|
||||
or pa.types.is_fixed_size_list(field.type)
|
||||
)
|
||||
and pa.types.is_floating(field.type.value_type)
|
||||
and field.name == VECTOR_COLUMN_NAME
|
||||
)
|
||||
# TODO: we're making an assumption that fixed size list of 10 or more
|
||||
# is a vector column. This is definitely a bit hacky.
|
||||
likely_vector_col = (
|
||||
pa.types.is_fixed_size_list(field.type)
|
||||
and pa.types.is_floating(field.type.value_type)
|
||||
and (field.type.list_size >= 10)
|
||||
)
|
||||
|
||||
if named_vector_col or likely_vector_col:
|
||||
vector_columns.append(field.name)
|
||||
output_schema = _vector_output_schema(reader.schema, vector_columns)
|
||||
|
||||
def gen():
|
||||
for batch in reader:
|
||||
for name in vector_columns:
|
||||
pending_dims = []
|
||||
for vector_column in vector_columns:
|
||||
dim = vector_column["expected_dim"]
|
||||
if target_schema is not None and dim is None:
|
||||
dim = _infer_vector_dim(batch[vector_column["name"]])
|
||||
pending_dims.append(vector_column)
|
||||
batch = _handle_bad_vector_column(
|
||||
batch,
|
||||
vector_column_name=name,
|
||||
vector_column_name=vector_column["name"],
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
expected_dim=dim,
|
||||
expected_value_type=vector_column["expected_value_type"],
|
||||
)
|
||||
yield batch
|
||||
for vector_column in pending_dims:
|
||||
if vector_column["expected_dim"] is None:
|
||||
vector_column["expected_dim"] = _infer_vector_dim(
|
||||
batch[vector_column["name"]]
|
||||
)
|
||||
if batch.schema.equals(output_schema, check_metadata=True):
|
||||
yield batch
|
||||
continue
|
||||
|
||||
return pa.RecordBatchReader.from_batches(reader.schema, gen())
|
||||
cast_batches = (
|
||||
pa.Table.from_batches([batch]).cast(output_schema).to_batches()
|
||||
)
|
||||
if cast_batches:
|
||||
yield pa.RecordBatch.from_arrays(
|
||||
cast_batches[0].columns,
|
||||
schema=output_schema,
|
||||
)
|
||||
|
||||
return pa.RecordBatchReader.from_batches(output_schema, gen())
|
||||
|
||||
|
||||
def _find_vector_columns(
|
||||
reader_schema: pa.Schema,
|
||||
target_schema: Optional[pa.Schema],
|
||||
metadata: Optional[dict],
|
||||
) -> List[dict]:
|
||||
if target_schema is None:
|
||||
vector_columns = []
|
||||
for field in reader_schema:
|
||||
named_vector_col = (
|
||||
_is_list_like(field.type)
|
||||
and pa.types.is_floating(field.type.value_type)
|
||||
and field.name == VECTOR_COLUMN_NAME
|
||||
)
|
||||
likely_vector_col = (
|
||||
pa.types.is_fixed_size_list(field.type)
|
||||
and pa.types.is_floating(field.type.value_type)
|
||||
and (field.type.list_size >= 10)
|
||||
)
|
||||
if named_vector_col or likely_vector_col:
|
||||
vector_columns.append(
|
||||
{
|
||||
"name": field.name,
|
||||
"expected_dim": None,
|
||||
"expected_value_type": None,
|
||||
}
|
||||
)
|
||||
return vector_columns
|
||||
|
||||
reader_column_names = set(reader_schema.names)
|
||||
active_metadata = _merge_metadata(target_schema.metadata, metadata)
|
||||
embedding_function_columns = set(
|
||||
EmbeddingFunctionRegistry.get_instance().parse_functions(active_metadata).keys()
|
||||
)
|
||||
vector_columns = []
|
||||
for field in target_schema:
|
||||
if field.name not in reader_column_names:
|
||||
continue
|
||||
if not _is_list_like(field.type) or not pa.types.is_floating(
|
||||
field.type.value_type
|
||||
):
|
||||
continue
|
||||
|
||||
reader_field = reader_schema.field(field.name)
|
||||
named_vector_col = (
|
||||
field.name in embedding_function_columns
|
||||
or field.name == VECTOR_COLUMN_NAME
|
||||
or (field.name == "embedding" and pa.types.is_fixed_size_list(field.type))
|
||||
)
|
||||
typed_fixed_vector_col = (
|
||||
pa.types.is_fixed_size_list(reader_field.type)
|
||||
and pa.types.is_floating(reader_field.type.value_type)
|
||||
and reader_field.type.list_size >= 10
|
||||
)
|
||||
|
||||
if named_vector_col or typed_fixed_vector_col:
|
||||
vector_columns.append(
|
||||
{
|
||||
"name": field.name,
|
||||
"expected_dim": (
|
||||
field.type.list_size
|
||||
if pa.types.is_fixed_size_list(field.type)
|
||||
else None
|
||||
),
|
||||
"expected_value_type": field.type.value_type,
|
||||
}
|
||||
)
|
||||
|
||||
return vector_columns
|
||||
|
||||
|
||||
def _vector_output_schema(
|
||||
reader_schema: pa.Schema,
|
||||
vector_columns: List[dict],
|
||||
) -> pa.Schema:
|
||||
columns_by_name = {column["name"]: column for column in vector_columns}
|
||||
fields = []
|
||||
for field in reader_schema:
|
||||
column = columns_by_name.get(field.name)
|
||||
if column is None:
|
||||
output_type = field.type
|
||||
else:
|
||||
output_type = _vector_output_type(field, column)
|
||||
fields.append(pa.field(field.name, output_type, field.nullable, field.metadata))
|
||||
return pa.schema(fields, metadata=reader_schema.metadata)
|
||||
|
||||
|
||||
def _vector_output_type(field: pa.Field, vector_column: dict) -> pa.DataType:
|
||||
if not _is_list_like(field.type):
|
||||
return field.type
|
||||
|
||||
if vector_column["expected_value_type"] is not None and (
|
||||
pa.types.is_null(field.type.value_type)
|
||||
or pa.types.is_integer(field.type.value_type)
|
||||
or pa.types.is_unsigned_integer(field.type.value_type)
|
||||
):
|
||||
return pa.list_(vector_column["expected_value_type"])
|
||||
|
||||
if (
|
||||
vector_column["expected_dim"] is not None
|
||||
and pa.types.is_fixed_size_list(field.type)
|
||||
and field.type.list_size != vector_column["expected_dim"]
|
||||
):
|
||||
return pa.list_(field.type.value_type)
|
||||
|
||||
return field.type
|
||||
|
||||
|
||||
def _handle_bad_vector_column(
|
||||
@@ -3255,6 +3392,8 @@ def _handle_bad_vector_column(
|
||||
vector_column_name: str,
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
expected_dim: Optional[int] = None,
|
||||
expected_value_type: Optional[pa.DataType] = None,
|
||||
) -> pa.RecordBatch:
|
||||
"""
|
||||
Ensure that the vector column exists and has type fixed_size_list(float)
|
||||
@@ -3271,14 +3410,39 @@ def _handle_bad_vector_column(
|
||||
fill_value: float, default 0.0
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
"""
|
||||
position = data.column_names.index(vector_column_name)
|
||||
vec_arr = data[vector_column_name]
|
||||
if not _is_list_like(vec_arr.type):
|
||||
return data
|
||||
|
||||
has_nan = has_nan_values(vec_arr)
|
||||
if (
|
||||
expected_dim is not None
|
||||
and pa.types.is_fixed_size_list(vec_arr.type)
|
||||
and vec_arr.type.list_size != expected_dim
|
||||
):
|
||||
vec_arr = pa.array(vec_arr.to_pylist(), type=pa.list_(vec_arr.type.value_type))
|
||||
data = data.set_column(position, vector_column_name, vec_arr)
|
||||
|
||||
if pa.types.is_fixed_size_list(vec_arr.type):
|
||||
if expected_value_type is not None and (
|
||||
pa.types.is_integer(vec_arr.type.value_type)
|
||||
or pa.types.is_unsigned_integer(vec_arr.type.value_type)
|
||||
):
|
||||
vec_arr = pa.array(vec_arr.to_pylist(), type=pa.list_(expected_value_type))
|
||||
data = data.set_column(position, vector_column_name, vec_arr)
|
||||
|
||||
if pa.types.is_floating(vec_arr.type.value_type):
|
||||
has_nan = has_nan_values(vec_arr)
|
||||
else:
|
||||
has_nan = pa.array([False] * len(vec_arr))
|
||||
|
||||
if expected_dim is not None:
|
||||
dim = expected_dim
|
||||
elif pa.types.is_fixed_size_list(vec_arr.type):
|
||||
dim = vec_arr.type.list_size
|
||||
else:
|
||||
dim = _modal_list_size(vec_arr)
|
||||
dim = _infer_vector_dim(vec_arr)
|
||||
if dim is None:
|
||||
return data
|
||||
has_wrong_dim = pc.not_equal(pc.list_value_length(vec_arr), dim)
|
||||
|
||||
has_bad_vectors = pc.any(has_nan).as_py() or pc.any(has_wrong_dim).as_py()
|
||||
@@ -3316,13 +3480,12 @@ def _handle_bad_vector_column(
|
||||
)
|
||||
vec_arr = pc.if_else(
|
||||
is_bad,
|
||||
pa.scalar([fill_value] * dim),
|
||||
pa.scalar([fill_value] * dim, type=vec_arr.type),
|
||||
vec_arr,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Invalid value for on_bad_vectors: {on_bad_vectors}")
|
||||
|
||||
position = data.column_names.index(vector_column_name)
|
||||
return data.set_column(position, vector_column_name, vec_arr)
|
||||
|
||||
|
||||
@@ -3343,6 +3506,28 @@ def has_nan_values(arr: Union[pa.ListArray, pa.ChunkedArray]) -> pa.BooleanArray
|
||||
return pc.is_in(indices, has_nan_indices)
|
||||
|
||||
|
||||
def _is_list_like(data_type: pa.DataType) -> bool:
|
||||
return (
|
||||
pa.types.is_list(data_type)
|
||||
or pa.types.is_large_list(data_type)
|
||||
or pa.types.is_fixed_size_list(data_type)
|
||||
)
|
||||
|
||||
|
||||
def _merge_metadata(*metadata_dicts: Optional[dict]) -> dict:
|
||||
merged = {}
|
||||
for metadata in metadata_dicts:
|
||||
if metadata is None:
|
||||
continue
|
||||
for key, value in metadata.items():
|
||||
if isinstance(key, str):
|
||||
key = key.encode("utf-8")
|
||||
if isinstance(value, str):
|
||||
value = value.encode("utf-8")
|
||||
merged[key] = value
|
||||
return merged
|
||||
|
||||
|
||||
def _name_suggests_vector_column(field_name: str) -> bool:
|
||||
"""Check if a field name indicates a vector column."""
|
||||
name_lower = field_name.lower()
|
||||
@@ -3410,6 +3595,16 @@ def _modal_list_size(arr: Union[pa.ListArray, pa.ChunkedArray]) -> int:
|
||||
return pc.mode(pc.list_value_length(arr))[0].as_py()["mode"]
|
||||
|
||||
|
||||
def _infer_vector_dim(arr: Union[pa.Array, pa.ChunkedArray]) -> Optional[int]:
|
||||
if not _is_list_like(arr.type):
|
||||
return None
|
||||
lengths = pc.list_value_length(arr)
|
||||
lengths = pc.filter(lengths, pc.greater(lengths, 0))
|
||||
if len(lengths) == 0:
|
||||
return None
|
||||
return pc.mode(lengths)[0].as_py()["mode"]
|
||||
|
||||
|
||||
def _validate_schema(schema: pa.Schema):
|
||||
"""
|
||||
Make sure the metadata is valid utf8
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
|
||||
import re
|
||||
import sys
|
||||
from datetime import timedelta
|
||||
import os
|
||||
|
||||
@@ -1048,3 +1049,59 @@ def test_clone_table_deep_clone_fails(tmp_path):
|
||||
source_uri = os.path.join(tmp_path, "source.lance")
|
||||
with pytest.raises(Exception, match="Deep clone is not yet implemented"):
|
||||
db.clone_table("cloned", source_uri, is_shallow=False)
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
|
||||
def test_namespace_client_native_storage(tmp_path):
|
||||
"""Test namespace_client() returns DirectoryNamespace for native storage."""
|
||||
from lance.namespace import DirectoryNamespace
|
||||
|
||||
db = lancedb.connect(tmp_path)
|
||||
ns_client = db.namespace_client()
|
||||
|
||||
assert isinstance(ns_client, DirectoryNamespace)
|
||||
assert str(tmp_path) in ns_client.namespace_id()
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
|
||||
def test_namespace_client_with_storage_options(tmp_path):
|
||||
"""Test namespace_client() preserves storage options."""
|
||||
from lance.namespace import DirectoryNamespace
|
||||
|
||||
storage_options = {"timeout": "10s"}
|
||||
db = lancedb.connect(tmp_path, storage_options=storage_options)
|
||||
ns_client = db.namespace_client()
|
||||
|
||||
assert isinstance(ns_client, DirectoryNamespace)
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
|
||||
def test_namespace_client_operations(tmp_path):
|
||||
"""Test that namespace_client() returns a functional namespace client."""
|
||||
db = lancedb.connect(tmp_path)
|
||||
ns_client = db.namespace_client()
|
||||
|
||||
# Create a table through the main db connection
|
||||
data = [{"id": 1, "text": "hello", "vector": [1.0, 2.0]}]
|
||||
db.create_table("test_table", data=data)
|
||||
|
||||
# Verify the namespace client can see the table
|
||||
from lance_namespace import ListTablesRequest
|
||||
|
||||
# id=[] means root namespace
|
||||
response = ns_client.list_tables(ListTablesRequest(id=[]))
|
||||
# Tables can be strings or objects with name attribute
|
||||
table_names = [t.name if hasattr(t, "name") else t for t in response.tables]
|
||||
assert "test_table" in table_names
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
|
||||
def test_namespace_client_namespace_connection(tmp_path):
|
||||
"""Test namespace_client() returns the backing client for namespace connections."""
|
||||
from lance.namespace import DirectoryNamespace
|
||||
|
||||
db = lancedb.connect_namespace("dir", {"root": str(tmp_path)})
|
||||
ns_client = db.namespace_client()
|
||||
|
||||
assert isinstance(ns_client, DirectoryNamespace)
|
||||
assert str(tmp_path) in ns_client.namespace_id()
|
||||
|
||||
@@ -522,6 +522,50 @@ def test_no_split_names(some_table: Table):
|
||||
assert permutations[1].num_rows == 500
|
||||
|
||||
|
||||
def test_permutations_metadata_without_split_names_key(mem_db: DBConnection):
|
||||
"""Regression: schema metadata present but missing split_names key must not crash.
|
||||
|
||||
Previously, `.get(b"split_names", None).decode()` was called unconditionally,
|
||||
so any permutation table whose metadata dict had other keys but no split_names
|
||||
raised AttributeError: 'NoneType' has no attribute 'decode'.
|
||||
"""
|
||||
base = mem_db.create_table("base_nosplit", pa.table({"x": range(10)}))
|
||||
|
||||
# Build a permutation-like table that carries some metadata but NOT split_names.
|
||||
raw = pa.table(
|
||||
{
|
||||
"row_id": pa.array(range(10), type=pa.uint64()),
|
||||
"split_id": pa.array([0] * 10, type=pa.uint32()),
|
||||
}
|
||||
).replace_schema_metadata({b"other_key": b"other_value"})
|
||||
perm_tbl = mem_db.create_table("perm_nosplit", raw)
|
||||
|
||||
permutations = Permutations(base, perm_tbl)
|
||||
assert permutations.split_names == []
|
||||
assert permutations.split_dict == {}
|
||||
|
||||
|
||||
def test_from_tables_string_split_missing_names_key(mem_db: DBConnection):
|
||||
"""Regression: from_tables() with a string split must raise ValueError, not
|
||||
AttributeError.
|
||||
|
||||
Previously, `.get(b"split_names", None).decode()` crashed with AttributeError
|
||||
when the metadata dict existed but had no split_names key.
|
||||
"""
|
||||
base = mem_db.create_table("base_strsplit", pa.table({"x": range(10)}))
|
||||
|
||||
raw = pa.table(
|
||||
{
|
||||
"row_id": pa.array(range(10), type=pa.uint64()),
|
||||
"split_id": pa.array([0] * 10, type=pa.uint32()),
|
||||
}
|
||||
).replace_schema_metadata({b"other_key": b"other_value"})
|
||||
perm_tbl = mem_db.create_table("perm_strsplit", raw)
|
||||
|
||||
with pytest.raises(ValueError, match="no split names are defined"):
|
||||
Permutation.from_tables(base, perm_tbl, split="train")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def some_perm_table(some_table: Table) -> Table:
|
||||
return (
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
import json
|
||||
from datetime import date, datetime
|
||||
from enum import Enum
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import pyarrow as pa
|
||||
@@ -673,3 +674,29 @@ async def test_aliases_in_lance_model_async(mem_db_async):
|
||||
assert hasattr(model, "name")
|
||||
assert hasattr(model, "distance")
|
||||
assert model.distance < 0.01
|
||||
|
||||
|
||||
def test_enum_types():
|
||||
"""Enum fields should map to the Arrow type of their value (issue #1846)."""
|
||||
|
||||
class StrStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
DONE = "done"
|
||||
|
||||
class IntPriority(int, Enum):
|
||||
LOW = 1
|
||||
MEDIUM = 2
|
||||
HIGH = 3
|
||||
|
||||
class TestModel(pydantic.BaseModel):
|
||||
status: StrStatus
|
||||
priority: IntPriority
|
||||
opt_status: Optional[StrStatus] = None
|
||||
|
||||
schema = pydantic_to_schema(TestModel)
|
||||
|
||||
assert schema.field("status").type == pa.dictionary(pa.int32(), pa.utf8())
|
||||
assert schema.field("priority").type == pa.int64()
|
||||
assert schema.field("opt_status").type == pa.dictionary(pa.int32(), pa.utf8())
|
||||
assert schema.field("opt_status").nullable
|
||||
|
||||
@@ -1049,6 +1049,231 @@ def test_add_with_nans(mem_db: DBConnection):
|
||||
assert np.allclose(v, np.array([0.0, 0.0]))
|
||||
|
||||
|
||||
def test_add_with_empty_fixed_size_list_drops_bad_rows(mem_db: DBConnection):
|
||||
class Schema(LanceModel):
|
||||
text: str
|
||||
embedding: Vector(16)
|
||||
|
||||
table = mem_db.create_table("test_empty_embeddings", schema=Schema)
|
||||
table.add(
|
||||
[
|
||||
{"text": "hello", "embedding": []},
|
||||
{"text": "bar", "embedding": [0.1] * 16},
|
||||
],
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
data = table.to_arrow()
|
||||
assert data["text"].to_pylist() == ["bar"]
|
||||
assert np.allclose(data["embedding"].to_pylist()[0], np.array([0.1] * 16))
|
||||
|
||||
|
||||
def test_add_with_integer_embeddings_preserves_casting(mem_db: DBConnection):
|
||||
class Schema(LanceModel):
|
||||
text: str
|
||||
embedding: Vector(4)
|
||||
|
||||
table = mem_db.create_table("test_integer_embeddings", schema=Schema)
|
||||
table.add(
|
||||
[{"text": "foo", "embedding": [1, 2, 3, 4]}],
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
assert table.to_arrow()["embedding"].to_pylist() == [[1.0, 2.0, 3.0, 4.0]]
|
||||
|
||||
|
||||
def test_on_bad_vectors_does_not_handle_non_vector_fixed_size_lists(
|
||||
mem_db: DBConnection,
|
||||
):
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("vector", pa.list_(pa.float32(), 4)),
|
||||
pa.field("bbox", pa.list_(pa.float32(), 4)),
|
||||
]
|
||||
)
|
||||
table = mem_db.create_table("test_bbox_schema", schema=schema)
|
||||
|
||||
with pytest.raises(RuntimeError, match="FixedSizeListType"):
|
||||
table.add(
|
||||
[{"vector": [1.0, 2.0, 3.0, 4.0], "bbox": [0.0, 1.0]}],
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
|
||||
def test_on_bad_vectors_does_not_handle_custom_named_fixed_size_lists(
|
||||
mem_db: DBConnection,
|
||||
):
|
||||
schema = pa.schema([pa.field("features", pa.list_(pa.float32(), 16))])
|
||||
table = mem_db.create_table("test_custom_named_fixed_size_vector", schema=schema)
|
||||
|
||||
with pytest.raises(RuntimeError, match="FixedSizeListType"):
|
||||
table.add(
|
||||
[
|
||||
{"features": []},
|
||||
{"features": [0.1] * 16},
|
||||
],
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
|
||||
def test_on_bad_vectors_with_schema_list_vector_still_sanitizes(mem_db: DBConnection):
|
||||
schema = pa.schema([pa.field("vector", pa.list_(pa.float32()))])
|
||||
table = mem_db.create_table("test_schema_list_vector", schema=schema)
|
||||
table.add(
|
||||
[
|
||||
{"vector": [1.0, 2.0]},
|
||||
{"vector": [3.0]},
|
||||
{"vector": [4.0, 5.0]},
|
||||
],
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
assert table.to_arrow()["vector"].to_pylist() == [[1.0, 2.0], [4.0, 5.0]]
|
||||
|
||||
|
||||
def test_on_bad_vectors_handles_typed_custom_fixed_vectors_for_list_schema(
|
||||
mem_db: DBConnection,
|
||||
):
|
||||
schema = pa.schema([pa.field("vec", pa.list_(pa.float32()))])
|
||||
table = mem_db.create_table("test_typed_custom_fixed_vector", schema=schema)
|
||||
data = pa.table(
|
||||
{
|
||||
"vec": pa.array(
|
||||
[[float("nan")] * 16, [1.0] * 16],
|
||||
type=pa.list_(pa.float32(), 16),
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
table.add(data, on_bad_vectors="drop")
|
||||
|
||||
assert table.to_arrow()["vec"].to_pylist() == [[1.0] * 16]
|
||||
|
||||
|
||||
def test_on_bad_vectors_fill_preserves_arrow_nested_vector_type(mem_db: DBConnection):
|
||||
schema = pa.schema([pa.field("vector", pa.list_(pa.float32()))])
|
||||
table = mem_db.create_table("test_fill_arrow_nested_type", schema=schema)
|
||||
data = pa.table(
|
||||
{
|
||||
"vector": pa.array(
|
||||
[[1.0, 2.0], [float("nan"), 3.0]],
|
||||
type=pa.list_(pa.float32(), 2),
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
table.add(
|
||||
data,
|
||||
on_bad_vectors="fill",
|
||||
fill_value=0.0,
|
||||
)
|
||||
|
||||
assert table.to_arrow()["vector"].to_pylist() == [[1.0, 2.0], [0.0, 0.0]]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("table_name", "batch1", "expected"),
|
||||
[
|
||||
(
|
||||
"test_schema_list_vector_empty_prefix",
|
||||
pa.record_batch({"vector": [[], []]}),
|
||||
[[], [], [1.0, 2.0], [3.0, 4.0]],
|
||||
),
|
||||
(
|
||||
"test_schema_list_vector_all_bad_prefix",
|
||||
pa.record_batch({"vector": [[float("nan")] * 3, [float("nan")] * 3]}),
|
||||
[[1.0, 2.0], [3.0, 4.0]],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_on_bad_vectors_with_schema_list_vector_ignores_invalid_prefix_batches(
|
||||
mem_db: DBConnection,
|
||||
table_name: str,
|
||||
batch1: pa.RecordBatch,
|
||||
expected: list,
|
||||
):
|
||||
schema = pa.schema([pa.field("vector", pa.list_(pa.float32()))])
|
||||
table = mem_db.create_table(table_name, schema=schema)
|
||||
batch2 = pa.record_batch({"vector": [[1.0, 2.0], [3.0, 4.0]]})
|
||||
reader = pa.RecordBatchReader.from_batches(batch1.schema, [batch1, batch2])
|
||||
|
||||
table.add(reader, on_bad_vectors="drop")
|
||||
|
||||
assert table.to_arrow()["vector"].to_pylist() == expected
|
||||
|
||||
|
||||
def test_on_bad_vectors_with_multiple_vectors_locks_dim_after_final_drop(
|
||||
mem_db: DBConnection,
|
||||
):
|
||||
registry = EmbeddingFunctionRegistry.get_instance()
|
||||
func = MockTextEmbeddingFunction.create()
|
||||
metadata = registry.get_table_metadata(
|
||||
[
|
||||
EmbeddingFunctionConfig(
|
||||
source_column="text1", vector_column="vec1", function=func
|
||||
),
|
||||
EmbeddingFunctionConfig(
|
||||
source_column="text2", vector_column="vec2", function=func
|
||||
),
|
||||
]
|
||||
)
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("vec1", pa.list_(pa.float32())),
|
||||
pa.field("vec2", pa.list_(pa.float32())),
|
||||
],
|
||||
metadata=metadata,
|
||||
)
|
||||
table = mem_db.create_table("test_multi_vector_dim_lock", schema=schema)
|
||||
batch1 = pa.record_batch(
|
||||
{
|
||||
"vec1": [[1.0, 2.0, 3.0], [10.0, 11.0]],
|
||||
"vec2": [[float("nan"), 0.0], [5.0, 6.0]],
|
||||
}
|
||||
)
|
||||
batch2 = pa.record_batch(
|
||||
{
|
||||
"vec1": [[20.0, 21.0], [30.0, 31.0]],
|
||||
"vec2": [[7.0, 8.0], [9.0, 10.0]],
|
||||
}
|
||||
)
|
||||
reader = pa.RecordBatchReader.from_batches(batch1.schema, [batch1, batch2])
|
||||
|
||||
table.add(reader, on_bad_vectors="drop")
|
||||
|
||||
data = table.to_arrow()
|
||||
assert data["vec1"].to_pylist() == [[10.0, 11.0], [20.0, 21.0], [30.0, 31.0]]
|
||||
assert data["vec2"].to_pylist() == [[5.0, 6.0], [7.0, 8.0], [9.0, 10.0]]
|
||||
|
||||
|
||||
def test_on_bad_vectors_does_not_handle_non_vector_list_columns(mem_db: DBConnection):
|
||||
schema = pa.schema([pa.field("embedding_history", pa.list_(pa.float32()))])
|
||||
table = mem_db.create_table("test_non_vector_list_schema", schema=schema)
|
||||
table.add(
|
||||
[
|
||||
{"embedding_history": [1.0, 2.0]},
|
||||
{"embedding_history": [3.0]},
|
||||
],
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
assert table.to_arrow()["embedding_history"].to_pylist() == [
|
||||
[1.0, 2.0],
|
||||
[3.0],
|
||||
]
|
||||
|
||||
|
||||
def test_on_bad_vectors_all_null_schema_vector_batches_do_not_crash(
|
||||
mem_db: DBConnection,
|
||||
):
|
||||
schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2), nullable=True)])
|
||||
table = mem_db.create_table("test_all_null_vector_batch", schema=schema)
|
||||
|
||||
table.add([{"vector": None}], on_bad_vectors="drop")
|
||||
|
||||
assert table.to_arrow()["vector"].to_pylist() == [None]
|
||||
|
||||
|
||||
def test_restore(mem_db: DBConnection):
|
||||
table = mem_db.create_table(
|
||||
"my_table",
|
||||
|
||||
@@ -15,8 +15,10 @@ from lancedb.table import (
|
||||
_cast_to_target_schema,
|
||||
_handle_bad_vectors,
|
||||
_into_pyarrow_reader,
|
||||
_sanitize_data,
|
||||
_infer_target_schema,
|
||||
_merge_metadata,
|
||||
_sanitize_data,
|
||||
sanitize_create_table,
|
||||
)
|
||||
import pyarrow as pa
|
||||
import pandas as pd
|
||||
@@ -304,6 +306,117 @@ def test_handle_bad_vectors_noop():
|
||||
assert output["vector"] == vector
|
||||
|
||||
|
||||
def test_handle_bad_vectors_updates_reader_schema_for_target_schema():
|
||||
data = pa.table({"vector": [[1, 2, 3, 4]]})
|
||||
target_schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 4))])
|
||||
|
||||
output = _handle_bad_vectors(
|
||||
data.to_reader(),
|
||||
on_bad_vectors="drop",
|
||||
target_schema=target_schema,
|
||||
)
|
||||
|
||||
assert output.schema == pa.schema([pa.field("vector", pa.list_(pa.float32()))])
|
||||
assert output.read_all()["vector"].to_pylist() == [[1.0, 2.0, 3.0, 4.0]]
|
||||
|
||||
|
||||
def test_sanitize_data_keeps_target_field_metadata():
|
||||
source_field = pa.field(
|
||||
"vector",
|
||||
pa.list_(pa.float32(), 2),
|
||||
metadata={b"source": b"drop-me"},
|
||||
)
|
||||
target_field = pa.field(
|
||||
"vector",
|
||||
pa.list_(pa.float32(), 2),
|
||||
metadata={b"target": b"keep-me"},
|
||||
)
|
||||
data = pa.table(
|
||||
{"vector": pa.array([[1.0, 2.0]], type=pa.list_(pa.float32(), 2))},
|
||||
schema=pa.schema([source_field]),
|
||||
)
|
||||
|
||||
output = _sanitize_data(
|
||||
data,
|
||||
target_schema=pa.schema([target_field]),
|
||||
on_bad_vectors="drop",
|
||||
).read_all()
|
||||
|
||||
assert output.schema.field("vector").metadata == {b"target": b"keep-me"}
|
||||
|
||||
|
||||
def test_sanitize_data_uses_separate_embedding_metadata_for_bad_vectors():
|
||||
registry = EmbeddingFunctionRegistry.get_instance()
|
||||
conf = EmbeddingFunctionConfig(
|
||||
source_column="text",
|
||||
vector_column="custom_vector",
|
||||
function=MockTextEmbeddingFunction.create(),
|
||||
)
|
||||
metadata = registry.get_table_metadata([conf])
|
||||
schema = pa.schema(
|
||||
{
|
||||
"text": pa.string(),
|
||||
"custom_vector": pa.list_(pa.float32(), 10),
|
||||
},
|
||||
metadata={b"note": b"keep-me"},
|
||||
)
|
||||
data = pa.table(
|
||||
{
|
||||
"text": ["bad", "good"],
|
||||
"custom_vector": [[1.0] * 9, [2.0] * 10],
|
||||
}
|
||||
)
|
||||
|
||||
output = _sanitize_data(
|
||||
data,
|
||||
target_schema=schema,
|
||||
metadata=metadata,
|
||||
on_bad_vectors="drop",
|
||||
).read_all()
|
||||
|
||||
assert output["text"].to_pylist() == ["good"]
|
||||
assert output.schema.metadata[b"note"] == b"keep-me"
|
||||
assert b"embedding_functions" in output.schema.metadata
|
||||
|
||||
|
||||
def test_sanitize_create_table_merges_and_overrides_embedding_metadata():
|
||||
registry = EmbeddingFunctionRegistry.get_instance()
|
||||
old_conf = EmbeddingFunctionConfig(
|
||||
source_column="text",
|
||||
vector_column="old_vector",
|
||||
function=MockTextEmbeddingFunction.create(),
|
||||
)
|
||||
new_conf = EmbeddingFunctionConfig(
|
||||
source_column="text",
|
||||
vector_column="custom_vector",
|
||||
function=MockTextEmbeddingFunction.create(),
|
||||
)
|
||||
metadata = registry.get_table_metadata([new_conf])
|
||||
schema = pa.schema(
|
||||
{
|
||||
"text": pa.string(),
|
||||
"custom_vector": pa.list_(pa.float32(), 10),
|
||||
},
|
||||
metadata=_merge_metadata(
|
||||
{b"note": b"keep-me"},
|
||||
registry.get_table_metadata([old_conf]),
|
||||
),
|
||||
)
|
||||
|
||||
data, schema = sanitize_create_table(
|
||||
pa.table({"text": ["good"]}),
|
||||
schema,
|
||||
metadata=metadata,
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
|
||||
assert schema.metadata[b"note"] == b"keep-me"
|
||||
assert b"embedding_functions" in schema.metadata
|
||||
assert data.schema.metadata[b"note"] == b"keep-me"
|
||||
funcs = EmbeddingFunctionRegistry.get_instance().parse_functions(schema.metadata)
|
||||
assert set(funcs.keys()) == {"custom_vector"}
|
||||
|
||||
|
||||
class TestModel(lancedb.pydantic.LanceModel):
|
||||
a: Optional[int]
|
||||
b: Optional[int]
|
||||
|
||||
@@ -474,6 +474,25 @@ impl Connection {
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the configuration for constructing an equivalent namespace client.
|
||||
/// Returns a dict with:
|
||||
/// - "impl": "dir" for DirectoryNamespace, "rest" for RestNamespace
|
||||
/// - "properties": configuration properties for the namespace
|
||||
#[pyo3(signature = ())]
|
||||
pub fn namespace_client_config(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
let py = self_.py();
|
||||
future_into_py(py, async move {
|
||||
let (impl_type, properties) = inner.namespace_client_config().await.infer_error()?;
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("impl", impl_type)?;
|
||||
dict.set_item("properties", properties)?;
|
||||
Ok(dict.unbind())
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
@@ -528,6 +547,7 @@ pub struct PyClientConfig {
|
||||
id_delimiter: Option<String>,
|
||||
tls_config: Option<PyClientTlsConfig>,
|
||||
header_provider: Option<Py<PyAny>>,
|
||||
user_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(FromPyObject)]
|
||||
@@ -612,6 +632,7 @@ impl From<PyClientConfig> for lancedb::remote::ClientConfig {
|
||||
id_delimiter: value.id_delimiter,
|
||||
tls_config: value.tls_config.map(Into::into),
|
||||
header_provider,
|
||||
user_id: value.user_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
[toolchain]
|
||||
channel = "1.91.0"
|
||||
channel = "1.94.0"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.27.2"
|
||||
version = "0.28.0-beta.1"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
@@ -541,6 +541,16 @@ impl Connection {
|
||||
self.internal.namespace_client().await
|
||||
}
|
||||
|
||||
/// Get the configuration for constructing an equivalent namespace client.
|
||||
/// Returns (impl_type, properties) where:
|
||||
/// - impl_type: "dir" for DirectoryNamespace, "rest" for RestNamespace
|
||||
/// - properties: configuration properties for the namespace
|
||||
pub async fn namespace_client_config(
|
||||
&self,
|
||||
) -> Result<(String, std::collections::HashMap<String, String>)> {
|
||||
self.internal.namespace_client_config().await
|
||||
}
|
||||
|
||||
/// List tables with pagination support
|
||||
pub async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
|
||||
self.internal.list_tables(request).await
|
||||
|
||||
@@ -265,4 +265,13 @@ pub trait Database:
|
||||
/// For ListingDatabase, it is the equivalent DirectoryNamespace.
|
||||
/// For RemoteDatabase, it is the equivalent RestNamespace.
|
||||
async fn namespace_client(&self) -> Result<Arc<dyn LanceNamespace>>;
|
||||
|
||||
/// Get the configuration for constructing an equivalent namespace client.
|
||||
/// Returns (impl_type, properties) where:
|
||||
/// - impl_type: "dir" for DirectoryNamespace, "rest" for RestNamespace
|
||||
/// - properties: configuration properties for the namespace
|
||||
///
|
||||
/// This is useful for Python bindings where we want to return a Python
|
||||
/// namespace object rather than a Rust trait object.
|
||||
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)>;
|
||||
}
|
||||
|
||||
@@ -1099,6 +1099,15 @@ impl Database for ListingDatabase {
|
||||
})?;
|
||||
Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
|
||||
}
|
||||
|
||||
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)> {
|
||||
let mut properties = HashMap::new();
|
||||
properties.insert("root".to_string(), self.uri.clone());
|
||||
for (key, value) in &self.storage_options {
|
||||
properties.insert(format!("storage.{}", key), value.clone());
|
||||
}
|
||||
Ok(("dir".to_string(), properties))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -45,6 +45,10 @@ pub struct LanceNamespaceDatabase {
|
||||
uri: String,
|
||||
// Operations to push down to the namespace server
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
// Namespace implementation type (e.g., "dir", "rest")
|
||||
ns_impl: String,
|
||||
// Namespace properties used to construct the namespace client
|
||||
ns_properties: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl LanceNamespaceDatabase {
|
||||
@@ -74,6 +78,8 @@ impl LanceNamespaceDatabase {
|
||||
session,
|
||||
uri: format!("namespace://{}", ns_impl),
|
||||
pushdown_operations,
|
||||
ns_impl: ns_impl.to_string(),
|
||||
ns_properties,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -345,6 +351,10 @@ impl Database for LanceNamespaceDatabase {
|
||||
async fn namespace_client(&self) -> Result<Arc<dyn LanceNamespace>> {
|
||||
Ok(self.namespace.clone())
|
||||
}
|
||||
|
||||
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)> {
|
||||
Ok((self.ns_impl.clone(), self.ns_properties.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -177,6 +177,7 @@ impl BedrockEmbeddingFunction {
|
||||
))
|
||||
.send()
|
||||
.await
|
||||
.map_err(Box::new)
|
||||
})
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -52,6 +52,13 @@ pub struct ClientConfig {
|
||||
pub tls_config: Option<TlsConfig>,
|
||||
/// Provider for custom headers to be added to each request
|
||||
pub header_provider: Option<Arc<dyn HeaderProvider>>,
|
||||
/// User identifier for tracking purposes.
|
||||
///
|
||||
/// This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
|
||||
/// It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
|
||||
/// Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
|
||||
/// variable that contains the user ID value.
|
||||
pub user_id: Option<String>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ClientConfig {
|
||||
@@ -67,6 +74,7 @@ impl std::fmt::Debug for ClientConfig {
|
||||
"header_provider",
|
||||
&self.header_provider.as_ref().map(|_| "Some(...)"),
|
||||
)
|
||||
.field("user_id", &self.user_id)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
@@ -81,10 +89,41 @@ impl Default for ClientConfig {
|
||||
id_delimiter: None,
|
||||
tls_config: None,
|
||||
header_provider: None,
|
||||
user_id: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ClientConfig {
|
||||
/// Resolve the user ID from the config or environment variables.
|
||||
///
|
||||
/// Resolution order:
|
||||
/// 1. If `user_id` is set in the config, use that value
|
||||
/// 2. If `LANCEDB_USER_ID` environment variable is set, use that value
|
||||
/// 3. If `LANCEDB_USER_ID_ENV_KEY` is set, read the env var it points to
|
||||
/// 4. Otherwise, return None
|
||||
pub fn resolve_user_id(&self) -> Option<String> {
|
||||
if self.user_id.is_some() {
|
||||
return self.user_id.clone();
|
||||
}
|
||||
|
||||
if let Ok(user_id) = std::env::var("LANCEDB_USER_ID")
|
||||
&& !user_id.is_empty()
|
||||
{
|
||||
return Some(user_id);
|
||||
}
|
||||
|
||||
if let Ok(env_key) = std::env::var("LANCEDB_USER_ID_ENV_KEY")
|
||||
&& let Ok(user_id) = std::env::var(&env_key)
|
||||
&& !user_id.is_empty()
|
||||
{
|
||||
return Some(user_id);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// How to handle timeouts for HTTP requests.
|
||||
#[derive(Clone, Default, Debug)]
|
||||
pub struct TimeoutConfig {
|
||||
@@ -464,6 +503,15 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(user_id) = config.resolve_user_id() {
|
||||
headers.insert(
|
||||
HeaderName::from_static("x-lancedb-user-id"),
|
||||
HeaderValue::from_str(&user_id).map_err(|_| Error::InvalidInput {
|
||||
message: format!("non-ascii user_id '{}' provided", user_id),
|
||||
})?,
|
||||
);
|
||||
}
|
||||
|
||||
Ok(headers)
|
||||
}
|
||||
|
||||
@@ -1072,4 +1120,91 @@ mod tests {
|
||||
_ => panic!("Expected Runtime error"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_user_id_direct_value() {
|
||||
let config = ClientConfig {
|
||||
user_id: Some("direct-user-id".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(config.resolve_user_id(), Some("direct-user-id".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_user_id_none() {
|
||||
let config = ClientConfig::default();
|
||||
// Clear env vars that might be set from other tests
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::remove_var("LANCEDB_USER_ID");
|
||||
std::env::remove_var("LANCEDB_USER_ID_ENV_KEY");
|
||||
}
|
||||
assert_eq!(config.resolve_user_id(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_user_id_from_env() {
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::set_var("LANCEDB_USER_ID", "env-user-id");
|
||||
}
|
||||
let config = ClientConfig::default();
|
||||
assert_eq!(config.resolve_user_id(), Some("env-user-id".to_string()));
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::remove_var("LANCEDB_USER_ID");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_user_id_from_env_key() {
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::remove_var("LANCEDB_USER_ID");
|
||||
std::env::set_var("LANCEDB_USER_ID_ENV_KEY", "MY_CUSTOM_USER_ID");
|
||||
std::env::set_var("MY_CUSTOM_USER_ID", "custom-env-user-id");
|
||||
}
|
||||
let config = ClientConfig::default();
|
||||
assert_eq!(
|
||||
config.resolve_user_id(),
|
||||
Some("custom-env-user-id".to_string())
|
||||
);
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::remove_var("LANCEDB_USER_ID_ENV_KEY");
|
||||
std::env::remove_var("MY_CUSTOM_USER_ID");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_user_id_direct_takes_precedence() {
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::set_var("LANCEDB_USER_ID", "env-user-id");
|
||||
}
|
||||
let config = ClientConfig {
|
||||
user_id: Some("direct-user-id".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(config.resolve_user_id(), Some("direct-user-id".to_string()));
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::remove_var("LANCEDB_USER_ID");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_user_id_empty_env_ignored() {
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::set_var("LANCEDB_USER_ID", "");
|
||||
std::env::remove_var("LANCEDB_USER_ID_ENV_KEY");
|
||||
}
|
||||
let config = ClientConfig::default();
|
||||
assert_eq!(config.resolve_user_id(), None);
|
||||
// SAFETY: This is only called in tests
|
||||
unsafe {
|
||||
std::env::remove_var("LANCEDB_USER_ID");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -777,6 +777,32 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
||||
let namespace = builder.build();
|
||||
Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
|
||||
}
|
||||
|
||||
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)> {
|
||||
let mut properties = HashMap::new();
|
||||
properties.insert("uri".to_string(), self.client.host().to_string());
|
||||
properties.insert("delimiter".to_string(), self.client.id_delimiter.clone());
|
||||
for (key, value) in &self.namespace_headers {
|
||||
properties.insert(format!("header.{}", key), value.clone());
|
||||
}
|
||||
// Add TLS configuration if present
|
||||
if let Some(tls_config) = &self.tls_config {
|
||||
if let Some(cert_file) = &tls_config.cert_file {
|
||||
properties.insert("tls.cert_file".to_string(), cert_file.clone());
|
||||
}
|
||||
if let Some(key_file) = &tls_config.key_file {
|
||||
properties.insert("tls.key_file".to_string(), key_file.clone());
|
||||
}
|
||||
if let Some(ssl_ca_cert) = &tls_config.ssl_ca_cert {
|
||||
properties.insert("tls.ssl_ca_cert".to_string(), ssl_ca_cert.clone());
|
||||
}
|
||||
properties.insert(
|
||||
"tls.assert_hostname".to_string(),
|
||||
tls_config.assert_hostname.to_string(),
|
||||
);
|
||||
}
|
||||
Ok(("rest".to_string(), properties))
|
||||
}
|
||||
}
|
||||
|
||||
/// RemoteOptions contains a subset of StorageOptions that are compatible with Remote LanceDB connections
|
||||
|
||||
Reference in New Issue
Block a user