mirror of
https://github.com/lancedb/lancedb.git
synced 2026-04-16 21:00:40 +00:00
Compare commits
19 Commits
feature/wa
...
rust-neste
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a3e3c1ad5c | ||
|
|
c6e43b545d | ||
|
|
5807ad464b | ||
|
|
513c2215c5 | ||
|
|
10879d99b8 | ||
|
|
4e6a1d5dce | ||
|
|
13d2759356 | ||
|
|
7f52ec8c36 | ||
|
|
c6ae0de3ee | ||
|
|
231f0655ce | ||
|
|
8c52977c59 | ||
|
|
359710a0bf | ||
|
|
1f1726369d | ||
|
|
df354abae4 | ||
|
|
11bc674548 | ||
|
|
5593460823 | ||
|
|
2807ad6854 | ||
|
|
4761fa9bcb | ||
|
|
4c2939d66e |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.28.0-beta.1"
|
||||
current_version = "0.28.0-beta.5"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/documentation.yml
vendored
2
.github/ISSUE_TEMPLATE/documentation.yml
vendored
@@ -18,6 +18,6 @@ body:
|
||||
label: Link
|
||||
description: >
|
||||
Provide a link to the existing documentation, if applicable.
|
||||
placeholder: ex. https://lancedb.com/docs/tables/...
|
||||
placeholder: ex. https://docs.lancedb.com/tables/...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
1
.github/workflows/nodejs.yml
vendored
1
.github/workflows/nodejs.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- nodejs/**
|
||||
- rust/**
|
||||
- docs/src/js/**
|
||||
|
||||
1
.github/workflows/python.yml
vendored
1
.github/workflows/python.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- python/**
|
||||
- rust/**
|
||||
- .github/workflows/python.yml
|
||||
|
||||
1
.github/workflows/rust.yml
vendored
1
.github/workflows/rust.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- rust/**
|
||||
- .github/workflows/rust.yml
|
||||
|
||||
|
||||
73
Cargo.lock
generated
73
Cargo.lock
generated
@@ -3072,8 +3072,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "fsst"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"rand 0.9.2",
|
||||
@@ -4134,13 +4134,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-ipc",
|
||||
"arrow-ord",
|
||||
"arrow-row",
|
||||
@@ -4201,13 +4202,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-arrow"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-data",
|
||||
"arrow-ipc",
|
||||
"arrow-ord",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
@@ -4222,8 +4224,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-bitpacking"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"paste",
|
||||
@@ -4232,8 +4234,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-core"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4270,12 +4272,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datafusion"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-ord",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
@@ -4301,8 +4304,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datagen"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4320,8 +4323,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-encoding"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4358,8 +4361,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-file"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4391,8 +4394,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-index"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4456,8 +4459,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-io"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4501,8 +4504,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-linalg"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4518,8 +4521,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -4532,8 +4535,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace-impls"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-ipc",
|
||||
@@ -4578,8 +4581,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-table"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4618,8 +4621,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-testing"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.3"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.3#0a0d53b5ef4b0f65bb775ce63bdcabc79d1d14b8"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-schema",
|
||||
@@ -4630,7 +4633,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb"
|
||||
version = "0.28.0-beta.1"
|
||||
version = "0.28.0-beta.5"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
@@ -4712,7 +4715,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-nodejs"
|
||||
version = "0.28.0-beta.1"
|
||||
version = "0.28.0-beta.5"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4734,7 +4737,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-python"
|
||||
version = "0.31.0-beta.1"
|
||||
version = "0.31.0-beta.5"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
|
||||
28
Cargo.toml
28
Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=5.1.0-beta.3", default-features = false, "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=5.1.0-beta.3", default-features = false, "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=5.1.0-beta.3", default-features = false, "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "57.2", optional = false }
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# **The Multimodal AI Lakehouse**
|
||||
|
||||
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://lancedb.com/docs) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
|
||||
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://docs.lancedb.com) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
|
||||
|
||||
**The ultimate multimodal data platform for AI/ML applications.**
|
||||
|
||||
@@ -57,7 +57,7 @@ LanceDB is a central location where developers can build, train and analyze thei
|
||||
|
||||
## **How to Install**:
|
||||
|
||||
Follow the [Quickstart](https://lancedb.com/docs/quickstart/) doc to set up LanceDB locally.
|
||||
Follow the [Quickstart](https://docs.lancedb.com/quickstart) doc to set up LanceDB locally.
|
||||
|
||||
**API & SDK:** We also support Python, Typescript and Rust SDKs
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# LanceDB Documentation
|
||||
|
||||
LanceDB docs are available at [lancedb.com/docs](https://lancedb.com/docs).
|
||||
LanceDB docs are available at [docs.lancedb.com](https://docs.lancedb.com).
|
||||
|
||||
The SDK docs are built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
|
||||
whenever a commit is pushed to the `main` branch. So it is possible for the docs to show
|
||||
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.28.0-beta.1</version>
|
||||
<version>0.28.0-beta.5</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
|
||||
The [quickstart](https://docs.lancedb.com/quickstart/) contains more complete examples.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -89,4 +89,4 @@ optional storageOptions: Record<string, string>;
|
||||
|
||||
(For LanceDB OSS only): configuration for object storage.
|
||||
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
|
||||
@@ -97,4 +97,4 @@ Configuration for object storage.
|
||||
Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
|
||||
@@ -42,4 +42,4 @@ Configuration for object storage.
|
||||
Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.28.0-beta.1</version>
|
||||
<version>0.28.0-beta.5</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.28.0-beta.1</version>
|
||||
<version>0.28.0-beta.5</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>5.0.0-beta.5</lance-core.version>
|
||||
<lance-core.version>5.1.0-beta.3</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.28.0-beta.1"
|
||||
version = "0.28.0-beta.5"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -30,7 +30,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
|
||||
The [quickstart](https://docs.lancedb.com/quickstart/) contains more complete examples.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ export interface CreateTableOptions {
|
||||
* Options already set on the connection will be inherited by the table,
|
||||
* but can be overridden here.
|
||||
*
|
||||
* The available options are described at https://lancedb.com/docs/storage/
|
||||
* The available options are described at https://docs.lancedb.com/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>;
|
||||
|
||||
@@ -78,7 +78,7 @@ export interface OpenTableOptions {
|
||||
* Options already set on the connection will be inherited by the table,
|
||||
* but can be overridden here.
|
||||
*
|
||||
* The available options are described at https://lancedb.com/docs/storage/
|
||||
* The available options are described at https://docs.lancedb.com/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>;
|
||||
/**
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.28.0-beta.1",
|
||||
"version": "0.28.0-beta.5",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.28.0-beta.1",
|
||||
"version": "0.28.0-beta.5",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.28.0-beta.1",
|
||||
"version": "0.28.0-beta.5",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.28.0-beta.1",
|
||||
"version": "0.28.0-beta.5",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.28.0-beta.1",
|
||||
"version": "0.28.0-beta.5",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.28.0-beta.1",
|
||||
"version": "0.28.0-beta.5",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.28.0-beta.1",
|
||||
"version": "0.28.0-beta.5",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.28.0-beta.1",
|
||||
"version": "0.28.0-beta.5",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.28.0-beta.1",
|
||||
"version": "0.28.0-beta.5",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.28.0-beta.1",
|
||||
"version": "0.28.0-beta.5",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -35,7 +35,7 @@ pub struct ConnectionOptions {
|
||||
pub read_consistency_interval: Option<f64>,
|
||||
/// (For LanceDB OSS only): configuration for object storage.
|
||||
///
|
||||
/// The available options are described at https://lancedb.com/docs/storage/
|
||||
/// The available options are described at https://docs.lancedb.com/storage/
|
||||
pub storage_options: Option<HashMap<String, String>>,
|
||||
/// (For LanceDB OSS only): the session to use for this connection. Holds
|
||||
/// shared caches and other session-specific state.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.31.0-beta.1"
|
||||
current_version = "0.31.0-beta.5"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.31.0-beta.1"
|
||||
version = "0.31.0-beta.5"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -83,7 +83,7 @@ embeddings = [
|
||||
"colpali-engine>=0.3.10",
|
||||
"huggingface_hub>=0.19.0",
|
||||
"InstructorEmbedding>=1.0.1",
|
||||
"google.generativeai>=0.3.0",
|
||||
"google-genai>=1.0.0",
|
||||
"boto3>=1.28.57",
|
||||
"awscli>=1.44.38",
|
||||
"botocore>=1.31.57",
|
||||
|
||||
@@ -110,7 +110,7 @@ def connect(
|
||||
default configuration is used.
|
||||
storage_options: dict, optional
|
||||
Additional options for the storage backend. See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
<https://docs.lancedb.com/storage/>
|
||||
session: Session, optional
|
||||
(For LanceDB OSS only)
|
||||
A session to use for this connection. Sessions allow you to configure
|
||||
@@ -215,6 +215,85 @@ def connect(
|
||||
)
|
||||
|
||||
|
||||
WORKER_PROPERTY_PREFIX = "_lancedb_worker_"
|
||||
|
||||
|
||||
def _apply_worker_overrides(props: dict[str, str]) -> dict[str, str]:
|
||||
"""Apply worker property overrides.
|
||||
|
||||
Any key starting with ``_lancedb_worker_`` is extracted, the prefix
|
||||
is stripped, and the resulting key-value pair is put back into the
|
||||
map (overriding the existing value if present). The original
|
||||
prefixed key is removed.
|
||||
"""
|
||||
worker_keys = [k for k in props if k.startswith(WORKER_PROPERTY_PREFIX)]
|
||||
if not worker_keys:
|
||||
return props
|
||||
result = dict(props)
|
||||
for key in worker_keys:
|
||||
value = result.pop(key)
|
||||
real_key = key[len(WORKER_PROPERTY_PREFIX) :]
|
||||
result[real_key] = value
|
||||
return result
|
||||
|
||||
|
||||
def deserialize_conn(
|
||||
data: str,
|
||||
*,
|
||||
for_worker: bool = False,
|
||||
) -> DBConnection:
|
||||
"""Reconstruct a DBConnection from a serialized string.
|
||||
|
||||
The string must have been produced by
|
||||
:meth:`DBConnection.serialize`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : str
|
||||
String produced by ``serialize()``.
|
||||
for_worker : bool, default False
|
||||
When ``True``, any namespace client property whose key starts
|
||||
with ``_lancedb_worker_`` has that prefix stripped and the
|
||||
value overrides the corresponding property. For example,
|
||||
``_lancedb_worker_uri`` replaces ``uri``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DBConnection
|
||||
A new connection matching the serialized state.
|
||||
"""
|
||||
import json
|
||||
|
||||
parsed = json.loads(data)
|
||||
connection_type = parsed.get("connection_type")
|
||||
|
||||
rci_secs = parsed.get("read_consistency_interval_seconds")
|
||||
rci = timedelta(seconds=rci_secs) if rci_secs is not None else None
|
||||
storage_options = parsed.get("storage_options")
|
||||
|
||||
if connection_type == "namespace":
|
||||
props = dict(parsed.get("namespace_client_properties") or {})
|
||||
if for_worker:
|
||||
props = _apply_worker_overrides(props)
|
||||
return connect_namespace(
|
||||
namespace_client_impl=parsed["namespace_client_impl"],
|
||||
namespace_client_properties=props,
|
||||
read_consistency_interval=rci,
|
||||
storage_options=storage_options,
|
||||
namespace_client_pushdown_operations=parsed.get(
|
||||
"namespace_client_pushdown_operations"
|
||||
),
|
||||
)
|
||||
elif connection_type == "local":
|
||||
return LanceDBConnection(
|
||||
parsed["uri"],
|
||||
read_consistency_interval=rci,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown connection_type: {connection_type}")
|
||||
|
||||
|
||||
async def connect_async(
|
||||
uri: URI,
|
||||
*,
|
||||
@@ -257,7 +336,7 @@ async def connect_async(
|
||||
default configuration is used.
|
||||
storage_options: dict, optional
|
||||
Additional options for the storage backend. See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
<https://docs.lancedb.com/storage/>
|
||||
session: Session, optional
|
||||
(For LanceDB OSS only)
|
||||
A session to use for this connection. Sessions allow you to configure
|
||||
|
||||
@@ -96,7 +96,7 @@ def data_to_reader(
|
||||
f"Unknown data type {type(data)}. "
|
||||
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
|
||||
"pyarrow Table/RecordBatch, or Pydantic models. "
|
||||
"See https://lancedb.com/docs/tables/ for examples."
|
||||
"See https://docs.lancedb.com/tables/ for examples."
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -282,7 +282,7 @@ class DBConnection(EnforceOverrides):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
<https://docs.lancedb.com/storage/>
|
||||
|
||||
To enable stable row IDs (row IDs remain stable after compaction,
|
||||
update, delete, and merges), set `new_table_enable_stable_row_ids`
|
||||
@@ -433,7 +433,7 @@ class DBConnection(EnforceOverrides):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
<https://docs.lancedb.com/storage/>
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -529,6 +529,19 @@ class DBConnection(EnforceOverrides):
|
||||
"namespace_client is not supported for this connection type"
|
||||
)
|
||||
|
||||
def serialize(self) -> str:
|
||||
"""Serialize this connection for reconstruction.
|
||||
|
||||
The returned string can be passed to :func:`lancedb.deserialize_conn`
|
||||
to recreate an equivalent connection, e.g. in a remote worker.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Serialized representation of this connection.
|
||||
"""
|
||||
raise NotImplementedError("serialize is not supported for this connection type")
|
||||
|
||||
|
||||
class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
@@ -581,6 +594,7 @@ class LanceDBConnection(DBConnection):
|
||||
):
|
||||
if _inner is not None:
|
||||
self._conn = _inner
|
||||
self._cached_namespace_client = None
|
||||
return
|
||||
|
||||
if not isinstance(uri, Path):
|
||||
@@ -628,6 +642,7 @@ class LanceDBConnection(DBConnection):
|
||||
# beyond _conn.
|
||||
self.storage_options = storage_options
|
||||
self._conn = AsyncConnection(LOOP.run(do_connect()))
|
||||
self._cached_namespace_client: Optional[LanceNamespace] = None
|
||||
|
||||
@property
|
||||
def read_consistency_interval(self) -> Optional[timedelta]:
|
||||
@@ -652,6 +667,22 @@ class LanceDBConnection(DBConnection):
|
||||
val += ")"
|
||||
return val
|
||||
|
||||
@override
|
||||
def serialize(self) -> str:
|
||||
import json
|
||||
|
||||
rci = self.read_consistency_interval
|
||||
return json.dumps(
|
||||
{
|
||||
"connection_type": "local",
|
||||
"uri": self.uri,
|
||||
"storage_options": self.storage_options,
|
||||
"read_consistency_interval_seconds": (
|
||||
rci.total_seconds() if rci else None
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
async def _async_get_table_names(self, start_after: Optional[str], limit: int):
|
||||
conn = AsyncConnection(await lancedb_connect(self.uri))
|
||||
return await conn.table_names(start_after=start_after, limit=limit)
|
||||
@@ -687,10 +718,10 @@ class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
return LOOP.run(
|
||||
self._conn.list_namespaces(
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
)
|
||||
return self._namespace_conn().list_namespaces(
|
||||
namespace_path=namespace_path,
|
||||
page_token=page_token,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
@override
|
||||
@@ -700,27 +731,10 @@ class LanceDBConnection(DBConnection):
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
"""Create a new namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
or "overwrite" (replace if exists). Case insensitive.
|
||||
properties: Dict[str, str], optional
|
||||
Properties to set on the namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing the properties of the created namespace.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.create_namespace(
|
||||
namespace_path=namespace_path, mode=mode, properties=properties
|
||||
)
|
||||
return self._namespace_conn().create_namespace(
|
||||
namespace_path=namespace_path,
|
||||
mode=mode,
|
||||
properties=properties,
|
||||
)
|
||||
|
||||
@override
|
||||
@@ -730,46 +744,19 @@ class LanceDBConnection(DBConnection):
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
"""Drop a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior: str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.drop_namespace(
|
||||
namespace_path=namespace_path, mode=mode, behavior=behavior
|
||||
)
|
||||
return self._namespace_conn().drop_namespace(
|
||||
namespace_path=namespace_path,
|
||||
mode=mode,
|
||||
behavior=behavior,
|
||||
)
|
||||
|
||||
@override
|
||||
def describe_namespace(
|
||||
self, namespace_path: List[str]
|
||||
) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
return LOOP.run(self._conn.describe_namespace(namespace_path=namespace_path))
|
||||
return self._namespace_conn().describe_namespace(
|
||||
namespace_path=namespace_path,
|
||||
)
|
||||
|
||||
@override
|
||||
def list_tables(
|
||||
@@ -798,6 +785,12 @@ class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if namespace_path:
|
||||
return self._namespace_conn().list_tables(
|
||||
namespace_path=namespace_path,
|
||||
page_token=page_token,
|
||||
limit=limit,
|
||||
)
|
||||
return LOOP.run(
|
||||
self._conn.list_tables(
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
@@ -886,6 +879,22 @@ class LanceDBConnection(DBConnection):
|
||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||
validate_table_name(name)
|
||||
|
||||
if namespace_path:
|
||||
return self._namespace_conn().create_table(
|
||||
name,
|
||||
data=data,
|
||||
schema=schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
embedding_functions=embedding_functions,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
data_storage_version=data_storage_version,
|
||||
enable_v2_manifest_paths=enable_v2_manifest_paths,
|
||||
)
|
||||
|
||||
tbl = LanceTable.create(
|
||||
self,
|
||||
name,
|
||||
@@ -901,6 +910,19 @@ class LanceDBConnection(DBConnection):
|
||||
)
|
||||
return tbl
|
||||
|
||||
def _namespace_conn(self) -> DBConnection:
|
||||
"""Return a LanceNamespaceDBConnection backed by this connection's
|
||||
directory namespace. Used to delegate child-namespace operations."""
|
||||
from lancedb.namespace import LanceNamespaceDBConnection
|
||||
|
||||
return LanceNamespaceDBConnection(
|
||||
self.namespace_client(),
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
storage_options=self.storage_options,
|
||||
namespace_client_impl=None,
|
||||
namespace_client_properties=None,
|
||||
)
|
||||
|
||||
@override
|
||||
def open_table(
|
||||
self,
|
||||
@@ -917,7 +939,8 @@ class LanceDBConnection(DBConnection):
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace_path: List[str], optional
|
||||
The namespace to open the table from.
|
||||
The namespace to open the table from. When non-empty, the
|
||||
table is resolved through the directory namespace client.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -936,6 +959,14 @@ class LanceDBConnection(DBConnection):
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
if namespace_path:
|
||||
return self._namespace_conn().open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
)
|
||||
|
||||
return LanceTable.open(
|
||||
self,
|
||||
name,
|
||||
@@ -1020,6 +1051,9 @@ class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if namespace_path:
|
||||
self._namespace_conn().drop_table(name, namespace_path=namespace_path)
|
||||
return
|
||||
LOOP.run(
|
||||
self._conn.drop_table(
|
||||
name, namespace_path=namespace_path, ignore_missing=ignore_missing
|
||||
@@ -1071,14 +1105,17 @@ class LanceDBConnection(DBConnection):
|
||||
"""Get the equivalent namespace client for this connection.
|
||||
|
||||
Returns a DirectoryNamespace pointing to the same root with the
|
||||
same storage options.
|
||||
same storage options. The result is cached for the lifetime of
|
||||
this connection.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
return LOOP.run(self._conn.namespace_client())
|
||||
if self._cached_namespace_client is None:
|
||||
self._cached_namespace_client = LOOP.run(self._conn.namespace_client())
|
||||
return self._cached_namespace_client
|
||||
|
||||
@deprecation.deprecated(
|
||||
deprecated_in="0.15.1",
|
||||
@@ -1397,7 +1434,7 @@ class AsyncConnection(object):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
<https://docs.lancedb.com/storage/>
|
||||
|
||||
To enable stable row IDs (row IDs remain stable after compaction,
|
||||
update, delete, and merges), set `new_table_enable_stable_row_ids`
|
||||
@@ -1588,7 +1625,7 @@ class AsyncConnection(object):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://lancedb.com/docs/storage/>
|
||||
<https://docs.lancedb.com/storage/>
|
||||
index_cache_size: int, default 256
|
||||
**Deprecated**: Use session-level cache configuration instead.
|
||||
Create a Session with custom cache sizes and pass it to lancedb.connect().
|
||||
|
||||
@@ -19,10 +19,10 @@ from .utils import TEXT, api_key_not_found_help
|
||||
@register("gemini-text")
|
||||
class GeminiText(TextEmbeddingFunction):
|
||||
"""
|
||||
An embedding function that uses the Google's Gemini API. Requires GOOGLE_API_KEY to
|
||||
An embedding function that uses Google's Gemini API. Requires GOOGLE_API_KEY to
|
||||
be set.
|
||||
|
||||
https://ai.google.dev/docs/embeddings_guide
|
||||
https://ai.google.dev/gemini-api/docs/embeddings
|
||||
|
||||
Supports various tasks types:
|
||||
| Task Type | Description |
|
||||
@@ -46,9 +46,12 @@ class GeminiText(TextEmbeddingFunction):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str, default "models/embedding-001"
|
||||
The name of the model to use. See the Gemini documentation for a list of
|
||||
available models.
|
||||
name: str, default "gemini-embedding-001"
|
||||
The name of the model to use. Supported models include:
|
||||
- "gemini-embedding-001" (768 dimensions)
|
||||
|
||||
Note: The legacy "models/embedding-001" format is also supported but
|
||||
"gemini-embedding-001" is recommended.
|
||||
|
||||
query_task_type: str, default "retrieval_query"
|
||||
Sets the task type for the queries.
|
||||
@@ -77,7 +80,7 @@ class GeminiText(TextEmbeddingFunction):
|
||||
|
||||
"""
|
||||
|
||||
name: str = "models/embedding-001"
|
||||
name: str = "gemini-embedding-001"
|
||||
query_task_type: str = "retrieval_query"
|
||||
source_task_type: str = "retrieval_document"
|
||||
|
||||
@@ -114,23 +117,48 @@ class GeminiText(TextEmbeddingFunction):
|
||||
texts: list[str] or np.ndarray (of str)
|
||||
The texts to embed
|
||||
"""
|
||||
if (
|
||||
kwargs.get("task_type") == "retrieval_document"
|
||||
): # Provide a title to use existing API design
|
||||
title = "Embedding of a document"
|
||||
kwargs["title"] = title
|
||||
from google.genai import types
|
||||
|
||||
return [
|
||||
self.client.embed_content(model=self.name, content=text, **kwargs)[
|
||||
"embedding"
|
||||
]
|
||||
for text in texts
|
||||
]
|
||||
task_type = kwargs.get("task_type")
|
||||
|
||||
# Build content objects for embed_content
|
||||
contents = []
|
||||
for text in texts:
|
||||
if task_type == "retrieval_document":
|
||||
# Provide a title for retrieval_document task
|
||||
contents.append(
|
||||
{"parts": [{"text": "Embedding of a document"}, {"text": text}]}
|
||||
)
|
||||
else:
|
||||
contents.append({"parts": [{"text": text}]})
|
||||
|
||||
# Build config
|
||||
config_kwargs = {}
|
||||
if task_type:
|
||||
config_kwargs["task_type"] = task_type.upper() # API expects uppercase
|
||||
|
||||
# Call embed_content for each content
|
||||
embeddings = []
|
||||
for content in contents:
|
||||
config = (
|
||||
types.EmbedContentConfig(**config_kwargs) if config_kwargs else None
|
||||
)
|
||||
response = self.client.models.embed_content(
|
||||
model=self.name,
|
||||
contents=content,
|
||||
config=config,
|
||||
)
|
||||
embeddings.append(response.embeddings[0].values)
|
||||
|
||||
return embeddings
|
||||
|
||||
@cached_property
|
||||
def client(self):
|
||||
genai = attempt_import_or_raise("google.generativeai", "google.generativeai")
|
||||
attempt_import_or_raise("google.genai", "google-genai")
|
||||
|
||||
if not os.environ.get("GOOGLE_API_KEY"):
|
||||
api_key_not_found_help("google")
|
||||
return genai
|
||||
|
||||
from google import genai as genai_module
|
||||
|
||||
return genai_module.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
|
||||
|
||||
@@ -381,6 +381,8 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
namespace_client_pushdown_operations: Optional[List[str]] = None,
|
||||
namespace_client_impl: Optional[str] = None,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
"""
|
||||
Initialize a namespace-based LanceDB connection.
|
||||
@@ -406,12 +408,43 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
namespace.create_table() instead of using declare_table + local write.
|
||||
|
||||
Default is None (no pushdown, all operations run locally).
|
||||
namespace_client_impl : Optional[str]
|
||||
The namespace implementation name used to create this connection.
|
||||
Stored for serialization purposes.
|
||||
namespace_client_properties : Optional[Dict[str, str]]
|
||||
The namespace properties used to create this connection.
|
||||
Stored for serialization purposes.
|
||||
"""
|
||||
self._namespace_client = namespace_client
|
||||
self.read_consistency_interval = read_consistency_interval
|
||||
self.storage_options = storage_options or {}
|
||||
self.session = session
|
||||
self._pushdown_operations = set(namespace_client_pushdown_operations or [])
|
||||
self._namespace_client_pushdown_operations = set(
|
||||
namespace_client_pushdown_operations or []
|
||||
)
|
||||
self._namespace_client_impl = namespace_client_impl
|
||||
self._namespace_client_properties = namespace_client_properties
|
||||
|
||||
@override
|
||||
def serialize(self) -> str:
|
||||
import json
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"connection_type": "namespace",
|
||||
"namespace_client_impl": self._namespace_client_impl,
|
||||
"namespace_client_properties": self._namespace_client_properties,
|
||||
"namespace_client_pushdown_operations": sorted(
|
||||
self._namespace_client_pushdown_operations
|
||||
),
|
||||
"storage_options": self.storage_options or None,
|
||||
"read_consistency_interval_seconds": (
|
||||
self.read_consistency_interval.total_seconds()
|
||||
if self.read_consistency_interval
|
||||
else None
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
@override
|
||||
def table_names(
|
||||
@@ -467,7 +500,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
|
||||
table_id = namespace_path + [name]
|
||||
|
||||
if "CreateTable" in self._pushdown_operations:
|
||||
if "CreateTable" in self._namespace_client_pushdown_operations:
|
||||
return self._create_table_server_side(
|
||||
name=name,
|
||||
data=data,
|
||||
@@ -549,7 +582,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
storage_options=merged_storage_options,
|
||||
location=location,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
)
|
||||
|
||||
return tbl
|
||||
@@ -580,10 +613,13 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
fill_value=fill_value,
|
||||
)
|
||||
|
||||
merged = dict(self.storage_options or {})
|
||||
if storage_options:
|
||||
merged.update(storage_options)
|
||||
request = CreateTableRequest(
|
||||
id=table_id,
|
||||
mode=_normalize_create_table_mode(mode),
|
||||
properties=self.storage_options if self.storage_options else None,
|
||||
properties=merged or None,
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -887,7 +923,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
location=table_uri,
|
||||
namespace_client=namespace_client,
|
||||
managed_versioning=managed_versioning,
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
)
|
||||
|
||||
@override
|
||||
@@ -951,7 +987,9 @@ class AsyncLanceNamespaceDBConnection:
|
||||
self.read_consistency_interval = read_consistency_interval
|
||||
self.storage_options = storage_options or {}
|
||||
self.session = session
|
||||
self._pushdown_operations = set(namespace_client_pushdown_operations or [])
|
||||
self._namespace_client_pushdown_operations = set(
|
||||
namespace_client_pushdown_operations or []
|
||||
)
|
||||
|
||||
async def table_names(
|
||||
self,
|
||||
@@ -1006,7 +1044,7 @@ class AsyncLanceNamespaceDBConnection:
|
||||
|
||||
table_id = namespace_path + [name]
|
||||
|
||||
if "CreateTable" in self._pushdown_operations:
|
||||
if "CreateTable" in self._namespace_client_pushdown_operations:
|
||||
return await self._create_table_server_side(
|
||||
name=name,
|
||||
data=data,
|
||||
@@ -1086,7 +1124,7 @@ class AsyncLanceNamespaceDBConnection:
|
||||
storage_options=merged_storage_options,
|
||||
location=location,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
)
|
||||
|
||||
lance_table = await asyncio.to_thread(_create_table)
|
||||
@@ -1120,10 +1158,13 @@ class AsyncLanceNamespaceDBConnection:
|
||||
fill_value=fill_value,
|
||||
)
|
||||
|
||||
merged = dict(self.storage_options or {})
|
||||
if storage_options:
|
||||
merged.update(storage_options)
|
||||
request = CreateTableRequest(
|
||||
id=table_id,
|
||||
mode=_normalize_create_table_mode(mode),
|
||||
properties=self.storage_options if self.storage_options else None,
|
||||
properties=merged or None,
|
||||
)
|
||||
|
||||
self._namespace_client.create_table(request, arrow_ipc_bytes)
|
||||
@@ -1190,7 +1231,7 @@ class AsyncLanceNamespaceDBConnection:
|
||||
location=response.location,
|
||||
namespace_client=self._namespace_client,
|
||||
managed_versioning=managed_versioning,
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
)
|
||||
|
||||
lance_table = await asyncio.to_thread(_open_table)
|
||||
@@ -1472,6 +1513,8 @@ def connect_namespace(
|
||||
storage_options=storage_options,
|
||||
session=session,
|
||||
namespace_client_pushdown_operations=namespace_client_pushdown_operations,
|
||||
namespace_client_impl=namespace_client_impl,
|
||||
namespace_client_properties=namespace_client_properties,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -284,9 +284,8 @@ class Permutations:
|
||||
self.permutation_table = permutation_table
|
||||
|
||||
if permutation_table.schema.metadata is not None:
|
||||
split_names = permutation_table.schema.metadata.get(
|
||||
b"split_names", None
|
||||
).decode("utf-8")
|
||||
raw = permutation_table.schema.metadata.get(b"split_names")
|
||||
split_names = raw.decode("utf-8") if raw is not None else None
|
||||
if split_names is not None:
|
||||
self.split_names = json.loads(split_names)
|
||||
self.split_dict = {
|
||||
@@ -460,9 +459,8 @@ class Permutation:
|
||||
f"Cannot create a permutation on split `{split}`"
|
||||
" because no split names are defined in the permutation table"
|
||||
)
|
||||
split_names = permutation_table.schema.metadata.get(
|
||||
b"split_names", None
|
||||
).decode("utf-8")
|
||||
raw = permutation_table.schema.metadata.get(b"split_names")
|
||||
split_names = raw.decode("utf-8") if raw is not None else None
|
||||
if split_names is None:
|
||||
raise ValueError(
|
||||
f"Cannot create a permutation on split `{split}`"
|
||||
|
||||
@@ -191,7 +191,7 @@ def _into_pyarrow_reader(
|
||||
f"Unknown data type {type(data)}. "
|
||||
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
|
||||
"pyarrow Table/RecordBatch, or Pydantic models. "
|
||||
"See https://lancedb.com/docs/tables/ for examples."
|
||||
"See https://docs.lancedb.com/tables/ for examples."
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -897,42 +897,22 @@ def test_bypass_vector_index_sync(tmp_db: lancedb.DBConnection):
|
||||
|
||||
|
||||
def test_local_namespace_operations(tmp_path):
|
||||
"""Test that local mode namespace operations behave as expected."""
|
||||
# Create a local database connection
|
||||
"""Test that local mode namespace operations work via directory namespace."""
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
# Test list_namespaces returns empty list for root namespace
|
||||
namespaces = db.list_namespaces().namespaces
|
||||
assert namespaces == []
|
||||
# Root namespace starts empty
|
||||
assert db.list_namespaces().namespaces == []
|
||||
|
||||
# Test list_namespaces with non-empty namespace raises NotImplementedError
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Namespace operations are not supported for listing database",
|
||||
):
|
||||
db.list_namespaces(namespace_path=["test"])
|
||||
# Create and list child namespace
|
||||
db.create_namespace(["child"])
|
||||
assert "child" in db.list_namespaces().namespaces
|
||||
|
||||
# List namespaces under child
|
||||
assert db.list_namespaces(namespace_path=["child"]).namespaces == []
|
||||
|
||||
def test_local_create_namespace_not_supported(tmp_path):
|
||||
"""Test that create_namespace is not supported in local mode."""
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Namespace operations are not supported for listing database",
|
||||
):
|
||||
db.create_namespace(["test_namespace"])
|
||||
|
||||
|
||||
def test_local_drop_namespace_not_supported(tmp_path):
|
||||
"""Test that drop_namespace is not supported in local mode."""
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Namespace operations are not supported for listing database",
|
||||
):
|
||||
db.drop_namespace(["test_namespace"])
|
||||
# Drop namespace
|
||||
db.drop_namespace(["child"])
|
||||
assert db.list_namespaces().namespaces == []
|
||||
|
||||
|
||||
def test_clone_table_latest_version(tmp_path):
|
||||
|
||||
@@ -681,7 +681,7 @@ class TestPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["QueryTable"],
|
||||
)
|
||||
assert "QueryTable" in db._pushdown_operations
|
||||
assert "QueryTable" in db._namespace_client_pushdown_operations
|
||||
|
||||
def test_create_table_pushdown_stored(self):
|
||||
"""Test that CreateTable pushdown is stored on sync connection."""
|
||||
@@ -690,7 +690,7 @@ class TestPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["CreateTable"],
|
||||
)
|
||||
assert "CreateTable" in db._pushdown_operations
|
||||
assert "CreateTable" in db._namespace_client_pushdown_operations
|
||||
|
||||
def test_both_pushdowns_stored(self):
|
||||
"""Test that both pushdown operations can be set together."""
|
||||
@@ -699,13 +699,13 @@ class TestPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["QueryTable", "CreateTable"],
|
||||
)
|
||||
assert "QueryTable" in db._pushdown_operations
|
||||
assert "CreateTable" in db._pushdown_operations
|
||||
assert "QueryTable" in db._namespace_client_pushdown_operations
|
||||
assert "CreateTable" in db._namespace_client_pushdown_operations
|
||||
|
||||
def test_pushdown_defaults_to_empty(self):
|
||||
"""Test that pushdown operations default to empty."""
|
||||
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||
assert len(db._pushdown_operations) == 0
|
||||
assert len(db._namespace_client_pushdown_operations) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -727,7 +727,7 @@ class TestAsyncPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["QueryTable"],
|
||||
)
|
||||
assert "QueryTable" in db._pushdown_operations
|
||||
assert "QueryTable" in db._namespace_client_pushdown_operations
|
||||
|
||||
async def test_async_create_table_pushdown_stored(self):
|
||||
"""Test that CreateTable pushdown is stored on async connection."""
|
||||
@@ -736,9 +736,9 @@ class TestAsyncPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["CreateTable"],
|
||||
)
|
||||
assert "CreateTable" in db._pushdown_operations
|
||||
assert "CreateTable" in db._namespace_client_pushdown_operations
|
||||
|
||||
async def test_async_pushdown_defaults_to_empty(self):
|
||||
"""Test that pushdown operations default to empty on async connection."""
|
||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||
assert len(db._pushdown_operations) == 0
|
||||
assert len(db._namespace_client_pushdown_operations) == 0
|
||||
|
||||
@@ -522,6 +522,50 @@ def test_no_split_names(some_table: Table):
|
||||
assert permutations[1].num_rows == 500
|
||||
|
||||
|
||||
def test_permutations_metadata_without_split_names_key(mem_db: DBConnection):
|
||||
"""Regression: schema metadata present but missing split_names key must not crash.
|
||||
|
||||
Previously, `.get(b"split_names", None).decode()` was called unconditionally,
|
||||
so any permutation table whose metadata dict had other keys but no split_names
|
||||
raised AttributeError: 'NoneType' has no attribute 'decode'.
|
||||
"""
|
||||
base = mem_db.create_table("base_nosplit", pa.table({"x": range(10)}))
|
||||
|
||||
# Build a permutation-like table that carries some metadata but NOT split_names.
|
||||
raw = pa.table(
|
||||
{
|
||||
"row_id": pa.array(range(10), type=pa.uint64()),
|
||||
"split_id": pa.array([0] * 10, type=pa.uint32()),
|
||||
}
|
||||
).replace_schema_metadata({b"other_key": b"other_value"})
|
||||
perm_tbl = mem_db.create_table("perm_nosplit", raw)
|
||||
|
||||
permutations = Permutations(base, perm_tbl)
|
||||
assert permutations.split_names == []
|
||||
assert permutations.split_dict == {}
|
||||
|
||||
|
||||
def test_from_tables_string_split_missing_names_key(mem_db: DBConnection):
|
||||
"""Regression: from_tables() with a string split must raise ValueError, not
|
||||
AttributeError.
|
||||
|
||||
Previously, `.get(b"split_names", None).decode()` crashed with AttributeError
|
||||
when the metadata dict existed but had no split_names key.
|
||||
"""
|
||||
base = mem_db.create_table("base_strsplit", pa.table({"x": range(10)}))
|
||||
|
||||
raw = pa.table(
|
||||
{
|
||||
"row_id": pa.array(range(10), type=pa.uint64()),
|
||||
"split_id": pa.array([0] * 10, type=pa.uint32()),
|
||||
}
|
||||
).replace_schema_metadata({b"other_key": b"other_value"})
|
||||
perm_tbl = mem_db.create_table("perm_strsplit", raw)
|
||||
|
||||
with pytest.raises(ValueError, match="no split names are defined"):
|
||||
Permutation.from_tables(base, perm_tbl, split="train")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def some_perm_table(some_table: Table) -> Table:
|
||||
return (
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
[toolchain]
|
||||
channel = "1.91.0"
|
||||
channel = "1.94.0"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.28.0-beta.1"
|
||||
version = "0.28.0-beta.5"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
@@ -171,7 +171,7 @@ impl OpenTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
@@ -188,7 +188,7 @@ impl OpenTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -582,6 +582,14 @@ pub struct ConnectRequest {
|
||||
/// Database specific options
|
||||
pub options: HashMap<String, String>,
|
||||
|
||||
/// Extra properties for the equivalent namespace client.
|
||||
///
|
||||
/// For a local [`ListingDatabase`], these are merged into the backing
|
||||
/// `DirectoryNamespace` properties. This is useful for namespace-specific
|
||||
/// settings such as `table_version_tracking_enabled` that are distinct from
|
||||
/// storage options.
|
||||
pub namespace_client_properties: HashMap<String, String>,
|
||||
|
||||
/// The interval at which to check for updates from other processes.
|
||||
///
|
||||
/// If None, then consistency is not checked. For performance
|
||||
@@ -621,6 +629,7 @@ impl ConnectBuilder {
|
||||
client_config: Default::default(),
|
||||
read_consistency_interval: None,
|
||||
options: HashMap::new(),
|
||||
namespace_client_properties: HashMap::new(),
|
||||
session: None,
|
||||
},
|
||||
embedding_registry: None,
|
||||
@@ -676,11 +685,6 @@ impl ConnectBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the WAL host override for routing merge_insert requests
|
||||
/// to a separate WAL/ingest service.
|
||||
///
|
||||
/// This option is only used when connecting to LanceDB Cloud (db:// URIs)
|
||||
/// and will be ignored for other URIs.
|
||||
/// Set the database specific options
|
||||
///
|
||||
/// See [crate::database::listing::ListingDatabaseOptions] for the options available for
|
||||
@@ -743,7 +747,7 @@ impl ConnectBuilder {
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.request.options.insert(key.into(), value.into());
|
||||
self
|
||||
@@ -751,7 +755,7 @@ impl ConnectBuilder {
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -762,6 +766,31 @@ impl ConnectBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set an additional property for the equivalent namespace client.
|
||||
pub fn namespace_client_property(
|
||||
mut self,
|
||||
key: impl Into<String>,
|
||||
value: impl Into<String>,
|
||||
) -> Self {
|
||||
self.request
|
||||
.namespace_client_properties
|
||||
.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set multiple additional properties for the equivalent namespace client.
|
||||
pub fn namespace_client_properties(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
) -> Self {
|
||||
for (key, value) in pairs {
|
||||
self.request
|
||||
.namespace_client_properties
|
||||
.insert(key.into(), value.into());
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// The interval at which to check for updates from other processes. This
|
||||
/// only affects LanceDB OSS.
|
||||
///
|
||||
@@ -898,6 +927,7 @@ pub struct ConnectNamespaceBuilder {
|
||||
ns_impl: String,
|
||||
properties: HashMap<String, String>,
|
||||
storage_options: HashMap<String, String>,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
@@ -910,6 +940,7 @@ impl ConnectNamespaceBuilder {
|
||||
ns_impl: ns_impl.to_string(),
|
||||
properties,
|
||||
storage_options: HashMap::new(),
|
||||
namespace_client_properties: HashMap::new(),
|
||||
read_consistency_interval: None,
|
||||
embedding_registry: None,
|
||||
session: None,
|
||||
@@ -919,7 +950,7 @@ impl ConnectNamespaceBuilder {
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.storage_options.insert(key.into(), value.into());
|
||||
self
|
||||
@@ -927,7 +958,7 @@ impl ConnectNamespaceBuilder {
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -938,6 +969,29 @@ impl ConnectNamespaceBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set an additional namespace client property.
|
||||
pub fn namespace_client_property(
|
||||
mut self,
|
||||
key: impl Into<String>,
|
||||
value: impl Into<String>,
|
||||
) -> Self {
|
||||
self.namespace_client_properties
|
||||
.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set multiple additional namespace client properties.
|
||||
pub fn namespace_client_properties(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
) -> Self {
|
||||
for (key, value) in pairs {
|
||||
self.namespace_client_properties
|
||||
.insert(key.into(), value.into());
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// The interval at which to check for updates from other processes.
|
||||
///
|
||||
/// If left unset, consistency is not checked. For maximum read
|
||||
@@ -999,10 +1053,13 @@ impl ConnectNamespaceBuilder {
|
||||
pub async fn execute(self) -> Result<Connection> {
|
||||
use crate::database::namespace::LanceNamespaceDatabase;
|
||||
|
||||
let mut properties = self.properties;
|
||||
properties.extend(self.namespace_client_properties);
|
||||
|
||||
let internal = Arc::new(
|
||||
LanceNamespaceDatabase::connect(
|
||||
&self.ns_impl,
|
||||
self.properties,
|
||||
properties,
|
||||
self.storage_options,
|
||||
self.read_consistency_interval,
|
||||
self.session,
|
||||
@@ -1122,6 +1179,31 @@ mod tests {
|
||||
assert_eq!(db.uri(), relative_uri.to_str().unwrap().to_string());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_connect_with_namespace_client_properties() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
let db = connect(uri)
|
||||
.namespace_client_property("table_version_tracking_enabled", "true")
|
||||
.namespace_client_property("manifest_enabled", "true")
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (ns_impl, properties) = db.namespace_client_config().await.unwrap();
|
||||
assert_eq!(ns_impl, "dir");
|
||||
assert_eq!(properties.get("root"), Some(&uri.to_string()));
|
||||
assert_eq!(
|
||||
properties.get("table_version_tracking_enabled"),
|
||||
Some(&"true".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
properties.get("manifest_enabled"),
|
||||
Some(&"true".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_table_names() {
|
||||
let tc = new_test_connection().await.unwrap();
|
||||
|
||||
@@ -55,7 +55,7 @@ impl CreateTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
@@ -73,7 +73,7 @@ impl CreateTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
|
||||
@@ -20,6 +20,7 @@ use snafu::ResultExt;
|
||||
|
||||
use crate::connection::ConnectRequest;
|
||||
use crate::database::ReadConsistency;
|
||||
use crate::database::namespace::LanceNamespaceDatabase;
|
||||
use crate::error::{CreateDirSnafu, Error, Result};
|
||||
use crate::io::object_store::MirroringObjectStoreWrapper;
|
||||
use crate::table::NativeTable;
|
||||
@@ -73,7 +74,7 @@ pub struct ListingDatabaseOptions {
|
||||
/// These are used to create/list tables and they are inherited by all tables
|
||||
/// opened by this database.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub storage_options: HashMap<String, String>,
|
||||
}
|
||||
|
||||
@@ -185,7 +186,7 @@ impl ListingDatabaseOptionsBuilder {
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.options
|
||||
.storage_options
|
||||
@@ -195,7 +196,7 @@ impl ListingDatabaseOptionsBuilder {
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -255,6 +256,9 @@ pub struct ListingDatabase {
|
||||
|
||||
// Session for object stores and caching
|
||||
session: Arc<lance::session::Session>,
|
||||
|
||||
// Namespace-backed database for child namespace operations
|
||||
namespace_database: Arc<LanceNamespaceDatabase>,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ListingDatabase {
|
||||
@@ -281,6 +285,44 @@ const MIRRORED_STORE: &str = "mirroredStore";
|
||||
|
||||
/// A connection to LanceDB
|
||||
impl ListingDatabase {
|
||||
fn build_namespace_client_properties(
|
||||
uri: &str,
|
||||
storage_options: &HashMap<String, String>,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
) -> HashMap<String, String> {
|
||||
let mut properties = namespace_client_properties;
|
||||
properties.insert("root".to_string(), uri.to_string());
|
||||
for (key, value) in storage_options {
|
||||
properties.insert(format!("storage.{}", key), value.clone());
|
||||
}
|
||||
properties
|
||||
}
|
||||
|
||||
async fn connect_namespace_database(
|
||||
uri: &str,
|
||||
storage_options: HashMap<String, String>,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
session: Arc<lance::session::Session>,
|
||||
) -> Result<Arc<LanceNamespaceDatabase>> {
|
||||
let ns_properties = Self::build_namespace_client_properties(
|
||||
uri,
|
||||
&storage_options,
|
||||
namespace_client_properties,
|
||||
);
|
||||
Ok(Arc::new(
|
||||
LanceNamespaceDatabase::connect(
|
||||
"dir",
|
||||
ns_properties,
|
||||
storage_options,
|
||||
read_consistency_interval,
|
||||
Some(session),
|
||||
HashSet::new(),
|
||||
)
|
||||
.await?,
|
||||
))
|
||||
}
|
||||
|
||||
/// Connect to a listing database
|
||||
///
|
||||
/// The URI should be a path to a directory where the tables are stored.
|
||||
@@ -300,6 +342,7 @@ impl ListingDatabase {
|
||||
uri,
|
||||
request.read_consistency_interval,
|
||||
options.new_table_config,
|
||||
request.namespace_client_properties.clone(),
|
||||
request.session.clone(),
|
||||
)
|
||||
.await
|
||||
@@ -387,6 +430,15 @@ impl ListingDatabase {
|
||||
None => None,
|
||||
};
|
||||
|
||||
let namespace_database = Self::connect_namespace_database(
|
||||
&table_base_uri,
|
||||
options.storage_options.clone(),
|
||||
request.namespace_client_properties.clone(),
|
||||
request.read_consistency_interval,
|
||||
session.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(Self {
|
||||
uri: table_base_uri,
|
||||
query_string,
|
||||
@@ -398,6 +450,7 @@ impl ListingDatabase {
|
||||
storage_options_provider: None,
|
||||
new_table_config: options.new_table_config,
|
||||
session,
|
||||
namespace_database,
|
||||
})
|
||||
}
|
||||
Err(_) => {
|
||||
@@ -405,6 +458,7 @@ impl ListingDatabase {
|
||||
uri,
|
||||
request.read_consistency_interval,
|
||||
options.new_table_config,
|
||||
request.namespace_client_properties.clone(),
|
||||
request.session.clone(),
|
||||
)
|
||||
.await
|
||||
@@ -416,6 +470,7 @@ impl ListingDatabase {
|
||||
path: &str,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
new_table_config: NewTableConfig,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
) -> Result<Self> {
|
||||
let session = session.unwrap_or_else(|| Arc::new(lance::session::Session::default()));
|
||||
@@ -429,6 +484,15 @@ impl ListingDatabase {
|
||||
Self::try_create_dir(path).context(CreateDirSnafu { path })?;
|
||||
}
|
||||
|
||||
let namespace_database = Self::connect_namespace_database(
|
||||
path,
|
||||
HashMap::new(),
|
||||
namespace_client_properties,
|
||||
read_consistency_interval,
|
||||
session.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(Self {
|
||||
uri: path.to_string(),
|
||||
query_string: None,
|
||||
@@ -440,6 +504,7 @@ impl ListingDatabase {
|
||||
storage_options_provider: None,
|
||||
new_table_config,
|
||||
session,
|
||||
namespace_database,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -497,6 +562,10 @@ impl ListingDatabase {
|
||||
Ok(uri)
|
||||
}
|
||||
|
||||
fn namespace_database(&self) -> Arc<LanceNamespaceDatabase> {
|
||||
self.namespace_database.clone()
|
||||
}
|
||||
|
||||
async fn drop_tables(&self, names: Vec<String>) -> Result<()> {
|
||||
let object_store_params = ObjectStoreParams {
|
||||
storage_options_accessor: if self.storage_options.is_empty() {
|
||||
@@ -696,16 +765,7 @@ impl Database for ListingDatabase {
|
||||
&self,
|
||||
request: ListNamespacesRequest,
|
||||
) -> Result<ListNamespacesResponse> {
|
||||
if request.id.as_ref().map(|v| !v.is_empty()).unwrap_or(false) {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(ListNamespacesResponse {
|
||||
namespaces: Vec::new(),
|
||||
page_token: None,
|
||||
})
|
||||
self.namespace_database().list_namespaces(request).await
|
||||
}
|
||||
|
||||
fn uri(&self) -> &str {
|
||||
@@ -726,36 +786,26 @@ impl Database for ListingDatabase {
|
||||
|
||||
async fn create_namespace(
|
||||
&self,
|
||||
_request: CreateNamespaceRequest,
|
||||
request: CreateNamespaceRequest,
|
||||
) -> Result<CreateNamespaceResponse> {
|
||||
Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
})
|
||||
self.namespace_database().create_namespace(request).await
|
||||
}
|
||||
|
||||
async fn drop_namespace(
|
||||
&self,
|
||||
_request: DropNamespaceRequest,
|
||||
) -> Result<DropNamespaceResponse> {
|
||||
Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
})
|
||||
async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<DropNamespaceResponse> {
|
||||
self.namespace_database().drop_namespace(request).await
|
||||
}
|
||||
|
||||
async fn describe_namespace(
|
||||
&self,
|
||||
_request: DescribeNamespaceRequest,
|
||||
request: DescribeNamespaceRequest,
|
||||
) -> Result<DescribeNamespaceResponse> {
|
||||
Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
})
|
||||
self.namespace_database().describe_namespace(request).await
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>> {
|
||||
if !request.namespace_path.is_empty() {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
|
||||
});
|
||||
return self.namespace_database().table_names(request).await;
|
||||
}
|
||||
let mut f = self
|
||||
.object_store
|
||||
@@ -788,9 +838,7 @@ impl Database for ListingDatabase {
|
||||
|
||||
async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
|
||||
if request.id.as_ref().map(|v| !v.is_empty()).unwrap_or(false) {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
|
||||
});
|
||||
return self.namespace_database().list_tables(request).await;
|
||||
}
|
||||
let mut f = self
|
||||
.object_store
|
||||
@@ -838,11 +886,8 @@ impl Database for ListingDatabase {
|
||||
}
|
||||
|
||||
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
// When namespace is not empty, location must be provided
|
||||
if !request.namespace_path.is_empty() && request.location.is_none() {
|
||||
return Err(Error::InvalidInput {
|
||||
message: "Location must be provided when namespace is not empty".into(),
|
||||
});
|
||||
if !request.namespace_path.is_empty() {
|
||||
return self.namespace_database().create_table(request).await;
|
||||
}
|
||||
// Use provided location if available, otherwise derive from table name
|
||||
let table_uri = request
|
||||
@@ -959,11 +1004,8 @@ impl Database for ListingDatabase {
|
||||
}
|
||||
|
||||
async fn open_table(&self, mut request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
// When namespace is not empty, location must be provided
|
||||
if !request.namespace_path.is_empty() && request.location.is_none() {
|
||||
return Err(Error::InvalidInput {
|
||||
message: "Location must be provided when namespace is not empty".into(),
|
||||
});
|
||||
if !request.namespace_path.is_empty() {
|
||||
return self.namespace_database().open_table(request).await;
|
||||
}
|
||||
// Use provided location if available, otherwise derive from table name
|
||||
let table_uri = request
|
||||
@@ -1059,9 +1101,10 @@ impl Database for ListingDatabase {
|
||||
|
||||
async fn drop_table(&self, name: &str, namespace_path: &[String]) -> Result<()> {
|
||||
if !namespace_path.is_empty() {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace parameter is not supported for listing database.".into(),
|
||||
});
|
||||
return self
|
||||
.namespace_database()
|
||||
.drop_table(name, namespace_path)
|
||||
.await;
|
||||
}
|
||||
self.drop_tables(vec![name.to_string()]).await
|
||||
}
|
||||
@@ -1070,9 +1113,10 @@ impl Database for ListingDatabase {
|
||||
async fn drop_all_tables(&self, namespace_path: &[String]) -> Result<()> {
|
||||
// Check if namespace parameter is provided
|
||||
if !namespace_path.is_empty() {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace parameter is not supported for listing database.".into(),
|
||||
});
|
||||
return self
|
||||
.namespace_database()
|
||||
.drop_all_tables(namespace_path)
|
||||
.await;
|
||||
}
|
||||
let tables = self.table_names(TableNamesRequest::default()).await?;
|
||||
self.drop_tables(tables).await
|
||||
@@ -1083,30 +1127,11 @@ impl Database for ListingDatabase {
|
||||
}
|
||||
|
||||
async fn namespace_client(&self) -> Result<Arc<dyn lance_namespace::LanceNamespace>> {
|
||||
// Create a DirectoryNamespace pointing to the same root with the same storage options
|
||||
let mut builder = lance_namespace_impls::DirectoryNamespaceBuilder::new(&self.uri);
|
||||
|
||||
// Add storage options
|
||||
if !self.storage_options.is_empty() {
|
||||
builder = builder.storage_options(self.storage_options.clone());
|
||||
}
|
||||
|
||||
// Use the same session
|
||||
builder = builder.session(self.session.clone());
|
||||
|
||||
let namespace = builder.build().await.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to create namespace client: {}", e),
|
||||
})?;
|
||||
Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
|
||||
self.namespace_database.namespace_client().await
|
||||
}
|
||||
|
||||
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)> {
|
||||
let mut properties = HashMap::new();
|
||||
properties.insert("root".to_string(), self.uri.clone());
|
||||
for (key, value) in &self.storage_options {
|
||||
properties.insert(format!("storage.{}", key), value.clone());
|
||||
}
|
||||
Ok(("dir".to_string(), properties))
|
||||
self.namespace_database.namespace_client_config().await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1132,6 +1157,7 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options: Default::default(),
|
||||
namespace_client_properties: Default::default(),
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -1265,6 +1291,7 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options: options.clone(),
|
||||
namespace_client_properties: Default::default(),
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -1799,6 +1826,7 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
namespace_client_properties: Default::default(),
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -1904,6 +1932,7 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
namespace_client_properties: Default::default(),
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -1975,6 +2004,7 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
namespace_client_properties: Default::default(),
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -2108,4 +2138,208 @@ mod tests {
|
||||
assert!(tables.contains(&"table1".to_string()));
|
||||
assert!(tables.contains(&"table2".to_string()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_listing_database_namespace_operations() {
|
||||
let (_tempdir, db) = setup_database().await;
|
||||
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["parent".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["parent".to_string(), "child".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let root_namespaces = db
|
||||
.list_namespaces(ListNamespacesRequest {
|
||||
id: Some(vec![]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(root_namespaces.namespaces.contains(&"parent".to_string()));
|
||||
|
||||
let child_namespaces = db
|
||||
.list_namespaces(ListNamespacesRequest {
|
||||
id: Some(vec!["parent".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(child_namespaces.namespaces.contains(&"child".to_string()));
|
||||
|
||||
db.describe_namespace(DescribeNamespaceRequest {
|
||||
id: Some(vec!["parent".to_string(), "child".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_listing_database_with_namespace_client_properties() {
|
||||
let tempdir = tempdir().unwrap();
|
||||
let uri = tempdir.path().to_str().unwrap();
|
||||
|
||||
let mut namespace_client_properties = HashMap::new();
|
||||
namespace_client_properties.insert(
|
||||
"table_version_tracking_enabled".to_string(),
|
||||
"true".to_string(),
|
||||
);
|
||||
namespace_client_properties.insert("manifest_enabled".to_string(), "true".to_string());
|
||||
|
||||
let request = ConnectRequest {
|
||||
uri: uri.to_string(),
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options: Default::default(),
|
||||
namespace_client_properties,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
|
||||
let db = ListingDatabase::connect_with_options(&request)
|
||||
.await
|
||||
.unwrap();
|
||||
let namespace_path = vec!["test_ns".to_string()];
|
||||
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(namespace_path.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new("name", DataType::Utf8, false),
|
||||
]));
|
||||
|
||||
db.create_table(CreateTableRequest {
|
||||
name: "managed_table".to_string(),
|
||||
namespace_path: namespace_path.clone(),
|
||||
data: Box::new(RecordBatch::new_empty(schema)) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let namespace_client = db.namespace_client().await.unwrap();
|
||||
let describe = namespace_client
|
||||
.describe_table(lance_namespace::models::DescribeTableRequest {
|
||||
id: Some(vec!["test_ns".to_string(), "managed_table".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(describe.managed_versioning, Some(true));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_listing_database_nested_namespace_table_ops() {
|
||||
let (_tempdir, db) = setup_database().await;
|
||||
let namespace_path = vec!["parent".to_string(), "child".to_string()];
|
||||
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["parent".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(namespace_path.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new("name", DataType::Utf8, false),
|
||||
]));
|
||||
|
||||
db.create_table(CreateTableRequest {
|
||||
name: "nested_table".to_string(),
|
||||
namespace_path: namespace_path.clone(),
|
||||
data: Box::new(RecordBatch::new_empty(schema)) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let namespace_client = db.namespace_client().await.unwrap();
|
||||
let describe = namespace_client
|
||||
.describe_table(lance_namespace::models::DescribeTableRequest {
|
||||
id: Some(vec![
|
||||
"parent".to_string(),
|
||||
"child".to_string(),
|
||||
"nested_table".to_string(),
|
||||
]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(describe.location.is_some());
|
||||
|
||||
let table = db
|
||||
.open_table(OpenTableRequest {
|
||||
name: "nested_table".to_string(),
|
||||
namespace_path: namespace_path.clone(),
|
||||
index_cache_size: None,
|
||||
lance_read_params: None,
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
managed_versioning: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table.name(), "nested_table");
|
||||
|
||||
#[allow(deprecated)]
|
||||
let table_names = db
|
||||
.table_names(TableNamesRequest {
|
||||
namespace_path: namespace_path.clone(),
|
||||
start_after: None,
|
||||
limit: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table_names, vec!["nested_table".to_string()]);
|
||||
|
||||
let list_tables = db
|
||||
.list_tables(ListTablesRequest {
|
||||
id: Some(namespace_path.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(list_tables.tables, vec!["nested_table".to_string()]);
|
||||
|
||||
db.drop_table("nested_table", &namespace_path)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let post_drop = db
|
||||
.list_tables(ListTablesRequest {
|
||||
id: Some(namespace_path),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(post_drop.tables.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -450,6 +450,47 @@ mod tests {
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_namespace_connection_with_namespace_client_properties() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let root_path = tmp_dir.path().to_str().unwrap().to_string();
|
||||
|
||||
let mut properties = HashMap::new();
|
||||
properties.insert("root".to_string(), root_path);
|
||||
|
||||
let conn = connect_namespace("dir", properties)
|
||||
.namespace_client_property("table_version_tracking_enabled", "true")
|
||||
.namespace_client_property("manifest_enabled", "true")
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to connect to namespace");
|
||||
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.expect("Failed to create namespace");
|
||||
|
||||
let test_data = create_test_data();
|
||||
conn.create_table("test_table", test_data)
|
||||
.namespace(vec!["test_ns".into()])
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to create table");
|
||||
|
||||
let namespace_client = conn.namespace_client().await.unwrap();
|
||||
let describe = namespace_client
|
||||
.describe_table(DescribeTableRequest {
|
||||
id: Some(vec!["test_ns".into(), "test_table".into()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.expect("Failed to describe table");
|
||||
|
||||
assert_eq!(describe.managed_versioning, Some(true));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_namespace_create_table_basic() {
|
||||
// Setup: Create a temporary directory for the namespace
|
||||
|
||||
@@ -177,6 +177,7 @@ impl BedrockEmbeddingFunction {
|
||||
))
|
||||
.send()
|
||||
.await
|
||||
.map_err(Box::new)
|
||||
})
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -69,7 +69,7 @@
|
||||
//! It treats [`FixedSizeList<Float16/Float32>`](https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeListArray.html)
|
||||
//! columns as vector columns.
|
||||
//!
|
||||
//! For more details, please refer to the [LanceDB documentation](https://lancedb.com/docs).
|
||||
//! For more details, please refer to the [LanceDB documentation](https://docs.lancedb.com).
|
||||
//!
|
||||
//! #### Create a table
|
||||
//!
|
||||
|
||||
@@ -527,12 +527,6 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
self.add_id_delimiter_query_param(builder)
|
||||
}
|
||||
|
||||
pub fn post_wal(&self, uri: &str) -> RequestBuilder {
|
||||
let full_uri = format!("{}{}", self.host, uri);
|
||||
let builder = self.client.post(full_uri).header("x-use-wal", "true");
|
||||
self.add_id_delimiter_query_param(builder)
|
||||
}
|
||||
|
||||
fn add_id_delimiter_query_param(&self, req: RequestBuilder) -> RequestBuilder {
|
||||
if self.id_delimiter != "$" {
|
||||
req.query(&[("delimiter", self.id_delimiter.clone())])
|
||||
@@ -1036,7 +1030,6 @@ mod tests {
|
||||
let client = RestfulLanceDbClient {
|
||||
client: reqwest::Client::new(),
|
||||
host: "https://example.com".to_string(),
|
||||
|
||||
retry_config: RetryConfig::default().try_into().unwrap(),
|
||||
sender: Sender,
|
||||
id_delimiter: "+".to_string(),
|
||||
@@ -1072,7 +1065,6 @@ mod tests {
|
||||
let client = RestfulLanceDbClient {
|
||||
client: reqwest::Client::new(),
|
||||
host: "https://example.com".to_string(),
|
||||
|
||||
retry_config: RetryConfig::default().try_into().unwrap(),
|
||||
sender: Sender,
|
||||
id_delimiter: "+".to_string(),
|
||||
@@ -1110,7 +1102,6 @@ mod tests {
|
||||
let client = RestfulLanceDbClient {
|
||||
client: reqwest::Client::new(),
|
||||
host: "https://example.com".to_string(),
|
||||
|
||||
retry_config: RetryConfig::default().try_into().unwrap(),
|
||||
sender: Sender,
|
||||
id_delimiter: "+".to_string(),
|
||||
|
||||
@@ -97,7 +97,7 @@ pub struct RemoteDatabaseOptions {
|
||||
pub host_override: Option<String>,
|
||||
/// Storage options configure the storage layer (e.g. S3, GCS, Azure, etc.)
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
///
|
||||
/// These options are only used for LanceDB Enterprise and only a subset of options
|
||||
/// are supported.
|
||||
@@ -185,7 +185,6 @@ impl RemoteDatabaseOptionsBuilder {
|
||||
self.options.host_override = Some(host_override);
|
||||
self
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -1610,17 +1610,13 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
self.check_mutable().await?;
|
||||
|
||||
let timeout = params.timeout;
|
||||
let use_wal = params.use_wal;
|
||||
|
||||
let query = MergeInsertRequest::try_from(params)?;
|
||||
let path = format!("/v1/table/{}/merge_insert/", self.identifier);
|
||||
let mut request = if use_wal {
|
||||
self.client.post_wal(&path)
|
||||
} else {
|
||||
self.client.post(&path)
|
||||
}
|
||||
.query(&query)
|
||||
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
||||
let mut request = self
|
||||
.client
|
||||
.post(&format!("/v1/table/{}/merge_insert/", self.identifier))
|
||||
.query(&query)
|
||||
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
||||
|
||||
if let Some(timeout) = timeout {
|
||||
// (If it doesn't fit into u64, it's not worth sending anyways.)
|
||||
@@ -2709,43 +2705,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_merge_insert_use_wal() {
|
||||
let batch = RecordBatch::try_new(
|
||||
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
let data: Box<dyn RecordBatchReader + Send> = Box::new(RecordBatchIterator::new(
|
||||
[Ok(batch.clone())],
|
||||
batch.schema(),
|
||||
));
|
||||
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
if request.url().path() == "/v1/table/my_table/merge_insert/" {
|
||||
// Verify the x-use-wal header is set for router-based WAL routing
|
||||
assert_eq!(
|
||||
request.headers().get("x-use-wal").unwrap(),
|
||||
"true",
|
||||
"merge_insert with use_wal should set x-use-wal header"
|
||||
);
|
||||
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"version": 1, "num_deleted_rows": 0, "num_inserted_rows": 3, "num_updated_rows": 0}"#)
|
||||
.unwrap()
|
||||
} else {
|
||||
panic!("Unexpected request path: {}", request.url().path());
|
||||
}
|
||||
});
|
||||
|
||||
let mut builder = table.merge_insert(&["some_col"]);
|
||||
builder.use_wal(true);
|
||||
let result = builder.execute(data).await.unwrap();
|
||||
|
||||
assert_eq!(result.num_inserted_rows, 3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_merge_insert_retries_on_409() {
|
||||
let batch = RecordBatch::try_new(
|
||||
|
||||
@@ -55,7 +55,6 @@ pub struct MergeInsertBuilder {
|
||||
pub(crate) when_not_matched_by_source_delete_filt: Option<String>,
|
||||
pub(crate) timeout: Option<Duration>,
|
||||
pub(crate) use_index: bool,
|
||||
pub(crate) use_wal: bool,
|
||||
}
|
||||
|
||||
impl MergeInsertBuilder {
|
||||
@@ -70,7 +69,6 @@ impl MergeInsertBuilder {
|
||||
when_not_matched_by_source_delete_filt: None,
|
||||
timeout: None,
|
||||
use_index: true,
|
||||
use_wal: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -150,18 +148,6 @@ impl MergeInsertBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Controls whether to route the merge insert operation through the WAL.
|
||||
///
|
||||
/// When set to `true`, the request includes an `x-use-wal: true` header,
|
||||
/// which the router uses to forward the operation to wal-writer instances
|
||||
/// instead of Phalanx.
|
||||
///
|
||||
/// Defaults to `false`.
|
||||
pub fn use_wal(&mut self, use_wal: bool) -> &mut Self {
|
||||
self.use_wal = use_wal;
|
||||
self
|
||||
}
|
||||
|
||||
/// Executes the merge insert operation
|
||||
///
|
||||
/// Returns version and statistics about the merge operation including the number of rows
|
||||
|
||||
Reference in New Issue
Block a user