mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-23 15:00:39 +00:00
Compare commits
1 Commits
codex/upda
...
codex/upda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d1d7151824 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.24.0"
|
||||
current_version = "0.23.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -75,13 +75,6 @@ jobs:
|
||||
VERSION="${VERSION#v}"
|
||||
BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
|
||||
|
||||
# Use "chore" for beta/rc versions, "feat" for stable releases
|
||||
if [[ "${VERSION}" == *beta* ]] || [[ "${VERSION}" == *rc* ]]; then
|
||||
COMMIT_TYPE="chore"
|
||||
else
|
||||
COMMIT_TYPE="feat"
|
||||
fi
|
||||
|
||||
cat <<EOF >/tmp/codex-prompt.txt
|
||||
You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.
|
||||
|
||||
@@ -91,10 +84,10 @@ jobs:
|
||||
3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
|
||||
4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
|
||||
5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
|
||||
6. Stage all relevant files with "git add -A". Commit using the message "${COMMIT_TYPE}: update lance dependency to v${VERSION}".
|
||||
6. Stage all relevant files with "git add -A". Commit using the message "chore: update lance dependency to v${VERSION}".
|
||||
7. Push the branch to origin. If the branch already exists, force-push your changes.
|
||||
8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
|
||||
9. Create a pull request targeting "main" with title "${COMMIT_TYPE}: update lance dependency to v${VERSION}". First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'EOF' > /tmp/pr-body.md). The body should summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). Then run "gh pr create --body-file /tmp/pr-body.md".
|
||||
9. Create a pull request targeting "main" with title "chore: update lance dependency to v${VERSION}". In the body, summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}).
|
||||
10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
|
||||
|
||||
Constraints:
|
||||
|
||||
664
Cargo.lock
generated
664
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
58
Cargo.toml
58
Cargo.toml
@@ -15,37 +15,37 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.78.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=2.0.0-rc.2", default-features = false, "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=2.0.0-rc.2", "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=2.0.0-rc.2", "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=2.0.0-rc.2", "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=2.0.0-rc.2", default-features = false, "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=2.0.0-rc.2", "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=2.0.0-rc.2", "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=2.0.0-rc.2", "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=2.0.0-rc.2", default-features = false, "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=2.0.0-rc.2", "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=2.0.0-rc.2", "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=2.0.0-rc.2", "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=2.0.0-rc.2", "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=2.0.0-rc.2", "tag" = "v2.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=1.0.3-rc.1", default-features = false, "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=1.0.3-rc.1", default-features = false, "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=1.0.3-rc.1", default-features = false, "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=1.0.3-rc.1", "tag" = "v1.0.3-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "57.2", optional = false }
|
||||
arrow-array = "57.2"
|
||||
arrow-data = "57.2"
|
||||
arrow-ipc = "57.2"
|
||||
arrow-ord = "57.2"
|
||||
arrow-schema = "57.2"
|
||||
arrow-select = "57.2"
|
||||
arrow-cast = "57.2"
|
||||
arrow = { version = "56.1", optional = false }
|
||||
arrow-array = "56.1"
|
||||
arrow-data = "56.1"
|
||||
arrow-ipc = "56.1"
|
||||
arrow-ord = "56.1"
|
||||
arrow-schema = "56.1"
|
||||
arrow-select = "56.1"
|
||||
arrow-cast = "56.1"
|
||||
async-trait = "0"
|
||||
datafusion = { version = "51.0", default-features = false }
|
||||
datafusion-catalog = "51.0"
|
||||
datafusion-common = { version = "51.0", default-features = false }
|
||||
datafusion-execution = "51.0"
|
||||
datafusion-expr = "51.0"
|
||||
datafusion-physical-plan = "51.0"
|
||||
datafusion = { version = "50.0.0", default-features = false }
|
||||
datafusion-catalog = "50.0.0"
|
||||
datafusion-common = { version = "50.0.0", default-features = false }
|
||||
datafusion-execution = "50.0.0"
|
||||
datafusion-expr = "50.0.0"
|
||||
datafusion-physical-plan = "50.0.0"
|
||||
env_logger = "0.11"
|
||||
half = { "version" = "2.7.1", default-features = false, features = [
|
||||
"num-traits",
|
||||
@@ -59,7 +59,7 @@ rand = "0.9"
|
||||
snafu = "0.8"
|
||||
url = "2"
|
||||
num-traits = "0.2"
|
||||
regex = "1.10"
|
||||
regex = "1.12"
|
||||
lazy_static = "1"
|
||||
semver = "1.0.25"
|
||||
chrono = "0.4"
|
||||
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.24.0</version>
|
||||
<version>0.23.1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.24.0-final.0</version>
|
||||
<version>0.23.1-final.0</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.24.0-final.0</version>
|
||||
<version>0.23.1-final.0</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.24.0"
|
||||
version = "0.23.1"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -1520,9 +1520,9 @@ describe("when optimizing a dataset", () => {
|
||||
|
||||
it("delete unverified", async () => {
|
||||
const version = await table.version();
|
||||
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${String(
|
||||
18446744073709551615n - (BigInt(version) - 1n),
|
||||
).padStart(20, "0")}.manifest`;
|
||||
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
|
||||
version - 1
|
||||
}.manifest`;
|
||||
fs.rmSync(versionFile);
|
||||
|
||||
let stats = await table.optimize({ deleteUnverified: false });
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.24.0",
|
||||
"version": "0.23.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.24.0",
|
||||
"version": "0.23.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.24.0",
|
||||
"version": "0.23.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.24.0",
|
||||
"version": "0.23.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.24.0",
|
||||
"version": "0.23.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.24.0",
|
||||
"version": "0.23.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.24.0",
|
||||
"version": "0.23.1",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.24.0",
|
||||
"version": "0.23.1",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.24.0",
|
||||
"version": "0.23.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.24.0",
|
||||
"version": "0.23.1",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.24.0",
|
||||
"version": "0.23.1",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.27.0"
|
||||
current_version = "0.26.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.27.0"
|
||||
version = "0.26.1"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
@@ -14,15 +14,15 @@ name = "_lancedb"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
arrow = { version = "57.2", features = ["pyarrow"] }
|
||||
arrow = { version = "56.1", features = ["pyarrow"] }
|
||||
async-trait = "0.1"
|
||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||
lance-core.workspace = true
|
||||
lance-namespace.workspace = true
|
||||
lance-io.workspace = true
|
||||
env_logger.workspace = true
|
||||
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
|
||||
pyo3-async-runtimes = { version = "0.26", features = [
|
||||
pyo3 = { version = "0.25.1", features = ["extension-module", "abi3-py39"] }
|
||||
pyo3-async-runtimes = { version = "0.25.0", features = [
|
||||
"attributes",
|
||||
"tokio-runtime",
|
||||
] }
|
||||
@@ -32,7 +32,7 @@ snafu.workspace = true
|
||||
tokio = { version = "1.40", features = ["sync"] }
|
||||
|
||||
[build-dependencies]
|
||||
pyo3-build-config = { version = "0.26", features = [
|
||||
pyo3-build-config = { version = "0.25.0", features = [
|
||||
"extension-module",
|
||||
"abi3-py39",
|
||||
] }
|
||||
|
||||
@@ -2,27 +2,12 @@
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
from lancedb.db import AsyncConnection, DBConnection
|
||||
import lancedb
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
|
||||
def pandas_string_type():
|
||||
"""Return the PyArrow string type that pandas uses for string columns.
|
||||
|
||||
pandas 3.0+ uses large_string for string columns, pandas 2.x uses string.
|
||||
"""
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
|
||||
version = tuple(int(x) for x in pd.__version__.split(".")[:2])
|
||||
if version >= (3, 0):
|
||||
return pa.large_utf8()
|
||||
return pa.utf8()
|
||||
|
||||
|
||||
# Use an in-memory database for most tests.
|
||||
@pytest.fixture
|
||||
def mem_db() -> DBConnection:
|
||||
|
||||
@@ -268,8 +268,6 @@ async def test_create_table_from_iterator_async(mem_db_async: lancedb.AsyncConne
|
||||
|
||||
|
||||
def test_create_exist_ok(tmp_db: lancedb.DBConnection):
|
||||
from conftest import pandas_string_type
|
||||
|
||||
data = pd.DataFrame(
|
||||
{
|
||||
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
||||
@@ -288,11 +286,10 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
|
||||
assert tbl.schema == tbl2.schema
|
||||
assert len(tbl) == len(tbl2)
|
||||
|
||||
# pandas 3.0+ uses large_string, pandas 2.x uses string
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
|
||||
pa.field("item", pandas_string_type()),
|
||||
pa.field("item", pa.utf8()),
|
||||
pa.field("price", pa.float64()),
|
||||
]
|
||||
)
|
||||
@@ -302,7 +299,7 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
|
||||
bad_schema = pa.schema(
|
||||
[
|
||||
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
|
||||
pa.field("item", pandas_string_type()),
|
||||
pa.field("item", pa.utf8()),
|
||||
pa.field("price", pa.float64()),
|
||||
pa.field("extra", pa.float32()),
|
||||
]
|
||||
@@ -368,8 +365,6 @@ async def test_create_mode_async(tmp_db_async: lancedb.AsyncConnection):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
|
||||
from conftest import pandas_string_type
|
||||
|
||||
data = pd.DataFrame(
|
||||
{
|
||||
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
||||
@@ -387,11 +382,10 @@ async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
|
||||
assert tbl.name == tbl2.name
|
||||
assert await tbl.schema() == await tbl2.schema()
|
||||
|
||||
# pandas 3.0+ uses large_string, pandas 2.x uses string
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
|
||||
pa.field("item", pandas_string_type()),
|
||||
pa.field("item", pa.utf8()),
|
||||
pa.field("price", pa.float64()),
|
||||
]
|
||||
)
|
||||
@@ -601,8 +595,6 @@ def test_open_table_sync(tmp_db: lancedb.DBConnection):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_open_table(tmp_path):
|
||||
from conftest import pandas_string_type
|
||||
|
||||
db = await lancedb.connect_async(tmp_path)
|
||||
data = pd.DataFrame(
|
||||
{
|
||||
@@ -622,11 +614,10 @@ async def test_open_table(tmp_path):
|
||||
)
|
||||
is not None
|
||||
)
|
||||
# pandas 3.0+ uses large_string, pandas 2.x uses string
|
||||
assert await tbl.schema() == pa.schema(
|
||||
{
|
||||
"vector": pa.list_(pa.float32(), list_size=2),
|
||||
"item": pandas_string_type(),
|
||||
"item": pa.utf8(),
|
||||
"price": pa.float64(),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -26,8 +26,6 @@ import pytest
|
||||
from lance_namespace import (
|
||||
CreateEmptyTableRequest,
|
||||
CreateEmptyTableResponse,
|
||||
DeclareTableRequest,
|
||||
DeclareTableResponse,
|
||||
DescribeTableRequest,
|
||||
DescribeTableResponse,
|
||||
LanceNamespace,
|
||||
@@ -162,19 +160,6 @@ class TrackingNamespace(LanceNamespace):
|
||||
|
||||
return modified
|
||||
|
||||
def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
|
||||
"""Track declare_table calls and inject rotating credentials."""
|
||||
with self.lock:
|
||||
self.create_call_count += 1
|
||||
count = self.create_call_count
|
||||
|
||||
response = self.inner.declare_table(request)
|
||||
response.storage_options = self._modify_storage_options(
|
||||
response.storage_options, count
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def create_empty_table(
|
||||
self, request: CreateEmptyTableRequest
|
||||
) -> CreateEmptyTableResponse:
|
||||
|
||||
@@ -601,6 +601,7 @@ def test_head():
|
||||
def test_query_sync_minimal():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"refine_factor": None,
|
||||
@@ -684,6 +685,7 @@ def test_query_sync_maximal():
|
||||
def test_query_sync_nprobes():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"fast_search": True,
|
||||
@@ -713,6 +715,7 @@ def test_query_sync_nprobes():
|
||||
def test_query_sync_no_max_nprobes():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"fast_search": True,
|
||||
@@ -835,6 +838,7 @@ def test_query_sync_hybrid():
|
||||
else:
|
||||
# Vector query
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 42,
|
||||
"prefilter": True,
|
||||
"refine_factor": None,
|
||||
|
||||
@@ -1880,13 +1880,8 @@ async def test_optimize_delete_unverified(tmp_db_async: AsyncConnection, tmp_pat
|
||||
],
|
||||
)
|
||||
version = await table.version()
|
||||
assert version == 2
|
||||
|
||||
# By removing a manifest file, we make the data files we just inserted unverified
|
||||
version_name = 18446744073709551615 - (version - 1)
|
||||
path = tmp_path / "test.lance" / "_versions" / f"{version_name:020}.manifest"
|
||||
path = tmp_path / "test.lance" / "_versions" / f"{version - 1}.manifest"
|
||||
os.remove(path)
|
||||
|
||||
stats = await table.optimize(delete_unverified=False)
|
||||
assert stats.prune.old_versions_removed == 0
|
||||
stats = await table.optimize(
|
||||
|
||||
@@ -528,19 +528,12 @@ def test_sanitize_data(
|
||||
else:
|
||||
expected_schema = schema
|
||||
else:
|
||||
from conftest import pandas_string_type
|
||||
|
||||
# polars uses large_string, pandas 3.0+ uses large_string, others use string
|
||||
if isinstance(data, pl.DataFrame):
|
||||
text_type = pa.large_utf8()
|
||||
elif isinstance(data, pd.DataFrame):
|
||||
text_type = pandas_string_type()
|
||||
else:
|
||||
text_type = pa.string()
|
||||
expected_schema = pa.schema(
|
||||
{
|
||||
"id": pa.int64(),
|
||||
"text": text_type,
|
||||
"text": pa.large_utf8()
|
||||
if isinstance(data, pl.DataFrame)
|
||||
else pa.string(),
|
||||
"vector": pa.list_(pa.float32(), 10),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -36,10 +36,7 @@ impl RecordBatchStream {
|
||||
impl RecordBatchStream {
|
||||
#[getter]
|
||||
pub fn schema(&self, py: Python) -> PyResult<Py<PyAny>> {
|
||||
(*self.schema)
|
||||
.clone()
|
||||
.into_pyarrow(py)
|
||||
.map(|obj| obj.unbind())
|
||||
(*self.schema).clone().into_pyarrow(py)
|
||||
}
|
||||
|
||||
pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> {
|
||||
@@ -55,12 +52,11 @@ impl RecordBatchStream {
|
||||
.next()
|
||||
.await
|
||||
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
|
||||
Python::attach(|py| {
|
||||
inner_next
|
||||
.infer_error()?
|
||||
.to_pyarrow(py)
|
||||
.map(|obj| obj.unbind())
|
||||
})
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
|
||||
inner_next.infer_error()?.to_pyarrow(py)
|
||||
})?;
|
||||
Ok(py_obj)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -307,7 +307,8 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.list_namespaces(request).await.infer_error()?;
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("namespaces", response.namespaces)?;
|
||||
dict.set_item("page_token", response.page_token)?;
|
||||
@@ -327,8 +328,7 @@ impl Connection {
|
||||
let py = self_.py();
|
||||
future_into_py(py, async move {
|
||||
use lance_namespace::models::CreateNamespaceRequest;
|
||||
// Mode is now a string field
|
||||
let mode_str = mode.and_then(|m| match m.to_lowercase().as_str() {
|
||||
let mode_enum = mode.and_then(|m| match m.to_lowercase().as_str() {
|
||||
"create" => Some("Create".to_string()),
|
||||
"exist_ok" => Some("ExistOk".to_string()),
|
||||
"overwrite" => Some("Overwrite".to_string()),
|
||||
@@ -340,12 +340,13 @@ impl Connection {
|
||||
} else {
|
||||
Some(namespace)
|
||||
},
|
||||
mode: mode_str,
|
||||
mode: mode_enum,
|
||||
properties,
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.create_namespace(request).await.infer_error()?;
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
Ok(dict.unbind())
|
||||
@@ -364,13 +365,12 @@ impl Connection {
|
||||
let py = self_.py();
|
||||
future_into_py(py, async move {
|
||||
use lance_namespace::models::DropNamespaceRequest;
|
||||
// Mode and Behavior are now string fields
|
||||
let mode_str = mode.and_then(|m| match m.to_uppercase().as_str() {
|
||||
let mode_enum = mode.and_then(|m| match m.to_uppercase().as_str() {
|
||||
"SKIP" => Some("Skip".to_string()),
|
||||
"FAIL" => Some("Fail".to_string()),
|
||||
_ => None,
|
||||
});
|
||||
let behavior_str = behavior.and_then(|b| match b.to_uppercase().as_str() {
|
||||
let behavior_enum = behavior.and_then(|b| match b.to_uppercase().as_str() {
|
||||
"RESTRICT" => Some("Restrict".to_string()),
|
||||
"CASCADE" => Some("Cascade".to_string()),
|
||||
_ => None,
|
||||
@@ -381,12 +381,13 @@ impl Connection {
|
||||
} else {
|
||||
Some(namespace)
|
||||
},
|
||||
mode: mode_str,
|
||||
behavior: behavior_str,
|
||||
mode: mode_enum,
|
||||
behavior: behavior_enum,
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.drop_namespace(request).await.infer_error()?;
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
dict.set_item("transaction_id", response.transaction_id)?;
|
||||
@@ -413,7 +414,8 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.describe_namespace(request).await.infer_error()?;
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
Ok(dict.unbind())
|
||||
@@ -443,7 +445,8 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.list_tables(request).await.infer_error()?;
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("tables", response.tables)?;
|
||||
dict.set_item("page_token", response.page_token)?;
|
||||
|
||||
@@ -40,31 +40,34 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
||||
request_id,
|
||||
source,
|
||||
status_code,
|
||||
} => Python::attach(|py| {
|
||||
let message = err.to_string();
|
||||
let http_err_cls = py
|
||||
.import(intern!(py, "lancedb.remote.errors"))?
|
||||
.getattr(intern!(py, "HttpError"))?;
|
||||
let err = http_err_cls.call1((
|
||||
message,
|
||||
request_id,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
))?;
|
||||
|
||||
if let Some(cause) = source.source() {
|
||||
// The HTTP error already includes the first cause. But
|
||||
// we can add the rest of the chain if there is any more.
|
||||
let cause_err = http_from_rust_error(
|
||||
py,
|
||||
cause,
|
||||
} => {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
let message = err.to_string();
|
||||
let http_err_cls = py
|
||||
.import(intern!(py, "lancedb.remote.errors"))?
|
||||
.getattr(intern!(py, "HttpError"))?;
|
||||
let err = http_err_cls.call1((
|
||||
message,
|
||||
request_id,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
)?;
|
||||
err.setattr(intern!(py, "__cause__"), cause_err)?;
|
||||
}
|
||||
))?;
|
||||
|
||||
Err(PyErr::from_value(err))
|
||||
}),
|
||||
if let Some(cause) = source.source() {
|
||||
// The HTTP error already includes the first cause. But
|
||||
// we can add the rest of the chain if there is any more.
|
||||
let cause_err = http_from_rust_error(
|
||||
py,
|
||||
cause,
|
||||
request_id,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
)?;
|
||||
err.setattr(intern!(py, "__cause__"), cause_err)?;
|
||||
}
|
||||
|
||||
Err(PyErr::from_value(err))
|
||||
})
|
||||
}
|
||||
LanceError::Retry {
|
||||
request_id,
|
||||
request_failures,
|
||||
@@ -75,33 +78,37 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
||||
max_read_failures,
|
||||
source,
|
||||
status_code,
|
||||
} => Python::attach(|py| {
|
||||
let cause_err = http_from_rust_error(
|
||||
py,
|
||||
source.as_ref(),
|
||||
request_id,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
)?;
|
||||
} =>
|
||||
{
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
let cause_err = http_from_rust_error(
|
||||
py,
|
||||
source.as_ref(),
|
||||
request_id,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
)?;
|
||||
|
||||
let message = err.to_string();
|
||||
let retry_error_cls = py
|
||||
.import(intern!(py, "lancedb.remote.errors"))?
|
||||
.getattr("RetryError")?;
|
||||
let err = retry_error_cls.call1((
|
||||
message,
|
||||
request_id,
|
||||
*request_failures,
|
||||
*connect_failures,
|
||||
*read_failures,
|
||||
*max_request_failures,
|
||||
*max_connect_failures,
|
||||
*max_read_failures,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
))?;
|
||||
let message = err.to_string();
|
||||
let retry_error_cls = py
|
||||
.import(intern!(py, "lancedb.remote.errors"))?
|
||||
.getattr("RetryError")?;
|
||||
let err = retry_error_cls.call1((
|
||||
message,
|
||||
request_id,
|
||||
*request_failures,
|
||||
*connect_failures,
|
||||
*read_failures,
|
||||
*max_request_failures,
|
||||
*max_connect_failures,
|
||||
*max_read_failures,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
))?;
|
||||
|
||||
err.setattr(intern!(py, "__cause__"), cause_err)?;
|
||||
Err(PyErr::from_value(err))
|
||||
}),
|
||||
err.setattr(intern!(py, "__cause__"), cause_err)?;
|
||||
Err(PyErr::from_value(err))
|
||||
})
|
||||
}
|
||||
_ => self.runtime_error(),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -12,7 +12,8 @@ pub struct PyHeaderProvider {
|
||||
|
||||
impl Clone for PyHeaderProvider {
|
||||
fn clone(&self) -> Self {
|
||||
Python::attach(|py| Self {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| Self {
|
||||
provider: self.provider.clone_ref(py),
|
||||
})
|
||||
}
|
||||
@@ -25,7 +26,8 @@ impl PyHeaderProvider {
|
||||
|
||||
/// Get headers from the Python provider (internal implementation)
|
||||
fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
|
||||
Python::attach(|py| {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
// Call the get_headers method
|
||||
let result = self.provider.call_method0(py, "get_headers");
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ use pyo3::{
|
||||
exceptions::PyRuntimeError,
|
||||
pyclass, pymethods,
|
||||
types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
|
||||
Bound, PyAny, PyRef, PyRefMut, PyResult, Python,
|
||||
Bound, Py, PyAny, PyRef, PyRefMut, PyResult, Python,
|
||||
};
|
||||
use pyo3_async_runtimes::tokio::future_into_py;
|
||||
|
||||
@@ -281,7 +281,10 @@ impl PyPermutationReader {
|
||||
let reader = slf.reader.clone();
|
||||
future_into_py(slf.py(), async move {
|
||||
let schema = reader.output_schema(selection).await.infer_error()?;
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> =
|
||||
Python::with_gil(|py| -> PyResult<Py<PyAny>> { schema.to_pyarrow(py) })?;
|
||||
Ok(py_obj)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ use pyo3::types::PyList;
|
||||
use pyo3::types::{PyDict, PyString};
|
||||
use pyo3::Bound;
|
||||
use pyo3::IntoPyObject;
|
||||
use pyo3::Py;
|
||||
use pyo3::PyAny;
|
||||
use pyo3::PyRef;
|
||||
use pyo3::PyResult;
|
||||
@@ -453,7 +454,10 @@ impl Query {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> =
|
||||
Python::with_gil(|py| -> PyResult<Py<PyAny>> { schema.to_pyarrow(py) })?;
|
||||
Ok(py_obj)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -532,7 +536,10 @@ impl TakeQuery {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> =
|
||||
Python::with_gil(|py| -> PyResult<Py<PyAny>> { schema.to_pyarrow(py) })?;
|
||||
Ok(py_obj)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -627,7 +634,10 @@ impl FTSQuery {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> =
|
||||
Python::with_gil(|py| -> PyResult<Py<PyAny>> { schema.to_pyarrow(py) })?;
|
||||
Ok(py_obj)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -806,7 +816,10 @@ impl VectorQuery {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> =
|
||||
Python::with_gil(|py| -> PyResult<Py<PyAny>> { schema.to_pyarrow(py) })?;
|
||||
Ok(py_obj)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,8 @@ pub struct PyStorageOptionsProvider {
|
||||
|
||||
impl Clone for PyStorageOptionsProvider {
|
||||
fn clone(&self) -> Self {
|
||||
Python::attach(|py| Self {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| Self {
|
||||
inner: self.inner.clone_ref(py),
|
||||
})
|
||||
}
|
||||
@@ -30,14 +31,17 @@ impl Clone for PyStorageOptionsProvider {
|
||||
|
||||
impl PyStorageOptionsProvider {
|
||||
pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
|
||||
Python::attach(|py| {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
// Verify the object has a fetch_storage_options method
|
||||
if !obj.bind(py).hasattr("fetch_storage_options")? {
|
||||
return Err(pyo3::exceptions::PyTypeError::new_err(
|
||||
"StorageOptionsProvider must implement fetch_storage_options() method",
|
||||
));
|
||||
}
|
||||
Ok(Self { inner: obj })
|
||||
Ok(Self {
|
||||
inner: obj.clone_ref(py),
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -60,7 +64,8 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
||||
let py_provider = self.py_provider.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
Python::attach(|py| {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
// Call the Python fetch_storage_options method
|
||||
let result = py_provider
|
||||
.inner
|
||||
@@ -119,7 +124,8 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
||||
}
|
||||
|
||||
fn provider_id(&self) -> String {
|
||||
Python::attach(|py| {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
// Call provider_id() method on the Python object
|
||||
let obj = self.py_provider.inner.bind(py);
|
||||
obj.call_method0("provider_id")
|
||||
|
||||
@@ -21,7 +21,7 @@ use pyo3::{
|
||||
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
|
||||
pyclass, pymethods,
|
||||
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
|
||||
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
|
||||
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
||||
};
|
||||
use pyo3_async_runtimes::tokio::future_into_py;
|
||||
|
||||
@@ -287,7 +287,10 @@ impl Table {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.schema().await.infer_error()?;
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> =
|
||||
Python::with_gil(|py| -> PyResult<Py<PyAny>> { schema.to_pyarrow(py) })?;
|
||||
Ok(py_obj)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -437,7 +440,8 @@ impl Table {
|
||||
future_into_py(self_.py(), async move {
|
||||
let stats = inner.index_stats(&index_name).await.infer_error()?;
|
||||
if let Some(stats) = stats {
|
||||
Python::attach(|py| {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
|
||||
dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
|
||||
@@ -467,7 +471,8 @@ impl Table {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let stats = inner.stats().await.infer_error()?;
|
||||
Python::attach(|py| {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("total_bytes", stats.total_bytes)?;
|
||||
dict.set_item("num_rows", stats.num_rows)?;
|
||||
@@ -521,7 +526,8 @@ impl Table {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let versions = inner.list_versions().await.infer_error()?;
|
||||
let versions_as_dict = Python::attach(|py| {
|
||||
#[allow(deprecated)]
|
||||
let versions_as_dict = Python::with_gil(|py| {
|
||||
versions
|
||||
.iter()
|
||||
.map(|v| {
|
||||
@@ -872,7 +878,8 @@ impl Tags {
|
||||
let tags = inner.tags().await.infer_error()?;
|
||||
let res = tags.list().await.infer_error()?;
|
||||
|
||||
Python::attach(|py| {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
let py_dict = PyDict::new(py);
|
||||
for (key, contents) in res {
|
||||
let value_dict = PyDict::new(py);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.24.0"
|
||||
version = "0.23.1"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
@@ -9,6 +9,7 @@ use std::sync::Arc;
|
||||
use arrow_array::RecordBatchReader;
|
||||
use arrow_schema::{Field, SchemaRef};
|
||||
use lance::dataset::ReadParams;
|
||||
use lance::io::ObjectStoreParams;
|
||||
use lance_namespace::models::{
|
||||
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
|
||||
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
|
||||
@@ -36,41 +37,20 @@ use crate::remote::{
|
||||
};
|
||||
use crate::table::{TableDefinition, WriteOptions};
|
||||
use crate::Table;
|
||||
use lance::io::ObjectStoreParams;
|
||||
pub use lance_encoding::version::LanceFileVersion;
|
||||
#[cfg(feature = "remote")]
|
||||
use lance_io::object_store::StorageOptions;
|
||||
use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
|
||||
|
||||
fn merge_storage_options(
|
||||
store_params: &mut ObjectStoreParams,
|
||||
pairs: impl IntoIterator<Item = (String, String)>,
|
||||
) {
|
||||
fn update_storage_options<F>(store_params: &mut ObjectStoreParams, update: F)
|
||||
where
|
||||
F: FnOnce(&mut HashMap<String, String>),
|
||||
{
|
||||
let mut options = store_params.storage_options().cloned().unwrap_or_default();
|
||||
for (key, value) in pairs {
|
||||
options.insert(key, value);
|
||||
}
|
||||
let provider = store_params
|
||||
.storage_options_accessor
|
||||
.as_ref()
|
||||
.and_then(|accessor| accessor.provider().cloned());
|
||||
let accessor = if let Some(provider) = provider {
|
||||
StorageOptionsAccessor::with_initial_and_provider(options, provider)
|
||||
} else {
|
||||
StorageOptionsAccessor::with_static_options(options)
|
||||
};
|
||||
store_params.storage_options_accessor = Some(Arc::new(accessor));
|
||||
}
|
||||
|
||||
fn set_storage_options_provider(
|
||||
store_params: &mut ObjectStoreParams,
|
||||
provider: Arc<dyn StorageOptionsProvider>,
|
||||
) {
|
||||
let accessor = match store_params.storage_options().cloned() {
|
||||
Some(options) => StorageOptionsAccessor::with_initial_and_provider(options, provider),
|
||||
None => StorageOptionsAccessor::with_provider(provider),
|
||||
};
|
||||
store_params.storage_options_accessor = Some(Arc::new(accessor));
|
||||
update(&mut options);
|
||||
store_params.storage_options_accessor = Some(Arc::new(
|
||||
StorageOptionsAccessor::with_static_options(options),
|
||||
));
|
||||
}
|
||||
|
||||
/// A builder for configuring a [`Connection::table_names`] operation
|
||||
@@ -285,7 +265,9 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
||||
.get_or_insert(Default::default())
|
||||
.store_params
|
||||
.get_or_insert(Default::default());
|
||||
merge_storage_options(store_params, [(key.into(), value.into())]);
|
||||
update_storage_options(store_params, |options| {
|
||||
options.insert(key.into(), value.into());
|
||||
});
|
||||
self
|
||||
}
|
||||
|
||||
@@ -306,10 +288,11 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
||||
.get_or_insert(Default::default())
|
||||
.store_params
|
||||
.get_or_insert(Default::default());
|
||||
let updates = pairs
|
||||
.into_iter()
|
||||
.map(|(key, value)| (key.into(), value.into()));
|
||||
merge_storage_options(store_params, updates);
|
||||
update_storage_options(store_params, |options| {
|
||||
for (key, value) in pairs {
|
||||
options.insert(key.into(), value.into());
|
||||
}
|
||||
});
|
||||
self
|
||||
}
|
||||
|
||||
@@ -353,15 +336,16 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
||||
.get_or_insert_with(Default::default)
|
||||
.store_params
|
||||
.get_or_insert_with(Default::default);
|
||||
let value = if use_v2_manifest_paths {
|
||||
"true".to_string()
|
||||
} else {
|
||||
"false".to_string()
|
||||
};
|
||||
merge_storage_options(
|
||||
store_params,
|
||||
[(OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(), value)],
|
||||
);
|
||||
update_storage_options(store_params, |options| {
|
||||
options.insert(
|
||||
OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(),
|
||||
if use_v2_manifest_paths {
|
||||
"true".to_string()
|
||||
} else {
|
||||
"false".to_string()
|
||||
},
|
||||
);
|
||||
});
|
||||
self
|
||||
}
|
||||
|
||||
@@ -377,13 +361,12 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
||||
.get_or_insert_with(Default::default)
|
||||
.store_params
|
||||
.get_or_insert_with(Default::default);
|
||||
merge_storage_options(
|
||||
store_params,
|
||||
[(
|
||||
update_storage_options(store_params, |options| {
|
||||
options.insert(
|
||||
OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
|
||||
data_storage_version.to_string(),
|
||||
)],
|
||||
);
|
||||
);
|
||||
});
|
||||
self
|
||||
}
|
||||
|
||||
@@ -414,7 +397,10 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
||||
.get_or_insert(Default::default())
|
||||
.store_params
|
||||
.get_or_insert(Default::default());
|
||||
set_storage_options_provider(store_params, provider);
|
||||
let initial = store_params.storage_options().cloned().unwrap_or_default();
|
||||
store_params.storage_options_accessor = Some(Arc::new(
|
||||
StorageOptionsAccessor::with_initial_and_provider(initial, provider),
|
||||
));
|
||||
self
|
||||
}
|
||||
}
|
||||
@@ -483,7 +469,9 @@ impl OpenTableBuilder {
|
||||
.get_or_insert(Default::default())
|
||||
.store_options
|
||||
.get_or_insert(Default::default());
|
||||
merge_storage_options(store_params, [(key.into(), value.into())]);
|
||||
update_storage_options(store_params, |options| {
|
||||
options.insert(key.into(), value.into());
|
||||
});
|
||||
self
|
||||
}
|
||||
|
||||
@@ -503,10 +491,11 @@ impl OpenTableBuilder {
|
||||
.get_or_insert(Default::default())
|
||||
.store_options
|
||||
.get_or_insert(Default::default());
|
||||
let updates = pairs
|
||||
.into_iter()
|
||||
.map(|(key, value)| (key.into(), value.into()));
|
||||
merge_storage_options(store_params, updates);
|
||||
update_storage_options(store_params, |options| {
|
||||
for (key, value) in pairs {
|
||||
options.insert(key.into(), value.into());
|
||||
}
|
||||
});
|
||||
self
|
||||
}
|
||||
|
||||
@@ -536,7 +525,10 @@ impl OpenTableBuilder {
|
||||
.get_or_insert(Default::default())
|
||||
.store_options
|
||||
.get_or_insert(Default::default());
|
||||
set_storage_options_provider(store_params, provider);
|
||||
let initial = store_params.storage_options().cloned().unwrap_or_default();
|
||||
store_params.storage_options_accessor = Some(Arc::new(
|
||||
StorageOptionsAccessor::with_initial_and_provider(initial, provider),
|
||||
));
|
||||
self
|
||||
}
|
||||
|
||||
@@ -892,10 +884,6 @@ pub struct ConnectBuilder {
|
||||
embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
|
||||
}
|
||||
|
||||
#[cfg(feature = "remote")]
|
||||
const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 1] =
|
||||
[("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name")];
|
||||
|
||||
impl ConnectBuilder {
|
||||
/// Create a new [`ConnectOptions`] with the given database URI.
|
||||
pub fn new(uri: &str) -> Self {
|
||||
@@ -1079,27 +1067,11 @@ impl ConnectBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
#[cfg(feature = "remote")]
|
||||
fn apply_env_defaults(
|
||||
env_var_to_remote_storage_option: &[(&str, &str)],
|
||||
options: &mut HashMap<String, String>,
|
||||
) {
|
||||
for (env_key, opt_key) in env_var_to_remote_storage_option {
|
||||
if let Ok(env_value) = std::env::var(env_key) {
|
||||
if !options.contains_key(*opt_key) {
|
||||
options.insert((*opt_key).to_string(), env_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "remote")]
|
||||
fn execute_remote(self) -> Result<Connection> {
|
||||
use crate::remote::db::RemoteDatabaseOptions;
|
||||
|
||||
let mut merged_options = self.request.options.clone();
|
||||
Self::apply_env_defaults(&ENV_VARS_TO_STORAGE_OPTS, &mut merged_options);
|
||||
let options = RemoteDatabaseOptions::parse_from_map(&merged_options)?;
|
||||
let options = RemoteDatabaseOptions::parse_from_map(&self.request.options)?;
|
||||
|
||||
let region = options.region.ok_or_else(|| Error::InvalidInput {
|
||||
message: "A region is required when connecting to LanceDb Cloud".to_string(),
|
||||
@@ -1321,6 +1293,8 @@ mod test_utils {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fs::create_dir_all;
|
||||
|
||||
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
|
||||
use crate::query::QueryBase;
|
||||
use crate::query::{ExecutableQuery, QueryExecutionOptions};
|
||||
@@ -1344,23 +1318,6 @@ mod tests {
|
||||
assert_eq!(tc.connection.uri(), tc.uri);
|
||||
}
|
||||
|
||||
#[cfg(feature = "remote")]
|
||||
#[test]
|
||||
fn test_apply_env_defaults() {
|
||||
let env_key = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_KEY";
|
||||
let env_val = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_VAL";
|
||||
let opts_key = "test_apply_env_defaults_environment_variable_opts_key";
|
||||
std::env::set_var(env_key, env_val);
|
||||
|
||||
let mut options = HashMap::new();
|
||||
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
|
||||
assert_eq!(Some(&env_val.to_string()), options.get(opts_key));
|
||||
|
||||
options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string());
|
||||
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
|
||||
assert_eq!(Some(&"EXPLICIT-VALUE".to_string()), options.get(opts_key));
|
||||
}
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[tokio::test]
|
||||
async fn test_connect_relative() {
|
||||
@@ -1585,27 +1542,18 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn drop_table() {
|
||||
let tc = new_test_connection().await.unwrap();
|
||||
let db = tc.connection;
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
|
||||
if tc.is_remote {
|
||||
// All the typical endpoints such as s3:///, file-object-store:///, etc. treat drop_table
|
||||
// as idempotent.
|
||||
assert!(db.drop_table("invalid_table", &[]).await.is_ok());
|
||||
} else {
|
||||
// The behavior of drop_table when using a file:/// endpoint differs from all other
|
||||
// object providers, in that it returns an error when deleting a non-existent table.
|
||||
assert!(matches!(
|
||||
db.drop_table("invalid_table", &[]).await,
|
||||
Err(crate::Error::TableNotFound { .. }),
|
||||
));
|
||||
}
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = connect(uri).execute().await.unwrap();
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
|
||||
db.create_empty_table("table1", schema.clone())
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
// drop non-exist table
|
||||
assert!(matches!(
|
||||
db.drop_table("invalid_table", &[]).await,
|
||||
Err(crate::Error::TableNotFound { .. }),
|
||||
));
|
||||
|
||||
create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
|
||||
db.drop_table("table1", &[]).await.unwrap();
|
||||
|
||||
let tables = db.table_names().execute().await.unwrap();
|
||||
|
||||
@@ -356,13 +356,11 @@ impl ListingDatabase {
|
||||
.clone()
|
||||
.unwrap_or_else(|| Arc::new(lance::session::Session::default()));
|
||||
let os_params = ObjectStoreParams {
|
||||
storage_options_accessor: if options.storage_options.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Arc::new(StorageOptionsAccessor::with_static_options(
|
||||
storage_options_accessor: Some(Arc::new(
|
||||
StorageOptionsAccessor::with_static_options(
|
||||
options.storage_options.clone(),
|
||||
)))
|
||||
},
|
||||
),
|
||||
)),
|
||||
..Default::default()
|
||||
};
|
||||
let (object_store, base_path) = ObjectStore::from_uri_and_params(
|
||||
@@ -498,13 +496,9 @@ impl ListingDatabase {
|
||||
|
||||
async fn drop_tables(&self, names: Vec<String>) -> Result<()> {
|
||||
let object_store_params = ObjectStoreParams {
|
||||
storage_options_accessor: if self.storage_options.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Arc::new(StorageOptionsAccessor::with_static_options(
|
||||
self.storage_options.clone(),
|
||||
)))
|
||||
},
|
||||
storage_options_accessor: Some(Arc::new(StorageOptionsAccessor::with_static_options(
|
||||
self.storage_options.clone(),
|
||||
))),
|
||||
..Default::default()
|
||||
};
|
||||
let mut uri = self.uri.clone();
|
||||
@@ -604,20 +598,26 @@ impl ListingDatabase {
|
||||
// will cause a new connection to be created, and that connection will
|
||||
// be dropped from the cache when python GCs the table object, which
|
||||
// confounds reuse across tables.
|
||||
if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
|
||||
if !self.storage_options.is_empty() {
|
||||
let store_params = write_params
|
||||
.store_params
|
||||
.get_or_insert_with(Default::default);
|
||||
let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
|
||||
if !self.storage_options.is_empty() {
|
||||
self.inherit_storage_options(&mut storage_options);
|
||||
}
|
||||
let accessor = if let Some(ref provider) = self.storage_options_provider {
|
||||
StorageOptionsAccessor::with_initial_and_provider(storage_options, provider.clone())
|
||||
} else {
|
||||
StorageOptionsAccessor::with_static_options(storage_options)
|
||||
};
|
||||
store_params.storage_options_accessor = Some(Arc::new(accessor));
|
||||
self.inherit_storage_options(&mut storage_options);
|
||||
store_params.storage_options_accessor = Some(Arc::new(
|
||||
StorageOptionsAccessor::with_static_options(storage_options),
|
||||
));
|
||||
}
|
||||
|
||||
// Set storage options provider if available
|
||||
if let Some(provider) = self.storage_options_provider.clone() {
|
||||
let store_params = write_params
|
||||
.store_params
|
||||
.get_or_insert_with(Default::default);
|
||||
let initial = store_params.storage_options().cloned().unwrap_or_default();
|
||||
store_params.storage_options_accessor = Some(Arc::new(
|
||||
StorageOptionsAccessor::with_initial_and_provider(initial, provider),
|
||||
));
|
||||
}
|
||||
|
||||
write_params.data_storage_version = self
|
||||
@@ -903,13 +903,9 @@ impl Database for ListingDatabase {
|
||||
validate_table_name(&request.target_table_name)?;
|
||||
|
||||
let storage_params = ObjectStoreParams {
|
||||
storage_options_accessor: if self.storage_options.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Arc::new(StorageOptionsAccessor::with_static_options(
|
||||
self.storage_options.clone(),
|
||||
)))
|
||||
},
|
||||
storage_options_accessor: Some(Arc::new(StorageOptionsAccessor::with_static_options(
|
||||
self.storage_options.clone(),
|
||||
))),
|
||||
..Default::default()
|
||||
};
|
||||
let read_params = ReadParams {
|
||||
@@ -973,28 +969,30 @@ impl Database for ListingDatabase {
|
||||
// will cause a new connection to be created, and that connection will
|
||||
// be dropped from the cache when python GCs the table object, which
|
||||
// confounds reuse across tables.
|
||||
if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
|
||||
if !self.storage_options.is_empty() {
|
||||
let store_params = request
|
||||
.lance_read_params
|
||||
.get_or_insert_with(Default::default)
|
||||
.store_options
|
||||
.get_or_insert_with(Default::default);
|
||||
let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
|
||||
if !self.storage_options.is_empty() {
|
||||
self.inherit_storage_options(&mut storage_options);
|
||||
}
|
||||
// Preserve request-level provider if no connection-level provider exists
|
||||
let request_provider = store_params
|
||||
.storage_options_accessor
|
||||
.as_ref()
|
||||
.and_then(|a| a.provider().cloned());
|
||||
let provider = self.storage_options_provider.clone().or(request_provider);
|
||||
let accessor = if let Some(provider) = provider {
|
||||
StorageOptionsAccessor::with_initial_and_provider(storage_options, provider)
|
||||
} else {
|
||||
StorageOptionsAccessor::with_static_options(storage_options)
|
||||
};
|
||||
store_params.storage_options_accessor = Some(Arc::new(accessor));
|
||||
self.inherit_storage_options(&mut storage_options);
|
||||
store_params.storage_options_accessor = Some(Arc::new(
|
||||
StorageOptionsAccessor::with_static_options(storage_options),
|
||||
));
|
||||
}
|
||||
|
||||
// Set storage options provider if available
|
||||
if let Some(provider) = self.storage_options_provider.clone() {
|
||||
let store_params = request
|
||||
.lance_read_params
|
||||
.get_or_insert_with(Default::default)
|
||||
.store_options
|
||||
.get_or_insert_with(Default::default);
|
||||
let initial = store_params.storage_options().cloned().unwrap_or_default();
|
||||
store_params.storage_options_accessor = Some(Arc::new(
|
||||
StorageOptionsAccessor::with_initial_and_provider(initial, provider),
|
||||
));
|
||||
}
|
||||
|
||||
// Some ReadParams are exposed in the OpenTableBuilder, but we also
|
||||
|
||||
@@ -9,15 +9,14 @@ use std::sync::Arc;
|
||||
use async_trait::async_trait;
|
||||
use lance_namespace::{
|
||||
models::{
|
||||
CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
|
||||
DeclareTableRequest, DescribeNamespaceRequest, DescribeNamespaceResponse,
|
||||
DescribeTableRequest, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest,
|
||||
ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||
CreateNamespaceRequest, CreateNamespaceResponse, DeclareTableRequest,
|
||||
DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest,
|
||||
DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest,
|
||||
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||
},
|
||||
LanceNamespace,
|
||||
};
|
||||
use lance_namespace_impls::ConnectBuilder;
|
||||
use log::warn;
|
||||
|
||||
use crate::database::ReadConsistency;
|
||||
use crate::error::{Error, Result};
|
||||
@@ -155,6 +154,7 @@ impl Database for LanceNamespaceDatabase {
|
||||
table_id.push(request.name.clone());
|
||||
let describe_request = DescribeTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
version: None,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -205,53 +205,26 @@ impl Database for LanceNamespaceDatabase {
|
||||
let mut table_id = request.namespace.clone();
|
||||
table_id.push(request.name.clone());
|
||||
|
||||
// Try declare_table first, falling back to create_empty_table for backwards
|
||||
// compatibility with older namespace clients that don't support declare_table
|
||||
let declare_request = DeclareTableRequest {
|
||||
let create_empty_request = DeclareTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
location: None,
|
||||
vend_credentials: None,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let location = match self.namespace.declare_table(declare_request).await {
|
||||
Ok(response) => response.location.ok_or_else(|| Error::Runtime {
|
||||
message: "Table location is missing from declare_table response".to_string(),
|
||||
})?,
|
||||
Err(e) => {
|
||||
// Check if the error is "not supported" and try create_empty_table as fallback
|
||||
let err_str = e.to_string().to_lowercase();
|
||||
if err_str.contains("not supported") || err_str.contains("not implemented") {
|
||||
warn!(
|
||||
"declare_table is not supported by the namespace client, \
|
||||
falling back to deprecated create_empty_table. \
|
||||
create_empty_table is deprecated and will be removed in Lance 3.0.0. \
|
||||
Please upgrade your namespace client to support declare_table."
|
||||
);
|
||||
#[allow(deprecated)]
|
||||
let create_empty_request = CreateEmptyTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
let create_empty_response = self
|
||||
.namespace
|
||||
.declare_table(create_empty_request)
|
||||
.await
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to declare table: {}", e),
|
||||
})?;
|
||||
|
||||
#[allow(deprecated)]
|
||||
let create_response = self
|
||||
.namespace
|
||||
.create_empty_table(create_empty_request)
|
||||
.await
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to create empty table: {}", e),
|
||||
})?;
|
||||
|
||||
create_response.location.ok_or_else(|| Error::Runtime {
|
||||
message: "Table location is missing from create_empty_table response"
|
||||
.to_string(),
|
||||
})?
|
||||
} else {
|
||||
return Err(Error::Runtime {
|
||||
message: format!("Failed to declare table: {}", e),
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
let location = create_empty_response
|
||||
.location
|
||||
.ok_or_else(|| Error::Runtime {
|
||||
message: "Table location is missing from create_empty_table response".to_string(),
|
||||
})?;
|
||||
|
||||
let native_table = NativeTable::create_from_namespace(
|
||||
self.namespace.clone(),
|
||||
@@ -466,6 +439,8 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -526,6 +501,8 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -589,6 +566,8 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -672,6 +651,8 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -727,6 +708,8 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -807,6 +790,8 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -840,6 +825,8 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -51,19 +51,24 @@
|
||||
//! - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud object store
|
||||
//! - `db://dbname` - Lance Cloud
|
||||
//!
|
||||
//! You can also use [`ConnectBuilder`] to configure the connection to the database.
|
||||
//! You can also use [`ConnectOptions`] to configure the connection to the database.
|
||||
//!
|
||||
//! ```rust
|
||||
//! # #[cfg(feature = "aws")]
|
||||
//! # {
|
||||
//! use object_store::aws::AwsCredential;
|
||||
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
//! let db = lancedb::connect("data/sample-lancedb")
|
||||
//! .storage_options([
|
||||
//! ("aws_access_key_id", "some_key"),
|
||||
//! ("aws_secret_access_key", "some_secret"),
|
||||
//! ])
|
||||
//! .aws_creds(AwsCredential {
|
||||
//! key_id: "some_key".to_string(),
|
||||
//! secret_key: "some_secret".to_string(),
|
||||
//! token: None,
|
||||
//! })
|
||||
//! .execute()
|
||||
//! .await
|
||||
//! .unwrap();
|
||||
//! # });
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! LanceDB uses [arrow-rs](https://github.com/apache/arrow-rs) to define schema, data types and array itself.
|
||||
|
||||
@@ -1718,6 +1718,8 @@ mod tests {
|
||||
let namespace = vec!["test_ns".to_string()];
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(namespace.clone()),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -1743,6 +1745,8 @@ mod tests {
|
||||
let list_response = conn
|
||||
.list_tables(ListTablesRequest {
|
||||
id: Some(namespace.clone()),
|
||||
page_token: None,
|
||||
limit: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -1754,6 +1758,8 @@ mod tests {
|
||||
let list_response = namespace_client
|
||||
.list_tables(ListTablesRequest {
|
||||
id: Some(namespace.clone()),
|
||||
page_token: None,
|
||||
limit: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -1794,6 +1800,8 @@ mod tests {
|
||||
let namespace = vec!["multi_table_ns".to_string()];
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(namespace.clone()),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -1819,6 +1827,8 @@ mod tests {
|
||||
let list_response = conn
|
||||
.list_tables(ListTablesRequest {
|
||||
id: Some(namespace.clone()),
|
||||
page_token: None,
|
||||
limit: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -468,9 +468,7 @@ impl<S: HttpSend> RemoteTable<S> {
|
||||
self.apply_query_params(&mut body, &query.base)?;
|
||||
|
||||
// Apply general parameters, before we dispatch based on number of query vectors.
|
||||
if let Some(distance_type) = query.distance_type {
|
||||
body["distance_type"] = serde_json::json!(distance_type);
|
||||
}
|
||||
body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
|
||||
// In 0.23.1 we migrated from `nprobes` to `minimum_nprobes` and `maximum_nprobes`.
|
||||
// Old client / new server: since minimum_nprobes is missing, fallback to nprobes
|
||||
// New client / old server: old server will only see nprobes, make sure to set both
|
||||
@@ -2232,6 +2230,7 @@ mod tests {
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
let mut expected_body = serde_json::json!({
|
||||
"prefilter": true,
|
||||
"distance_type": "l2",
|
||||
"nprobes": 20,
|
||||
"minimum_nprobes": 20,
|
||||
"maximum_nprobes": 20,
|
||||
|
||||
@@ -1425,9 +1425,7 @@ impl Table {
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let unioned = UnionExec::try_new(projected_plans).map_err(|err| Error::Runtime {
|
||||
message: err.to_string(),
|
||||
})?;
|
||||
let unioned = Arc::new(UnionExec::new(projected_plans));
|
||||
// We require 1 partition in the final output
|
||||
let repartitioned = RepartitionExec::try_new(
|
||||
unioned,
|
||||
@@ -1879,13 +1877,10 @@ impl NativeTable {
|
||||
let store_params = params
|
||||
.store_params
|
||||
.get_or_insert_with(ObjectStoreParams::default);
|
||||
let accessor = match store_params.storage_options().cloned() {
|
||||
Some(options) => {
|
||||
StorageOptionsAccessor::with_initial_and_provider(options, storage_options_provider)
|
||||
}
|
||||
None => StorageOptionsAccessor::with_provider(storage_options_provider),
|
||||
};
|
||||
store_params.storage_options_accessor = Some(Arc::new(accessor));
|
||||
let initial = store_params.storage_options().cloned().unwrap_or_default();
|
||||
store_params.storage_options_accessor = Some(Arc::new(
|
||||
StorageOptionsAccessor::with_initial_and_provider(initial, storage_options_provider),
|
||||
));
|
||||
|
||||
// Patch the params if we have a write store wrapper
|
||||
let params = match write_store_wrapper.clone() {
|
||||
@@ -2351,7 +2346,7 @@ impl NativeTable {
|
||||
};
|
||||
|
||||
// Convert select to columns list
|
||||
let columns = match &vq.base.select {
|
||||
let columns: Option<Box<QueryTableRequestColumns>> = match &vq.base.select {
|
||||
Select::All => None,
|
||||
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
|
||||
column_names: Some(cols.clone()),
|
||||
@@ -2409,6 +2404,7 @@ impl NativeTable {
|
||||
with_row_id: Some(vq.base.with_row_id),
|
||||
bypass_vector_index: Some(!vq.use_index),
|
||||
full_text_query,
|
||||
version: None,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
@@ -2427,7 +2423,7 @@ impl NativeTable {
|
||||
.map(|f| self.filter_to_sql(f))
|
||||
.transpose()?;
|
||||
|
||||
let columns = match &q.select {
|
||||
let columns: Option<Box<QueryTableRequestColumns>> = match &q.select {
|
||||
Select::All => None,
|
||||
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
|
||||
column_names: Some(cols.clone()),
|
||||
@@ -2471,10 +2467,18 @@ impl NativeTable {
|
||||
columns,
|
||||
prefilter: Some(q.prefilter),
|
||||
offset: q.offset.map(|o| o as i32),
|
||||
ef: None,
|
||||
refine_factor: None,
|
||||
distance_type: None,
|
||||
nprobes: None,
|
||||
vector_column: None, // No vector column for plain queries
|
||||
with_row_id: Some(q.with_row_id),
|
||||
bypass_vector_index: Some(true), // No vector index for plain queries
|
||||
full_text_query,
|
||||
version: None,
|
||||
fast_search: None,
|
||||
lower_bound: None,
|
||||
upper_bound: None,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
@@ -5147,16 +5151,15 @@ mod tests {
|
||||
let any_query = AnyQuery::VectorQuery(vq);
|
||||
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
|
||||
|
||||
let column_names = ns_request
|
||||
.columns
|
||||
.as_ref()
|
||||
.and_then(|cols| cols.column_names.clone());
|
||||
|
||||
assert_eq!(ns_request.k, 10);
|
||||
assert_eq!(ns_request.offset, Some(5));
|
||||
assert_eq!(ns_request.filter, Some("id > 0".to_string()));
|
||||
assert_eq!(
|
||||
ns_request
|
||||
.columns
|
||||
.as_ref()
|
||||
.and_then(|c| c.column_names.as_ref()),
|
||||
Some(&vec!["id".to_string()])
|
||||
);
|
||||
assert_eq!(column_names, Some(vec!["id".to_string()]));
|
||||
assert_eq!(ns_request.vector_column, Some("vector".to_string()));
|
||||
assert_eq!(ns_request.distance_type, Some("l2".to_string()));
|
||||
assert!(ns_request.vector.single_vector.is_some());
|
||||
@@ -5193,17 +5196,16 @@ mod tests {
|
||||
let any_query = AnyQuery::Query(q);
|
||||
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
|
||||
|
||||
let column_names = ns_request
|
||||
.columns
|
||||
.as_ref()
|
||||
.and_then(|cols| cols.column_names.clone());
|
||||
|
||||
// Plain queries should pass an empty vector
|
||||
assert_eq!(ns_request.k, 20);
|
||||
assert_eq!(ns_request.offset, Some(5));
|
||||
assert_eq!(ns_request.filter, Some("id > 5".to_string()));
|
||||
assert_eq!(
|
||||
ns_request
|
||||
.columns
|
||||
.as_ref()
|
||||
.and_then(|c| c.column_names.as_ref()),
|
||||
Some(&vec!["id".to_string()])
|
||||
);
|
||||
assert_eq!(column_names, Some(vec!["id".to_string()]));
|
||||
assert_eq!(ns_request.with_row_id, Some(true));
|
||||
assert_eq!(ns_request.bypass_vector_index, Some(true));
|
||||
assert!(ns_request.vector_column.is_none()); // No vector column for plain queries
|
||||
|
||||
@@ -100,8 +100,7 @@ impl DatasetRef {
|
||||
let should_checkout = match &target_ref {
|
||||
refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
|
||||
refs::Ref::Version(_, None) => true, // No specific version, always checkout
|
||||
refs::Ref::VersionNumber(target_ver) => version != target_ver,
|
||||
refs::Ref::Tag(_) => true, // Always checkout for tags
|
||||
refs::Ref::Tag(_) => true, // Always checkout for tags
|
||||
};
|
||||
|
||||
if should_checkout {
|
||||
|
||||
Reference in New Issue
Block a user