Compare commits

..

8 Commits

Author SHA1 Message Date
Lance Release
042bc22468 Bump version: 0.27.0-beta.1 → 0.27.0 2026-01-22 01:09:32 +00:00
Lance Release
68569906c6 Bump version: 0.27.0-beta.0 → 0.27.0-beta.1 2026-01-22 01:09:31 +00:00
LanceDB Robot
c71c1fc822 feat: update lance dependency to v1.0.3 (#2932)
## Summary
- bump Lance dependency to v1.0.3
- refresh Cargo metadata and lockfile

## Verification
- cargo clippy --workspace --tests --all-features -- -D warnings
- cargo fmt --all

## Release
- https://github.com/lance-format/lance/releases/tag/v1.0.3
2026-01-21 17:08:24 -08:00
Jack Ye
4a6a0c856e ci: fix codex version bump title and summary (#2931)
1. use feat for releases, chore for prereleases
2. do not have literal `\n` in summary
2026-01-21 15:45:28 -08:00
Jack Ye
f124c9d8d2 test: string type conversion in pandas 3.0+ (#2928)
Pandas 3.0+ string now converts to Arrow large_utf8. This PR mainly
makes sure our test accounts for the difference across the pandas
versions when constructing schema.
2026-01-21 13:40:48 -08:00
Jack Ye
4e65748abf chore: update lance dependency to v1.0.3-rc.1 (#2927)
Supercedes https://github.com/lancedb/lancedb/pull/2925

We accidentally upgraded lance to 2.0.0-beta.8. This PR reverts that
first and then bump to 1.0.3-rc.1

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-21 11:52:07 -08:00
Colin Patrick McCabe
e897f3edab test: assert remote behavior of drop_table (#2926)
Add support for testing remote connections in drop_table in
`rust/lancedb/src/connection.rs`.
2026-01-21 08:42:40 -08:00
Lance Release
790ba7115b Bump version: 0.23.1 → 0.24.0-beta.0 2026-01-21 12:21:53 +00:00
41 changed files with 921 additions and 753 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.23.1" current_version = "0.24.0-beta.0"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -75,6 +75,13 @@ jobs:
VERSION="${VERSION#v}" VERSION="${VERSION#v}"
BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}" BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
# Use "chore" for beta/rc versions, "feat" for stable releases
if [[ "${VERSION}" == *beta* ]] || [[ "${VERSION}" == *rc* ]]; then
COMMIT_TYPE="chore"
else
COMMIT_TYPE="feat"
fi
cat <<EOF >/tmp/codex-prompt.txt cat <<EOF >/tmp/codex-prompt.txt
You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review. You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.
@@ -84,10 +91,10 @@ jobs:
3. After clippy succeeds, run "cargo fmt --all" to format the workspace. 3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes. 4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary). 5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
6. Stage all relevant files with "git add -A". Commit using the message "chore: update lance dependency to v${VERSION}". 6. Stage all relevant files with "git add -A". Commit using the message "${COMMIT_TYPE}: update lance dependency to v${VERSION}".
7. Push the branch to origin. If the branch already exists, force-push your changes. 7. Push the branch to origin. If the branch already exists, force-push your changes.
8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request. 8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
9. Create a pull request targeting "main" with title "chore: update lance dependency to v${VERSION}". In the body, summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). 9. Create a pull request targeting "main" with title "${COMMIT_TYPE}: update lance dependency to v${VERSION}". First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'EOF' > /tmp/pr-body.md). The body should summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). Then run "gh pr create --body-file /tmp/pr-body.md".
10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results. 10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
Constraints: Constraints:

835
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -15,39 +15,39 @@ categories = ["database-implementations"]
rust-version = "1.78.0" rust-version = "1.78.0"
[workspace.dependencies] [workspace.dependencies]
lance = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance = { "version" = "=1.0.3", default-features = false, "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance-core = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance-datagen = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance-file = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance-io = { "version" = "=1.0.3", default-features = false, "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance-index = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance-linalg = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance-namespace = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance-namespace-impls = { "version" = "=1.0.3", default-features = false, "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance-table = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance-testing = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance-datafusion = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance-encoding = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } lance-arrow = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8" ahash = "0.8"
# Note that this one does not include pyarrow # Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false } arrow = { version = "56.2", optional = false }
arrow-array = "57.2" arrow-array = "56.2"
arrow-data = "57.2" arrow-data = "56.2"
arrow-ipc = "57.2" arrow-ipc = "56.2"
arrow-ord = "57.2" arrow-ord = "56.2"
arrow-schema = "57.2" arrow-schema = "56.2"
arrow-select = "57.2" arrow-select = "56.2"
arrow-cast = "57.2" arrow-cast = "56.2"
async-trait = "0" async-trait = "0"
datafusion = { version = "51.0", default-features = false } datafusion = { version = "50.1", default-features = false }
datafusion-catalog = "51.0" datafusion-catalog = "50.1"
datafusion-common = { version = "51.0", default-features = false } datafusion-common = { version = "50.1", default-features = false }
datafusion-execution = "51.0" datafusion-execution = "50.1"
datafusion-expr = "51.0" datafusion-expr = "50.1"
datafusion-physical-plan = "51.0" datafusion-physical-plan = "50.1"
env_logger = "0.11" env_logger = "0.11"
half = { "version" = "2.7.1", default-features = false, features = [ half = { "version" = "2.6.0", default-features = false, features = [
"num-traits", "num-traits",
] } ] }
futures = "0" futures = "0"
@@ -59,7 +59,7 @@ rand = "0.9"
snafu = "0.8" snafu = "0.8"
url = "2" url = "2"
num-traits = "0.2" num-traits = "0.2"
regex = "1.12" regex = "1.10"
lazy_static = "1" lazy_static = "1"
semver = "1.0.25" semver = "1.0.25"
chrono = "0.4" chrono = "0.4"

View File

@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
<dependency> <dependency>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-core</artifactId> <artifactId>lancedb-core</artifactId>
<version>0.23.1</version> <version>0.24.0-beta.0</version>
</dependency> </dependency>
``` ```

View File

@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.23.1-final.0</version> <version>0.24.0-beta.0</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.23.1-final.0</version> <version>0.24.0-beta.0</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>${project.artifactId}</name> <name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description> <description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "lancedb-nodejs" name = "lancedb-nodejs"
edition.workspace = true edition.workspace = true
version = "0.23.1" version = "0.24.0-beta.0"
license.workspace = true license.workspace = true
description.workspace = true description.workspace = true
repository.workspace = true repository.workspace = true

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-arm64", "name": "@lancedb/lancedb-darwin-arm64",
"version": "0.23.1", "version": "0.24.0-beta.0",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node", "main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-x64", "name": "@lancedb/lancedb-darwin-x64",
"version": "0.23.1", "version": "0.24.0-beta.0",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.darwin-x64.node", "main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-gnu", "name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.23.1", "version": "0.24.0-beta.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node", "main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-musl", "name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.23.1", "version": "0.24.0-beta.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node", "main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-gnu", "name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.23.1", "version": "0.24.0-beta.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node", "main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-musl", "name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.23.1", "version": "0.24.0-beta.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node", "main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-arm64-msvc", "name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.23.1", "version": "0.24.0-beta.0",
"os": [ "os": [
"win32" "win32"
], ],

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-x64-msvc", "name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.23.1", "version": "0.24.0-beta.0",
"os": ["win32"], "os": ["win32"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node", "main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{ {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.23.1", "version": "0.24.0-beta.0",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.23.1", "version": "0.24.0-beta.0",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"

View File

@@ -11,7 +11,7 @@
"ann" "ann"
], ],
"private": false, "private": false,
"version": "0.23.1", "version": "0.24.0-beta.0",
"main": "dist/index.js", "main": "dist/index.js",
"exports": { "exports": {
".": "./dist/index.js", ".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.27.0-beta.0" current_version = "0.27.0"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-python" name = "lancedb-python"
version = "0.27.0-beta.0" version = "0.27.0"
edition.workspace = true edition.workspace = true
description = "Python bindings for LanceDB" description = "Python bindings for LanceDB"
license.workspace = true license.workspace = true
@@ -14,15 +14,15 @@ name = "_lancedb"
crate-type = ["cdylib"] crate-type = ["cdylib"]
[dependencies] [dependencies]
arrow = { version = "57.2", features = ["pyarrow"] } arrow = { version = "56.2", features = ["pyarrow"] }
async-trait = "0.1" async-trait = "0.1"
lancedb = { path = "../rust/lancedb", default-features = false } lancedb = { path = "../rust/lancedb", default-features = false }
lance-core.workspace = true lance-core.workspace = true
lance-namespace.workspace = true lance-namespace.workspace = true
lance-io.workspace = true lance-io.workspace = true
env_logger.workspace = true env_logger.workspace = true
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] } pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
pyo3-async-runtimes = { version = "0.26", features = [ pyo3-async-runtimes = { version = "0.25", features = [
"attributes", "attributes",
"tokio-runtime", "tokio-runtime",
] } ] }
@@ -32,7 +32,7 @@ snafu.workspace = true
tokio = { version = "1.40", features = ["sync"] } tokio = { version = "1.40", features = ["sync"] }
[build-dependencies] [build-dependencies]
pyo3-build-config = { version = "0.26", features = [ pyo3-build-config = { version = "0.25", features = [
"extension-module", "extension-module",
"abi3-py39", "abi3-py39",
] } ] }

View File

@@ -961,27 +961,22 @@ class LanceQueryBuilder(ABC):
>>> query = [100, 100] >>> query = [100, 100]
>>> plan = table.search(query).analyze_plan() >>> plan = table.search(query).analyze_plan()
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE >>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
AnalyzeExec verbose=true, elapsed=..., metrics=... AnalyzeExec verbose=true, metrics=[], cumulative_cpu=...
TracedExec, elapsed=..., metrics=... TracedExec, metrics=[], cumulative_cpu=...
ProjectionExec: elapsed=..., expr=[...], ProjectionExec: expr=[...], metrics=[...], cumulative_cpu=...
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...] GlobalLimitExec: skip=0, fetch=10, metrics=[...], cumulative_cpu=...
GlobalLimitExec: elapsed=..., skip=0, fetch=10, FilterExec: _distance@2 IS NOT NULL,
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...] metrics=[output_rows=..., elapsed_compute=...], cumulative_cpu=...
FilterExec: elapsed=..., _distance@2 IS NOT NULL, metrics=[...] SortExec: TopK(fetch=10), expr=[...],
SortExec: elapsed=..., TopK(fetch=10), expr=[...],
preserve_partitioning=[...], preserve_partitioning=[...],
metrics=[output_rows=..., elapsed_compute=..., metrics=[output_rows=..., elapsed_compute=..., row_replacements=...],
output_bytes=..., row_replacements=...] cumulative_cpu=...
KNNVectorDistance: elapsed=..., metric=l2, KNNVectorDistance: metric=l2,
metrics=[output_rows=..., elapsed_compute=..., metrics=[output_rows=..., elapsed_compute=..., output_batches=...],
output_bytes=..., output_batches=...] cumulative_cpu=...
LanceRead: elapsed=..., uri=..., projection=[vector], LanceRead: uri=..., projection=[vector], ...
num_fragments=..., range_before=None, range_after=None, metrics=[output_rows=..., elapsed_compute=...,
row_id=true, row_addr=false, bytes_read=..., iops=..., requests=...], cumulative_cpu=...
full_filter=--, refine_filter=--,
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...,
fragments_scanned=..., ranges_scanned=1, rows_scanned=1,
bytes_read=..., iops=..., requests=..., task_wait_time=...]
Returns Returns
------- -------

View File

@@ -2,12 +2,27 @@
# SPDX-FileCopyrightText: Copyright The LanceDB Authors # SPDX-FileCopyrightText: Copyright The LanceDB Authors
from datetime import timedelta from datetime import timedelta
from lancedb.db import AsyncConnection, DBConnection from lancedb.db import AsyncConnection, DBConnection
import lancedb import lancedb
import pytest import pytest
import pytest_asyncio import pytest_asyncio
def pandas_string_type():
"""Return the PyArrow string type that pandas uses for string columns.
pandas 3.0+ uses large_string for string columns, pandas 2.x uses string.
"""
import pandas as pd
import pyarrow as pa
version = tuple(int(x) for x in pd.__version__.split(".")[:2])
if version >= (3, 0):
return pa.large_utf8()
return pa.utf8()
# Use an in-memory database for most tests. # Use an in-memory database for most tests.
@pytest.fixture @pytest.fixture
def mem_db() -> DBConnection: def mem_db() -> DBConnection:

View File

@@ -268,6 +268,8 @@ async def test_create_table_from_iterator_async(mem_db_async: lancedb.AsyncConne
def test_create_exist_ok(tmp_db: lancedb.DBConnection): def test_create_exist_ok(tmp_db: lancedb.DBConnection):
from conftest import pandas_string_type
data = pd.DataFrame( data = pd.DataFrame(
{ {
"vector": [[3.1, 4.1], [5.9, 26.5]], "vector": [[3.1, 4.1], [5.9, 26.5]],
@@ -286,10 +288,11 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
assert tbl.schema == tbl2.schema assert tbl.schema == tbl2.schema
assert len(tbl) == len(tbl2) assert len(tbl) == len(tbl2)
# pandas 3.0+ uses large_string, pandas 2.x uses string
schema = pa.schema( schema = pa.schema(
[ [
pa.field("vector", pa.list_(pa.float32(), list_size=2)), pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pa.utf8()), pa.field("item", pandas_string_type()),
pa.field("price", pa.float64()), pa.field("price", pa.float64()),
] ]
) )
@@ -299,7 +302,7 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
bad_schema = pa.schema( bad_schema = pa.schema(
[ [
pa.field("vector", pa.list_(pa.float32(), list_size=2)), pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pa.utf8()), pa.field("item", pandas_string_type()),
pa.field("price", pa.float64()), pa.field("price", pa.float64()),
pa.field("extra", pa.float32()), pa.field("extra", pa.float32()),
] ]
@@ -365,6 +368,8 @@ async def test_create_mode_async(tmp_db_async: lancedb.AsyncConnection):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection): async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
from conftest import pandas_string_type
data = pd.DataFrame( data = pd.DataFrame(
{ {
"vector": [[3.1, 4.1], [5.9, 26.5]], "vector": [[3.1, 4.1], [5.9, 26.5]],
@@ -382,10 +387,11 @@ async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
assert tbl.name == tbl2.name assert tbl.name == tbl2.name
assert await tbl.schema() == await tbl2.schema() assert await tbl.schema() == await tbl2.schema()
# pandas 3.0+ uses large_string, pandas 2.x uses string
schema = pa.schema( schema = pa.schema(
[ [
pa.field("vector", pa.list_(pa.float32(), list_size=2)), pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pa.utf8()), pa.field("item", pandas_string_type()),
pa.field("price", pa.float64()), pa.field("price", pa.float64()),
] ]
) )
@@ -595,6 +601,8 @@ def test_open_table_sync(tmp_db: lancedb.DBConnection):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_open_table(tmp_path): async def test_open_table(tmp_path):
from conftest import pandas_string_type
db = await lancedb.connect_async(tmp_path) db = await lancedb.connect_async(tmp_path)
data = pd.DataFrame( data = pd.DataFrame(
{ {
@@ -614,10 +622,11 @@ async def test_open_table(tmp_path):
) )
is not None is not None
) )
# pandas 3.0+ uses large_string, pandas 2.x uses string
assert await tbl.schema() == pa.schema( assert await tbl.schema() == pa.schema(
{ {
"vector": pa.list_(pa.float32(), list_size=2), "vector": pa.list_(pa.float32(), list_size=2),
"item": pa.utf8(), "item": pandas_string_type(),
"price": pa.float64(), "price": pa.float64(),
} }
) )

View File

@@ -26,6 +26,8 @@ import pytest
from lance_namespace import ( from lance_namespace import (
CreateEmptyTableRequest, CreateEmptyTableRequest,
CreateEmptyTableResponse, CreateEmptyTableResponse,
DeclareTableRequest,
DeclareTableResponse,
DescribeTableRequest, DescribeTableRequest,
DescribeTableResponse, DescribeTableResponse,
LanceNamespace, LanceNamespace,
@@ -160,6 +162,19 @@ class TrackingNamespace(LanceNamespace):
return modified return modified
def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
"""Track declare_table calls and inject rotating credentials."""
with self.lock:
self.create_call_count += 1
count = self.create_call_count
response = self.inner.declare_table(request)
response.storage_options = self._modify_storage_options(
response.storage_options, count
)
return response
def create_empty_table( def create_empty_table(
self, request: CreateEmptyTableRequest self, request: CreateEmptyTableRequest
) -> CreateEmptyTableResponse: ) -> CreateEmptyTableResponse:

View File

@@ -528,12 +528,19 @@ def test_sanitize_data(
else: else:
expected_schema = schema expected_schema = schema
else: else:
from conftest import pandas_string_type
# polars uses large_string, pandas 3.0+ uses large_string, others use string
if isinstance(data, pl.DataFrame):
text_type = pa.large_utf8()
elif isinstance(data, pd.DataFrame):
text_type = pandas_string_type()
else:
text_type = pa.string()
expected_schema = pa.schema( expected_schema = pa.schema(
{ {
"id": pa.int64(), "id": pa.int64(),
"text": pa.large_utf8() "text": text_type,
if isinstance(data, pl.DataFrame)
else pa.string(),
"vector": pa.list_(pa.float32(), 10), "vector": pa.list_(pa.float32(), 10),
} }
) )

View File

@@ -10,7 +10,8 @@ use arrow::{
use futures::stream::StreamExt; use futures::stream::StreamExt;
use lancedb::arrow::SendableRecordBatchStream; use lancedb::arrow::SendableRecordBatchStream;
use pyo3::{ use pyo3::{
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python, exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult,
Python,
}; };
use pyo3_async_runtimes::tokio::future_into_py; use pyo3_async_runtimes::tokio::future_into_py;
@@ -35,11 +36,8 @@ impl RecordBatchStream {
#[pymethods] #[pymethods]
impl RecordBatchStream { impl RecordBatchStream {
#[getter] #[getter]
pub fn schema(&self, py: Python) -> PyResult<Py<PyAny>> { pub fn schema(&self, py: Python) -> PyResult<PyObject> {
(*self.schema) (*self.schema).clone().into_pyarrow(py)
.clone()
.into_pyarrow(py)
.map(|obj| obj.unbind())
} }
pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> { pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> {
@@ -55,12 +53,7 @@ impl RecordBatchStream {
.next() .next()
.await .await
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?; .ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
#[allow(deprecated)] Python::with_gil(|py| inner_next.infer_error()?.to_pyarrow(py))
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = inner_next.infer_error()?.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
}) })
} }
} }

View File

@@ -12,7 +12,7 @@ use pyo3::{
exceptions::{PyRuntimeError, PyValueError}, exceptions::{PyRuntimeError, PyValueError},
pyclass, pyfunction, pymethods, pyclass, pyfunction, pymethods,
types::{PyDict, PyDictMethods}, types::{PyDict, PyDictMethods},
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python, Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
}; };
use pyo3_async_runtimes::tokio::future_into_py; use pyo3_async_runtimes::tokio::future_into_py;
@@ -114,7 +114,7 @@ impl Connection {
data: Bound<'_, PyAny>, data: Bound<'_, PyAny>,
namespace: Vec<String>, namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>, storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<Py<PyAny>>, storage_options_provider: Option<PyObject>,
location: Option<String>, location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> { ) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone(); let inner = self_.get_inner()?.clone();
@@ -152,7 +152,7 @@ impl Connection {
schema: Bound<'_, PyAny>, schema: Bound<'_, PyAny>,
namespace: Vec<String>, namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>, storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<Py<PyAny>>, storage_options_provider: Option<PyObject>,
location: Option<String>, location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> { ) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone(); let inner = self_.get_inner()?.clone();
@@ -187,7 +187,7 @@ impl Connection {
name: String, name: String,
namespace: Vec<String>, namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>, storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<Py<PyAny>>, storage_options_provider: Option<PyObject>,
index_cache_size: Option<u32>, index_cache_size: Option<u32>,
location: Option<String>, location: Option<String>,
) -> PyResult<Bound<'_, PyAny>> { ) -> PyResult<Bound<'_, PyAny>> {
@@ -307,7 +307,6 @@ impl Connection {
..Default::default() ..Default::default()
}; };
let response = inner.list_namespaces(request).await.infer_error()?; let response = inner.list_namespaces(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> { Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py); let dict = PyDict::new(py);
dict.set_item("namespaces", response.namespaces)?; dict.set_item("namespaces", response.namespaces)?;
@@ -328,7 +327,8 @@ impl Connection {
let py = self_.py(); let py = self_.py();
future_into_py(py, async move { future_into_py(py, async move {
use lance_namespace::models::CreateNamespaceRequest; use lance_namespace::models::CreateNamespaceRequest;
let mode_enum = mode.and_then(|m| match m.to_lowercase().as_str() { // Mode is now a string field
let mode_str = mode.and_then(|m| match m.to_lowercase().as_str() {
"create" => Some("Create".to_string()), "create" => Some("Create".to_string()),
"exist_ok" => Some("ExistOk".to_string()), "exist_ok" => Some("ExistOk".to_string()),
"overwrite" => Some("Overwrite".to_string()), "overwrite" => Some("Overwrite".to_string()),
@@ -340,12 +340,11 @@ impl Connection {
} else { } else {
Some(namespace) Some(namespace)
}, },
mode: mode_enum, mode: mode_str,
properties, properties,
..Default::default() ..Default::default()
}; };
let response = inner.create_namespace(request).await.infer_error()?; let response = inner.create_namespace(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> { Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py); let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?; dict.set_item("properties", response.properties)?;
@@ -365,12 +364,13 @@ impl Connection {
let py = self_.py(); let py = self_.py();
future_into_py(py, async move { future_into_py(py, async move {
use lance_namespace::models::DropNamespaceRequest; use lance_namespace::models::DropNamespaceRequest;
let mode_enum = mode.and_then(|m| match m.to_uppercase().as_str() { // Mode and Behavior are now string fields
let mode_str = mode.and_then(|m| match m.to_uppercase().as_str() {
"SKIP" => Some("Skip".to_string()), "SKIP" => Some("Skip".to_string()),
"FAIL" => Some("Fail".to_string()), "FAIL" => Some("Fail".to_string()),
_ => None, _ => None,
}); });
let behavior_enum = behavior.and_then(|b| match b.to_uppercase().as_str() { let behavior_str = behavior.and_then(|b| match b.to_uppercase().as_str() {
"RESTRICT" => Some("Restrict".to_string()), "RESTRICT" => Some("Restrict".to_string()),
"CASCADE" => Some("Cascade".to_string()), "CASCADE" => Some("Cascade".to_string()),
_ => None, _ => None,
@@ -381,12 +381,11 @@ impl Connection {
} else { } else {
Some(namespace) Some(namespace)
}, },
mode: mode_enum, mode: mode_str,
behavior: behavior_enum, behavior: behavior_str,
..Default::default() ..Default::default()
}; };
let response = inner.drop_namespace(request).await.infer_error()?; let response = inner.drop_namespace(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> { Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py); let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?; dict.set_item("properties", response.properties)?;
@@ -414,7 +413,6 @@ impl Connection {
..Default::default() ..Default::default()
}; };
let response = inner.describe_namespace(request).await.infer_error()?; let response = inner.describe_namespace(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> { Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py); let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?; dict.set_item("properties", response.properties)?;
@@ -445,7 +443,6 @@ impl Connection {
..Default::default() ..Default::default()
}; };
let response = inner.list_tables(request).await.infer_error()?; let response = inner.list_tables(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> { Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py); let dict = PyDict::new(py);
dict.set_item("tables", response.tables)?; dict.set_item("tables", response.tables)?;

View File

@@ -40,34 +40,31 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
request_id, request_id,
source, source,
status_code, status_code,
} => { } => Python::with_gil(|py| {
#[allow(deprecated)] let message = err.to_string();
Python::with_gil(|py| { let http_err_cls = py
let message = err.to_string(); .import(intern!(py, "lancedb.remote.errors"))?
let http_err_cls = py .getattr(intern!(py, "HttpError"))?;
.import(intern!(py, "lancedb.remote.errors"))? let err = http_err_cls.call1((
.getattr(intern!(py, "HttpError"))?; message,
let err = http_err_cls.call1(( request_id,
message, status_code.map(|s| s.as_u16()),
))?;
if let Some(cause) = source.source() {
// The HTTP error already includes the first cause. But
// we can add the rest of the chain if there is any more.
let cause_err = http_from_rust_error(
py,
cause,
request_id, request_id,
status_code.map(|s| s.as_u16()), status_code.map(|s| s.as_u16()),
))?; )?;
err.setattr(intern!(py, "__cause__"), cause_err)?;
}
if let Some(cause) = source.source() { Err(PyErr::from_value(err))
// The HTTP error already includes the first cause. But }),
// we can add the rest of the chain if there is any more.
let cause_err = http_from_rust_error(
py,
cause,
request_id,
status_code.map(|s| s.as_u16()),
)?;
err.setattr(intern!(py, "__cause__"), cause_err)?;
}
Err(PyErr::from_value(err))
})
}
LanceError::Retry { LanceError::Retry {
request_id, request_id,
request_failures, request_failures,
@@ -78,37 +75,33 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
max_read_failures, max_read_failures,
source, source,
status_code, status_code,
} => } => Python::with_gil(|py| {
{ let cause_err = http_from_rust_error(
#[allow(deprecated)] py,
Python::with_gil(|py| { source.as_ref(),
let cause_err = http_from_rust_error( request_id,
py, status_code.map(|s| s.as_u16()),
source.as_ref(), )?;
request_id,
status_code.map(|s| s.as_u16()),
)?;
let message = err.to_string(); let message = err.to_string();
let retry_error_cls = py let retry_error_cls = py
.import(intern!(py, "lancedb.remote.errors"))? .import(intern!(py, "lancedb.remote.errors"))?
.getattr("RetryError")?; .getattr("RetryError")?;
let err = retry_error_cls.call1(( let err = retry_error_cls.call1((
message, message,
request_id, request_id,
*request_failures, *request_failures,
*connect_failures, *connect_failures,
*read_failures, *read_failures,
*max_request_failures, *max_request_failures,
*max_connect_failures, *max_connect_failures,
*max_read_failures, *max_read_failures,
status_code.map(|s| s.as_u16()), status_code.map(|s| s.as_u16()),
))?; ))?;
err.setattr(intern!(py, "__cause__"), cause_err)?; err.setattr(intern!(py, "__cause__"), cause_err)?;
Err(PyErr::from_value(err)) Err(PyErr::from_value(err))
}) }),
}
_ => self.runtime_error(), _ => self.runtime_error(),
}, },
} }

View File

@@ -12,7 +12,6 @@ pub struct PyHeaderProvider {
impl Clone for PyHeaderProvider { impl Clone for PyHeaderProvider {
fn clone(&self) -> Self { fn clone(&self) -> Self {
#[allow(deprecated)]
Python::with_gil(|py| Self { Python::with_gil(|py| Self {
provider: self.provider.clone_ref(py), provider: self.provider.clone_ref(py),
}) })
@@ -26,7 +25,6 @@ impl PyHeaderProvider {
/// Get headers from the Python provider (internal implementation) /// Get headers from the Python provider (internal implementation)
fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> { fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
#[allow(deprecated)]
Python::with_gil(|py| { Python::with_gil(|py| {
// Call the get_headers method // Call the get_headers method
let result = self.provider.call_method0(py, "get_headers"); let result = self.provider.call_method0(py, "get_headers");

View File

@@ -19,7 +19,7 @@ use pyo3::{
exceptions::PyRuntimeError, exceptions::PyRuntimeError,
pyclass, pymethods, pyclass, pymethods,
types::{PyAnyMethods, PyDict, PyDictMethods, PyType}, types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
Bound, Py, PyAny, PyRef, PyRefMut, PyResult, Python, Bound, PyAny, PyRef, PyRefMut, PyResult, Python,
}; };
use pyo3_async_runtimes::tokio::future_into_py; use pyo3_async_runtimes::tokio::future_into_py;
@@ -281,12 +281,7 @@ impl PyPermutationReader {
let reader = slf.reader.clone(); let reader = slf.reader.clone();
future_into_py(slf.py(), async move { future_into_py(slf.py(), async move {
let schema = reader.output_schema(selection).await.infer_error()?; let schema = reader.output_schema(selection).await.infer_error()?;
#[allow(deprecated)] Python::with_gil(|py| schema.to_pyarrow(py))
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
}) })
} }

View File

@@ -29,7 +29,6 @@ use pyo3::types::PyList;
use pyo3::types::{PyDict, PyString}; use pyo3::types::{PyDict, PyString};
use pyo3::Bound; use pyo3::Bound;
use pyo3::IntoPyObject; use pyo3::IntoPyObject;
use pyo3::Py;
use pyo3::PyAny; use pyo3::PyAny;
use pyo3::PyRef; use pyo3::PyRef;
use pyo3::PyResult; use pyo3::PyResult;
@@ -454,12 +453,7 @@ impl Query {
let inner = self_.inner.clone(); let inner = self_.inner.clone();
future_into_py(self_.py(), async move { future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?; let schema = inner.output_schema().await.infer_error()?;
#[allow(deprecated)] Python::with_gil(|py| schema.to_pyarrow(py))
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
}) })
} }
@@ -538,12 +532,7 @@ impl TakeQuery {
let inner = self_.inner.clone(); let inner = self_.inner.clone();
future_into_py(self_.py(), async move { future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?; let schema = inner.output_schema().await.infer_error()?;
#[allow(deprecated)] Python::with_gil(|py| schema.to_pyarrow(py))
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
}) })
} }
@@ -638,12 +627,7 @@ impl FTSQuery {
let inner = self_.inner.clone(); let inner = self_.inner.clone();
future_into_py(self_.py(), async move { future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?; let schema = inner.output_schema().await.infer_error()?;
#[allow(deprecated)] Python::with_gil(|py| schema.to_pyarrow(py))
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
}) })
} }
@@ -822,12 +806,7 @@ impl VectorQuery {
let inner = self_.inner.clone(); let inner = self_.inner.clone();
future_into_py(self_.py(), async move { future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?; let schema = inner.output_schema().await.infer_error()?;
#[allow(deprecated)] Python::with_gil(|py| schema.to_pyarrow(py))
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
}) })
} }

View File

@@ -17,12 +17,11 @@ use pyo3::types::PyDict;
/// Internal wrapper around a Python object implementing StorageOptionsProvider /// Internal wrapper around a Python object implementing StorageOptionsProvider
pub struct PyStorageOptionsProvider { pub struct PyStorageOptionsProvider {
/// The Python object implementing fetch_storage_options() /// The Python object implementing fetch_storage_options()
inner: Py<PyAny>, inner: PyObject,
} }
impl Clone for PyStorageOptionsProvider { impl Clone for PyStorageOptionsProvider {
fn clone(&self) -> Self { fn clone(&self) -> Self {
#[allow(deprecated)]
Python::with_gil(|py| Self { Python::with_gil(|py| Self {
inner: self.inner.clone_ref(py), inner: self.inner.clone_ref(py),
}) })
@@ -30,8 +29,7 @@ impl Clone for PyStorageOptionsProvider {
} }
impl PyStorageOptionsProvider { impl PyStorageOptionsProvider {
pub fn new(obj: Py<PyAny>) -> PyResult<Self> { pub fn new(obj: PyObject) -> PyResult<Self> {
#[allow(deprecated)]
Python::with_gil(|py| { Python::with_gil(|py| {
// Verify the object has a fetch_storage_options method // Verify the object has a fetch_storage_options method
if !obj.bind(py).hasattr("fetch_storage_options")? { if !obj.bind(py).hasattr("fetch_storage_options")? {
@@ -39,9 +37,7 @@ impl PyStorageOptionsProvider {
"StorageOptionsProvider must implement fetch_storage_options() method", "StorageOptionsProvider must implement fetch_storage_options() method",
)); ));
} }
Ok(Self { Ok(Self { inner: obj })
inner: obj.clone_ref(py),
})
}) })
} }
} }
@@ -64,7 +60,6 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
let py_provider = self.py_provider.clone(); let py_provider = self.py_provider.clone();
tokio::task::spawn_blocking(move || { tokio::task::spawn_blocking(move || {
#[allow(deprecated)]
Python::with_gil(|py| { Python::with_gil(|py| {
// Call the Python fetch_storage_options method // Call the Python fetch_storage_options method
let result = py_provider let result = py_provider
@@ -124,7 +119,6 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
} }
fn provider_id(&self) -> String { fn provider_id(&self) -> String {
#[allow(deprecated)]
Python::with_gil(|py| { Python::with_gil(|py| {
// Call provider_id() method on the Python object // Call provider_id() method on the Python object
let obj = self.py_provider.inner.bind(py); let obj = self.py_provider.inner.bind(py);
@@ -149,7 +143,7 @@ impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
/// This is the main entry point for converting Python StorageOptionsProvider objects /// This is the main entry point for converting Python StorageOptionsProvider objects
/// to Rust trait objects that can be used by the Lance ecosystem. /// to Rust trait objects that can be used by the Lance ecosystem.
pub fn py_object_to_storage_options_provider( pub fn py_object_to_storage_options_provider(
py_obj: Py<PyAny>, py_obj: PyObject,
) -> PyResult<Arc<dyn StorageOptionsProvider>> { ) -> PyResult<Arc<dyn StorageOptionsProvider>> {
let py_provider = PyStorageOptionsProvider::new(py_obj)?; let py_provider = PyStorageOptionsProvider::new(py_obj)?;
Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider))) Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))

View File

@@ -21,7 +21,7 @@ use pyo3::{
exceptions::{PyKeyError, PyRuntimeError, PyValueError}, exceptions::{PyKeyError, PyRuntimeError, PyValueError},
pyclass, pymethods, pyclass, pymethods,
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods}, types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python, Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
}; };
use pyo3_async_runtimes::tokio::future_into_py; use pyo3_async_runtimes::tokio::future_into_py;
@@ -287,12 +287,7 @@ impl Table {
let inner = self_.inner_ref()?.clone(); let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move { future_into_py(self_.py(), async move {
let schema = inner.schema().await.infer_error()?; let schema = inner.schema().await.infer_error()?;
#[allow(deprecated)] Python::with_gil(|py| schema.to_pyarrow(py))
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
}) })
} }
@@ -442,7 +437,6 @@ impl Table {
future_into_py(self_.py(), async move { future_into_py(self_.py(), async move {
let stats = inner.index_stats(&index_name).await.infer_error()?; let stats = inner.index_stats(&index_name).await.infer_error()?;
if let Some(stats) = stats { if let Some(stats) = stats {
#[allow(deprecated)]
Python::with_gil(|py| { Python::with_gil(|py| {
let dict = PyDict::new(py); let dict = PyDict::new(py);
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?; dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
@@ -473,7 +467,6 @@ impl Table {
let inner = self_.inner_ref()?.clone(); let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move { future_into_py(self_.py(), async move {
let stats = inner.stats().await.infer_error()?; let stats = inner.stats().await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| { Python::with_gil(|py| {
let dict = PyDict::new(py); let dict = PyDict::new(py);
dict.set_item("total_bytes", stats.total_bytes)?; dict.set_item("total_bytes", stats.total_bytes)?;
@@ -528,7 +521,6 @@ impl Table {
let inner = self_.inner_ref()?.clone(); let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move { future_into_py(self_.py(), async move {
let versions = inner.list_versions().await.infer_error()?; let versions = inner.list_versions().await.infer_error()?;
#[allow(deprecated)]
let versions_as_dict = Python::with_gil(|py| { let versions_as_dict = Python::with_gil(|py| {
versions versions
.iter() .iter()
@@ -880,7 +872,6 @@ impl Tags {
let tags = inner.tags().await.infer_error()?; let tags = inner.tags().await.infer_error()?;
let res = tags.list().await.infer_error()?; let res = tags.list().await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| { Python::with_gil(|py| {
let py_dict = PyDict::new(py); let py_dict = PyDict::new(py);
for (key, contents) in res { for (key, contents) in res {

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb" name = "lancedb"
version = "0.23.1" version = "0.24.0-beta.0"
edition.workspace = true edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications" description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true

View File

@@ -36,10 +36,42 @@ use crate::remote::{
}; };
use crate::table::{TableDefinition, WriteOptions}; use crate::table::{TableDefinition, WriteOptions};
use crate::Table; use crate::Table;
use lance::io::ObjectStoreParams;
pub use lance_encoding::version::LanceFileVersion; pub use lance_encoding::version::LanceFileVersion;
#[cfg(feature = "remote")] #[cfg(feature = "remote")]
use lance_io::object_store::StorageOptions; use lance_io::object_store::StorageOptions;
use lance_io::object_store::StorageOptionsProvider; use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
fn merge_storage_options(
store_params: &mut ObjectStoreParams,
pairs: impl IntoIterator<Item = (String, String)>,
) {
let mut options = store_params.storage_options().cloned().unwrap_or_default();
for (key, value) in pairs {
options.insert(key, value);
}
let provider = store_params
.storage_options_accessor
.as_ref()
.and_then(|accessor| accessor.provider().cloned());
let accessor = if let Some(provider) = provider {
StorageOptionsAccessor::with_initial_and_provider(options, provider)
} else {
StorageOptionsAccessor::with_static_options(options)
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
}
fn set_storage_options_provider(
store_params: &mut ObjectStoreParams,
provider: Arc<dyn StorageOptionsProvider>,
) {
let accessor = match store_params.storage_options().cloned() {
Some(options) => StorageOptionsAccessor::with_initial_and_provider(options, provider),
None => StorageOptionsAccessor::with_provider(provider),
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
}
/// A builder for configuring a [`Connection::table_names`] operation /// A builder for configuring a [`Connection::table_names`] operation
pub struct TableNamesBuilder { pub struct TableNamesBuilder {
@@ -246,16 +278,14 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// ///
/// See available options at <https://lancedb.com/docs/storage/> /// See available options at <https://lancedb.com/docs/storage/>
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self { pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
let store_options = self let store_params = self
.request .request
.write_options .write_options
.lance_write_params .lance_write_params
.get_or_insert(Default::default()) .get_or_insert(Default::default())
.store_params .store_params
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default()); .get_or_insert(Default::default());
store_options.insert(key.into(), value.into()); merge_storage_options(store_params, [(key.into(), value.into())]);
self self
} }
@@ -269,19 +299,17 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
mut self, mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>, pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
) -> Self { ) -> Self {
let store_options = self let store_params = self
.request .request
.write_options .write_options
.lance_write_params .lance_write_params
.get_or_insert(Default::default()) .get_or_insert(Default::default())
.store_params .store_params
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default()); .get_or_insert(Default::default());
let updates = pairs
for (key, value) in pairs { .into_iter()
store_options.insert(key.into(), value.into()); .map(|(key, value)| (key.into(), value.into()));
} merge_storage_options(store_params, updates);
self self
} }
@@ -318,23 +346,21 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// This has no effect in LanceDB Cloud. /// This has no effect in LanceDB Cloud.
#[deprecated(since = "0.15.1", note = "Use `database_options` instead")] #[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
pub fn enable_v2_manifest_paths(mut self, use_v2_manifest_paths: bool) -> Self { pub fn enable_v2_manifest_paths(mut self, use_v2_manifest_paths: bool) -> Self {
let storage_options = self let store_params = self
.request .request
.write_options .write_options
.lance_write_params .lance_write_params
.get_or_insert_with(Default::default) .get_or_insert_with(Default::default)
.store_params .store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default); .get_or_insert_with(Default::default);
let value = if use_v2_manifest_paths {
storage_options.insert( "true".to_string()
OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(), } else {
if use_v2_manifest_paths { "false".to_string()
"true".to_string() };
} else { merge_storage_options(
"false".to_string() store_params,
}, [(OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(), value)],
); );
self self
} }
@@ -344,19 +370,19 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// The default is `LanceFileVersion::Stable`. /// The default is `LanceFileVersion::Stable`.
#[deprecated(since = "0.15.1", note = "Use `database_options` instead")] #[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
pub fn data_storage_version(mut self, data_storage_version: LanceFileVersion) -> Self { pub fn data_storage_version(mut self, data_storage_version: LanceFileVersion) -> Self {
let storage_options = self let store_params = self
.request .request
.write_options .write_options
.lance_write_params .lance_write_params
.get_or_insert_with(Default::default) .get_or_insert_with(Default::default)
.store_params .store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default); .get_or_insert_with(Default::default);
merge_storage_options(
storage_options.insert( store_params,
OPT_NEW_TABLE_STORAGE_VERSION.to_string(), [(
data_storage_version.to_string(), OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
data_storage_version.to_string(),
)],
); );
self self
} }
@@ -381,13 +407,14 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// This allows tables to automatically refresh cloud storage credentials /// This allows tables to automatically refresh cloud storage credentials
/// when they expire, enabling long-running operations on remote storage. /// when they expire, enabling long-running operations on remote storage.
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self { pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
self.request let store_params = self
.request
.write_options .write_options
.lance_write_params .lance_write_params
.get_or_insert(Default::default()) .get_or_insert(Default::default())
.store_params .store_params
.get_or_insert(Default::default()) .get_or_insert(Default::default());
.storage_options_provider = Some(provider); set_storage_options_provider(store_params, provider);
self self
} }
} }
@@ -450,15 +477,13 @@ impl OpenTableBuilder {
/// ///
/// See available options at <https://lancedb.com/docs/storage/> /// See available options at <https://lancedb.com/docs/storage/>
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self { pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
let storage_options = self let store_params = self
.request .request
.lance_read_params .lance_read_params
.get_or_insert(Default::default()) .get_or_insert(Default::default())
.store_options .store_options
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default()); .get_or_insert(Default::default());
storage_options.insert(key.into(), value.into()); merge_storage_options(store_params, [(key.into(), value.into())]);
self self
} }
@@ -472,18 +497,16 @@ impl OpenTableBuilder {
mut self, mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>, pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
) -> Self { ) -> Self {
let storage_options = self let store_params = self
.request .request
.lance_read_params .lance_read_params
.get_or_insert(Default::default()) .get_or_insert(Default::default())
.store_options .store_options
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default()); .get_or_insert(Default::default());
let updates = pairs
for (key, value) in pairs { .into_iter()
storage_options.insert(key.into(), value.into()); .map(|(key, value)| (key.into(), value.into()));
} merge_storage_options(store_params, updates);
self self
} }
@@ -507,12 +530,13 @@ impl OpenTableBuilder {
/// This allows tables to automatically refresh cloud storage credentials /// This allows tables to automatically refresh cloud storage credentials
/// when they expire, enabling long-running operations on remote storage. /// when they expire, enabling long-running operations on remote storage.
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self { pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
self.request let store_params = self
.request
.lance_read_params .lance_read_params
.get_or_insert(Default::default()) .get_or_insert(Default::default())
.store_options .store_options
.get_or_insert(Default::default()) .get_or_insert(Default::default());
.storage_options_provider = Some(provider); set_storage_options_provider(store_params, provider);
self self
} }
@@ -1277,8 +1301,6 @@ mod test_utils {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::fs::create_dir_all;
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig}; use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
use crate::query::QueryBase; use crate::query::QueryBase;
use crate::query::{ExecutableQuery, QueryExecutionOptions}; use crate::query::{ExecutableQuery, QueryExecutionOptions};
@@ -1526,18 +1548,27 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn drop_table() { async fn drop_table() {
let tmp_dir = tempdir().unwrap(); let tc = new_test_connection().await.unwrap();
let db = tc.connection;
let uri = tmp_dir.path().to_str().unwrap(); if tc.is_remote {
let db = connect(uri).execute().await.unwrap(); // All the typical endpoints such as s3:///, file-object-store:///, etc. treat drop_table
// as idempotent.
assert!(db.drop_table("invalid_table", &[]).await.is_ok());
} else {
// The behavior of drop_table when using a file:/// endpoint differs from all other
// object providers, in that it returns an error when deleting a non-existent table.
assert!(matches!(
db.drop_table("invalid_table", &[]).await,
Err(crate::Error::TableNotFound { .. }),
));
}
// drop non-exist table let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
assert!(matches!( db.create_empty_table("table1", schema.clone())
db.drop_table("invalid_table", &[]).await, .execute()
Err(crate::Error::TableNotFound { .. }), .await
)); .unwrap();
create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
db.drop_table("table1", &[]).await.unwrap(); db.drop_table("table1", &[]).await.unwrap();
let tables = db.table_names().execute().await.unwrap(); let tables = db.table_names().execute().await.unwrap();

View File

@@ -12,7 +12,7 @@ use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode};
use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore}; use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
use lance_datafusion::utils::StreamingWriteSource; use lance_datafusion::utils::StreamingWriteSource;
use lance_encoding::version::LanceFileVersion; use lance_encoding::version::LanceFileVersion;
use lance_io::object_store::StorageOptionsProvider; use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
use lance_table::io::commit::commit_handler_from_url; use lance_table::io::commit::commit_handler_from_url;
use object_store::local::LocalFileSystem; use object_store::local::LocalFileSystem;
use snafu::ResultExt; use snafu::ResultExt;
@@ -356,7 +356,13 @@ impl ListingDatabase {
.clone() .clone()
.unwrap_or_else(|| Arc::new(lance::session::Session::default())); .unwrap_or_else(|| Arc::new(lance::session::Session::default()));
let os_params = ObjectStoreParams { let os_params = ObjectStoreParams {
storage_options: Some(options.storage_options.clone()), storage_options_accessor: if options.storage_options.is_empty() {
None
} else {
Some(Arc::new(StorageOptionsAccessor::with_static_options(
options.storage_options.clone(),
)))
},
..Default::default() ..Default::default()
}; };
let (object_store, base_path) = ObjectStore::from_uri_and_params( let (object_store, base_path) = ObjectStore::from_uri_and_params(
@@ -492,7 +498,13 @@ impl ListingDatabase {
async fn drop_tables(&self, names: Vec<String>) -> Result<()> { async fn drop_tables(&self, names: Vec<String>) -> Result<()> {
let object_store_params = ObjectStoreParams { let object_store_params = ObjectStoreParams {
storage_options: Some(self.storage_options.clone()), storage_options_accessor: if self.storage_options.is_empty() {
None
} else {
Some(Arc::new(StorageOptionsAccessor::with_static_options(
self.storage_options.clone(),
)))
},
..Default::default() ..Default::default()
}; };
let mut uri = self.uri.clone(); let mut uri = self.uri.clone();
@@ -541,7 +553,7 @@ impl ListingDatabase {
.lance_write_params .lance_write_params
.as_ref() .as_ref()
.and_then(|p| p.store_params.as_ref()) .and_then(|p| p.store_params.as_ref())
.and_then(|sp| sp.storage_options.as_ref()); .and_then(|sp| sp.storage_options());
let storage_version_override = storage_options let storage_version_override = storage_options
.and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION)) .and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION))
@@ -592,21 +604,20 @@ impl ListingDatabase {
// will cause a new connection to be created, and that connection will // will cause a new connection to be created, and that connection will
// be dropped from the cache when python GCs the table object, which // be dropped from the cache when python GCs the table object, which
// confounds reuse across tables. // confounds reuse across tables.
if !self.storage_options.is_empty() { if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
let storage_options = write_params let store_params = write_params
.store_params .store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default); .get_or_insert_with(Default::default);
self.inherit_storage_options(storage_options); let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
} if !self.storage_options.is_empty() {
self.inherit_storage_options(&mut storage_options);
// Set storage options provider if available }
if self.storage_options_provider.is_some() { let accessor = if let Some(ref provider) = self.storage_options_provider {
write_params StorageOptionsAccessor::with_initial_and_provider(storage_options, provider.clone())
.store_params } else {
.get_or_insert_with(Default::default) StorageOptionsAccessor::with_static_options(storage_options)
.storage_options_provider = self.storage_options_provider.clone(); };
store_params.storage_options_accessor = Some(Arc::new(accessor));
} }
write_params.data_storage_version = self write_params.data_storage_version = self
@@ -892,7 +903,13 @@ impl Database for ListingDatabase {
validate_table_name(&request.target_table_name)?; validate_table_name(&request.target_table_name)?;
let storage_params = ObjectStoreParams { let storage_params = ObjectStoreParams {
storage_options: Some(self.storage_options.clone()), storage_options_accessor: if self.storage_options.is_empty() {
None
} else {
Some(Arc::new(StorageOptionsAccessor::with_static_options(
self.storage_options.clone(),
)))
},
..Default::default() ..Default::default()
}; };
let read_params = ReadParams { let read_params = ReadParams {
@@ -956,25 +973,28 @@ impl Database for ListingDatabase {
// will cause a new connection to be created, and that connection will // will cause a new connection to be created, and that connection will
// be dropped from the cache when python GCs the table object, which // be dropped from the cache when python GCs the table object, which
// confounds reuse across tables. // confounds reuse across tables.
if !self.storage_options.is_empty() { if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
let storage_options = request let store_params = request
.lance_read_params .lance_read_params
.get_or_insert_with(Default::default) .get_or_insert_with(Default::default)
.store_options .store_options
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default); .get_or_insert_with(Default::default);
self.inherit_storage_options(storage_options); let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
} if !self.storage_options.is_empty() {
self.inherit_storage_options(&mut storage_options);
// Set storage options provider if available }
if self.storage_options_provider.is_some() { // Preserve request-level provider if no connection-level provider exists
request let request_provider = store_params
.lance_read_params .storage_options_accessor
.get_or_insert_with(Default::default) .as_ref()
.store_options .and_then(|a| a.provider().cloned());
.get_or_insert_with(Default::default) let provider = self.storage_options_provider.clone().or(request_provider);
.storage_options_provider = self.storage_options_provider.clone(); let accessor = if let Some(provider) = provider {
StorageOptionsAccessor::with_initial_and_provider(storage_options, provider)
} else {
StorageOptionsAccessor::with_static_options(storage_options)
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
} }
// Some ReadParams are exposed in the OpenTableBuilder, but we also // Some ReadParams are exposed in the OpenTableBuilder, but we also
@@ -1881,7 +1901,9 @@ mod tests {
let write_options = WriteOptions { let write_options = WriteOptions {
lance_write_params: Some(lance::dataset::WriteParams { lance_write_params: Some(lance::dataset::WriteParams {
store_params: Some(lance::io::ObjectStoreParams { store_params: Some(lance::io::ObjectStoreParams {
storage_options: Some(storage_options), storage_options_accessor: Some(Arc::new(
StorageOptionsAccessor::with_static_options(storage_options),
)),
..Default::default() ..Default::default()
}), }),
..Default::default() ..Default::default()
@@ -1955,7 +1977,9 @@ mod tests {
let write_options = WriteOptions { let write_options = WriteOptions {
lance_write_params: Some(lance::dataset::WriteParams { lance_write_params: Some(lance::dataset::WriteParams {
store_params: Some(lance::io::ObjectStoreParams { store_params: Some(lance::io::ObjectStoreParams {
storage_options: Some(storage_options), storage_options_accessor: Some(Arc::new(
StorageOptionsAccessor::with_static_options(storage_options),
)),
..Default::default() ..Default::default()
}), }),
..Default::default() ..Default::default()

View File

@@ -9,14 +9,15 @@ use std::sync::Arc;
use async_trait::async_trait; use async_trait::async_trait;
use lance_namespace::{ use lance_namespace::{
models::{ models::{
CreateNamespaceRequest, CreateNamespaceResponse, DeclareTableRequest, CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest, DeclareTableRequest, DescribeNamespaceRequest, DescribeNamespaceResponse,
DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest, DescribeTableRequest, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest,
ListNamespacesResponse, ListTablesRequest, ListTablesResponse, ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
}, },
LanceNamespace, LanceNamespace,
}; };
use lance_namespace_impls::ConnectBuilder; use lance_namespace_impls::ConnectBuilder;
use log::warn;
use crate::database::ReadConsistency; use crate::database::ReadConsistency;
use crate::error::{Error, Result}; use crate::error::{Error, Result};
@@ -154,7 +155,6 @@ impl Database for LanceNamespaceDatabase {
table_id.push(request.name.clone()); table_id.push(request.name.clone());
let describe_request = DescribeTableRequest { let describe_request = DescribeTableRequest {
id: Some(table_id.clone()), id: Some(table_id.clone()),
version: None,
..Default::default() ..Default::default()
}; };
@@ -205,26 +205,53 @@ impl Database for LanceNamespaceDatabase {
let mut table_id = request.namespace.clone(); let mut table_id = request.namespace.clone();
table_id.push(request.name.clone()); table_id.push(request.name.clone());
let create_empty_request = DeclareTableRequest { // Try declare_table first, falling back to create_empty_table for backwards
// compatibility with older namespace clients that don't support declare_table
let declare_request = DeclareTableRequest {
id: Some(table_id.clone()), id: Some(table_id.clone()),
location: None,
vend_credentials: None,
..Default::default() ..Default::default()
}; };
let create_empty_response = self let location = match self.namespace.declare_table(declare_request).await {
.namespace Ok(response) => response.location.ok_or_else(|| Error::Runtime {
.declare_table(create_empty_request) message: "Table location is missing from declare_table response".to_string(),
.await })?,
.map_err(|e| Error::Runtime { Err(e) => {
message: format!("Failed to declare table: {}", e), // Check if the error is "not supported" and try create_empty_table as fallback
})?; let err_str = e.to_string().to_lowercase();
if err_str.contains("not supported") || err_str.contains("not implemented") {
warn!(
"declare_table is not supported by the namespace client, \
falling back to deprecated create_empty_table. \
create_empty_table is deprecated and will be removed in Lance 3.0.0. \
Please upgrade your namespace client to support declare_table."
);
#[allow(deprecated)]
let create_empty_request = CreateEmptyTableRequest {
id: Some(table_id.clone()),
..Default::default()
};
let location = create_empty_response #[allow(deprecated)]
.location let create_response = self
.ok_or_else(|| Error::Runtime { .namespace
message: "Table location is missing from create_empty_table response".to_string(), .create_empty_table(create_empty_request)
})?; .await
.map_err(|e| Error::Runtime {
message: format!("Failed to create empty table: {}", e),
})?;
create_response.location.ok_or_else(|| Error::Runtime {
message: "Table location is missing from create_empty_table response"
.to_string(),
})?
} else {
return Err(Error::Runtime {
message: format!("Failed to declare table: {}", e),
});
}
}
};
let native_table = NativeTable::create_from_namespace( let native_table = NativeTable::create_from_namespace(
self.namespace.clone(), self.namespace.clone(),
@@ -439,8 +466,6 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default() ..Default::default()
}) })
.await .await
@@ -501,8 +526,6 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default() ..Default::default()
}) })
.await .await
@@ -566,8 +589,6 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default() ..Default::default()
}) })
.await .await
@@ -651,8 +672,6 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default() ..Default::default()
}) })
.await .await
@@ -708,8 +727,6 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default() ..Default::default()
}) })
.await .await
@@ -790,8 +807,6 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default() ..Default::default()
}) })
.await .await
@@ -825,8 +840,6 @@ mod tests {
// Create a child namespace first // Create a child namespace first
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]), id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default() ..Default::default()
}) })
.await .await

View File

@@ -54,8 +54,6 @@
//! You can also use [`ConnectOptions`] to configure the connection to the database. //! You can also use [`ConnectOptions`] to configure the connection to the database.
//! //!
//! ```rust //! ```rust
//! # #[cfg(feature = "aws")]
//! # {
//! use object_store::aws::AwsCredential; //! use object_store::aws::AwsCredential;
//! # tokio::runtime::Runtime::new().unwrap().block_on(async { //! # tokio::runtime::Runtime::new().unwrap().block_on(async {
//! let db = lancedb::connect("data/sample-lancedb") //! let db = lancedb::connect("data/sample-lancedb")
@@ -68,7 +66,6 @@
//! .await //! .await
//! .unwrap(); //! .unwrap();
//! # }); //! # });
//! # }
//! ``` //! ```
//! //!
//! LanceDB uses [arrow-rs](https://github.com/apache/arrow-rs) to define schema, data types and array itself. //! LanceDB uses [arrow-rs](https://github.com/apache/arrow-rs) to define schema, data types and array itself.

View File

@@ -1718,8 +1718,6 @@ mod tests {
let namespace = vec!["test_ns".to_string()]; let namespace = vec!["test_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(namespace.clone()), id: Some(namespace.clone()),
mode: None,
properties: None,
..Default::default() ..Default::default()
}) })
.await .await
@@ -1745,8 +1743,6 @@ mod tests {
let list_response = conn let list_response = conn
.list_tables(ListTablesRequest { .list_tables(ListTablesRequest {
id: Some(namespace.clone()), id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default() ..Default::default()
}) })
.await .await
@@ -1758,8 +1754,6 @@ mod tests {
let list_response = namespace_client let list_response = namespace_client
.list_tables(ListTablesRequest { .list_tables(ListTablesRequest {
id: Some(namespace.clone()), id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default() ..Default::default()
}) })
.await .await
@@ -1800,8 +1794,6 @@ mod tests {
let namespace = vec!["multi_table_ns".to_string()]; let namespace = vec!["multi_table_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest { conn.create_namespace(CreateNamespaceRequest {
id: Some(namespace.clone()), id: Some(namespace.clone()),
mode: None,
properties: None,
..Default::default() ..Default::default()
}) })
.await .await
@@ -1827,8 +1819,6 @@ mod tests {
let list_response = conn let list_response = conn
.list_tables(ListTablesRequest { .list_tables(ListTablesRequest {
id: Some(namespace.clone()), id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default() ..Default::default()
}) })
.await .await

View File

@@ -40,7 +40,7 @@ use lance_index::vector::pq::PQBuildParams;
use lance_index::vector::sq::builder::SQBuildParams; use lance_index::vector::sq::builder::SQBuildParams;
use lance_index::DatasetIndexExt; use lance_index::DatasetIndexExt;
use lance_index::IndexType; use lance_index::IndexType;
use lance_io::object_store::LanceNamespaceStorageOptionsProvider; use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsAccessor};
use lance_namespace::models::{ use lance_namespace::models::{
QueryTableRequest as NsQueryTableRequest, QueryTableRequestColumns, QueryTableRequest as NsQueryTableRequest, QueryTableRequestColumns,
QueryTableRequestFullTextQuery, QueryTableRequestVector, StringFtsQuery, QueryTableRequestFullTextQuery, QueryTableRequestVector, StringFtsQuery,
@@ -1425,9 +1425,7 @@ impl Table {
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let unioned = UnionExec::try_new(projected_plans).map_err(|e| Error::Runtime { let unioned = Arc::new(UnionExec::new(projected_plans));
message: format!("Failed to build union plan: {e}"),
})?;
// We require 1 partition in the final output // We require 1 partition in the final output
let repartitioned = RepartitionExec::try_new( let repartitioned = RepartitionExec::try_new(
unioned, unioned,
@@ -1668,18 +1666,14 @@ impl NativeTable {
// Use DatasetBuilder::from_namespace which automatically fetches location // Use DatasetBuilder::from_namespace which automatically fetches location
// and storage options from the namespace // and storage options from the namespace
let builder = DatasetBuilder::from_namespace( let builder = DatasetBuilder::from_namespace(namespace_client.clone(), table_id)
namespace_client.clone(), .await
table_id, .map_err(|e| match e {
false, // Don't ignore namespace storage options lance::Error::Namespace { source, .. } => Error::Runtime {
) message: format!("Failed to get table info from namespace: {:?}", source),
.await },
.map_err(|e| match e { source => Error::Lance { source },
lance::Error::Namespace { source, .. } => Error::Runtime { })?;
message: format!("Failed to get table info from namespace: {:?}", source),
},
source => Error::Lance { source },
})?;
let dataset = builder let dataset = builder
.with_read_params(params) .with_read_params(params)
@@ -1883,7 +1877,13 @@ impl NativeTable {
let store_params = params let store_params = params
.store_params .store_params
.get_or_insert_with(ObjectStoreParams::default); .get_or_insert_with(ObjectStoreParams::default);
store_params.storage_options_provider = Some(storage_options_provider); let accessor = match store_params.storage_options().cloned() {
Some(options) => {
StorageOptionsAccessor::with_initial_and_provider(options, storage_options_provider)
}
None => StorageOptionsAccessor::with_provider(storage_options_provider),
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
// Patch the params if we have a write store wrapper // Patch the params if we have a write store wrapper
let params = match write_store_wrapper.clone() { let params = match write_store_wrapper.clone() {
@@ -2349,7 +2349,7 @@ impl NativeTable {
}; };
// Convert select to columns list // Convert select to columns list
let columns: Option<Box<QueryTableRequestColumns>> = match &vq.base.select { let columns = match &vq.base.select {
Select::All => None, Select::All => None,
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns { Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
column_names: Some(cols.clone()), column_names: Some(cols.clone()),
@@ -2407,7 +2407,6 @@ impl NativeTable {
with_row_id: Some(vq.base.with_row_id), with_row_id: Some(vq.base.with_row_id),
bypass_vector_index: Some(!vq.use_index), bypass_vector_index: Some(!vq.use_index),
full_text_query, full_text_query,
version: None,
..Default::default() ..Default::default()
}) })
} }
@@ -2426,7 +2425,7 @@ impl NativeTable {
.map(|f| self.filter_to_sql(f)) .map(|f| self.filter_to_sql(f))
.transpose()?; .transpose()?;
let columns: Option<Box<QueryTableRequestColumns>> = match &q.select { let columns = match &q.select {
Select::All => None, Select::All => None,
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns { Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
column_names: Some(cols.clone()), column_names: Some(cols.clone()),
@@ -2470,18 +2469,10 @@ impl NativeTable {
columns, columns,
prefilter: Some(q.prefilter), prefilter: Some(q.prefilter),
offset: q.offset.map(|o| o as i32), offset: q.offset.map(|o| o as i32),
ef: None,
refine_factor: None,
distance_type: None,
nprobes: None,
vector_column: None, // No vector column for plain queries vector_column: None, // No vector column for plain queries
with_row_id: Some(q.with_row_id), with_row_id: Some(q.with_row_id),
bypass_vector_index: Some(true), // No vector index for plain queries bypass_vector_index: Some(true), // No vector index for plain queries
full_text_query, full_text_query,
version: None,
fast_search: None,
lower_bound: None,
upper_bound: None,
..Default::default() ..Default::default()
}) })
} }
@@ -3244,7 +3235,7 @@ impl BaseTable for NativeTable {
.get() .get()
.await .await
.ok() .ok()
.and_then(|dataset| dataset.storage_options().cloned()) .and_then(|dataset| dataset.initial_storage_options().cloned())
} }
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> { async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
@@ -5154,15 +5145,16 @@ mod tests {
let any_query = AnyQuery::VectorQuery(vq); let any_query = AnyQuery::VectorQuery(vq);
let ns_request = table.convert_to_namespace_query(&any_query).unwrap(); let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
let column_names = ns_request
.columns
.as_ref()
.and_then(|cols| cols.column_names.clone());
assert_eq!(ns_request.k, 10); assert_eq!(ns_request.k, 10);
assert_eq!(ns_request.offset, Some(5)); assert_eq!(ns_request.offset, Some(5));
assert_eq!(ns_request.filter, Some("id > 0".to_string())); assert_eq!(ns_request.filter, Some("id > 0".to_string()));
assert_eq!(column_names, Some(vec!["id".to_string()])); assert_eq!(
ns_request
.columns
.as_ref()
.and_then(|c| c.column_names.as_ref()),
Some(&vec!["id".to_string()])
);
assert_eq!(ns_request.vector_column, Some("vector".to_string())); assert_eq!(ns_request.vector_column, Some("vector".to_string()));
assert_eq!(ns_request.distance_type, Some("l2".to_string())); assert_eq!(ns_request.distance_type, Some("l2".to_string()));
assert!(ns_request.vector.single_vector.is_some()); assert!(ns_request.vector.single_vector.is_some());
@@ -5199,16 +5191,17 @@ mod tests {
let any_query = AnyQuery::Query(q); let any_query = AnyQuery::Query(q);
let ns_request = table.convert_to_namespace_query(&any_query).unwrap(); let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
let column_names = ns_request
.columns
.as_ref()
.and_then(|cols| cols.column_names.clone());
// Plain queries should pass an empty vector // Plain queries should pass an empty vector
assert_eq!(ns_request.k, 20); assert_eq!(ns_request.k, 20);
assert_eq!(ns_request.offset, Some(5)); assert_eq!(ns_request.offset, Some(5));
assert_eq!(ns_request.filter, Some("id > 5".to_string())); assert_eq!(ns_request.filter, Some("id > 5".to_string()));
assert_eq!(column_names, Some(vec!["id".to_string()])); assert_eq!(
ns_request
.columns
.as_ref()
.and_then(|c| c.column_names.as_ref()),
Some(&vec!["id".to_string()])
);
assert_eq!(ns_request.with_row_id, Some(true)); assert_eq!(ns_request.with_row_id, Some(true));
assert_eq!(ns_request.bypass_vector_index, Some(true)); assert_eq!(ns_request.bypass_vector_index, Some(true));
assert!(ns_request.vector_column.is_none()); // No vector column for plain queries assert!(ns_request.vector_column.is_none()); // No vector column for plain queries

View File

@@ -100,8 +100,7 @@ impl DatasetRef {
let should_checkout = match &target_ref { let should_checkout = match &target_ref {
refs::Ref::Version(_, Some(target_ver)) => version != target_ver, refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
refs::Ref::Version(_, None) => true, // No specific version, always checkout refs::Ref::Version(_, None) => true, // No specific version, always checkout
refs::Ref::VersionNumber(target_ver) => version != target_ver, refs::Ref::Tag(_) => true, // Always checkout for tags
refs::Ref::Tag(_) => true, // Always checkout for tags
}; };
if should_checkout { if should_checkout {