Compare commits

...

14 Commits

Author SHA1 Message Date
Dan Tasse
65c14f6b40 Avoid embedding warnings 2026-01-30 12:35:45 -05:00
Colin Patrick McCabe
5a7a8da567 feat: check AZURE_STORAGE_ACCOUNT_NAME in remote conns (#2918)
Unlike in Amazon S3, in Azure bucket names are not globally unique.
Instead, the combination of (storage_account_name, bucket_name) is
unique.

Therefore, when using Azure blob store, we always need a way to
configure the storage account name. One way is to use the
storage_options hash map and set azure_storage_account_name. Another way
is to set an environment variable, AZURE_STORAGE_ACCOUNT_NAME.

Prior to this PR, the second way (environment variable) did not work
with remote connections. This is because the existing code that checks
for these environment variables happens inside the Azure object store
implementation itself, which does not run locally when using remote
connections.

This PR addresses that situation by adding a check of the environment
variable. This functions as a default if the relevant storage option is
not set in the storage_options hash map.
2026-01-22 13:36:05 -08:00
Jack Ye
0db8176445 test: fix failing remote doctest reference to aws feature (#2935)
Closes https://github.com/lancedb/lancedb/issues/2933
2026-01-22 13:17:03 -08:00
LanceDB Robot
bd84bba14d chore: update lance dependency to v2.0.0-rc.1 (#2936)
## Summary
- bump Lance dependencies to v2.0.0-rc.1 (git tag)
- align Arrow/DataFusion/PyO3 versions for the new Lance release
- update Python bindings for PyO3 0.26 (attach API + Py<PyAny>)

## Verification
- `cargo clippy --workspace --tests --all-features -- -D warnings`
- `cargo fmt --all`

## Reference
- https://github.com/lance-format/lance/releases/tag/v2.0.0-rc.1

---------

Co-authored-by: Jack Ye <yezhaoqin@gmail.com>
Co-authored-by: Will Jones <willjones127@gmail.com>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
Co-authored-by: BubbleCal <bubble_cal@outlook.com>
2026-01-22 13:14:38 -08:00
Lance Release
ac07f8068c Bump version: 0.24.0-beta.1 → 0.24.0 2026-01-22 01:10:15 +00:00
Lance Release
bba362d372 Bump version: 0.24.0-beta.0 → 0.24.0-beta.1 2026-01-22 01:09:53 +00:00
Lance Release
042bc22468 Bump version: 0.27.0-beta.1 → 0.27.0 2026-01-22 01:09:32 +00:00
Lance Release
68569906c6 Bump version: 0.27.0-beta.0 → 0.27.0-beta.1 2026-01-22 01:09:31 +00:00
LanceDB Robot
c71c1fc822 feat: update lance dependency to v1.0.3 (#2932)
## Summary
- bump Lance dependency to v1.0.3
- refresh Cargo metadata and lockfile

## Verification
- cargo clippy --workspace --tests --all-features -- -D warnings
- cargo fmt --all

## Release
- https://github.com/lance-format/lance/releases/tag/v1.0.3
2026-01-21 17:08:24 -08:00
Jack Ye
4a6a0c856e ci: fix codex version bump title and summary (#2931)
1. use feat for releases, chore for prereleases
2. do not have literal `\n` in summary
2026-01-21 15:45:28 -08:00
Jack Ye
f124c9d8d2 test: string type conversion in pandas 3.0+ (#2928)
Pandas 3.0+ string now converts to Arrow large_utf8. This PR mainly
makes sure our test accounts for the difference across the pandas
versions when constructing schema.
2026-01-21 13:40:48 -08:00
Jack Ye
4e65748abf chore: update lance dependency to v1.0.3-rc.1 (#2927)
Supercedes https://github.com/lancedb/lancedb/pull/2925

We accidentally upgraded lance to 2.0.0-beta.8. This PR reverts that
first and then bump to 1.0.3-rc.1

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-21 11:52:07 -08:00
Colin Patrick McCabe
e897f3edab test: assert remote behavior of drop_table (#2926)
Add support for testing remote connections in drop_table in
`rust/lancedb/src/connection.rs`.
2026-01-21 08:42:40 -08:00
Lance Release
790ba7115b Bump version: 0.23.1 → 0.24.0-beta.0 2026-01-21 12:21:53 +00:00
45 changed files with 520 additions and 424 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.23.1"
current_version = "0.24.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -75,6 +75,13 @@ jobs:
VERSION="${VERSION#v}"
BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
# Use "chore" for beta/rc versions, "feat" for stable releases
if [[ "${VERSION}" == *beta* ]] || [[ "${VERSION}" == *rc* ]]; then
COMMIT_TYPE="chore"
else
COMMIT_TYPE="feat"
fi
cat <<EOF >/tmp/codex-prompt.txt
You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.
@@ -84,10 +91,10 @@ jobs:
3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
6. Stage all relevant files with "git add -A". Commit using the message "chore: update lance dependency to v${VERSION}".
6. Stage all relevant files with "git add -A". Commit using the message "${COMMIT_TYPE}: update lance dependency to v${VERSION}".
7. Push the branch to origin. If the branch already exists, force-push your changes.
8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
9. Create a pull request targeting "main" with title "chore: update lance dependency to v${VERSION}". In the body, summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}).
9. Create a pull request targeting "main" with title "${COMMIT_TYPE}: update lance dependency to v${VERSION}". First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'EOF' > /tmp/pr-body.md). The body should summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). Then run "gh pr create --body-file /tmp/pr-body.md".
10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
Constraints:

74
Cargo.lock generated
View File

@@ -3072,8 +3072,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4404,8 +4404,8 @@ dependencies = [
[[package]]
name = "lance"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow",
"arrow-arith",
@@ -4470,8 +4470,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4490,8 +4490,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrayref",
"paste",
@@ -4500,8 +4500,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4538,8 +4538,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow",
"arrow-array",
@@ -4569,8 +4569,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow",
"arrow-array",
@@ -4588,8 +4588,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4626,8 +4626,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4659,8 +4659,8 @@ dependencies = [
[[package]]
name = "lance-geo"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"datafusion",
"geo-traits",
@@ -4674,8 +4674,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow",
"arrow-arith",
@@ -4742,8 +4742,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow",
"arrow-arith",
@@ -4783,8 +4783,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4800,8 +4800,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow",
"async-trait",
@@ -4813,8 +4813,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4857,8 +4857,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow",
"arrow-array",
@@ -4897,8 +4897,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4909,7 +4909,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.23.1"
version = "0.24.0"
dependencies = [
"ahash",
"anyhow",
@@ -4988,7 +4988,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.23.1"
version = "0.24.0"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -5008,7 +5008,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.26.1"
version = "0.27.0"
dependencies = [
"arrow",
"async-trait",

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false }
@@ -59,7 +59,7 @@ rand = "0.9"
snafu = "0.8"
url = "2"
num-traits = "0.2"
regex = "1.12"
regex = "1.10"
lazy_static = "1"
semver = "1.0.25"
chrono = "0.4"

View File

@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-core</artifactId>
<version>0.23.1</version>
<version>0.24.0</version>
</dependency>
```

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.23.1-final.0</version>
<version>0.24.0-final.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.23.1-final.0</version>
<version>0.24.0-final.0</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.23.1"
version = "0.24.0"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1520,9 +1520,9 @@ describe("when optimizing a dataset", () => {
it("delete unverified", async () => {
const version = await table.version();
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
version - 1
}.manifest`;
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${String(
18446744073709551615n - (BigInt(version) - 1n),
).padStart(20, "0")}.manifest`;
fs.rmSync(versionFile);
let stats = await table.optimize({ deleteUnverified: false });

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.23.1",
"version": "0.24.0",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.23.1",
"version": "0.24.0",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.23.1",
"version": "0.24.0",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.23.1",
"version": "0.24.0",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.23.1",
"version": "0.24.0",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.23.1",
"version": "0.24.0",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.23.1",
"version": "0.24.0",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.23.1",
"version": "0.24.0",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.23.1",
"version": "0.24.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.23.1",
"version": "0.24.0",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.23.1",
"version": "0.24.0",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.27.0-beta.0"
current_version = "0.27.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.27.0-beta.0"
version = "0.27.0"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -9,6 +9,8 @@ import numpy as np
import io
import warnings
from pydantic import Field
from ..util import attempt_import_or_raise
from .base import EmbeddingFunction
from .registry import register
@@ -26,7 +28,7 @@ class ColPaliEmbeddings(EmbeddingFunction):
Parameters
----------
model_name : str
colpali_model_name : str
The name of the model to use (e.g., "Metric-AI/ColQwen2.5-3b-multilingual-v1.0")
Supports models based on these engines:
- ColPali: "vidore/colpali-v1.3" and others
@@ -57,7 +59,10 @@ class ColPaliEmbeddings(EmbeddingFunction):
useful for large models that do not fit in memory.
"""
model_name: str = "Metric-AI/ColQwen2.5-3b-multilingual-v1.0"
colpali_model_name: str = Field(
default="Metric-AI/ColQwen2.5-3b-multilingual-v1.0",
validation_alias="model_name",
)
device: str = "auto"
dtype: str = "bfloat16"
use_token_pooling: bool = True
@@ -107,7 +112,7 @@ class ColPaliEmbeddings(EmbeddingFunction):
self._processor,
self._token_pooler,
) = self._load_model(
self.model_name,
self.colpali_model_name,
dtype,
device,
self.pooling_strategy,

View File

@@ -10,7 +10,7 @@ import urllib.parse as urlparse
import numpy as np
import pyarrow as pa
from tqdm import tqdm
from pydantic import PrivateAttr
from pydantic import Field, PrivateAttr
from ..util import attempt_import_or_raise
from .base import EmbeddingFunction
@@ -24,7 +24,10 @@ if TYPE_CHECKING:
@register("siglip")
class SigLipEmbeddings(EmbeddingFunction):
model_name: str = "google/siglip-base-patch16-224"
siglip_model_name: str = Field(
default="google/siglip-base-patch16-224",
validation_alias="model_name",
)
device: str = "cpu"
batch_size: int = 64
normalize: bool = True
@@ -39,8 +42,10 @@ class SigLipEmbeddings(EmbeddingFunction):
transformers = attempt_import_or_raise("transformers")
self._torch = attempt_import_or_raise("torch")
self._processor = transformers.AutoProcessor.from_pretrained(self.model_name)
self._model = transformers.SiglipModel.from_pretrained(self.model_name)
self._processor = transformers.AutoProcessor.from_pretrained(
self.siglip_model_name
)
self._model = transformers.SiglipModel.from_pretrained(self.siglip_model_name)
self._model.to(self.device)
self._model.eval()
self._ndims = None

View File

@@ -2,12 +2,27 @@
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
from datetime import timedelta
from lancedb.db import AsyncConnection, DBConnection
import lancedb
import pytest
import pytest_asyncio
def pandas_string_type():
"""Return the PyArrow string type that pandas uses for string columns.
pandas 3.0+ uses large_string for string columns, pandas 2.x uses string.
"""
import pandas as pd
import pyarrow as pa
version = tuple(int(x) for x in pd.__version__.split(".")[:2])
if version >= (3, 0):
return pa.large_utf8()
return pa.utf8()
# Use an in-memory database for most tests.
@pytest.fixture
def mem_db() -> DBConnection:

View File

@@ -268,6 +268,8 @@ async def test_create_table_from_iterator_async(mem_db_async: lancedb.AsyncConne
def test_create_exist_ok(tmp_db: lancedb.DBConnection):
from conftest import pandas_string_type
data = pd.DataFrame(
{
"vector": [[3.1, 4.1], [5.9, 26.5]],
@@ -286,10 +288,11 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
assert tbl.schema == tbl2.schema
assert len(tbl) == len(tbl2)
# pandas 3.0+ uses large_string, pandas 2.x uses string
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pa.utf8()),
pa.field("item", pandas_string_type()),
pa.field("price", pa.float64()),
]
)
@@ -299,7 +302,7 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
bad_schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pa.utf8()),
pa.field("item", pandas_string_type()),
pa.field("price", pa.float64()),
pa.field("extra", pa.float32()),
]
@@ -365,6 +368,8 @@ async def test_create_mode_async(tmp_db_async: lancedb.AsyncConnection):
@pytest.mark.asyncio
async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
from conftest import pandas_string_type
data = pd.DataFrame(
{
"vector": [[3.1, 4.1], [5.9, 26.5]],
@@ -382,10 +387,11 @@ async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
assert tbl.name == tbl2.name
assert await tbl.schema() == await tbl2.schema()
# pandas 3.0+ uses large_string, pandas 2.x uses string
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pa.utf8()),
pa.field("item", pandas_string_type()),
pa.field("price", pa.float64()),
]
)
@@ -595,6 +601,8 @@ def test_open_table_sync(tmp_db: lancedb.DBConnection):
@pytest.mark.asyncio
async def test_open_table(tmp_path):
from conftest import pandas_string_type
db = await lancedb.connect_async(tmp_path)
data = pd.DataFrame(
{
@@ -614,10 +622,11 @@ async def test_open_table(tmp_path):
)
is not None
)
# pandas 3.0+ uses large_string, pandas 2.x uses string
assert await tbl.schema() == pa.schema(
{
"vector": pa.list_(pa.float32(), list_size=2),
"item": pa.utf8(),
"item": pandas_string_type(),
"price": pa.float64(),
}
)

View File

@@ -26,6 +26,8 @@ import pytest
from lance_namespace import (
CreateEmptyTableRequest,
CreateEmptyTableResponse,
DeclareTableRequest,
DeclareTableResponse,
DescribeTableRequest,
DescribeTableResponse,
LanceNamespace,
@@ -160,6 +162,19 @@ class TrackingNamespace(LanceNamespace):
return modified
def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
"""Track declare_table calls and inject rotating credentials."""
with self.lock:
self.create_call_count += 1
count = self.create_call_count
response = self.inner.declare_table(request)
response.storage_options = self._modify_storage_options(
response.storage_options, count
)
return response
def create_empty_table(
self, request: CreateEmptyTableRequest
) -> CreateEmptyTableResponse:

View File

@@ -601,7 +601,6 @@ def test_head():
def test_query_sync_minimal():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"refine_factor": None,
@@ -685,7 +684,6 @@ def test_query_sync_maximal():
def test_query_sync_nprobes():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"fast_search": True,
@@ -715,7 +713,6 @@ def test_query_sync_nprobes():
def test_query_sync_no_max_nprobes():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"fast_search": True,
@@ -838,7 +835,6 @@ def test_query_sync_hybrid():
else:
# Vector query
assert body == {
"distance_type": "l2",
"k": 42,
"prefilter": True,
"refine_factor": None,

View File

@@ -1880,8 +1880,13 @@ async def test_optimize_delete_unverified(tmp_db_async: AsyncConnection, tmp_pat
],
)
version = await table.version()
path = tmp_path / "test.lance" / "_versions" / f"{version - 1}.manifest"
assert version == 2
# By removing a manifest file, we make the data files we just inserted unverified
version_name = 18446744073709551615 - (version - 1)
path = tmp_path / "test.lance" / "_versions" / f"{version_name:020}.manifest"
os.remove(path)
stats = await table.optimize(delete_unverified=False)
assert stats.prune.old_versions_removed == 0
stats = await table.optimize(

View File

@@ -528,12 +528,19 @@ def test_sanitize_data(
else:
expected_schema = schema
else:
from conftest import pandas_string_type
# polars uses large_string, pandas 3.0+ uses large_string, others use string
if isinstance(data, pl.DataFrame):
text_type = pa.large_utf8()
elif isinstance(data, pd.DataFrame):
text_type = pandas_string_type()
else:
text_type = pa.string()
expected_schema = pa.schema(
{
"id": pa.int64(),
"text": pa.large_utf8()
if isinstance(data, pl.DataFrame)
else pa.string(),
"text": text_type,
"vector": pa.list_(pa.float32(), 10),
}
)

View File

@@ -55,12 +55,12 @@ impl RecordBatchStream {
.next()
.await
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = inner_next.infer_error()?.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
Python::attach(|py| {
inner_next
.infer_error()?
.to_pyarrow(py)
.map(|obj| obj.unbind())
})
})
}
}

View File

@@ -307,8 +307,7 @@ impl Connection {
..Default::default()
};
let response = inner.list_namespaces(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
Python::attach(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("namespaces", response.namespaces)?;
dict.set_item("page_token", response.page_token)?;
@@ -328,7 +327,8 @@ impl Connection {
let py = self_.py();
future_into_py(py, async move {
use lance_namespace::models::CreateNamespaceRequest;
let mode_enum = mode.and_then(|m| match m.to_lowercase().as_str() {
// Mode is now a string field
let mode_str = mode.and_then(|m| match m.to_lowercase().as_str() {
"create" => Some("Create".to_string()),
"exist_ok" => Some("ExistOk".to_string()),
"overwrite" => Some("Overwrite".to_string()),
@@ -340,13 +340,12 @@ impl Connection {
} else {
Some(namespace)
},
mode: mode_enum,
mode: mode_str,
properties,
..Default::default()
};
let response = inner.create_namespace(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
Python::attach(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
Ok(dict.unbind())
@@ -365,12 +364,13 @@ impl Connection {
let py = self_.py();
future_into_py(py, async move {
use lance_namespace::models::DropNamespaceRequest;
let mode_enum = mode.and_then(|m| match m.to_uppercase().as_str() {
// Mode and Behavior are now string fields
let mode_str = mode.and_then(|m| match m.to_uppercase().as_str() {
"SKIP" => Some("Skip".to_string()),
"FAIL" => Some("Fail".to_string()),
_ => None,
});
let behavior_enum = behavior.and_then(|b| match b.to_uppercase().as_str() {
let behavior_str = behavior.and_then(|b| match b.to_uppercase().as_str() {
"RESTRICT" => Some("Restrict".to_string()),
"CASCADE" => Some("Cascade".to_string()),
_ => None,
@@ -381,13 +381,12 @@ impl Connection {
} else {
Some(namespace)
},
mode: mode_enum,
behavior: behavior_enum,
mode: mode_str,
behavior: behavior_str,
..Default::default()
};
let response = inner.drop_namespace(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
Python::attach(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
dict.set_item("transaction_id", response.transaction_id)?;
@@ -414,8 +413,7 @@ impl Connection {
..Default::default()
};
let response = inner.describe_namespace(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
Python::attach(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
Ok(dict.unbind())
@@ -445,8 +443,7 @@ impl Connection {
..Default::default()
};
let response = inner.list_tables(request).await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
Python::attach(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("tables", response.tables)?;
dict.set_item("page_token", response.page_token)?;

View File

@@ -40,34 +40,31 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
request_id,
source,
status_code,
} => {
#[allow(deprecated)]
Python::with_gil(|py| {
let message = err.to_string();
let http_err_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
.getattr(intern!(py, "HttpError"))?;
let err = http_err_cls.call1((
message,
} => Python::attach(|py| {
let message = err.to_string();
let http_err_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
.getattr(intern!(py, "HttpError"))?;
let err = http_err_cls.call1((
message,
request_id,
status_code.map(|s| s.as_u16()),
))?;
if let Some(cause) = source.source() {
// The HTTP error already includes the first cause. But
// we can add the rest of the chain if there is any more.
let cause_err = http_from_rust_error(
py,
cause,
request_id,
status_code.map(|s| s.as_u16()),
))?;
)?;
err.setattr(intern!(py, "__cause__"), cause_err)?;
}
if let Some(cause) = source.source() {
// The HTTP error already includes the first cause. But
// we can add the rest of the chain if there is any more.
let cause_err = http_from_rust_error(
py,
cause,
request_id,
status_code.map(|s| s.as_u16()),
)?;
err.setattr(intern!(py, "__cause__"), cause_err)?;
}
Err(PyErr::from_value(err))
})
}
Err(PyErr::from_value(err))
}),
LanceError::Retry {
request_id,
request_failures,
@@ -78,37 +75,33 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
max_read_failures,
source,
status_code,
} =>
{
#[allow(deprecated)]
Python::with_gil(|py| {
let cause_err = http_from_rust_error(
py,
source.as_ref(),
request_id,
status_code.map(|s| s.as_u16()),
)?;
} => Python::attach(|py| {
let cause_err = http_from_rust_error(
py,
source.as_ref(),
request_id,
status_code.map(|s| s.as_u16()),
)?;
let message = err.to_string();
let retry_error_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
.getattr("RetryError")?;
let err = retry_error_cls.call1((
message,
request_id,
*request_failures,
*connect_failures,
*read_failures,
*max_request_failures,
*max_connect_failures,
*max_read_failures,
status_code.map(|s| s.as_u16()),
))?;
let message = err.to_string();
let retry_error_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
.getattr("RetryError")?;
let err = retry_error_cls.call1((
message,
request_id,
*request_failures,
*connect_failures,
*read_failures,
*max_request_failures,
*max_connect_failures,
*max_read_failures,
status_code.map(|s| s.as_u16()),
))?;
err.setattr(intern!(py, "__cause__"), cause_err)?;
Err(PyErr::from_value(err))
})
}
err.setattr(intern!(py, "__cause__"), cause_err)?;
Err(PyErr::from_value(err))
}),
_ => self.runtime_error(),
},
}

View File

@@ -12,8 +12,7 @@ pub struct PyHeaderProvider {
impl Clone for PyHeaderProvider {
fn clone(&self) -> Self {
#[allow(deprecated)]
Python::with_gil(|py| Self {
Python::attach(|py| Self {
provider: self.provider.clone_ref(py),
})
}
@@ -26,8 +25,7 @@ impl PyHeaderProvider {
/// Get headers from the Python provider (internal implementation)
fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
#[allow(deprecated)]
Python::with_gil(|py| {
Python::attach(|py| {
// Call the get_headers method
let result = self.provider.call_method0(py, "get_headers");

View File

@@ -19,7 +19,7 @@ use pyo3::{
exceptions::PyRuntimeError,
pyclass, pymethods,
types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
Bound, Py, PyAny, PyRef, PyRefMut, PyResult, Python,
Bound, PyAny, PyRef, PyRefMut, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -281,12 +281,7 @@ impl PyPermutationReader {
let reader = slf.reader.clone();
future_into_py(slf.py(), async move {
let schema = reader.output_schema(selection).await.infer_error()?;
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
})
}

View File

@@ -29,7 +29,6 @@ use pyo3::types::PyList;
use pyo3::types::{PyDict, PyString};
use pyo3::Bound;
use pyo3::IntoPyObject;
use pyo3::Py;
use pyo3::PyAny;
use pyo3::PyRef;
use pyo3::PyResult;
@@ -454,12 +453,7 @@ impl Query {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
})
}
@@ -538,12 +532,7 @@ impl TakeQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
})
}
@@ -638,12 +627,7 @@ impl FTSQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
})
}
@@ -822,12 +806,7 @@ impl VectorQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
})
}

View File

@@ -22,8 +22,7 @@ pub struct PyStorageOptionsProvider {
impl Clone for PyStorageOptionsProvider {
fn clone(&self) -> Self {
#[allow(deprecated)]
Python::with_gil(|py| Self {
Python::attach(|py| Self {
inner: self.inner.clone_ref(py),
})
}
@@ -31,17 +30,14 @@ impl Clone for PyStorageOptionsProvider {
impl PyStorageOptionsProvider {
pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
#[allow(deprecated)]
Python::with_gil(|py| {
Python::attach(|py| {
// Verify the object has a fetch_storage_options method
if !obj.bind(py).hasattr("fetch_storage_options")? {
return Err(pyo3::exceptions::PyTypeError::new_err(
"StorageOptionsProvider must implement fetch_storage_options() method",
));
}
Ok(Self {
inner: obj.clone_ref(py),
})
Ok(Self { inner: obj })
})
}
}
@@ -64,8 +60,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
let py_provider = self.py_provider.clone();
tokio::task::spawn_blocking(move || {
#[allow(deprecated)]
Python::with_gil(|py| {
Python::attach(|py| {
// Call the Python fetch_storage_options method
let result = py_provider
.inner
@@ -124,8 +119,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
}
fn provider_id(&self) -> String {
#[allow(deprecated)]
Python::with_gil(|py| {
Python::attach(|py| {
// Call provider_id() method on the Python object
let obj = self.py_provider.inner.bind(py);
obj.call_method0("provider_id")

View File

@@ -21,7 +21,7 @@ use pyo3::{
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
pyclass, pymethods,
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -287,12 +287,7 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let schema = inner.schema().await.infer_error()?;
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
})
}
@@ -442,8 +437,7 @@ impl Table {
future_into_py(self_.py(), async move {
let stats = inner.index_stats(&index_name).await.infer_error()?;
if let Some(stats) = stats {
#[allow(deprecated)]
Python::with_gil(|py| {
Python::attach(|py| {
let dict = PyDict::new(py);
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
@@ -473,8 +467,7 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let stats = inner.stats().await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| {
Python::attach(|py| {
let dict = PyDict::new(py);
dict.set_item("total_bytes", stats.total_bytes)?;
dict.set_item("num_rows", stats.num_rows)?;
@@ -528,8 +521,7 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let versions = inner.list_versions().await.infer_error()?;
#[allow(deprecated)]
let versions_as_dict = Python::with_gil(|py| {
let versions_as_dict = Python::attach(|py| {
versions
.iter()
.map(|v| {
@@ -880,8 +872,7 @@ impl Tags {
let tags = inner.tags().await.infer_error()?;
let res = tags.list().await.infer_error()?;
#[allow(deprecated)]
Python::with_gil(|py| {
Python::attach(|py| {
let py_dict = PyDict::new(py);
for (key, contents) in res {
let value_dict = PyDict::new(py);

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.23.1"
version = "0.24.0"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -36,10 +36,42 @@ use crate::remote::{
};
use crate::table::{TableDefinition, WriteOptions};
use crate::Table;
use lance::io::ObjectStoreParams;
pub use lance_encoding::version::LanceFileVersion;
#[cfg(feature = "remote")]
use lance_io::object_store::StorageOptions;
use lance_io::object_store::StorageOptionsProvider;
use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
fn merge_storage_options(
store_params: &mut ObjectStoreParams,
pairs: impl IntoIterator<Item = (String, String)>,
) {
let mut options = store_params.storage_options().cloned().unwrap_or_default();
for (key, value) in pairs {
options.insert(key, value);
}
let provider = store_params
.storage_options_accessor
.as_ref()
.and_then(|accessor| accessor.provider().cloned());
let accessor = if let Some(provider) = provider {
StorageOptionsAccessor::with_initial_and_provider(options, provider)
} else {
StorageOptionsAccessor::with_static_options(options)
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
}
fn set_storage_options_provider(
store_params: &mut ObjectStoreParams,
provider: Arc<dyn StorageOptionsProvider>,
) {
let accessor = match store_params.storage_options().cloned() {
Some(options) => StorageOptionsAccessor::with_initial_and_provider(options, provider),
None => StorageOptionsAccessor::with_provider(provider),
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
}
/// A builder for configuring a [`Connection::table_names`] operation
pub struct TableNamesBuilder {
@@ -246,16 +278,14 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
///
/// See available options at <https://lancedb.com/docs/storage/>
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
let store_options = self
let store_params = self
.request
.write_options
.lance_write_params
.get_or_insert(Default::default())
.store_params
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default());
store_options.insert(key.into(), value.into());
merge_storage_options(store_params, [(key.into(), value.into())]);
self
}
@@ -269,19 +299,17 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
) -> Self {
let store_options = self
let store_params = self
.request
.write_options
.lance_write_params
.get_or_insert(Default::default())
.store_params
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default());
for (key, value) in pairs {
store_options.insert(key.into(), value.into());
}
let updates = pairs
.into_iter()
.map(|(key, value)| (key.into(), value.into()));
merge_storage_options(store_params, updates);
self
}
@@ -318,23 +346,21 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// This has no effect in LanceDB Cloud.
#[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
pub fn enable_v2_manifest_paths(mut self, use_v2_manifest_paths: bool) -> Self {
let storage_options = self
let store_params = self
.request
.write_options
.lance_write_params
.get_or_insert_with(Default::default)
.store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
storage_options.insert(
OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(),
if use_v2_manifest_paths {
"true".to_string()
} else {
"false".to_string()
},
let value = if use_v2_manifest_paths {
"true".to_string()
} else {
"false".to_string()
};
merge_storage_options(
store_params,
[(OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(), value)],
);
self
}
@@ -344,19 +370,19 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// The default is `LanceFileVersion::Stable`.
#[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
pub fn data_storage_version(mut self, data_storage_version: LanceFileVersion) -> Self {
let storage_options = self
let store_params = self
.request
.write_options
.lance_write_params
.get_or_insert_with(Default::default)
.store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
storage_options.insert(
OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
data_storage_version.to_string(),
merge_storage_options(
store_params,
[(
OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
data_storage_version.to_string(),
)],
);
self
}
@@ -381,13 +407,14 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// This allows tables to automatically refresh cloud storage credentials
/// when they expire, enabling long-running operations on remote storage.
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
self.request
let store_params = self
.request
.write_options
.lance_write_params
.get_or_insert(Default::default())
.store_params
.get_or_insert(Default::default())
.storage_options_provider = Some(provider);
.get_or_insert(Default::default());
set_storage_options_provider(store_params, provider);
self
}
}
@@ -450,15 +477,13 @@ impl OpenTableBuilder {
///
/// See available options at <https://lancedb.com/docs/storage/>
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
let storage_options = self
let store_params = self
.request
.lance_read_params
.get_or_insert(Default::default())
.store_options
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default());
storage_options.insert(key.into(), value.into());
merge_storage_options(store_params, [(key.into(), value.into())]);
self
}
@@ -472,18 +497,16 @@ impl OpenTableBuilder {
mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
) -> Self {
let storage_options = self
let store_params = self
.request
.lance_read_params
.get_or_insert(Default::default())
.store_options
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default());
for (key, value) in pairs {
storage_options.insert(key.into(), value.into());
}
let updates = pairs
.into_iter()
.map(|(key, value)| (key.into(), value.into()));
merge_storage_options(store_params, updates);
self
}
@@ -507,12 +530,13 @@ impl OpenTableBuilder {
/// This allows tables to automatically refresh cloud storage credentials
/// when they expire, enabling long-running operations on remote storage.
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
self.request
let store_params = self
.request
.lance_read_params
.get_or_insert(Default::default())
.store_options
.get_or_insert(Default::default())
.storage_options_provider = Some(provider);
.get_or_insert(Default::default());
set_storage_options_provider(store_params, provider);
self
}
@@ -868,6 +892,9 @@ pub struct ConnectBuilder {
embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
}
const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 1] =
[("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name")];
impl ConnectBuilder {
/// Create a new [`ConnectOptions`] with the given database URI.
pub fn new(uri: &str) -> Self {
@@ -1051,11 +1078,27 @@ impl ConnectBuilder {
self
}
#[cfg(feature = "remote")]
fn apply_env_defaults(
env_var_to_remote_storage_option: &[(&str, &str)],
options: &mut HashMap<String, String>,
) {
for (env_key, opt_key) in env_var_to_remote_storage_option {
if let Ok(env_value) = std::env::var(env_key) {
if !options.contains_key(*opt_key) {
options.insert((*opt_key).to_string(), env_value);
}
}
}
}
#[cfg(feature = "remote")]
fn execute_remote(self) -> Result<Connection> {
use crate::remote::db::RemoteDatabaseOptions;
let options = RemoteDatabaseOptions::parse_from_map(&self.request.options)?;
let mut merged_options = self.request.options.clone();
Self::apply_env_defaults(&ENV_VARS_TO_STORAGE_OPTS, &mut merged_options);
let options = RemoteDatabaseOptions::parse_from_map(&merged_options)?;
let region = options.region.ok_or_else(|| Error::InvalidInput {
message: "A region is required when connecting to LanceDb Cloud".to_string(),
@@ -1277,8 +1320,6 @@ mod test_utils {
#[cfg(test)]
mod tests {
use std::fs::create_dir_all;
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
use crate::query::QueryBase;
use crate::query::{ExecutableQuery, QueryExecutionOptions};
@@ -1302,6 +1343,23 @@ mod tests {
assert_eq!(tc.connection.uri(), tc.uri);
}
#[cfg(feature = "remote")]
#[test]
fn test_apply_env_defaults() {
let env_key = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_KEY";
let env_val = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_VAL";
let opts_key = "test_apply_env_defaults_environment_variable_opts_key";
std::env::set_var(env_key, env_val);
let mut options = HashMap::new();
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
assert_eq!(Some(&env_val.to_string()), options.get(opts_key));
options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string());
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
assert_eq!(Some(&"EXPLICIT-VALUE".to_string()), options.get(opts_key));
}
#[cfg(not(windows))]
#[tokio::test]
async fn test_connect_relative() {
@@ -1526,18 +1584,27 @@ mod tests {
#[tokio::test]
async fn drop_table() {
let tmp_dir = tempdir().unwrap();
let tc = new_test_connection().await.unwrap();
let db = tc.connection;
let uri = tmp_dir.path().to_str().unwrap();
let db = connect(uri).execute().await.unwrap();
if tc.is_remote {
// All the typical endpoints such as s3:///, file-object-store:///, etc. treat drop_table
// as idempotent.
assert!(db.drop_table("invalid_table", &[]).await.is_ok());
} else {
// The behavior of drop_table when using a file:/// endpoint differs from all other
// object providers, in that it returns an error when deleting a non-existent table.
assert!(matches!(
db.drop_table("invalid_table", &[]).await,
Err(crate::Error::TableNotFound { .. }),
));
}
// drop non-exist table
assert!(matches!(
db.drop_table("invalid_table", &[]).await,
Err(crate::Error::TableNotFound { .. }),
));
create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
db.create_empty_table("table1", schema.clone())
.execute()
.await
.unwrap();
db.drop_table("table1", &[]).await.unwrap();
let tables = db.table_names().execute().await.unwrap();

View File

@@ -12,7 +12,7 @@ use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode};
use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
use lance_datafusion::utils::StreamingWriteSource;
use lance_encoding::version::LanceFileVersion;
use lance_io::object_store::StorageOptionsProvider;
use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
use lance_table::io::commit::commit_handler_from_url;
use object_store::local::LocalFileSystem;
use snafu::ResultExt;
@@ -356,7 +356,13 @@ impl ListingDatabase {
.clone()
.unwrap_or_else(|| Arc::new(lance::session::Session::default()));
let os_params = ObjectStoreParams {
storage_options: Some(options.storage_options.clone()),
storage_options_accessor: if options.storage_options.is_empty() {
None
} else {
Some(Arc::new(StorageOptionsAccessor::with_static_options(
options.storage_options.clone(),
)))
},
..Default::default()
};
let (object_store, base_path) = ObjectStore::from_uri_and_params(
@@ -492,7 +498,13 @@ impl ListingDatabase {
async fn drop_tables(&self, names: Vec<String>) -> Result<()> {
let object_store_params = ObjectStoreParams {
storage_options: Some(self.storage_options.clone()),
storage_options_accessor: if self.storage_options.is_empty() {
None
} else {
Some(Arc::new(StorageOptionsAccessor::with_static_options(
self.storage_options.clone(),
)))
},
..Default::default()
};
let mut uri = self.uri.clone();
@@ -541,7 +553,7 @@ impl ListingDatabase {
.lance_write_params
.as_ref()
.and_then(|p| p.store_params.as_ref())
.and_then(|sp| sp.storage_options.as_ref());
.and_then(|sp| sp.storage_options());
let storage_version_override = storage_options
.and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION))
@@ -592,21 +604,20 @@ impl ListingDatabase {
// will cause a new connection to be created, and that connection will
// be dropped from the cache when python GCs the table object, which
// confounds reuse across tables.
if !self.storage_options.is_empty() {
let storage_options = write_params
if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
let store_params = write_params
.store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
self.inherit_storage_options(storage_options);
}
// Set storage options provider if available
if self.storage_options_provider.is_some() {
write_params
.store_params
.get_or_insert_with(Default::default)
.storage_options_provider = self.storage_options_provider.clone();
let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
if !self.storage_options.is_empty() {
self.inherit_storage_options(&mut storage_options);
}
let accessor = if let Some(ref provider) = self.storage_options_provider {
StorageOptionsAccessor::with_initial_and_provider(storage_options, provider.clone())
} else {
StorageOptionsAccessor::with_static_options(storage_options)
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
}
write_params.data_storage_version = self
@@ -892,7 +903,13 @@ impl Database for ListingDatabase {
validate_table_name(&request.target_table_name)?;
let storage_params = ObjectStoreParams {
storage_options: Some(self.storage_options.clone()),
storage_options_accessor: if self.storage_options.is_empty() {
None
} else {
Some(Arc::new(StorageOptionsAccessor::with_static_options(
self.storage_options.clone(),
)))
},
..Default::default()
};
let read_params = ReadParams {
@@ -956,25 +973,28 @@ impl Database for ListingDatabase {
// will cause a new connection to be created, and that connection will
// be dropped from the cache when python GCs the table object, which
// confounds reuse across tables.
if !self.storage_options.is_empty() {
let storage_options = request
if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
let store_params = request
.lance_read_params
.get_or_insert_with(Default::default)
.store_options
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
self.inherit_storage_options(storage_options);
}
// Set storage options provider if available
if self.storage_options_provider.is_some() {
request
.lance_read_params
.get_or_insert_with(Default::default)
.store_options
.get_or_insert_with(Default::default)
.storage_options_provider = self.storage_options_provider.clone();
let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
if !self.storage_options.is_empty() {
self.inherit_storage_options(&mut storage_options);
}
// Preserve request-level provider if no connection-level provider exists
let request_provider = store_params
.storage_options_accessor
.as_ref()
.and_then(|a| a.provider().cloned());
let provider = self.storage_options_provider.clone().or(request_provider);
let accessor = if let Some(provider) = provider {
StorageOptionsAccessor::with_initial_and_provider(storage_options, provider)
} else {
StorageOptionsAccessor::with_static_options(storage_options)
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
}
// Some ReadParams are exposed in the OpenTableBuilder, but we also
@@ -1881,7 +1901,9 @@ mod tests {
let write_options = WriteOptions {
lance_write_params: Some(lance::dataset::WriteParams {
store_params: Some(lance::io::ObjectStoreParams {
storage_options: Some(storage_options),
storage_options_accessor: Some(Arc::new(
StorageOptionsAccessor::with_static_options(storage_options),
)),
..Default::default()
}),
..Default::default()
@@ -1955,7 +1977,9 @@ mod tests {
let write_options = WriteOptions {
lance_write_params: Some(lance::dataset::WriteParams {
store_params: Some(lance::io::ObjectStoreParams {
storage_options: Some(storage_options),
storage_options_accessor: Some(Arc::new(
StorageOptionsAccessor::with_static_options(storage_options),
)),
..Default::default()
}),
..Default::default()

View File

@@ -9,14 +9,15 @@ use std::sync::Arc;
use async_trait::async_trait;
use lance_namespace::{
models::{
CreateNamespaceRequest, CreateNamespaceResponse, DeclareTableRequest,
DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest,
DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest,
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
DeclareTableRequest, DescribeNamespaceRequest, DescribeNamespaceResponse,
DescribeTableRequest, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest,
ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
},
LanceNamespace,
};
use lance_namespace_impls::ConnectBuilder;
use log::warn;
use crate::database::ReadConsistency;
use crate::error::{Error, Result};
@@ -154,7 +155,6 @@ impl Database for LanceNamespaceDatabase {
table_id.push(request.name.clone());
let describe_request = DescribeTableRequest {
id: Some(table_id.clone()),
version: None,
..Default::default()
};
@@ -205,26 +205,53 @@ impl Database for LanceNamespaceDatabase {
let mut table_id = request.namespace.clone();
table_id.push(request.name.clone());
let create_empty_request = DeclareTableRequest {
// Try declare_table first, falling back to create_empty_table for backwards
// compatibility with older namespace clients that don't support declare_table
let declare_request = DeclareTableRequest {
id: Some(table_id.clone()),
location: None,
vend_credentials: None,
..Default::default()
};
let create_empty_response = self
.namespace
.declare_table(create_empty_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to declare table: {}", e),
})?;
let location = match self.namespace.declare_table(declare_request).await {
Ok(response) => response.location.ok_or_else(|| Error::Runtime {
message: "Table location is missing from declare_table response".to_string(),
})?,
Err(e) => {
// Check if the error is "not supported" and try create_empty_table as fallback
let err_str = e.to_string().to_lowercase();
if err_str.contains("not supported") || err_str.contains("not implemented") {
warn!(
"declare_table is not supported by the namespace client, \
falling back to deprecated create_empty_table. \
create_empty_table is deprecated and will be removed in Lance 3.0.0. \
Please upgrade your namespace client to support declare_table."
);
#[allow(deprecated)]
let create_empty_request = CreateEmptyTableRequest {
id: Some(table_id.clone()),
..Default::default()
};
let location = create_empty_response
.location
.ok_or_else(|| Error::Runtime {
message: "Table location is missing from create_empty_table response".to_string(),
})?;
#[allow(deprecated)]
let create_response = self
.namespace
.create_empty_table(create_empty_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to create empty table: {}", e),
})?;
create_response.location.ok_or_else(|| Error::Runtime {
message: "Table location is missing from create_empty_table response"
.to_string(),
})?
} else {
return Err(Error::Runtime {
message: format!("Failed to declare table: {}", e),
});
}
}
};
let native_table = NativeTable::create_from_namespace(
self.namespace.clone(),
@@ -439,8 +466,6 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -501,8 +526,6 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -566,8 +589,6 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -651,8 +672,6 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -708,8 +727,6 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -790,8 +807,6 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -825,8 +840,6 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await

View File

@@ -51,24 +51,19 @@
//! - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud object store
//! - `db://dbname` - Lance Cloud
//!
//! You can also use [`ConnectOptions`] to configure the connection to the database.
//! You can also use [`ConnectBuilder`] to configure the connection to the database.
//!
//! ```rust
//! # #[cfg(feature = "aws")]
//! # {
//! use object_store::aws::AwsCredential;
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
//! let db = lancedb::connect("data/sample-lancedb")
//! .aws_creds(AwsCredential {
//! key_id: "some_key".to_string(),
//! secret_key: "some_secret".to_string(),
//! token: None,
//! })
//! .storage_options([
//! ("aws_access_key_id", "some_key"),
//! ("aws_secret_access_key", "some_secret"),
//! ])
//! .execute()
//! .await
//! .unwrap();
//! # });
//! # }
//! ```
//!
//! LanceDB uses [arrow-rs](https://github.com/apache/arrow-rs) to define schema, data types and array itself.

View File

@@ -1718,8 +1718,6 @@ mod tests {
let namespace = vec!["test_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest {
id: Some(namespace.clone()),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -1745,8 +1743,6 @@ mod tests {
let list_response = conn
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default()
})
.await
@@ -1758,8 +1754,6 @@ mod tests {
let list_response = namespace_client
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default()
})
.await
@@ -1800,8 +1794,6 @@ mod tests {
let namespace = vec!["multi_table_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest {
id: Some(namespace.clone()),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -1827,8 +1819,6 @@ mod tests {
let list_response = conn
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default()
})
.await

View File

@@ -468,7 +468,9 @@ impl<S: HttpSend> RemoteTable<S> {
self.apply_query_params(&mut body, &query.base)?;
// Apply general parameters, before we dispatch based on number of query vectors.
body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
if let Some(distance_type) = query.distance_type {
body["distance_type"] = serde_json::json!(distance_type);
}
// In 0.23.1 we migrated from `nprobes` to `minimum_nprobes` and `maximum_nprobes`.
// Old client / new server: since minimum_nprobes is missing, fallback to nprobes
// New client / old server: old server will only see nprobes, make sure to set both
@@ -2230,7 +2232,6 @@ mod tests {
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
let mut expected_body = serde_json::json!({
"prefilter": true,
"distance_type": "l2",
"nprobes": 20,
"minimum_nprobes": 20,
"maximum_nprobes": 20,

View File

@@ -40,7 +40,7 @@ use lance_index::vector::pq::PQBuildParams;
use lance_index::vector::sq::builder::SQBuildParams;
use lance_index::DatasetIndexExt;
use lance_index::IndexType;
use lance_io::object_store::LanceNamespaceStorageOptionsProvider;
use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsAccessor};
use lance_namespace::models::{
QueryTableRequest as NsQueryTableRequest, QueryTableRequestColumns,
QueryTableRequestFullTextQuery, QueryTableRequestVector, StringFtsQuery,
@@ -1425,8 +1425,8 @@ impl Table {
})
.collect::<Vec<_>>();
let unioned = UnionExec::try_new(projected_plans).map_err(|e| Error::Runtime {
message: format!("Failed to build union plan: {e}"),
let unioned = UnionExec::try_new(projected_plans).map_err(|err| Error::Runtime {
message: err.to_string(),
})?;
// We require 1 partition in the final output
let repartitioned = RepartitionExec::try_new(
@@ -1668,18 +1668,14 @@ impl NativeTable {
// Use DatasetBuilder::from_namespace which automatically fetches location
// and storage options from the namespace
let builder = DatasetBuilder::from_namespace(
namespace_client.clone(),
table_id,
false, // Don't ignore namespace storage options
)
.await
.map_err(|e| match e {
lance::Error::Namespace { source, .. } => Error::Runtime {
message: format!("Failed to get table info from namespace: {:?}", source),
},
source => Error::Lance { source },
})?;
let builder = DatasetBuilder::from_namespace(namespace_client.clone(), table_id)
.await
.map_err(|e| match e {
lance::Error::Namespace { source, .. } => Error::Runtime {
message: format!("Failed to get table info from namespace: {:?}", source),
},
source => Error::Lance { source },
})?;
let dataset = builder
.with_read_params(params)
@@ -1883,7 +1879,13 @@ impl NativeTable {
let store_params = params
.store_params
.get_or_insert_with(ObjectStoreParams::default);
store_params.storage_options_provider = Some(storage_options_provider);
let accessor = match store_params.storage_options().cloned() {
Some(options) => {
StorageOptionsAccessor::with_initial_and_provider(options, storage_options_provider)
}
None => StorageOptionsAccessor::with_provider(storage_options_provider),
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
// Patch the params if we have a write store wrapper
let params = match write_store_wrapper.clone() {
@@ -2349,7 +2351,7 @@ impl NativeTable {
};
// Convert select to columns list
let columns: Option<Box<QueryTableRequestColumns>> = match &vq.base.select {
let columns = match &vq.base.select {
Select::All => None,
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
column_names: Some(cols.clone()),
@@ -2407,7 +2409,6 @@ impl NativeTable {
with_row_id: Some(vq.base.with_row_id),
bypass_vector_index: Some(!vq.use_index),
full_text_query,
version: None,
..Default::default()
})
}
@@ -2426,7 +2427,7 @@ impl NativeTable {
.map(|f| self.filter_to_sql(f))
.transpose()?;
let columns: Option<Box<QueryTableRequestColumns>> = match &q.select {
let columns = match &q.select {
Select::All => None,
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
column_names: Some(cols.clone()),
@@ -2470,18 +2471,10 @@ impl NativeTable {
columns,
prefilter: Some(q.prefilter),
offset: q.offset.map(|o| o as i32),
ef: None,
refine_factor: None,
distance_type: None,
nprobes: None,
vector_column: None, // No vector column for plain queries
with_row_id: Some(q.with_row_id),
bypass_vector_index: Some(true), // No vector index for plain queries
full_text_query,
version: None,
fast_search: None,
lower_bound: None,
upper_bound: None,
..Default::default()
})
}
@@ -3244,7 +3237,7 @@ impl BaseTable for NativeTable {
.get()
.await
.ok()
.and_then(|dataset| dataset.storage_options().cloned())
.and_then(|dataset| dataset.initial_storage_options().cloned())
}
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
@@ -5154,15 +5147,16 @@ mod tests {
let any_query = AnyQuery::VectorQuery(vq);
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
let column_names = ns_request
.columns
.as_ref()
.and_then(|cols| cols.column_names.clone());
assert_eq!(ns_request.k, 10);
assert_eq!(ns_request.offset, Some(5));
assert_eq!(ns_request.filter, Some("id > 0".to_string()));
assert_eq!(column_names, Some(vec!["id".to_string()]));
assert_eq!(
ns_request
.columns
.as_ref()
.and_then(|c| c.column_names.as_ref()),
Some(&vec!["id".to_string()])
);
assert_eq!(ns_request.vector_column, Some("vector".to_string()));
assert_eq!(ns_request.distance_type, Some("l2".to_string()));
assert!(ns_request.vector.single_vector.is_some());
@@ -5199,16 +5193,17 @@ mod tests {
let any_query = AnyQuery::Query(q);
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
let column_names = ns_request
.columns
.as_ref()
.and_then(|cols| cols.column_names.clone());
// Plain queries should pass an empty vector
assert_eq!(ns_request.k, 20);
assert_eq!(ns_request.offset, Some(5));
assert_eq!(ns_request.filter, Some("id > 5".to_string()));
assert_eq!(column_names, Some(vec!["id".to_string()]));
assert_eq!(
ns_request
.columns
.as_ref()
.and_then(|c| c.column_names.as_ref()),
Some(&vec!["id".to_string()])
);
assert_eq!(ns_request.with_row_id, Some(true));
assert_eq!(ns_request.bypass_vector_index, Some(true));
assert!(ns_request.vector_column.is_none()); // No vector column for plain queries