Compare commits

..

1 Commits

Author SHA1 Message Date
Lance Release
a544af7c03 Bump version: 0.23.1 → 0.24.0-beta.0 2026-01-21 12:21:32 +00:00
45 changed files with 424 additions and 520 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.24.0"
current_version = "0.24.0-beta.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -75,13 +75,6 @@ jobs:
VERSION="${VERSION#v}"
BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
# Use "chore" for beta/rc versions, "feat" for stable releases
if [[ "${VERSION}" == *beta* ]] || [[ "${VERSION}" == *rc* ]]; then
COMMIT_TYPE="chore"
else
COMMIT_TYPE="feat"
fi
cat <<EOF >/tmp/codex-prompt.txt
You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.
@@ -91,10 +84,10 @@ jobs:
3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
6. Stage all relevant files with "git add -A". Commit using the message "${COMMIT_TYPE}: update lance dependency to v${VERSION}".
6. Stage all relevant files with "git add -A". Commit using the message "chore: update lance dependency to v${VERSION}".
7. Push the branch to origin. If the branch already exists, force-push your changes.
8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
9. Create a pull request targeting "main" with title "${COMMIT_TYPE}: update lance dependency to v${VERSION}". First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'EOF' > /tmp/pr-body.md). The body should summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). Then run "gh pr create --body-file /tmp/pr-body.md".
9. Create a pull request targeting "main" with title "chore: update lance dependency to v${VERSION}". In the body, summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}).
10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
Constraints:

74
Cargo.lock generated
View File

@@ -3072,8 +3072,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4404,8 +4404,8 @@ dependencies = [
[[package]]
name = "lance"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-arith",
@@ -4470,8 +4470,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4490,8 +4490,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrayref",
"paste",
@@ -4500,8 +4500,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4538,8 +4538,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-array",
@@ -4569,8 +4569,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-array",
@@ -4588,8 +4588,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4626,8 +4626,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4659,8 +4659,8 @@ dependencies = [
[[package]]
name = "lance-geo"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"datafusion",
"geo-traits",
@@ -4674,8 +4674,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-arith",
@@ -4742,8 +4742,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-arith",
@@ -4783,8 +4783,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4800,8 +4800,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"async-trait",
@@ -4813,8 +4813,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4857,8 +4857,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow",
"arrow-array",
@@ -4897,8 +4897,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "2.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
version = "2.0.0-beta.8"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4909,7 +4909,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.24.0"
version = "0.23.1"
dependencies = [
"ahash",
"anyhow",
@@ -4988,7 +4988,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.24.0"
version = "0.23.1"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -5008,7 +5008,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.27.0"
version = "0.26.1"
dependencies = [
"arrow",
"async-trait",

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false }
@@ -59,7 +59,7 @@ rand = "0.9"
snafu = "0.8"
url = "2"
num-traits = "0.2"
regex = "1.10"
regex = "1.12"
lazy_static = "1"
semver = "1.0.25"
chrono = "0.4"

View File

@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-core</artifactId>
<version>0.24.0</version>
<version>0.24.0-beta.0</version>
</dependency>
```

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.24.0-final.0</version>
<version>0.24.0-beta.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.24.0-final.0</version>
<version>0.24.0-beta.0</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.24.0"
version = "0.24.0-beta.0"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1520,9 +1520,9 @@ describe("when optimizing a dataset", () => {
it("delete unverified", async () => {
const version = await table.version();
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${String(
18446744073709551615n - (BigInt(version) - 1n),
).padStart(20, "0")}.manifest`;
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
version - 1
}.manifest`;
fs.rmSync(versionFile);
let stats = await table.optimize({ deleteUnverified: false });

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.24.0",
"version": "0.24.0-beta.0",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.24.0",
"version": "0.24.0-beta.0",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.24.0",
"version": "0.24.0-beta.0",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.24.0",
"version": "0.24.0-beta.0",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.24.0",
"version": "0.24.0-beta.0",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.24.0",
"version": "0.24.0-beta.0",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.24.0",
"version": "0.24.0-beta.0",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.24.0",
"version": "0.24.0-beta.0",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.24.0",
"version": "0.23.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.24.0",
"version": "0.23.1",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.24.0",
"version": "0.24.0-beta.0",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.27.0"
current_version = "0.27.0-beta.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.27.0"
version = "0.27.0-beta.0"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -9,8 +9,6 @@ import numpy as np
import io
import warnings
from pydantic import Field
from ..util import attempt_import_or_raise
from .base import EmbeddingFunction
from .registry import register
@@ -28,7 +26,7 @@ class ColPaliEmbeddings(EmbeddingFunction):
Parameters
----------
colpali_model_name : str
model_name : str
The name of the model to use (e.g., "Metric-AI/ColQwen2.5-3b-multilingual-v1.0")
Supports models based on these engines:
- ColPali: "vidore/colpali-v1.3" and others
@@ -59,10 +57,7 @@ class ColPaliEmbeddings(EmbeddingFunction):
useful for large models that do not fit in memory.
"""
colpali_model_name: str = Field(
default="Metric-AI/ColQwen2.5-3b-multilingual-v1.0",
validation_alias="model_name",
)
model_name: str = "Metric-AI/ColQwen2.5-3b-multilingual-v1.0"
device: str = "auto"
dtype: str = "bfloat16"
use_token_pooling: bool = True
@@ -112,7 +107,7 @@ class ColPaliEmbeddings(EmbeddingFunction):
self._processor,
self._token_pooler,
) = self._load_model(
self.colpali_model_name,
self.model_name,
dtype,
device,
self.pooling_strategy,

View File

@@ -10,7 +10,7 @@ import urllib.parse as urlparse
import numpy as np
import pyarrow as pa
from tqdm import tqdm
from pydantic import Field, PrivateAttr
from pydantic import PrivateAttr
from ..util import attempt_import_or_raise
from .base import EmbeddingFunction
@@ -24,10 +24,7 @@ if TYPE_CHECKING:
@register("siglip")
class SigLipEmbeddings(EmbeddingFunction):
siglip_model_name: str = Field(
default="google/siglip-base-patch16-224",
validation_alias="model_name",
)
model_name: str = "google/siglip-base-patch16-224"
device: str = "cpu"
batch_size: int = 64
normalize: bool = True
@@ -42,10 +39,8 @@ class SigLipEmbeddings(EmbeddingFunction):
transformers = attempt_import_or_raise("transformers")
self._torch = attempt_import_or_raise("torch")
self._processor = transformers.AutoProcessor.from_pretrained(
self.siglip_model_name
)
self._model = transformers.SiglipModel.from_pretrained(self.siglip_model_name)
self._processor = transformers.AutoProcessor.from_pretrained(self.model_name)
self._model = transformers.SiglipModel.from_pretrained(self.model_name)
self._model.to(self.device)
self._model.eval()
self._ndims = None

View File

@@ -2,27 +2,12 @@
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
from datetime import timedelta
from lancedb.db import AsyncConnection, DBConnection
import lancedb
import pytest
import pytest_asyncio
def pandas_string_type():
"""Return the PyArrow string type that pandas uses for string columns.
pandas 3.0+ uses large_string for string columns, pandas 2.x uses string.
"""
import pandas as pd
import pyarrow as pa
version = tuple(int(x) for x in pd.__version__.split(".")[:2])
if version >= (3, 0):
return pa.large_utf8()
return pa.utf8()
# Use an in-memory database for most tests.
@pytest.fixture
def mem_db() -> DBConnection:

View File

@@ -268,8 +268,6 @@ async def test_create_table_from_iterator_async(mem_db_async: lancedb.AsyncConne
def test_create_exist_ok(tmp_db: lancedb.DBConnection):
from conftest import pandas_string_type
data = pd.DataFrame(
{
"vector": [[3.1, 4.1], [5.9, 26.5]],
@@ -288,11 +286,10 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
assert tbl.schema == tbl2.schema
assert len(tbl) == len(tbl2)
# pandas 3.0+ uses large_string, pandas 2.x uses string
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pandas_string_type()),
pa.field("item", pa.utf8()),
pa.field("price", pa.float64()),
]
)
@@ -302,7 +299,7 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
bad_schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pandas_string_type()),
pa.field("item", pa.utf8()),
pa.field("price", pa.float64()),
pa.field("extra", pa.float32()),
]
@@ -368,8 +365,6 @@ async def test_create_mode_async(tmp_db_async: lancedb.AsyncConnection):
@pytest.mark.asyncio
async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
from conftest import pandas_string_type
data = pd.DataFrame(
{
"vector": [[3.1, 4.1], [5.9, 26.5]],
@@ -387,11 +382,10 @@ async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
assert tbl.name == tbl2.name
assert await tbl.schema() == await tbl2.schema()
# pandas 3.0+ uses large_string, pandas 2.x uses string
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pandas_string_type()),
pa.field("item", pa.utf8()),
pa.field("price", pa.float64()),
]
)
@@ -601,8 +595,6 @@ def test_open_table_sync(tmp_db: lancedb.DBConnection):
@pytest.mark.asyncio
async def test_open_table(tmp_path):
from conftest import pandas_string_type
db = await lancedb.connect_async(tmp_path)
data = pd.DataFrame(
{
@@ -622,11 +614,10 @@ async def test_open_table(tmp_path):
)
is not None
)
# pandas 3.0+ uses large_string, pandas 2.x uses string
assert await tbl.schema() == pa.schema(
{
"vector": pa.list_(pa.float32(), list_size=2),
"item": pandas_string_type(),
"item": pa.utf8(),
"price": pa.float64(),
}
)

View File

@@ -26,8 +26,6 @@ import pytest
from lance_namespace import (
CreateEmptyTableRequest,
CreateEmptyTableResponse,
DeclareTableRequest,
DeclareTableResponse,
DescribeTableRequest,
DescribeTableResponse,
LanceNamespace,
@@ -162,19 +160,6 @@ class TrackingNamespace(LanceNamespace):
return modified
def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
"""Track declare_table calls and inject rotating credentials."""
with self.lock:
self.create_call_count += 1
count = self.create_call_count
response = self.inner.declare_table(request)
response.storage_options = self._modify_storage_options(
response.storage_options, count
)
return response
def create_empty_table(
self, request: CreateEmptyTableRequest
) -> CreateEmptyTableResponse:

View File

@@ -601,6 +601,7 @@ def test_head():
def test_query_sync_minimal():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"refine_factor": None,
@@ -684,6 +685,7 @@ def test_query_sync_maximal():
def test_query_sync_nprobes():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"fast_search": True,
@@ -713,6 +715,7 @@ def test_query_sync_nprobes():
def test_query_sync_no_max_nprobes():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"fast_search": True,
@@ -835,6 +838,7 @@ def test_query_sync_hybrid():
else:
# Vector query
assert body == {
"distance_type": "l2",
"k": 42,
"prefilter": True,
"refine_factor": None,

View File

@@ -1880,13 +1880,8 @@ async def test_optimize_delete_unverified(tmp_db_async: AsyncConnection, tmp_pat
],
)
version = await table.version()
assert version == 2
# By removing a manifest file, we make the data files we just inserted unverified
version_name = 18446744073709551615 - (version - 1)
path = tmp_path / "test.lance" / "_versions" / f"{version_name:020}.manifest"
path = tmp_path / "test.lance" / "_versions" / f"{version - 1}.manifest"
os.remove(path)
stats = await table.optimize(delete_unverified=False)
assert stats.prune.old_versions_removed == 0
stats = await table.optimize(

View File

@@ -528,19 +528,12 @@ def test_sanitize_data(
else:
expected_schema = schema
else:
from conftest import pandas_string_type
# polars uses large_string, pandas 3.0+ uses large_string, others use string
if isinstance(data, pl.DataFrame):
text_type = pa.large_utf8()
elif isinstance(data, pd.DataFrame):
text_type = pandas_string_type()
else:
text_type = pa.string()
expected_schema = pa.schema(
{
"id": pa.int64(),
"text": text_type,
"text": pa.large_utf8()
if isinstance(data, pl.DataFrame)
else pa.string(),
"vector": pa.list_(pa.float32(), 10),
}
)

View File

@@ -55,12 +55,12 @@ impl RecordBatchStream {
.next()
.await
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
Python::attach(|py| {
inner_next
.infer_error()?
.to_pyarrow(py)
.map(|obj| obj.unbind())
})
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = inner_next.infer_error()?.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
})
}
}

View File

@@ -307,7 +307,8 @@ impl Connection {
..Default::default()
};
let response = inner.list_namespaces(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("namespaces", response.namespaces)?;
dict.set_item("page_token", response.page_token)?;
@@ -327,8 +328,7 @@ impl Connection {
let py = self_.py();
future_into_py(py, async move {
use lance_namespace::models::CreateNamespaceRequest;
// Mode is now a string field
let mode_str = mode.and_then(|m| match m.to_lowercase().as_str() {
let mode_enum = mode.and_then(|m| match m.to_lowercase().as_str() {
"create" => Some("Create".to_string()),
"exist_ok" => Some("ExistOk".to_string()),
"overwrite" => Some("Overwrite".to_string()),
@@ -340,12 +340,13 @@ impl Connection {
} else {
Some(namespace)
},
mode: mode_str,
mode: mode_enum,
properties,
..Default::default()
};
let response = inner.create_namespace(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
Ok(dict.unbind())
@@ -364,13 +365,12 @@ impl Connection {
let py = self_.py();
future_into_py(py, async move {
use lance_namespace::models::DropNamespaceRequest;
// Mode and Behavior are now string fields
let mode_str = mode.and_then(|m| match m.to_uppercase().as_str() {
let mode_enum = mode.and_then(|m| match m.to_uppercase().as_str() {
"SKIP" => Some("Skip".to_string()),
"FAIL" => Some("Fail".to_string()),
_ => None,
});
let behavior_str = behavior.and_then(|b| match b.to_uppercase().as_str() {
let behavior_enum = behavior.and_then(|b| match b.to_uppercase().as_str() {
"RESTRICT" => Some("Restrict".to_string()),
"CASCADE" => Some("Cascade".to_string()),
_ => None,
@@ -381,12 +381,13 @@ impl Connection {
} else {
Some(namespace)
},
mode: mode_str,
behavior: behavior_str,
mode: mode_enum,
behavior: behavior_enum,
..Default::default()
};
let response = inner.drop_namespace(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
dict.set_item("transaction_id", response.transaction_id)?;
@@ -413,7 +414,8 @@ impl Connection {
..Default::default()
};
let response = inner.describe_namespace(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
Ok(dict.unbind())
@@ -443,7 +445,8 @@ impl Connection {
..Default::default()
};
let response = inner.list_tables(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
#[allow(deprecated)]
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("tables", response.tables)?;
dict.set_item("page_token", response.page_token)?;

View File

@@ -40,31 +40,34 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
request_id,
source,
status_code,
} => Python::attach(|py| {
let message = err.to_string();
let http_err_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
.getattr(intern!(py, "HttpError"))?;
let err = http_err_cls.call1((
message,
request_id,
status_code.map(|s| s.as_u16()),
))?;
if let Some(cause) = source.source() {
// The HTTP error already includes the first cause. But
// we can add the rest of the chain if there is any more.
let cause_err = http_from_rust_error(
py,
cause,
} => {
#[allow(deprecated)]
Python::with_gil(|py| {
let message = err.to_string();
let http_err_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
.getattr(intern!(py, "HttpError"))?;
let err = http_err_cls.call1((
message,
request_id,
status_code.map(|s| s.as_u16()),
)?;
err.setattr(intern!(py, "__cause__"), cause_err)?;
}
))?;
Err(PyErr::from_value(err))
}),
if let Some(cause) = source.source() {
// The HTTP error already includes the first cause. But
// we can add the rest of the chain if there is any more.
let cause_err = http_from_rust_error(
py,
cause,
request_id,
status_code.map(|s| s.as_u16()),
)?;
err.setattr(intern!(py, "__cause__"), cause_err)?;
}
Err(PyErr::from_value(err))
})
}
LanceError::Retry {
request_id,
request_failures,
@@ -75,33 +78,37 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
max_read_failures,
source,
status_code,
} => Python::attach(|py| {
let cause_err = http_from_rust_error(
py,
source.as_ref(),
request_id,
status_code.map(|s| s.as_u16()),
)?;
} =>
{
#[allow(deprecated)]
Python::with_gil(|py| {
let cause_err = http_from_rust_error(
py,
source.as_ref(),
request_id,
status_code.map(|s| s.as_u16()),
)?;
let message = err.to_string();
let retry_error_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
.getattr("RetryError")?;
let err = retry_error_cls.call1((
message,
request_id,
*request_failures,
*connect_failures,
*read_failures,
*max_request_failures,
*max_connect_failures,
*max_read_failures,
status_code.map(|s| s.as_u16()),
))?;
let message = err.to_string();
let retry_error_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
.getattr("RetryError")?;
let err = retry_error_cls.call1((
message,
request_id,
*request_failures,
*connect_failures,
*read_failures,
*max_request_failures,
*max_connect_failures,
*max_read_failures,
status_code.map(|s| s.as_u16()),
))?;
err.setattr(intern!(py, "__cause__"), cause_err)?;
Err(PyErr::from_value(err))
}),
err.setattr(intern!(py, "__cause__"), cause_err)?;
Err(PyErr::from_value(err))
})
}
_ => self.runtime_error(),
},
}

View File

@@ -12,7 +12,8 @@ pub struct PyHeaderProvider {
impl Clone for PyHeaderProvider {
fn clone(&self) -> Self {
Python::attach(|py| Self {
#[allow(deprecated)]
Python::with_gil(|py| Self {
provider: self.provider.clone_ref(py),
})
}
@@ -25,7 +26,8 @@ impl PyHeaderProvider {
/// Get headers from the Python provider (internal implementation)
fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
Python::attach(|py| {
#[allow(deprecated)]
Python::with_gil(|py| {
// Call the get_headers method
let result = self.provider.call_method0(py, "get_headers");

View File

@@ -19,7 +19,7 @@ use pyo3::{
exceptions::PyRuntimeError,
pyclass, pymethods,
types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
Bound, PyAny, PyRef, PyRefMut, PyResult, Python,
Bound, Py, PyAny, PyRef, PyRefMut, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -281,7 +281,12 @@ impl PyPermutationReader {
let reader = slf.reader.clone();
future_into_py(slf.py(), async move {
let schema = reader.output_schema(selection).await.infer_error()?;
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
})
}

View File

@@ -29,6 +29,7 @@ use pyo3::types::PyList;
use pyo3::types::{PyDict, PyString};
use pyo3::Bound;
use pyo3::IntoPyObject;
use pyo3::Py;
use pyo3::PyAny;
use pyo3::PyRef;
use pyo3::PyResult;
@@ -453,7 +454,12 @@ impl Query {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
})
}
@@ -532,7 +538,12 @@ impl TakeQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
})
}
@@ -627,7 +638,12 @@ impl FTSQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
})
}
@@ -806,7 +822,12 @@ impl VectorQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
})
}

View File

@@ -22,7 +22,8 @@ pub struct PyStorageOptionsProvider {
impl Clone for PyStorageOptionsProvider {
fn clone(&self) -> Self {
Python::attach(|py| Self {
#[allow(deprecated)]
Python::with_gil(|py| Self {
inner: self.inner.clone_ref(py),
})
}
@@ -30,14 +31,17 @@ impl Clone for PyStorageOptionsProvider {
impl PyStorageOptionsProvider {
pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
Python::attach(|py| {
#[allow(deprecated)]
Python::with_gil(|py| {
// Verify the object has a fetch_storage_options method
if !obj.bind(py).hasattr("fetch_storage_options")? {
return Err(pyo3::exceptions::PyTypeError::new_err(
"StorageOptionsProvider must implement fetch_storage_options() method",
));
}
Ok(Self { inner: obj })
Ok(Self {
inner: obj.clone_ref(py),
})
})
}
}
@@ -60,7 +64,8 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
let py_provider = self.py_provider.clone();
tokio::task::spawn_blocking(move || {
Python::attach(|py| {
#[allow(deprecated)]
Python::with_gil(|py| {
// Call the Python fetch_storage_options method
let result = py_provider
.inner
@@ -119,7 +124,8 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
}
fn provider_id(&self) -> String {
Python::attach(|py| {
#[allow(deprecated)]
Python::with_gil(|py| {
// Call provider_id() method on the Python object
let obj = self.py_provider.inner.bind(py);
obj.call_method0("provider_id")

View File

@@ -21,7 +21,7 @@ use pyo3::{
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
pyclass, pymethods,
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -287,7 +287,12 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let schema = inner.schema().await.infer_error()?;
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
#[allow(deprecated)]
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
let bound = schema.to_pyarrow(py)?;
Ok(bound.unbind())
})?;
Ok(py_obj)
})
}
@@ -437,7 +442,8 @@ impl Table {
future_into_py(self_.py(), async move {
let stats = inner.index_stats(&index_name).await.infer_error()?;
if let Some(stats) = stats {
Python::attach(|py| {
#[allow(deprecated)]
Python::with_gil(|py| {
let dict = PyDict::new(py);
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
@@ -467,7 +473,8 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let stats = inner.stats().await.infer_error()?;
Python::attach(|py| {
#[allow(deprecated)]
Python::with_gil(|py| {
let dict = PyDict::new(py);
dict.set_item("total_bytes", stats.total_bytes)?;
dict.set_item("num_rows", stats.num_rows)?;
@@ -521,7 +528,8 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let versions = inner.list_versions().await.infer_error()?;
let versions_as_dict = Python::attach(|py| {
#[allow(deprecated)]
let versions_as_dict = Python::with_gil(|py| {
versions
.iter()
.map(|v| {
@@ -872,7 +880,8 @@ impl Tags {
let tags = inner.tags().await.infer_error()?;
let res = tags.list().await.infer_error()?;
Python::attach(|py| {
#[allow(deprecated)]
Python::with_gil(|py| {
let py_dict = PyDict::new(py);
for (key, contents) in res {
let value_dict = PyDict::new(py);

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.24.0"
version = "0.24.0-beta.0"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -36,42 +36,10 @@ use crate::remote::{
};
use crate::table::{TableDefinition, WriteOptions};
use crate::Table;
use lance::io::ObjectStoreParams;
pub use lance_encoding::version::LanceFileVersion;
#[cfg(feature = "remote")]
use lance_io::object_store::StorageOptions;
use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
fn merge_storage_options(
store_params: &mut ObjectStoreParams,
pairs: impl IntoIterator<Item = (String, String)>,
) {
let mut options = store_params.storage_options().cloned().unwrap_or_default();
for (key, value) in pairs {
options.insert(key, value);
}
let provider = store_params
.storage_options_accessor
.as_ref()
.and_then(|accessor| accessor.provider().cloned());
let accessor = if let Some(provider) = provider {
StorageOptionsAccessor::with_initial_and_provider(options, provider)
} else {
StorageOptionsAccessor::with_static_options(options)
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
}
fn set_storage_options_provider(
store_params: &mut ObjectStoreParams,
provider: Arc<dyn StorageOptionsProvider>,
) {
let accessor = match store_params.storage_options().cloned() {
Some(options) => StorageOptionsAccessor::with_initial_and_provider(options, provider),
None => StorageOptionsAccessor::with_provider(provider),
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
}
use lance_io::object_store::StorageOptionsProvider;
/// A builder for configuring a [`Connection::table_names`] operation
pub struct TableNamesBuilder {
@@ -278,14 +246,16 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
///
/// See available options at <https://lancedb.com/docs/storage/>
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
let store_params = self
let store_options = self
.request
.write_options
.lance_write_params
.get_or_insert(Default::default())
.store_params
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default());
merge_storage_options(store_params, [(key.into(), value.into())]);
store_options.insert(key.into(), value.into());
self
}
@@ -299,17 +269,19 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
) -> Self {
let store_params = self
let store_options = self
.request
.write_options
.lance_write_params
.get_or_insert(Default::default())
.store_params
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default());
let updates = pairs
.into_iter()
.map(|(key, value)| (key.into(), value.into()));
merge_storage_options(store_params, updates);
for (key, value) in pairs {
store_options.insert(key.into(), value.into());
}
self
}
@@ -346,21 +318,23 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// This has no effect in LanceDB Cloud.
#[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
pub fn enable_v2_manifest_paths(mut self, use_v2_manifest_paths: bool) -> Self {
let store_params = self
let storage_options = self
.request
.write_options
.lance_write_params
.get_or_insert_with(Default::default)
.store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
let value = if use_v2_manifest_paths {
"true".to_string()
} else {
"false".to_string()
};
merge_storage_options(
store_params,
[(OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(), value)],
storage_options.insert(
OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(),
if use_v2_manifest_paths {
"true".to_string()
} else {
"false".to_string()
},
);
self
}
@@ -370,19 +344,19 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// The default is `LanceFileVersion::Stable`.
#[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
pub fn data_storage_version(mut self, data_storage_version: LanceFileVersion) -> Self {
let store_params = self
let storage_options = self
.request
.write_options
.lance_write_params
.get_or_insert_with(Default::default)
.store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
merge_storage_options(
store_params,
[(
OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
data_storage_version.to_string(),
)],
storage_options.insert(
OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
data_storage_version.to_string(),
);
self
}
@@ -407,14 +381,13 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
/// This allows tables to automatically refresh cloud storage credentials
/// when they expire, enabling long-running operations on remote storage.
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
let store_params = self
.request
self.request
.write_options
.lance_write_params
.get_or_insert(Default::default())
.store_params
.get_or_insert(Default::default());
set_storage_options_provider(store_params, provider);
.get_or_insert(Default::default())
.storage_options_provider = Some(provider);
self
}
}
@@ -477,13 +450,15 @@ impl OpenTableBuilder {
///
/// See available options at <https://lancedb.com/docs/storage/>
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
let store_params = self
let storage_options = self
.request
.lance_read_params
.get_or_insert(Default::default())
.store_options
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default());
merge_storage_options(store_params, [(key.into(), value.into())]);
storage_options.insert(key.into(), value.into());
self
}
@@ -497,16 +472,18 @@ impl OpenTableBuilder {
mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
) -> Self {
let store_params = self
let storage_options = self
.request
.lance_read_params
.get_or_insert(Default::default())
.store_options
.get_or_insert(Default::default())
.storage_options
.get_or_insert(Default::default());
let updates = pairs
.into_iter()
.map(|(key, value)| (key.into(), value.into()));
merge_storage_options(store_params, updates);
for (key, value) in pairs {
storage_options.insert(key.into(), value.into());
}
self
}
@@ -530,13 +507,12 @@ impl OpenTableBuilder {
/// This allows tables to automatically refresh cloud storage credentials
/// when they expire, enabling long-running operations on remote storage.
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
let store_params = self
.request
self.request
.lance_read_params
.get_or_insert(Default::default())
.store_options
.get_or_insert(Default::default());
set_storage_options_provider(store_params, provider);
.get_or_insert(Default::default())
.storage_options_provider = Some(provider);
self
}
@@ -892,9 +868,6 @@ pub struct ConnectBuilder {
embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
}
const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 1] =
[("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name")];
impl ConnectBuilder {
/// Create a new [`ConnectOptions`] with the given database URI.
pub fn new(uri: &str) -> Self {
@@ -1078,27 +1051,11 @@ impl ConnectBuilder {
self
}
#[cfg(feature = "remote")]
fn apply_env_defaults(
env_var_to_remote_storage_option: &[(&str, &str)],
options: &mut HashMap<String, String>,
) {
for (env_key, opt_key) in env_var_to_remote_storage_option {
if let Ok(env_value) = std::env::var(env_key) {
if !options.contains_key(*opt_key) {
options.insert((*opt_key).to_string(), env_value);
}
}
}
}
#[cfg(feature = "remote")]
fn execute_remote(self) -> Result<Connection> {
use crate::remote::db::RemoteDatabaseOptions;
let mut merged_options = self.request.options.clone();
Self::apply_env_defaults(&ENV_VARS_TO_STORAGE_OPTS, &mut merged_options);
let options = RemoteDatabaseOptions::parse_from_map(&merged_options)?;
let options = RemoteDatabaseOptions::parse_from_map(&self.request.options)?;
let region = options.region.ok_or_else(|| Error::InvalidInput {
message: "A region is required when connecting to LanceDb Cloud".to_string(),
@@ -1320,6 +1277,8 @@ mod test_utils {
#[cfg(test)]
mod tests {
use std::fs::create_dir_all;
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
use crate::query::QueryBase;
use crate::query::{ExecutableQuery, QueryExecutionOptions};
@@ -1343,23 +1302,6 @@ mod tests {
assert_eq!(tc.connection.uri(), tc.uri);
}
#[cfg(feature = "remote")]
#[test]
fn test_apply_env_defaults() {
let env_key = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_KEY";
let env_val = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_VAL";
let opts_key = "test_apply_env_defaults_environment_variable_opts_key";
std::env::set_var(env_key, env_val);
let mut options = HashMap::new();
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
assert_eq!(Some(&env_val.to_string()), options.get(opts_key));
options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string());
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
assert_eq!(Some(&"EXPLICIT-VALUE".to_string()), options.get(opts_key));
}
#[cfg(not(windows))]
#[tokio::test]
async fn test_connect_relative() {
@@ -1584,27 +1526,18 @@ mod tests {
#[tokio::test]
async fn drop_table() {
let tc = new_test_connection().await.unwrap();
let db = tc.connection;
let tmp_dir = tempdir().unwrap();
if tc.is_remote {
// All the typical endpoints such as s3:///, file-object-store:///, etc. treat drop_table
// as idempotent.
assert!(db.drop_table("invalid_table", &[]).await.is_ok());
} else {
// The behavior of drop_table when using a file:/// endpoint differs from all other
// object providers, in that it returns an error when deleting a non-existent table.
assert!(matches!(
db.drop_table("invalid_table", &[]).await,
Err(crate::Error::TableNotFound { .. }),
));
}
let uri = tmp_dir.path().to_str().unwrap();
let db = connect(uri).execute().await.unwrap();
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
db.create_empty_table("table1", schema.clone())
.execute()
.await
.unwrap();
// drop non-exist table
assert!(matches!(
db.drop_table("invalid_table", &[]).await,
Err(crate::Error::TableNotFound { .. }),
));
create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
db.drop_table("table1", &[]).await.unwrap();
let tables = db.table_names().execute().await.unwrap();

View File

@@ -12,7 +12,7 @@ use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode};
use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
use lance_datafusion::utils::StreamingWriteSource;
use lance_encoding::version::LanceFileVersion;
use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
use lance_io::object_store::StorageOptionsProvider;
use lance_table::io::commit::commit_handler_from_url;
use object_store::local::LocalFileSystem;
use snafu::ResultExt;
@@ -356,13 +356,7 @@ impl ListingDatabase {
.clone()
.unwrap_or_else(|| Arc::new(lance::session::Session::default()));
let os_params = ObjectStoreParams {
storage_options_accessor: if options.storage_options.is_empty() {
None
} else {
Some(Arc::new(StorageOptionsAccessor::with_static_options(
options.storage_options.clone(),
)))
},
storage_options: Some(options.storage_options.clone()),
..Default::default()
};
let (object_store, base_path) = ObjectStore::from_uri_and_params(
@@ -498,13 +492,7 @@ impl ListingDatabase {
async fn drop_tables(&self, names: Vec<String>) -> Result<()> {
let object_store_params = ObjectStoreParams {
storage_options_accessor: if self.storage_options.is_empty() {
None
} else {
Some(Arc::new(StorageOptionsAccessor::with_static_options(
self.storage_options.clone(),
)))
},
storage_options: Some(self.storage_options.clone()),
..Default::default()
};
let mut uri = self.uri.clone();
@@ -553,7 +541,7 @@ impl ListingDatabase {
.lance_write_params
.as_ref()
.and_then(|p| p.store_params.as_ref())
.and_then(|sp| sp.storage_options());
.and_then(|sp| sp.storage_options.as_ref());
let storage_version_override = storage_options
.and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION))
@@ -604,20 +592,21 @@ impl ListingDatabase {
// will cause a new connection to be created, and that connection will
// be dropped from the cache when python GCs the table object, which
// confounds reuse across tables.
if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
let store_params = write_params
if !self.storage_options.is_empty() {
let storage_options = write_params
.store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
if !self.storage_options.is_empty() {
self.inherit_storage_options(&mut storage_options);
}
let accessor = if let Some(ref provider) = self.storage_options_provider {
StorageOptionsAccessor::with_initial_and_provider(storage_options, provider.clone())
} else {
StorageOptionsAccessor::with_static_options(storage_options)
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
self.inherit_storage_options(storage_options);
}
// Set storage options provider if available
if self.storage_options_provider.is_some() {
write_params
.store_params
.get_or_insert_with(Default::default)
.storage_options_provider = self.storage_options_provider.clone();
}
write_params.data_storage_version = self
@@ -903,13 +892,7 @@ impl Database for ListingDatabase {
validate_table_name(&request.target_table_name)?;
let storage_params = ObjectStoreParams {
storage_options_accessor: if self.storage_options.is_empty() {
None
} else {
Some(Arc::new(StorageOptionsAccessor::with_static_options(
self.storage_options.clone(),
)))
},
storage_options: Some(self.storage_options.clone()),
..Default::default()
};
let read_params = ReadParams {
@@ -973,28 +956,25 @@ impl Database for ListingDatabase {
// will cause a new connection to be created, and that connection will
// be dropped from the cache when python GCs the table object, which
// confounds reuse across tables.
if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
let store_params = request
if !self.storage_options.is_empty() {
let storage_options = request
.lance_read_params
.get_or_insert_with(Default::default)
.store_options
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
if !self.storage_options.is_empty() {
self.inherit_storage_options(&mut storage_options);
}
// Preserve request-level provider if no connection-level provider exists
let request_provider = store_params
.storage_options_accessor
.as_ref()
.and_then(|a| a.provider().cloned());
let provider = self.storage_options_provider.clone().or(request_provider);
let accessor = if let Some(provider) = provider {
StorageOptionsAccessor::with_initial_and_provider(storage_options, provider)
} else {
StorageOptionsAccessor::with_static_options(storage_options)
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
self.inherit_storage_options(storage_options);
}
// Set storage options provider if available
if self.storage_options_provider.is_some() {
request
.lance_read_params
.get_or_insert_with(Default::default)
.store_options
.get_or_insert_with(Default::default)
.storage_options_provider = self.storage_options_provider.clone();
}
// Some ReadParams are exposed in the OpenTableBuilder, but we also
@@ -1901,9 +1881,7 @@ mod tests {
let write_options = WriteOptions {
lance_write_params: Some(lance::dataset::WriteParams {
store_params: Some(lance::io::ObjectStoreParams {
storage_options_accessor: Some(Arc::new(
StorageOptionsAccessor::with_static_options(storage_options),
)),
storage_options: Some(storage_options),
..Default::default()
}),
..Default::default()
@@ -1977,9 +1955,7 @@ mod tests {
let write_options = WriteOptions {
lance_write_params: Some(lance::dataset::WriteParams {
store_params: Some(lance::io::ObjectStoreParams {
storage_options_accessor: Some(Arc::new(
StorageOptionsAccessor::with_static_options(storage_options),
)),
storage_options: Some(storage_options),
..Default::default()
}),
..Default::default()

View File

@@ -9,15 +9,14 @@ use std::sync::Arc;
use async_trait::async_trait;
use lance_namespace::{
models::{
CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
DeclareTableRequest, DescribeNamespaceRequest, DescribeNamespaceResponse,
DescribeTableRequest, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest,
ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
CreateNamespaceRequest, CreateNamespaceResponse, DeclareTableRequest,
DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest,
DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest,
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
},
LanceNamespace,
};
use lance_namespace_impls::ConnectBuilder;
use log::warn;
use crate::database::ReadConsistency;
use crate::error::{Error, Result};
@@ -155,6 +154,7 @@ impl Database for LanceNamespaceDatabase {
table_id.push(request.name.clone());
let describe_request = DescribeTableRequest {
id: Some(table_id.clone()),
version: None,
..Default::default()
};
@@ -205,53 +205,26 @@ impl Database for LanceNamespaceDatabase {
let mut table_id = request.namespace.clone();
table_id.push(request.name.clone());
// Try declare_table first, falling back to create_empty_table for backwards
// compatibility with older namespace clients that don't support declare_table
let declare_request = DeclareTableRequest {
let create_empty_request = DeclareTableRequest {
id: Some(table_id.clone()),
location: None,
vend_credentials: None,
..Default::default()
};
let location = match self.namespace.declare_table(declare_request).await {
Ok(response) => response.location.ok_or_else(|| Error::Runtime {
message: "Table location is missing from declare_table response".to_string(),
})?,
Err(e) => {
// Check if the error is "not supported" and try create_empty_table as fallback
let err_str = e.to_string().to_lowercase();
if err_str.contains("not supported") || err_str.contains("not implemented") {
warn!(
"declare_table is not supported by the namespace client, \
falling back to deprecated create_empty_table. \
create_empty_table is deprecated and will be removed in Lance 3.0.0. \
Please upgrade your namespace client to support declare_table."
);
#[allow(deprecated)]
let create_empty_request = CreateEmptyTableRequest {
id: Some(table_id.clone()),
..Default::default()
};
let create_empty_response = self
.namespace
.declare_table(create_empty_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to declare table: {}", e),
})?;
#[allow(deprecated)]
let create_response = self
.namespace
.create_empty_table(create_empty_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to create empty table: {}", e),
})?;
create_response.location.ok_or_else(|| Error::Runtime {
message: "Table location is missing from create_empty_table response"
.to_string(),
})?
} else {
return Err(Error::Runtime {
message: format!("Failed to declare table: {}", e),
});
}
}
};
let location = create_empty_response
.location
.ok_or_else(|| Error::Runtime {
message: "Table location is missing from create_empty_table response".to_string(),
})?;
let native_table = NativeTable::create_from_namespace(
self.namespace.clone(),
@@ -466,6 +439,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -526,6 +501,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -589,6 +566,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -672,6 +651,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -727,6 +708,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -807,6 +790,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -840,6 +825,8 @@ mod tests {
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
id: Some(vec!["test_ns".into()]),
mode: None,
properties: None,
..Default::default()
})
.await

View File

@@ -51,19 +51,24 @@
//! - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud object store
//! - `db://dbname` - Lance Cloud
//!
//! You can also use [`ConnectBuilder`] to configure the connection to the database.
//! You can also use [`ConnectOptions`] to configure the connection to the database.
//!
//! ```rust
//! # #[cfg(feature = "aws")]
//! # {
//! use object_store::aws::AwsCredential;
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
//! let db = lancedb::connect("data/sample-lancedb")
//! .storage_options([
//! ("aws_access_key_id", "some_key"),
//! ("aws_secret_access_key", "some_secret"),
//! ])
//! .aws_creds(AwsCredential {
//! key_id: "some_key".to_string(),
//! secret_key: "some_secret".to_string(),
//! token: None,
//! })
//! .execute()
//! .await
//! .unwrap();
//! # });
//! # }
//! ```
//!
//! LanceDB uses [arrow-rs](https://github.com/apache/arrow-rs) to define schema, data types and array itself.

View File

@@ -1718,6 +1718,8 @@ mod tests {
let namespace = vec!["test_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest {
id: Some(namespace.clone()),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -1743,6 +1745,8 @@ mod tests {
let list_response = conn
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default()
})
.await
@@ -1754,6 +1758,8 @@ mod tests {
let list_response = namespace_client
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default()
})
.await
@@ -1794,6 +1800,8 @@ mod tests {
let namespace = vec!["multi_table_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest {
id: Some(namespace.clone()),
mode: None,
properties: None,
..Default::default()
})
.await
@@ -1819,6 +1827,8 @@ mod tests {
let list_response = conn
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
..Default::default()
})
.await

View File

@@ -468,9 +468,7 @@ impl<S: HttpSend> RemoteTable<S> {
self.apply_query_params(&mut body, &query.base)?;
// Apply general parameters, before we dispatch based on number of query vectors.
if let Some(distance_type) = query.distance_type {
body["distance_type"] = serde_json::json!(distance_type);
}
body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
// In 0.23.1 we migrated from `nprobes` to `minimum_nprobes` and `maximum_nprobes`.
// Old client / new server: since minimum_nprobes is missing, fallback to nprobes
// New client / old server: old server will only see nprobes, make sure to set both
@@ -2232,6 +2230,7 @@ mod tests {
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
let mut expected_body = serde_json::json!({
"prefilter": true,
"distance_type": "l2",
"nprobes": 20,
"minimum_nprobes": 20,
"maximum_nprobes": 20,

View File

@@ -40,7 +40,7 @@ use lance_index::vector::pq::PQBuildParams;
use lance_index::vector::sq::builder::SQBuildParams;
use lance_index::DatasetIndexExt;
use lance_index::IndexType;
use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsAccessor};
use lance_io::object_store::LanceNamespaceStorageOptionsProvider;
use lance_namespace::models::{
QueryTableRequest as NsQueryTableRequest, QueryTableRequestColumns,
QueryTableRequestFullTextQuery, QueryTableRequestVector, StringFtsQuery,
@@ -1425,8 +1425,8 @@ impl Table {
})
.collect::<Vec<_>>();
let unioned = UnionExec::try_new(projected_plans).map_err(|err| Error::Runtime {
message: err.to_string(),
let unioned = UnionExec::try_new(projected_plans).map_err(|e| Error::Runtime {
message: format!("Failed to build union plan: {e}"),
})?;
// We require 1 partition in the final output
let repartitioned = RepartitionExec::try_new(
@@ -1668,14 +1668,18 @@ impl NativeTable {
// Use DatasetBuilder::from_namespace which automatically fetches location
// and storage options from the namespace
let builder = DatasetBuilder::from_namespace(namespace_client.clone(), table_id)
.await
.map_err(|e| match e {
lance::Error::Namespace { source, .. } => Error::Runtime {
message: format!("Failed to get table info from namespace: {:?}", source),
},
source => Error::Lance { source },
})?;
let builder = DatasetBuilder::from_namespace(
namespace_client.clone(),
table_id,
false, // Don't ignore namespace storage options
)
.await
.map_err(|e| match e {
lance::Error::Namespace { source, .. } => Error::Runtime {
message: format!("Failed to get table info from namespace: {:?}", source),
},
source => Error::Lance { source },
})?;
let dataset = builder
.with_read_params(params)
@@ -1879,13 +1883,7 @@ impl NativeTable {
let store_params = params
.store_params
.get_or_insert_with(ObjectStoreParams::default);
let accessor = match store_params.storage_options().cloned() {
Some(options) => {
StorageOptionsAccessor::with_initial_and_provider(options, storage_options_provider)
}
None => StorageOptionsAccessor::with_provider(storage_options_provider),
};
store_params.storage_options_accessor = Some(Arc::new(accessor));
store_params.storage_options_provider = Some(storage_options_provider);
// Patch the params if we have a write store wrapper
let params = match write_store_wrapper.clone() {
@@ -2351,7 +2349,7 @@ impl NativeTable {
};
// Convert select to columns list
let columns = match &vq.base.select {
let columns: Option<Box<QueryTableRequestColumns>> = match &vq.base.select {
Select::All => None,
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
column_names: Some(cols.clone()),
@@ -2409,6 +2407,7 @@ impl NativeTable {
with_row_id: Some(vq.base.with_row_id),
bypass_vector_index: Some(!vq.use_index),
full_text_query,
version: None,
..Default::default()
})
}
@@ -2427,7 +2426,7 @@ impl NativeTable {
.map(|f| self.filter_to_sql(f))
.transpose()?;
let columns = match &q.select {
let columns: Option<Box<QueryTableRequestColumns>> = match &q.select {
Select::All => None,
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
column_names: Some(cols.clone()),
@@ -2471,10 +2470,18 @@ impl NativeTable {
columns,
prefilter: Some(q.prefilter),
offset: q.offset.map(|o| o as i32),
ef: None,
refine_factor: None,
distance_type: None,
nprobes: None,
vector_column: None, // No vector column for plain queries
with_row_id: Some(q.with_row_id),
bypass_vector_index: Some(true), // No vector index for plain queries
full_text_query,
version: None,
fast_search: None,
lower_bound: None,
upper_bound: None,
..Default::default()
})
}
@@ -3237,7 +3244,7 @@ impl BaseTable for NativeTable {
.get()
.await
.ok()
.and_then(|dataset| dataset.initial_storage_options().cloned())
.and_then(|dataset| dataset.storage_options().cloned())
}
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
@@ -5147,16 +5154,15 @@ mod tests {
let any_query = AnyQuery::VectorQuery(vq);
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
let column_names = ns_request
.columns
.as_ref()
.and_then(|cols| cols.column_names.clone());
assert_eq!(ns_request.k, 10);
assert_eq!(ns_request.offset, Some(5));
assert_eq!(ns_request.filter, Some("id > 0".to_string()));
assert_eq!(
ns_request
.columns
.as_ref()
.and_then(|c| c.column_names.as_ref()),
Some(&vec!["id".to_string()])
);
assert_eq!(column_names, Some(vec!["id".to_string()]));
assert_eq!(ns_request.vector_column, Some("vector".to_string()));
assert_eq!(ns_request.distance_type, Some("l2".to_string()));
assert!(ns_request.vector.single_vector.is_some());
@@ -5193,17 +5199,16 @@ mod tests {
let any_query = AnyQuery::Query(q);
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
let column_names = ns_request
.columns
.as_ref()
.and_then(|cols| cols.column_names.clone());
// Plain queries should pass an empty vector
assert_eq!(ns_request.k, 20);
assert_eq!(ns_request.offset, Some(5));
assert_eq!(ns_request.filter, Some("id > 5".to_string()));
assert_eq!(
ns_request
.columns
.as_ref()
.and_then(|c| c.column_names.as_ref()),
Some(&vec!["id".to_string()])
);
assert_eq!(column_names, Some(vec!["id".to_string()]));
assert_eq!(ns_request.with_row_id, Some(true));
assert_eq!(ns_request.bypass_vector_index, Some(true));
assert!(ns_request.vector_column.is_none()); // No vector column for plain queries