mirror of
https://github.com/lancedb/lancedb.git
synced 2026-03-26 02:20:40 +00:00
Compare commits
14 Commits
python-v0.
...
dantasse/e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
65c14f6b40 | ||
|
|
5a7a8da567 | ||
|
|
0db8176445 | ||
|
|
bd84bba14d | ||
|
|
ac07f8068c | ||
|
|
bba362d372 | ||
|
|
042bc22468 | ||
|
|
68569906c6 | ||
|
|
c71c1fc822 | ||
|
|
4a6a0c856e | ||
|
|
f124c9d8d2 | ||
|
|
4e65748abf | ||
|
|
e897f3edab | ||
|
|
790ba7115b |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.23.1"
|
||||
current_version = "0.24.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -75,6 +75,13 @@ jobs:
|
||||
VERSION="${VERSION#v}"
|
||||
BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
|
||||
|
||||
# Use "chore" for beta/rc versions, "feat" for stable releases
|
||||
if [[ "${VERSION}" == *beta* ]] || [[ "${VERSION}" == *rc* ]]; then
|
||||
COMMIT_TYPE="chore"
|
||||
else
|
||||
COMMIT_TYPE="feat"
|
||||
fi
|
||||
|
||||
cat <<EOF >/tmp/codex-prompt.txt
|
||||
You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.
|
||||
|
||||
@@ -84,10 +91,10 @@ jobs:
|
||||
3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
|
||||
4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
|
||||
5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
|
||||
6. Stage all relevant files with "git add -A". Commit using the message "chore: update lance dependency to v${VERSION}".
|
||||
6. Stage all relevant files with "git add -A". Commit using the message "${COMMIT_TYPE}: update lance dependency to v${VERSION}".
|
||||
7. Push the branch to origin. If the branch already exists, force-push your changes.
|
||||
8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
|
||||
9. Create a pull request targeting "main" with title "chore: update lance dependency to v${VERSION}". In the body, summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}).
|
||||
9. Create a pull request targeting "main" with title "${COMMIT_TYPE}: update lance dependency to v${VERSION}". First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'EOF' > /tmp/pr-body.md). The body should summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). Then run "gh pr create --body-file /tmp/pr-body.md".
|
||||
10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
|
||||
|
||||
Constraints:
|
||||
|
||||
74
Cargo.lock
generated
74
Cargo.lock
generated
@@ -3072,8 +3072,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "fsst"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"rand 0.9.2",
|
||||
@@ -4404,8 +4404,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4470,8 +4470,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-arrow"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4490,8 +4490,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-bitpacking"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"paste",
|
||||
@@ -4500,8 +4500,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-core"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4538,8 +4538,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datafusion"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4569,8 +4569,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datagen"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4588,8 +4588,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-encoding"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4626,8 +4626,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-file"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4659,8 +4659,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-geo"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"datafusion",
|
||||
"geo-traits",
|
||||
@@ -4674,8 +4674,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-index"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4742,8 +4742,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-io"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4783,8 +4783,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-linalg"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4800,8 +4800,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -4813,8 +4813,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace-impls"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-ipc",
|
||||
@@ -4857,8 +4857,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-table"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4897,8 +4897,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-testing"
|
||||
version = "2.0.0-beta.8"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7"
|
||||
version = "2.0.0-rc.1"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.1#751de457b1d44ff957931bedaeb62a6b06ad38d4"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-schema",
|
||||
@@ -4909,7 +4909,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb"
|
||||
version = "0.23.1"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
@@ -4988,7 +4988,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-nodejs"
|
||||
version = "0.23.1"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-ipc",
|
||||
@@ -5008,7 +5008,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-python"
|
||||
version = "0.26.1"
|
||||
version = "0.27.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
|
||||
30
Cargo.toml
30
Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.78.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=2.0.0-rc.1", "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "57.2", optional = false }
|
||||
@@ -59,7 +59,7 @@ rand = "0.9"
|
||||
snafu = "0.8"
|
||||
url = "2"
|
||||
num-traits = "0.2"
|
||||
regex = "1.12"
|
||||
regex = "1.10"
|
||||
lazy_static = "1"
|
||||
semver = "1.0.25"
|
||||
chrono = "0.4"
|
||||
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.23.1</version>
|
||||
<version>0.24.0</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.23.1-final.0</version>
|
||||
<version>0.24.0-final.0</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.23.1-final.0</version>
|
||||
<version>0.24.0-final.0</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.23.1"
|
||||
version = "0.24.0"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -1520,9 +1520,9 @@ describe("when optimizing a dataset", () => {
|
||||
|
||||
it("delete unverified", async () => {
|
||||
const version = await table.version();
|
||||
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
|
||||
version - 1
|
||||
}.manifest`;
|
||||
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${String(
|
||||
18446744073709551615n - (BigInt(version) - 1n),
|
||||
).padStart(20, "0")}.manifest`;
|
||||
fs.rmSync(versionFile);
|
||||
|
||||
let stats = await table.optimize({ deleteUnverified: false });
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.23.1",
|
||||
"version": "0.24.0",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.23.1",
|
||||
"version": "0.24.0",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.23.1",
|
||||
"version": "0.24.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.23.1",
|
||||
"version": "0.24.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.23.1",
|
||||
"version": "0.24.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.23.1",
|
||||
"version": "0.24.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.23.1",
|
||||
"version": "0.24.0",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.23.1",
|
||||
"version": "0.24.0",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.23.1",
|
||||
"version": "0.24.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.23.1",
|
||||
"version": "0.24.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.23.1",
|
||||
"version": "0.24.0",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.27.0-beta.0"
|
||||
current_version = "0.27.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.27.0-beta.0"
|
||||
version = "0.27.0"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -9,6 +9,8 @@ import numpy as np
|
||||
import io
|
||||
import warnings
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from ..util import attempt_import_or_raise
|
||||
from .base import EmbeddingFunction
|
||||
from .registry import register
|
||||
@@ -26,7 +28,7 @@ class ColPaliEmbeddings(EmbeddingFunction):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model_name : str
|
||||
colpali_model_name : str
|
||||
The name of the model to use (e.g., "Metric-AI/ColQwen2.5-3b-multilingual-v1.0")
|
||||
Supports models based on these engines:
|
||||
- ColPali: "vidore/colpali-v1.3" and others
|
||||
@@ -57,7 +59,10 @@ class ColPaliEmbeddings(EmbeddingFunction):
|
||||
useful for large models that do not fit in memory.
|
||||
"""
|
||||
|
||||
model_name: str = "Metric-AI/ColQwen2.5-3b-multilingual-v1.0"
|
||||
colpali_model_name: str = Field(
|
||||
default="Metric-AI/ColQwen2.5-3b-multilingual-v1.0",
|
||||
validation_alias="model_name",
|
||||
)
|
||||
device: str = "auto"
|
||||
dtype: str = "bfloat16"
|
||||
use_token_pooling: bool = True
|
||||
@@ -107,7 +112,7 @@ class ColPaliEmbeddings(EmbeddingFunction):
|
||||
self._processor,
|
||||
self._token_pooler,
|
||||
) = self._load_model(
|
||||
self.model_name,
|
||||
self.colpali_model_name,
|
||||
dtype,
|
||||
device,
|
||||
self.pooling_strategy,
|
||||
|
||||
@@ -10,7 +10,7 @@ import urllib.parse as urlparse
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
from tqdm import tqdm
|
||||
from pydantic import PrivateAttr
|
||||
from pydantic import Field, PrivateAttr
|
||||
|
||||
from ..util import attempt_import_or_raise
|
||||
from .base import EmbeddingFunction
|
||||
@@ -24,7 +24,10 @@ if TYPE_CHECKING:
|
||||
|
||||
@register("siglip")
|
||||
class SigLipEmbeddings(EmbeddingFunction):
|
||||
model_name: str = "google/siglip-base-patch16-224"
|
||||
siglip_model_name: str = Field(
|
||||
default="google/siglip-base-patch16-224",
|
||||
validation_alias="model_name",
|
||||
)
|
||||
device: str = "cpu"
|
||||
batch_size: int = 64
|
||||
normalize: bool = True
|
||||
@@ -39,8 +42,10 @@ class SigLipEmbeddings(EmbeddingFunction):
|
||||
transformers = attempt_import_or_raise("transformers")
|
||||
self._torch = attempt_import_or_raise("torch")
|
||||
|
||||
self._processor = transformers.AutoProcessor.from_pretrained(self.model_name)
|
||||
self._model = transformers.SiglipModel.from_pretrained(self.model_name)
|
||||
self._processor = transformers.AutoProcessor.from_pretrained(
|
||||
self.siglip_model_name
|
||||
)
|
||||
self._model = transformers.SiglipModel.from_pretrained(self.siglip_model_name)
|
||||
self._model.to(self.device)
|
||||
self._model.eval()
|
||||
self._ndims = None
|
||||
|
||||
@@ -2,12 +2,27 @@
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
from lancedb.db import AsyncConnection, DBConnection
|
||||
import lancedb
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
|
||||
def pandas_string_type():
|
||||
"""Return the PyArrow string type that pandas uses for string columns.
|
||||
|
||||
pandas 3.0+ uses large_string for string columns, pandas 2.x uses string.
|
||||
"""
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
|
||||
version = tuple(int(x) for x in pd.__version__.split(".")[:2])
|
||||
if version >= (3, 0):
|
||||
return pa.large_utf8()
|
||||
return pa.utf8()
|
||||
|
||||
|
||||
# Use an in-memory database for most tests.
|
||||
@pytest.fixture
|
||||
def mem_db() -> DBConnection:
|
||||
|
||||
@@ -268,6 +268,8 @@ async def test_create_table_from_iterator_async(mem_db_async: lancedb.AsyncConne
|
||||
|
||||
|
||||
def test_create_exist_ok(tmp_db: lancedb.DBConnection):
|
||||
from conftest import pandas_string_type
|
||||
|
||||
data = pd.DataFrame(
|
||||
{
|
||||
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
||||
@@ -286,10 +288,11 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
|
||||
assert tbl.schema == tbl2.schema
|
||||
assert len(tbl) == len(tbl2)
|
||||
|
||||
# pandas 3.0+ uses large_string, pandas 2.x uses string
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
|
||||
pa.field("item", pa.utf8()),
|
||||
pa.field("item", pandas_string_type()),
|
||||
pa.field("price", pa.float64()),
|
||||
]
|
||||
)
|
||||
@@ -299,7 +302,7 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
|
||||
bad_schema = pa.schema(
|
||||
[
|
||||
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
|
||||
pa.field("item", pa.utf8()),
|
||||
pa.field("item", pandas_string_type()),
|
||||
pa.field("price", pa.float64()),
|
||||
pa.field("extra", pa.float32()),
|
||||
]
|
||||
@@ -365,6 +368,8 @@ async def test_create_mode_async(tmp_db_async: lancedb.AsyncConnection):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
|
||||
from conftest import pandas_string_type
|
||||
|
||||
data = pd.DataFrame(
|
||||
{
|
||||
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
||||
@@ -382,10 +387,11 @@ async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
|
||||
assert tbl.name == tbl2.name
|
||||
assert await tbl.schema() == await tbl2.schema()
|
||||
|
||||
# pandas 3.0+ uses large_string, pandas 2.x uses string
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
|
||||
pa.field("item", pa.utf8()),
|
||||
pa.field("item", pandas_string_type()),
|
||||
pa.field("price", pa.float64()),
|
||||
]
|
||||
)
|
||||
@@ -595,6 +601,8 @@ def test_open_table_sync(tmp_db: lancedb.DBConnection):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_open_table(tmp_path):
|
||||
from conftest import pandas_string_type
|
||||
|
||||
db = await lancedb.connect_async(tmp_path)
|
||||
data = pd.DataFrame(
|
||||
{
|
||||
@@ -614,10 +622,11 @@ async def test_open_table(tmp_path):
|
||||
)
|
||||
is not None
|
||||
)
|
||||
# pandas 3.0+ uses large_string, pandas 2.x uses string
|
||||
assert await tbl.schema() == pa.schema(
|
||||
{
|
||||
"vector": pa.list_(pa.float32(), list_size=2),
|
||||
"item": pa.utf8(),
|
||||
"item": pandas_string_type(),
|
||||
"price": pa.float64(),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -26,6 +26,8 @@ import pytest
|
||||
from lance_namespace import (
|
||||
CreateEmptyTableRequest,
|
||||
CreateEmptyTableResponse,
|
||||
DeclareTableRequest,
|
||||
DeclareTableResponse,
|
||||
DescribeTableRequest,
|
||||
DescribeTableResponse,
|
||||
LanceNamespace,
|
||||
@@ -160,6 +162,19 @@ class TrackingNamespace(LanceNamespace):
|
||||
|
||||
return modified
|
||||
|
||||
def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
|
||||
"""Track declare_table calls and inject rotating credentials."""
|
||||
with self.lock:
|
||||
self.create_call_count += 1
|
||||
count = self.create_call_count
|
||||
|
||||
response = self.inner.declare_table(request)
|
||||
response.storage_options = self._modify_storage_options(
|
||||
response.storage_options, count
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def create_empty_table(
|
||||
self, request: CreateEmptyTableRequest
|
||||
) -> CreateEmptyTableResponse:
|
||||
|
||||
@@ -601,7 +601,6 @@ def test_head():
|
||||
def test_query_sync_minimal():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"refine_factor": None,
|
||||
@@ -685,7 +684,6 @@ def test_query_sync_maximal():
|
||||
def test_query_sync_nprobes():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"fast_search": True,
|
||||
@@ -715,7 +713,6 @@ def test_query_sync_nprobes():
|
||||
def test_query_sync_no_max_nprobes():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"fast_search": True,
|
||||
@@ -838,7 +835,6 @@ def test_query_sync_hybrid():
|
||||
else:
|
||||
# Vector query
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 42,
|
||||
"prefilter": True,
|
||||
"refine_factor": None,
|
||||
|
||||
@@ -1880,8 +1880,13 @@ async def test_optimize_delete_unverified(tmp_db_async: AsyncConnection, tmp_pat
|
||||
],
|
||||
)
|
||||
version = await table.version()
|
||||
path = tmp_path / "test.lance" / "_versions" / f"{version - 1}.manifest"
|
||||
assert version == 2
|
||||
|
||||
# By removing a manifest file, we make the data files we just inserted unverified
|
||||
version_name = 18446744073709551615 - (version - 1)
|
||||
path = tmp_path / "test.lance" / "_versions" / f"{version_name:020}.manifest"
|
||||
os.remove(path)
|
||||
|
||||
stats = await table.optimize(delete_unverified=False)
|
||||
assert stats.prune.old_versions_removed == 0
|
||||
stats = await table.optimize(
|
||||
|
||||
@@ -528,12 +528,19 @@ def test_sanitize_data(
|
||||
else:
|
||||
expected_schema = schema
|
||||
else:
|
||||
from conftest import pandas_string_type
|
||||
|
||||
# polars uses large_string, pandas 3.0+ uses large_string, others use string
|
||||
if isinstance(data, pl.DataFrame):
|
||||
text_type = pa.large_utf8()
|
||||
elif isinstance(data, pd.DataFrame):
|
||||
text_type = pandas_string_type()
|
||||
else:
|
||||
text_type = pa.string()
|
||||
expected_schema = pa.schema(
|
||||
{
|
||||
"id": pa.int64(),
|
||||
"text": pa.large_utf8()
|
||||
if isinstance(data, pl.DataFrame)
|
||||
else pa.string(),
|
||||
"text": text_type,
|
||||
"vector": pa.list_(pa.float32(), 10),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -55,12 +55,12 @@ impl RecordBatchStream {
|
||||
.next()
|
||||
.await
|
||||
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
|
||||
let bound = inner_next.infer_error()?.to_pyarrow(py)?;
|
||||
Ok(bound.unbind())
|
||||
})?;
|
||||
Ok(py_obj)
|
||||
Python::attach(|py| {
|
||||
inner_next
|
||||
.infer_error()?
|
||||
.to_pyarrow(py)
|
||||
.map(|obj| obj.unbind())
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -307,8 +307,7 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.list_namespaces(request).await.infer_error()?;
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("namespaces", response.namespaces)?;
|
||||
dict.set_item("page_token", response.page_token)?;
|
||||
@@ -328,7 +327,8 @@ impl Connection {
|
||||
let py = self_.py();
|
||||
future_into_py(py, async move {
|
||||
use lance_namespace::models::CreateNamespaceRequest;
|
||||
let mode_enum = mode.and_then(|m| match m.to_lowercase().as_str() {
|
||||
// Mode is now a string field
|
||||
let mode_str = mode.and_then(|m| match m.to_lowercase().as_str() {
|
||||
"create" => Some("Create".to_string()),
|
||||
"exist_ok" => Some("ExistOk".to_string()),
|
||||
"overwrite" => Some("Overwrite".to_string()),
|
||||
@@ -340,13 +340,12 @@ impl Connection {
|
||||
} else {
|
||||
Some(namespace)
|
||||
},
|
||||
mode: mode_enum,
|
||||
mode: mode_str,
|
||||
properties,
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.create_namespace(request).await.infer_error()?;
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
Ok(dict.unbind())
|
||||
@@ -365,12 +364,13 @@ impl Connection {
|
||||
let py = self_.py();
|
||||
future_into_py(py, async move {
|
||||
use lance_namespace::models::DropNamespaceRequest;
|
||||
let mode_enum = mode.and_then(|m| match m.to_uppercase().as_str() {
|
||||
// Mode and Behavior are now string fields
|
||||
let mode_str = mode.and_then(|m| match m.to_uppercase().as_str() {
|
||||
"SKIP" => Some("Skip".to_string()),
|
||||
"FAIL" => Some("Fail".to_string()),
|
||||
_ => None,
|
||||
});
|
||||
let behavior_enum = behavior.and_then(|b| match b.to_uppercase().as_str() {
|
||||
let behavior_str = behavior.and_then(|b| match b.to_uppercase().as_str() {
|
||||
"RESTRICT" => Some("Restrict".to_string()),
|
||||
"CASCADE" => Some("Cascade".to_string()),
|
||||
_ => None,
|
||||
@@ -381,13 +381,12 @@ impl Connection {
|
||||
} else {
|
||||
Some(namespace)
|
||||
},
|
||||
mode: mode_enum,
|
||||
behavior: behavior_enum,
|
||||
mode: mode_str,
|
||||
behavior: behavior_str,
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.drop_namespace(request).await.infer_error()?;
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
dict.set_item("transaction_id", response.transaction_id)?;
|
||||
@@ -414,8 +413,7 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.describe_namespace(request).await.infer_error()?;
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
Ok(dict.unbind())
|
||||
@@ -445,8 +443,7 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.list_tables(request).await.infer_error()?;
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("tables", response.tables)?;
|
||||
dict.set_item("page_token", response.page_token)?;
|
||||
|
||||
@@ -40,34 +40,31 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
||||
request_id,
|
||||
source,
|
||||
status_code,
|
||||
} => {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
let message = err.to_string();
|
||||
let http_err_cls = py
|
||||
.import(intern!(py, "lancedb.remote.errors"))?
|
||||
.getattr(intern!(py, "HttpError"))?;
|
||||
let err = http_err_cls.call1((
|
||||
message,
|
||||
} => Python::attach(|py| {
|
||||
let message = err.to_string();
|
||||
let http_err_cls = py
|
||||
.import(intern!(py, "lancedb.remote.errors"))?
|
||||
.getattr(intern!(py, "HttpError"))?;
|
||||
let err = http_err_cls.call1((
|
||||
message,
|
||||
request_id,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
))?;
|
||||
|
||||
if let Some(cause) = source.source() {
|
||||
// The HTTP error already includes the first cause. But
|
||||
// we can add the rest of the chain if there is any more.
|
||||
let cause_err = http_from_rust_error(
|
||||
py,
|
||||
cause,
|
||||
request_id,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
))?;
|
||||
)?;
|
||||
err.setattr(intern!(py, "__cause__"), cause_err)?;
|
||||
}
|
||||
|
||||
if let Some(cause) = source.source() {
|
||||
// The HTTP error already includes the first cause. But
|
||||
// we can add the rest of the chain if there is any more.
|
||||
let cause_err = http_from_rust_error(
|
||||
py,
|
||||
cause,
|
||||
request_id,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
)?;
|
||||
err.setattr(intern!(py, "__cause__"), cause_err)?;
|
||||
}
|
||||
|
||||
Err(PyErr::from_value(err))
|
||||
})
|
||||
}
|
||||
Err(PyErr::from_value(err))
|
||||
}),
|
||||
LanceError::Retry {
|
||||
request_id,
|
||||
request_failures,
|
||||
@@ -78,37 +75,33 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
||||
max_read_failures,
|
||||
source,
|
||||
status_code,
|
||||
} =>
|
||||
{
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
let cause_err = http_from_rust_error(
|
||||
py,
|
||||
source.as_ref(),
|
||||
request_id,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
)?;
|
||||
} => Python::attach(|py| {
|
||||
let cause_err = http_from_rust_error(
|
||||
py,
|
||||
source.as_ref(),
|
||||
request_id,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
)?;
|
||||
|
||||
let message = err.to_string();
|
||||
let retry_error_cls = py
|
||||
.import(intern!(py, "lancedb.remote.errors"))?
|
||||
.getattr("RetryError")?;
|
||||
let err = retry_error_cls.call1((
|
||||
message,
|
||||
request_id,
|
||||
*request_failures,
|
||||
*connect_failures,
|
||||
*read_failures,
|
||||
*max_request_failures,
|
||||
*max_connect_failures,
|
||||
*max_read_failures,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
))?;
|
||||
let message = err.to_string();
|
||||
let retry_error_cls = py
|
||||
.import(intern!(py, "lancedb.remote.errors"))?
|
||||
.getattr("RetryError")?;
|
||||
let err = retry_error_cls.call1((
|
||||
message,
|
||||
request_id,
|
||||
*request_failures,
|
||||
*connect_failures,
|
||||
*read_failures,
|
||||
*max_request_failures,
|
||||
*max_connect_failures,
|
||||
*max_read_failures,
|
||||
status_code.map(|s| s.as_u16()),
|
||||
))?;
|
||||
|
||||
err.setattr(intern!(py, "__cause__"), cause_err)?;
|
||||
Err(PyErr::from_value(err))
|
||||
})
|
||||
}
|
||||
err.setattr(intern!(py, "__cause__"), cause_err)?;
|
||||
Err(PyErr::from_value(err))
|
||||
}),
|
||||
_ => self.runtime_error(),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -12,8 +12,7 @@ pub struct PyHeaderProvider {
|
||||
|
||||
impl Clone for PyHeaderProvider {
|
||||
fn clone(&self) -> Self {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| Self {
|
||||
Python::attach(|py| Self {
|
||||
provider: self.provider.clone_ref(py),
|
||||
})
|
||||
}
|
||||
@@ -26,8 +25,7 @@ impl PyHeaderProvider {
|
||||
|
||||
/// Get headers from the Python provider (internal implementation)
|
||||
fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
// Call the get_headers method
|
||||
let result = self.provider.call_method0(py, "get_headers");
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ use pyo3::{
|
||||
exceptions::PyRuntimeError,
|
||||
pyclass, pymethods,
|
||||
types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
|
||||
Bound, Py, PyAny, PyRef, PyRefMut, PyResult, Python,
|
||||
Bound, PyAny, PyRef, PyRefMut, PyResult, Python,
|
||||
};
|
||||
use pyo3_async_runtimes::tokio::future_into_py;
|
||||
|
||||
@@ -281,12 +281,7 @@ impl PyPermutationReader {
|
||||
let reader = slf.reader.clone();
|
||||
future_into_py(slf.py(), async move {
|
||||
let schema = reader.output_schema(selection).await.infer_error()?;
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
|
||||
let bound = schema.to_pyarrow(py)?;
|
||||
Ok(bound.unbind())
|
||||
})?;
|
||||
Ok(py_obj)
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -29,7 +29,6 @@ use pyo3::types::PyList;
|
||||
use pyo3::types::{PyDict, PyString};
|
||||
use pyo3::Bound;
|
||||
use pyo3::IntoPyObject;
|
||||
use pyo3::Py;
|
||||
use pyo3::PyAny;
|
||||
use pyo3::PyRef;
|
||||
use pyo3::PyResult;
|
||||
@@ -454,12 +453,7 @@ impl Query {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
|
||||
let bound = schema.to_pyarrow(py)?;
|
||||
Ok(bound.unbind())
|
||||
})?;
|
||||
Ok(py_obj)
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -538,12 +532,7 @@ impl TakeQuery {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
|
||||
let bound = schema.to_pyarrow(py)?;
|
||||
Ok(bound.unbind())
|
||||
})?;
|
||||
Ok(py_obj)
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -638,12 +627,7 @@ impl FTSQuery {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
|
||||
let bound = schema.to_pyarrow(py)?;
|
||||
Ok(bound.unbind())
|
||||
})?;
|
||||
Ok(py_obj)
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -822,12 +806,7 @@ impl VectorQuery {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
|
||||
let bound = schema.to_pyarrow(py)?;
|
||||
Ok(bound.unbind())
|
||||
})?;
|
||||
Ok(py_obj)
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -22,8 +22,7 @@ pub struct PyStorageOptionsProvider {
|
||||
|
||||
impl Clone for PyStorageOptionsProvider {
|
||||
fn clone(&self) -> Self {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| Self {
|
||||
Python::attach(|py| Self {
|
||||
inner: self.inner.clone_ref(py),
|
||||
})
|
||||
}
|
||||
@@ -31,17 +30,14 @@ impl Clone for PyStorageOptionsProvider {
|
||||
|
||||
impl PyStorageOptionsProvider {
|
||||
pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
// Verify the object has a fetch_storage_options method
|
||||
if !obj.bind(py).hasattr("fetch_storage_options")? {
|
||||
return Err(pyo3::exceptions::PyTypeError::new_err(
|
||||
"StorageOptionsProvider must implement fetch_storage_options() method",
|
||||
));
|
||||
}
|
||||
Ok(Self {
|
||||
inner: obj.clone_ref(py),
|
||||
})
|
||||
Ok(Self { inner: obj })
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -64,8 +60,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
||||
let py_provider = self.py_provider.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
// Call the Python fetch_storage_options method
|
||||
let result = py_provider
|
||||
.inner
|
||||
@@ -124,8 +119,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
||||
}
|
||||
|
||||
fn provider_id(&self) -> String {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
// Call provider_id() method on the Python object
|
||||
let obj = self.py_provider.inner.bind(py);
|
||||
obj.call_method0("provider_id")
|
||||
|
||||
@@ -21,7 +21,7 @@ use pyo3::{
|
||||
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
|
||||
pyclass, pymethods,
|
||||
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
|
||||
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
||||
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
|
||||
};
|
||||
use pyo3_async_runtimes::tokio::future_into_py;
|
||||
|
||||
@@ -287,12 +287,7 @@ impl Table {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.schema().await.infer_error()?;
|
||||
#[allow(deprecated)]
|
||||
let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
|
||||
let bound = schema.to_pyarrow(py)?;
|
||||
Ok(bound.unbind())
|
||||
})?;
|
||||
Ok(py_obj)
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -442,8 +437,7 @@ impl Table {
|
||||
future_into_py(self_.py(), async move {
|
||||
let stats = inner.index_stats(&index_name).await.infer_error()?;
|
||||
if let Some(stats) = stats {
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
|
||||
dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
|
||||
@@ -473,8 +467,7 @@ impl Table {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let stats = inner.stats().await.infer_error()?;
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("total_bytes", stats.total_bytes)?;
|
||||
dict.set_item("num_rows", stats.num_rows)?;
|
||||
@@ -528,8 +521,7 @@ impl Table {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let versions = inner.list_versions().await.infer_error()?;
|
||||
#[allow(deprecated)]
|
||||
let versions_as_dict = Python::with_gil(|py| {
|
||||
let versions_as_dict = Python::attach(|py| {
|
||||
versions
|
||||
.iter()
|
||||
.map(|v| {
|
||||
@@ -880,8 +872,7 @@ impl Tags {
|
||||
let tags = inner.tags().await.infer_error()?;
|
||||
let res = tags.list().await.infer_error()?;
|
||||
|
||||
#[allow(deprecated)]
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
let py_dict = PyDict::new(py);
|
||||
for (key, contents) in res {
|
||||
let value_dict = PyDict::new(py);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.23.1"
|
||||
version = "0.24.0"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
@@ -36,10 +36,42 @@ use crate::remote::{
|
||||
};
|
||||
use crate::table::{TableDefinition, WriteOptions};
|
||||
use crate::Table;
|
||||
use lance::io::ObjectStoreParams;
|
||||
pub use lance_encoding::version::LanceFileVersion;
|
||||
#[cfg(feature = "remote")]
|
||||
use lance_io::object_store::StorageOptions;
|
||||
use lance_io::object_store::StorageOptionsProvider;
|
||||
use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
|
||||
|
||||
fn merge_storage_options(
|
||||
store_params: &mut ObjectStoreParams,
|
||||
pairs: impl IntoIterator<Item = (String, String)>,
|
||||
) {
|
||||
let mut options = store_params.storage_options().cloned().unwrap_or_default();
|
||||
for (key, value) in pairs {
|
||||
options.insert(key, value);
|
||||
}
|
||||
let provider = store_params
|
||||
.storage_options_accessor
|
||||
.as_ref()
|
||||
.and_then(|accessor| accessor.provider().cloned());
|
||||
let accessor = if let Some(provider) = provider {
|
||||
StorageOptionsAccessor::with_initial_and_provider(options, provider)
|
||||
} else {
|
||||
StorageOptionsAccessor::with_static_options(options)
|
||||
};
|
||||
store_params.storage_options_accessor = Some(Arc::new(accessor));
|
||||
}
|
||||
|
||||
fn set_storage_options_provider(
|
||||
store_params: &mut ObjectStoreParams,
|
||||
provider: Arc<dyn StorageOptionsProvider>,
|
||||
) {
|
||||
let accessor = match store_params.storage_options().cloned() {
|
||||
Some(options) => StorageOptionsAccessor::with_initial_and_provider(options, provider),
|
||||
None => StorageOptionsAccessor::with_provider(provider),
|
||||
};
|
||||
store_params.storage_options_accessor = Some(Arc::new(accessor));
|
||||
}
|
||||
|
||||
/// A builder for configuring a [`Connection::table_names`] operation
|
||||
pub struct TableNamesBuilder {
|
||||
@@ -246,16 +278,14 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
let store_options = self
|
||||
let store_params = self
|
||||
.request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.get_or_insert(Default::default())
|
||||
.store_params
|
||||
.get_or_insert(Default::default())
|
||||
.storage_options
|
||||
.get_or_insert(Default::default());
|
||||
store_options.insert(key.into(), value.into());
|
||||
merge_storage_options(store_params, [(key.into(), value.into())]);
|
||||
self
|
||||
}
|
||||
|
||||
@@ -269,19 +299,17 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
) -> Self {
|
||||
let store_options = self
|
||||
let store_params = self
|
||||
.request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.get_or_insert(Default::default())
|
||||
.store_params
|
||||
.get_or_insert(Default::default())
|
||||
.storage_options
|
||||
.get_or_insert(Default::default());
|
||||
|
||||
for (key, value) in pairs {
|
||||
store_options.insert(key.into(), value.into());
|
||||
}
|
||||
let updates = pairs
|
||||
.into_iter()
|
||||
.map(|(key, value)| (key.into(), value.into()));
|
||||
merge_storage_options(store_params, updates);
|
||||
self
|
||||
}
|
||||
|
||||
@@ -318,23 +346,21 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
||||
/// This has no effect in LanceDB Cloud.
|
||||
#[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
|
||||
pub fn enable_v2_manifest_paths(mut self, use_v2_manifest_paths: bool) -> Self {
|
||||
let storage_options = self
|
||||
let store_params = self
|
||||
.request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.get_or_insert_with(Default::default)
|
||||
.store_params
|
||||
.get_or_insert_with(Default::default)
|
||||
.storage_options
|
||||
.get_or_insert_with(Default::default);
|
||||
|
||||
storage_options.insert(
|
||||
OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(),
|
||||
if use_v2_manifest_paths {
|
||||
"true".to_string()
|
||||
} else {
|
||||
"false".to_string()
|
||||
},
|
||||
let value = if use_v2_manifest_paths {
|
||||
"true".to_string()
|
||||
} else {
|
||||
"false".to_string()
|
||||
};
|
||||
merge_storage_options(
|
||||
store_params,
|
||||
[(OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(), value)],
|
||||
);
|
||||
self
|
||||
}
|
||||
@@ -344,19 +370,19 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
||||
/// The default is `LanceFileVersion::Stable`.
|
||||
#[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
|
||||
pub fn data_storage_version(mut self, data_storage_version: LanceFileVersion) -> Self {
|
||||
let storage_options = self
|
||||
let store_params = self
|
||||
.request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.get_or_insert_with(Default::default)
|
||||
.store_params
|
||||
.get_or_insert_with(Default::default)
|
||||
.storage_options
|
||||
.get_or_insert_with(Default::default);
|
||||
|
||||
storage_options.insert(
|
||||
OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
|
||||
data_storage_version.to_string(),
|
||||
merge_storage_options(
|
||||
store_params,
|
||||
[(
|
||||
OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
|
||||
data_storage_version.to_string(),
|
||||
)],
|
||||
);
|
||||
self
|
||||
}
|
||||
@@ -381,13 +407,14 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
||||
/// This allows tables to automatically refresh cloud storage credentials
|
||||
/// when they expire, enabling long-running operations on remote storage.
|
||||
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
|
||||
self.request
|
||||
let store_params = self
|
||||
.request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.get_or_insert(Default::default())
|
||||
.store_params
|
||||
.get_or_insert(Default::default())
|
||||
.storage_options_provider = Some(provider);
|
||||
.get_or_insert(Default::default());
|
||||
set_storage_options_provider(store_params, provider);
|
||||
self
|
||||
}
|
||||
}
|
||||
@@ -450,15 +477,13 @@ impl OpenTableBuilder {
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
let storage_options = self
|
||||
let store_params = self
|
||||
.request
|
||||
.lance_read_params
|
||||
.get_or_insert(Default::default())
|
||||
.store_options
|
||||
.get_or_insert(Default::default())
|
||||
.storage_options
|
||||
.get_or_insert(Default::default());
|
||||
storage_options.insert(key.into(), value.into());
|
||||
merge_storage_options(store_params, [(key.into(), value.into())]);
|
||||
self
|
||||
}
|
||||
|
||||
@@ -472,18 +497,16 @@ impl OpenTableBuilder {
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
) -> Self {
|
||||
let storage_options = self
|
||||
let store_params = self
|
||||
.request
|
||||
.lance_read_params
|
||||
.get_or_insert(Default::default())
|
||||
.store_options
|
||||
.get_or_insert(Default::default())
|
||||
.storage_options
|
||||
.get_or_insert(Default::default());
|
||||
|
||||
for (key, value) in pairs {
|
||||
storage_options.insert(key.into(), value.into());
|
||||
}
|
||||
let updates = pairs
|
||||
.into_iter()
|
||||
.map(|(key, value)| (key.into(), value.into()));
|
||||
merge_storage_options(store_params, updates);
|
||||
self
|
||||
}
|
||||
|
||||
@@ -507,12 +530,13 @@ impl OpenTableBuilder {
|
||||
/// This allows tables to automatically refresh cloud storage credentials
|
||||
/// when they expire, enabling long-running operations on remote storage.
|
||||
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
|
||||
self.request
|
||||
let store_params = self
|
||||
.request
|
||||
.lance_read_params
|
||||
.get_or_insert(Default::default())
|
||||
.store_options
|
||||
.get_or_insert(Default::default())
|
||||
.storage_options_provider = Some(provider);
|
||||
.get_or_insert(Default::default());
|
||||
set_storage_options_provider(store_params, provider);
|
||||
self
|
||||
}
|
||||
|
||||
@@ -868,6 +892,9 @@ pub struct ConnectBuilder {
|
||||
embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
|
||||
}
|
||||
|
||||
const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 1] =
|
||||
[("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name")];
|
||||
|
||||
impl ConnectBuilder {
|
||||
/// Create a new [`ConnectOptions`] with the given database URI.
|
||||
pub fn new(uri: &str) -> Self {
|
||||
@@ -1051,11 +1078,27 @@ impl ConnectBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
#[cfg(feature = "remote")]
|
||||
fn apply_env_defaults(
|
||||
env_var_to_remote_storage_option: &[(&str, &str)],
|
||||
options: &mut HashMap<String, String>,
|
||||
) {
|
||||
for (env_key, opt_key) in env_var_to_remote_storage_option {
|
||||
if let Ok(env_value) = std::env::var(env_key) {
|
||||
if !options.contains_key(*opt_key) {
|
||||
options.insert((*opt_key).to_string(), env_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "remote")]
|
||||
fn execute_remote(self) -> Result<Connection> {
|
||||
use crate::remote::db::RemoteDatabaseOptions;
|
||||
|
||||
let options = RemoteDatabaseOptions::parse_from_map(&self.request.options)?;
|
||||
let mut merged_options = self.request.options.clone();
|
||||
Self::apply_env_defaults(&ENV_VARS_TO_STORAGE_OPTS, &mut merged_options);
|
||||
let options = RemoteDatabaseOptions::parse_from_map(&merged_options)?;
|
||||
|
||||
let region = options.region.ok_or_else(|| Error::InvalidInput {
|
||||
message: "A region is required when connecting to LanceDb Cloud".to_string(),
|
||||
@@ -1277,8 +1320,6 @@ mod test_utils {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fs::create_dir_all;
|
||||
|
||||
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
|
||||
use crate::query::QueryBase;
|
||||
use crate::query::{ExecutableQuery, QueryExecutionOptions};
|
||||
@@ -1302,6 +1343,23 @@ mod tests {
|
||||
assert_eq!(tc.connection.uri(), tc.uri);
|
||||
}
|
||||
|
||||
#[cfg(feature = "remote")]
|
||||
#[test]
|
||||
fn test_apply_env_defaults() {
|
||||
let env_key = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_KEY";
|
||||
let env_val = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_VAL";
|
||||
let opts_key = "test_apply_env_defaults_environment_variable_opts_key";
|
||||
std::env::set_var(env_key, env_val);
|
||||
|
||||
let mut options = HashMap::new();
|
||||
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
|
||||
assert_eq!(Some(&env_val.to_string()), options.get(opts_key));
|
||||
|
||||
options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string());
|
||||
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
|
||||
assert_eq!(Some(&"EXPLICIT-VALUE".to_string()), options.get(opts_key));
|
||||
}
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[tokio::test]
|
||||
async fn test_connect_relative() {
|
||||
@@ -1526,18 +1584,27 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn drop_table() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let tc = new_test_connection().await.unwrap();
|
||||
let db = tc.connection;
|
||||
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = connect(uri).execute().await.unwrap();
|
||||
if tc.is_remote {
|
||||
// All the typical endpoints such as s3:///, file-object-store:///, etc. treat drop_table
|
||||
// as idempotent.
|
||||
assert!(db.drop_table("invalid_table", &[]).await.is_ok());
|
||||
} else {
|
||||
// The behavior of drop_table when using a file:/// endpoint differs from all other
|
||||
// object providers, in that it returns an error when deleting a non-existent table.
|
||||
assert!(matches!(
|
||||
db.drop_table("invalid_table", &[]).await,
|
||||
Err(crate::Error::TableNotFound { .. }),
|
||||
));
|
||||
}
|
||||
|
||||
// drop non-exist table
|
||||
assert!(matches!(
|
||||
db.drop_table("invalid_table", &[]).await,
|
||||
Err(crate::Error::TableNotFound { .. }),
|
||||
));
|
||||
|
||||
create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
|
||||
db.create_empty_table("table1", schema.clone())
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
db.drop_table("table1", &[]).await.unwrap();
|
||||
|
||||
let tables = db.table_names().execute().await.unwrap();
|
||||
|
||||
@@ -12,7 +12,7 @@ use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode};
|
||||
use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
|
||||
use lance_datafusion::utils::StreamingWriteSource;
|
||||
use lance_encoding::version::LanceFileVersion;
|
||||
use lance_io::object_store::StorageOptionsProvider;
|
||||
use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
|
||||
use lance_table::io::commit::commit_handler_from_url;
|
||||
use object_store::local::LocalFileSystem;
|
||||
use snafu::ResultExt;
|
||||
@@ -356,7 +356,13 @@ impl ListingDatabase {
|
||||
.clone()
|
||||
.unwrap_or_else(|| Arc::new(lance::session::Session::default()));
|
||||
let os_params = ObjectStoreParams {
|
||||
storage_options: Some(options.storage_options.clone()),
|
||||
storage_options_accessor: if options.storage_options.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Arc::new(StorageOptionsAccessor::with_static_options(
|
||||
options.storage_options.clone(),
|
||||
)))
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
let (object_store, base_path) = ObjectStore::from_uri_and_params(
|
||||
@@ -492,7 +498,13 @@ impl ListingDatabase {
|
||||
|
||||
async fn drop_tables(&self, names: Vec<String>) -> Result<()> {
|
||||
let object_store_params = ObjectStoreParams {
|
||||
storage_options: Some(self.storage_options.clone()),
|
||||
storage_options_accessor: if self.storage_options.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Arc::new(StorageOptionsAccessor::with_static_options(
|
||||
self.storage_options.clone(),
|
||||
)))
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
let mut uri = self.uri.clone();
|
||||
@@ -541,7 +553,7 @@ impl ListingDatabase {
|
||||
.lance_write_params
|
||||
.as_ref()
|
||||
.and_then(|p| p.store_params.as_ref())
|
||||
.and_then(|sp| sp.storage_options.as_ref());
|
||||
.and_then(|sp| sp.storage_options());
|
||||
|
||||
let storage_version_override = storage_options
|
||||
.and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION))
|
||||
@@ -592,21 +604,20 @@ impl ListingDatabase {
|
||||
// will cause a new connection to be created, and that connection will
|
||||
// be dropped from the cache when python GCs the table object, which
|
||||
// confounds reuse across tables.
|
||||
if !self.storage_options.is_empty() {
|
||||
let storage_options = write_params
|
||||
if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
|
||||
let store_params = write_params
|
||||
.store_params
|
||||
.get_or_insert_with(Default::default)
|
||||
.storage_options
|
||||
.get_or_insert_with(Default::default);
|
||||
self.inherit_storage_options(storage_options);
|
||||
}
|
||||
|
||||
// Set storage options provider if available
|
||||
if self.storage_options_provider.is_some() {
|
||||
write_params
|
||||
.store_params
|
||||
.get_or_insert_with(Default::default)
|
||||
.storage_options_provider = self.storage_options_provider.clone();
|
||||
let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
|
||||
if !self.storage_options.is_empty() {
|
||||
self.inherit_storage_options(&mut storage_options);
|
||||
}
|
||||
let accessor = if let Some(ref provider) = self.storage_options_provider {
|
||||
StorageOptionsAccessor::with_initial_and_provider(storage_options, provider.clone())
|
||||
} else {
|
||||
StorageOptionsAccessor::with_static_options(storage_options)
|
||||
};
|
||||
store_params.storage_options_accessor = Some(Arc::new(accessor));
|
||||
}
|
||||
|
||||
write_params.data_storage_version = self
|
||||
@@ -892,7 +903,13 @@ impl Database for ListingDatabase {
|
||||
validate_table_name(&request.target_table_name)?;
|
||||
|
||||
let storage_params = ObjectStoreParams {
|
||||
storage_options: Some(self.storage_options.clone()),
|
||||
storage_options_accessor: if self.storage_options.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Arc::new(StorageOptionsAccessor::with_static_options(
|
||||
self.storage_options.clone(),
|
||||
)))
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
let read_params = ReadParams {
|
||||
@@ -956,25 +973,28 @@ impl Database for ListingDatabase {
|
||||
// will cause a new connection to be created, and that connection will
|
||||
// be dropped from the cache when python GCs the table object, which
|
||||
// confounds reuse across tables.
|
||||
if !self.storage_options.is_empty() {
|
||||
let storage_options = request
|
||||
if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
|
||||
let store_params = request
|
||||
.lance_read_params
|
||||
.get_or_insert_with(Default::default)
|
||||
.store_options
|
||||
.get_or_insert_with(Default::default)
|
||||
.storage_options
|
||||
.get_or_insert_with(Default::default);
|
||||
self.inherit_storage_options(storage_options);
|
||||
}
|
||||
|
||||
// Set storage options provider if available
|
||||
if self.storage_options_provider.is_some() {
|
||||
request
|
||||
.lance_read_params
|
||||
.get_or_insert_with(Default::default)
|
||||
.store_options
|
||||
.get_or_insert_with(Default::default)
|
||||
.storage_options_provider = self.storage_options_provider.clone();
|
||||
let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
|
||||
if !self.storage_options.is_empty() {
|
||||
self.inherit_storage_options(&mut storage_options);
|
||||
}
|
||||
// Preserve request-level provider if no connection-level provider exists
|
||||
let request_provider = store_params
|
||||
.storage_options_accessor
|
||||
.as_ref()
|
||||
.and_then(|a| a.provider().cloned());
|
||||
let provider = self.storage_options_provider.clone().or(request_provider);
|
||||
let accessor = if let Some(provider) = provider {
|
||||
StorageOptionsAccessor::with_initial_and_provider(storage_options, provider)
|
||||
} else {
|
||||
StorageOptionsAccessor::with_static_options(storage_options)
|
||||
};
|
||||
store_params.storage_options_accessor = Some(Arc::new(accessor));
|
||||
}
|
||||
|
||||
// Some ReadParams are exposed in the OpenTableBuilder, but we also
|
||||
@@ -1881,7 +1901,9 @@ mod tests {
|
||||
let write_options = WriteOptions {
|
||||
lance_write_params: Some(lance::dataset::WriteParams {
|
||||
store_params: Some(lance::io::ObjectStoreParams {
|
||||
storage_options: Some(storage_options),
|
||||
storage_options_accessor: Some(Arc::new(
|
||||
StorageOptionsAccessor::with_static_options(storage_options),
|
||||
)),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
@@ -1955,7 +1977,9 @@ mod tests {
|
||||
let write_options = WriteOptions {
|
||||
lance_write_params: Some(lance::dataset::WriteParams {
|
||||
store_params: Some(lance::io::ObjectStoreParams {
|
||||
storage_options: Some(storage_options),
|
||||
storage_options_accessor: Some(Arc::new(
|
||||
StorageOptionsAccessor::with_static_options(storage_options),
|
||||
)),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
|
||||
@@ -9,14 +9,15 @@ use std::sync::Arc;
|
||||
use async_trait::async_trait;
|
||||
use lance_namespace::{
|
||||
models::{
|
||||
CreateNamespaceRequest, CreateNamespaceResponse, DeclareTableRequest,
|
||||
DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest,
|
||||
DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest,
|
||||
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||
CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
|
||||
DeclareTableRequest, DescribeNamespaceRequest, DescribeNamespaceResponse,
|
||||
DescribeTableRequest, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest,
|
||||
ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||
},
|
||||
LanceNamespace,
|
||||
};
|
||||
use lance_namespace_impls::ConnectBuilder;
|
||||
use log::warn;
|
||||
|
||||
use crate::database::ReadConsistency;
|
||||
use crate::error::{Error, Result};
|
||||
@@ -154,7 +155,6 @@ impl Database for LanceNamespaceDatabase {
|
||||
table_id.push(request.name.clone());
|
||||
let describe_request = DescribeTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
version: None,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -205,26 +205,53 @@ impl Database for LanceNamespaceDatabase {
|
||||
let mut table_id = request.namespace.clone();
|
||||
table_id.push(request.name.clone());
|
||||
|
||||
let create_empty_request = DeclareTableRequest {
|
||||
// Try declare_table first, falling back to create_empty_table for backwards
|
||||
// compatibility with older namespace clients that don't support declare_table
|
||||
let declare_request = DeclareTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
location: None,
|
||||
vend_credentials: None,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let create_empty_response = self
|
||||
.namespace
|
||||
.declare_table(create_empty_request)
|
||||
.await
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to declare table: {}", e),
|
||||
})?;
|
||||
let location = match self.namespace.declare_table(declare_request).await {
|
||||
Ok(response) => response.location.ok_or_else(|| Error::Runtime {
|
||||
message: "Table location is missing from declare_table response".to_string(),
|
||||
})?,
|
||||
Err(e) => {
|
||||
// Check if the error is "not supported" and try create_empty_table as fallback
|
||||
let err_str = e.to_string().to_lowercase();
|
||||
if err_str.contains("not supported") || err_str.contains("not implemented") {
|
||||
warn!(
|
||||
"declare_table is not supported by the namespace client, \
|
||||
falling back to deprecated create_empty_table. \
|
||||
create_empty_table is deprecated and will be removed in Lance 3.0.0. \
|
||||
Please upgrade your namespace client to support declare_table."
|
||||
);
|
||||
#[allow(deprecated)]
|
||||
let create_empty_request = CreateEmptyTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let location = create_empty_response
|
||||
.location
|
||||
.ok_or_else(|| Error::Runtime {
|
||||
message: "Table location is missing from create_empty_table response".to_string(),
|
||||
})?;
|
||||
#[allow(deprecated)]
|
||||
let create_response = self
|
||||
.namespace
|
||||
.create_empty_table(create_empty_request)
|
||||
.await
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to create empty table: {}", e),
|
||||
})?;
|
||||
|
||||
create_response.location.ok_or_else(|| Error::Runtime {
|
||||
message: "Table location is missing from create_empty_table response"
|
||||
.to_string(),
|
||||
})?
|
||||
} else {
|
||||
return Err(Error::Runtime {
|
||||
message: format!("Failed to declare table: {}", e),
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let native_table = NativeTable::create_from_namespace(
|
||||
self.namespace.clone(),
|
||||
@@ -439,8 +466,6 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -501,8 +526,6 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -566,8 +589,6 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -651,8 +672,6 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -708,8 +727,6 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -790,8 +807,6 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -825,8 +840,6 @@ mod tests {
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -51,24 +51,19 @@
|
||||
//! - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud object store
|
||||
//! - `db://dbname` - Lance Cloud
|
||||
//!
|
||||
//! You can also use [`ConnectOptions`] to configure the connection to the database.
|
||||
//! You can also use [`ConnectBuilder`] to configure the connection to the database.
|
||||
//!
|
||||
//! ```rust
|
||||
//! # #[cfg(feature = "aws")]
|
||||
//! # {
|
||||
//! use object_store::aws::AwsCredential;
|
||||
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
//! let db = lancedb::connect("data/sample-lancedb")
|
||||
//! .aws_creds(AwsCredential {
|
||||
//! key_id: "some_key".to_string(),
|
||||
//! secret_key: "some_secret".to_string(),
|
||||
//! token: None,
|
||||
//! })
|
||||
//! .storage_options([
|
||||
//! ("aws_access_key_id", "some_key"),
|
||||
//! ("aws_secret_access_key", "some_secret"),
|
||||
//! ])
|
||||
//! .execute()
|
||||
//! .await
|
||||
//! .unwrap();
|
||||
//! # });
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! LanceDB uses [arrow-rs](https://github.com/apache/arrow-rs) to define schema, data types and array itself.
|
||||
|
||||
@@ -1718,8 +1718,6 @@ mod tests {
|
||||
let namespace = vec!["test_ns".to_string()];
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(namespace.clone()),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -1745,8 +1743,6 @@ mod tests {
|
||||
let list_response = conn
|
||||
.list_tables(ListTablesRequest {
|
||||
id: Some(namespace.clone()),
|
||||
page_token: None,
|
||||
limit: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -1758,8 +1754,6 @@ mod tests {
|
||||
let list_response = namespace_client
|
||||
.list_tables(ListTablesRequest {
|
||||
id: Some(namespace.clone()),
|
||||
page_token: None,
|
||||
limit: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -1800,8 +1794,6 @@ mod tests {
|
||||
let namespace = vec!["multi_table_ns".to_string()];
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(namespace.clone()),
|
||||
mode: None,
|
||||
properties: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
@@ -1827,8 +1819,6 @@ mod tests {
|
||||
let list_response = conn
|
||||
.list_tables(ListTablesRequest {
|
||||
id: Some(namespace.clone()),
|
||||
page_token: None,
|
||||
limit: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -468,7 +468,9 @@ impl<S: HttpSend> RemoteTable<S> {
|
||||
self.apply_query_params(&mut body, &query.base)?;
|
||||
|
||||
// Apply general parameters, before we dispatch based on number of query vectors.
|
||||
body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
|
||||
if let Some(distance_type) = query.distance_type {
|
||||
body["distance_type"] = serde_json::json!(distance_type);
|
||||
}
|
||||
// In 0.23.1 we migrated from `nprobes` to `minimum_nprobes` and `maximum_nprobes`.
|
||||
// Old client / new server: since minimum_nprobes is missing, fallback to nprobes
|
||||
// New client / old server: old server will only see nprobes, make sure to set both
|
||||
@@ -2230,7 +2232,6 @@ mod tests {
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
let mut expected_body = serde_json::json!({
|
||||
"prefilter": true,
|
||||
"distance_type": "l2",
|
||||
"nprobes": 20,
|
||||
"minimum_nprobes": 20,
|
||||
"maximum_nprobes": 20,
|
||||
|
||||
@@ -40,7 +40,7 @@ use lance_index::vector::pq::PQBuildParams;
|
||||
use lance_index::vector::sq::builder::SQBuildParams;
|
||||
use lance_index::DatasetIndexExt;
|
||||
use lance_index::IndexType;
|
||||
use lance_io::object_store::LanceNamespaceStorageOptionsProvider;
|
||||
use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsAccessor};
|
||||
use lance_namespace::models::{
|
||||
QueryTableRequest as NsQueryTableRequest, QueryTableRequestColumns,
|
||||
QueryTableRequestFullTextQuery, QueryTableRequestVector, StringFtsQuery,
|
||||
@@ -1425,8 +1425,8 @@ impl Table {
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let unioned = UnionExec::try_new(projected_plans).map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to build union plan: {e}"),
|
||||
let unioned = UnionExec::try_new(projected_plans).map_err(|err| Error::Runtime {
|
||||
message: err.to_string(),
|
||||
})?;
|
||||
// We require 1 partition in the final output
|
||||
let repartitioned = RepartitionExec::try_new(
|
||||
@@ -1668,18 +1668,14 @@ impl NativeTable {
|
||||
|
||||
// Use DatasetBuilder::from_namespace which automatically fetches location
|
||||
// and storage options from the namespace
|
||||
let builder = DatasetBuilder::from_namespace(
|
||||
namespace_client.clone(),
|
||||
table_id,
|
||||
false, // Don't ignore namespace storage options
|
||||
)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
lance::Error::Namespace { source, .. } => Error::Runtime {
|
||||
message: format!("Failed to get table info from namespace: {:?}", source),
|
||||
},
|
||||
source => Error::Lance { source },
|
||||
})?;
|
||||
let builder = DatasetBuilder::from_namespace(namespace_client.clone(), table_id)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
lance::Error::Namespace { source, .. } => Error::Runtime {
|
||||
message: format!("Failed to get table info from namespace: {:?}", source),
|
||||
},
|
||||
source => Error::Lance { source },
|
||||
})?;
|
||||
|
||||
let dataset = builder
|
||||
.with_read_params(params)
|
||||
@@ -1883,7 +1879,13 @@ impl NativeTable {
|
||||
let store_params = params
|
||||
.store_params
|
||||
.get_or_insert_with(ObjectStoreParams::default);
|
||||
store_params.storage_options_provider = Some(storage_options_provider);
|
||||
let accessor = match store_params.storage_options().cloned() {
|
||||
Some(options) => {
|
||||
StorageOptionsAccessor::with_initial_and_provider(options, storage_options_provider)
|
||||
}
|
||||
None => StorageOptionsAccessor::with_provider(storage_options_provider),
|
||||
};
|
||||
store_params.storage_options_accessor = Some(Arc::new(accessor));
|
||||
|
||||
// Patch the params if we have a write store wrapper
|
||||
let params = match write_store_wrapper.clone() {
|
||||
@@ -2349,7 +2351,7 @@ impl NativeTable {
|
||||
};
|
||||
|
||||
// Convert select to columns list
|
||||
let columns: Option<Box<QueryTableRequestColumns>> = match &vq.base.select {
|
||||
let columns = match &vq.base.select {
|
||||
Select::All => None,
|
||||
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
|
||||
column_names: Some(cols.clone()),
|
||||
@@ -2407,7 +2409,6 @@ impl NativeTable {
|
||||
with_row_id: Some(vq.base.with_row_id),
|
||||
bypass_vector_index: Some(!vq.use_index),
|
||||
full_text_query,
|
||||
version: None,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
@@ -2426,7 +2427,7 @@ impl NativeTable {
|
||||
.map(|f| self.filter_to_sql(f))
|
||||
.transpose()?;
|
||||
|
||||
let columns: Option<Box<QueryTableRequestColumns>> = match &q.select {
|
||||
let columns = match &q.select {
|
||||
Select::All => None,
|
||||
Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
|
||||
column_names: Some(cols.clone()),
|
||||
@@ -2470,18 +2471,10 @@ impl NativeTable {
|
||||
columns,
|
||||
prefilter: Some(q.prefilter),
|
||||
offset: q.offset.map(|o| o as i32),
|
||||
ef: None,
|
||||
refine_factor: None,
|
||||
distance_type: None,
|
||||
nprobes: None,
|
||||
vector_column: None, // No vector column for plain queries
|
||||
with_row_id: Some(q.with_row_id),
|
||||
bypass_vector_index: Some(true), // No vector index for plain queries
|
||||
full_text_query,
|
||||
version: None,
|
||||
fast_search: None,
|
||||
lower_bound: None,
|
||||
upper_bound: None,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
@@ -3244,7 +3237,7 @@ impl BaseTable for NativeTable {
|
||||
.get()
|
||||
.await
|
||||
.ok()
|
||||
.and_then(|dataset| dataset.storage_options().cloned())
|
||||
.and_then(|dataset| dataset.initial_storage_options().cloned())
|
||||
}
|
||||
|
||||
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
|
||||
@@ -5154,15 +5147,16 @@ mod tests {
|
||||
let any_query = AnyQuery::VectorQuery(vq);
|
||||
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
|
||||
|
||||
let column_names = ns_request
|
||||
.columns
|
||||
.as_ref()
|
||||
.and_then(|cols| cols.column_names.clone());
|
||||
|
||||
assert_eq!(ns_request.k, 10);
|
||||
assert_eq!(ns_request.offset, Some(5));
|
||||
assert_eq!(ns_request.filter, Some("id > 0".to_string()));
|
||||
assert_eq!(column_names, Some(vec!["id".to_string()]));
|
||||
assert_eq!(
|
||||
ns_request
|
||||
.columns
|
||||
.as_ref()
|
||||
.and_then(|c| c.column_names.as_ref()),
|
||||
Some(&vec!["id".to_string()])
|
||||
);
|
||||
assert_eq!(ns_request.vector_column, Some("vector".to_string()));
|
||||
assert_eq!(ns_request.distance_type, Some("l2".to_string()));
|
||||
assert!(ns_request.vector.single_vector.is_some());
|
||||
@@ -5199,16 +5193,17 @@ mod tests {
|
||||
let any_query = AnyQuery::Query(q);
|
||||
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
|
||||
|
||||
let column_names = ns_request
|
||||
.columns
|
||||
.as_ref()
|
||||
.and_then(|cols| cols.column_names.clone());
|
||||
|
||||
// Plain queries should pass an empty vector
|
||||
assert_eq!(ns_request.k, 20);
|
||||
assert_eq!(ns_request.offset, Some(5));
|
||||
assert_eq!(ns_request.filter, Some("id > 5".to_string()));
|
||||
assert_eq!(column_names, Some(vec!["id".to_string()]));
|
||||
assert_eq!(
|
||||
ns_request
|
||||
.columns
|
||||
.as_ref()
|
||||
.and_then(|c| c.column_names.as_ref()),
|
||||
Some(&vec!["id".to_string()])
|
||||
);
|
||||
assert_eq!(ns_request.with_row_id, Some(true));
|
||||
assert_eq!(ns_request.bypass_vector_index, Some(true));
|
||||
assert!(ns_request.vector_column.is_none()); // No vector column for plain queries
|
||||
|
||||
Reference in New Issue
Block a user