mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-23 06:50:40 +00:00
Compare commits
13 Commits
codex/upda
...
codex/upda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dd8defa664 | ||
|
|
59db036118 | ||
|
|
c091243d5b | ||
|
|
a2aea7b4e5 | ||
|
|
4a5341edb1 | ||
|
|
25dfe2cfd4 | ||
|
|
4dcd7f4314 | ||
|
|
2e36cd9dad | ||
|
|
f31e27768a | ||
|
|
b84150a53e | ||
|
|
d135c18db6 | ||
|
|
ef399de092 | ||
|
|
0d767abd0e |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.28.0-beta.9"
|
||||
current_version = "0.28.0-beta.11"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
18
.github/dependabot.yml
vendored
Normal file
18
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
version: 2
|
||||
|
||||
# Scope: the root Cargo workspace, which produces the Rust binaries we
|
||||
# ship to users (the Node.js and Python native extensions). The
|
||||
# `rust/lancedb` library crate shares the same lockfile; its consumers
|
||||
# pick their own dependency versions, but bumping transitive deps here
|
||||
# keeps the binaries we ship current.
|
||||
updates:
|
||||
- package-ecosystem: cargo
|
||||
directory: /
|
||||
schedule:
|
||||
interval: weekly
|
||||
open-pull-requests-limit: 10
|
||||
groups:
|
||||
rust-minor-patch:
|
||||
update-types:
|
||||
- minor
|
||||
- patch
|
||||
12
.github/workflows/pypi-publish.yml
vendored
12
.github/workflows/pypi-publish.yml
vendored
@@ -21,6 +21,9 @@ jobs:
|
||||
linux:
|
||||
name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }}
|
||||
timeout-minutes: 60
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
@@ -60,10 +63,12 @@ jobs:
|
||||
- uses: ./.github/workflows/upload_wheel
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
with:
|
||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||
mac:
|
||||
timeout-minutes: 90
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
runs-on: ${{ matrix.config.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -88,10 +93,12 @@ jobs:
|
||||
- uses: ./.github/workflows/upload_wheel
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
with:
|
||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||
windows:
|
||||
timeout-minutes: 60
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -110,7 +117,6 @@ jobs:
|
||||
- uses: ./.github/workflows/upload_wheel
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
with:
|
||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||
gh-release:
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
|
||||
14
.github/workflows/rust.yml
vendored
14
.github/workflows/rust.yml
vendored
@@ -9,7 +9,10 @@ on:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- deny.toml
|
||||
- rust/**
|
||||
- nodejs/Cargo.toml
|
||||
- python/Cargo.toml
|
||||
- .github/workflows/rust.yml
|
||||
|
||||
permissions:
|
||||
@@ -56,6 +59,17 @@ jobs:
|
||||
- name: Run clippy (without remote feature)
|
||||
run: cargo clippy --profile ci --workspace --tests -- -D warnings
|
||||
|
||||
deny:
|
||||
# Supply-chain checks: advisories, licenses, banned crates, and source
|
||||
# restrictions. Configuration lives in `deny.toml` at the workspace root.
|
||||
timeout-minutes: 10
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: EmbarkStudios/cargo-deny-action@v2
|
||||
with:
|
||||
command: check advisories bans licenses sources
|
||||
|
||||
build-no-lock:
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 30
|
||||
|
||||
31
.github/workflows/upload_wheel/action.yml
vendored
31
.github/workflows/upload_wheel/action.yml
vendored
@@ -2,9 +2,6 @@ name: upload-wheel
|
||||
|
||||
description: "Upload wheels to Pypi"
|
||||
inputs:
|
||||
pypi_token:
|
||||
required: true
|
||||
description: "release token for the repo"
|
||||
fury_token:
|
||||
required: true
|
||||
description: "release token for the fury repo"
|
||||
@@ -12,12 +9,6 @@ inputs:
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install twine
|
||||
python3 -m pip install --upgrade pkginfo
|
||||
- name: Choose repo
|
||||
shell: bash
|
||||
id: choose_repo
|
||||
@@ -27,19 +18,17 @@ runs:
|
||||
else
|
||||
echo "repo=pypi" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
- name: Publish to PyPI
|
||||
- name: Publish to Fury
|
||||
if: steps.choose_repo.outputs.repo == 'fury'
|
||||
shell: bash
|
||||
env:
|
||||
FURY_TOKEN: ${{ inputs.fury_token }}
|
||||
PYPI_TOKEN: ${{ inputs.pypi_token }}
|
||||
run: |
|
||||
if [[ ${{ steps.choose_repo.outputs.repo }} == fury ]]; then
|
||||
WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
|
||||
echo "Uploading $WHEEL to Fury"
|
||||
curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
|
||||
else
|
||||
twine upload --repository ${{ steps.choose_repo.outputs.repo }} \
|
||||
--username __token__ \
|
||||
--password $PYPI_TOKEN \
|
||||
target/wheels/lancedb-*.whl
|
||||
fi
|
||||
WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
|
||||
echo "Uploading $WHEEL to Fury"
|
||||
curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
|
||||
- name: Publish to PyPI
|
||||
if: steps.choose_repo.outputs.repo == 'pypi'
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
packages-dir: target/wheels/
|
||||
|
||||
512
Cargo.lock
generated
512
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
64
Cargo.toml
64
Cargo.toml
@@ -1,7 +1,5 @@
|
||||
[workspace]
|
||||
members = ["rust/lancedb", "nodejs", "python"]
|
||||
# Python package needs to be built by maturin.
|
||||
exclude = ["python"]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
@@ -15,40 +13,40 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=6.0.0-beta.2", default-features = false, "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=6.0.0-beta.2", "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=6.0.0-beta.2", "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=6.0.0-beta.2", "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=6.0.0-beta.2", default-features = false, "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=6.0.0-beta.2", "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=6.0.0-beta.2", "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=6.0.0-beta.2", "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=6.0.0-beta.2", default-features = false, "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=6.0.0-beta.2", "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=6.0.0-beta.2", "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=6.0.0-beta.2", "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=6.0.0-beta.2", "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=6.0.0-beta.2", "tag" = "v6.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=7.0.0-beta.1", default-features = false, "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=7.0.0-beta.1", "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=7.0.0-beta.1", "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=7.0.0-beta.1", "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=7.0.0-beta.1", default-features = false, "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=7.0.0-beta.1", "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=7.0.0-beta.1", "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=7.0.0-beta.1", "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=7.0.0-beta.1", default-features = false, "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=7.0.0-beta.1", "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=7.0.0-beta.1", "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=7.0.0-beta.1", "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=7.0.0-beta.1", "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=7.0.0-beta.1", "tag" = "v7.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "57.2", optional = false }
|
||||
arrow-array = "57.2"
|
||||
arrow-data = "57.2"
|
||||
arrow-ipc = "57.2"
|
||||
arrow-ord = "57.2"
|
||||
arrow-schema = "57.2"
|
||||
arrow-select = "57.2"
|
||||
arrow-cast = "57.2"
|
||||
arrow = { version = "58.0.0", optional = false }
|
||||
arrow-array = "58.0.0"
|
||||
arrow-data = "58.0.0"
|
||||
arrow-ipc = "58.0.0"
|
||||
arrow-ord = "58.0.0"
|
||||
arrow-schema = "58.0.0"
|
||||
arrow-select = "58.0.0"
|
||||
arrow-cast = "58.0.0"
|
||||
async-trait = "0"
|
||||
datafusion = { version = "52.1", default-features = false }
|
||||
datafusion-catalog = "52.1"
|
||||
datafusion-common = { version = "52.1", default-features = false }
|
||||
datafusion-execution = "52.1"
|
||||
datafusion-expr = "52.1"
|
||||
datafusion-functions = "52.1"
|
||||
datafusion-physical-plan = "52.1"
|
||||
datafusion-physical-expr = "52.1"
|
||||
datafusion-sql = "52.1"
|
||||
datafusion = { version = "53.0.0", default-features = false }
|
||||
datafusion-catalog = "53.0.0"
|
||||
datafusion-common = { version = "53.0.0", default-features = false }
|
||||
datafusion-execution = "53.0.0"
|
||||
datafusion-expr = "53.0.0"
|
||||
datafusion-functions = "53.0.0"
|
||||
datafusion-physical-plan = "53.0.0"
|
||||
datafusion-physical-expr = "53.0.0"
|
||||
datafusion-sql = "53.0.0"
|
||||
env_logger = "0.11"
|
||||
half = { "version" = "2.7.1", default-features = false, features = [
|
||||
"num-traits",
|
||||
|
||||
172
deny.toml
Normal file
172
deny.toml
Normal file
@@ -0,0 +1,172 @@
|
||||
# cargo-deny configuration for LanceDB.
|
||||
#
|
||||
# Run locally with `cargo deny check`. See
|
||||
# https://embarkstudios.github.io/cargo-deny/ for the full reference.
|
||||
|
||||
# The set of target triples we care about. cargo-deny will only consider
|
||||
# dependencies that are used on at least one of these targets. Keeping this
|
||||
# explicit avoids noise from platform-specific crates (e.g. wasm, android,
|
||||
# ios) that we never actually ship.
|
||||
[graph]
|
||||
targets = [
|
||||
"x86_64-unknown-linux-gnu",
|
||||
"aarch64-unknown-linux-gnu",
|
||||
"x86_64-apple-darwin",
|
||||
"aarch64-apple-darwin",
|
||||
"x86_64-pc-windows-msvc",
|
||||
"aarch64-pc-windows-msvc",
|
||||
]
|
||||
all-features = true
|
||||
|
||||
[output]
|
||||
feature-depth = 1
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Advisories: security vulnerabilities and yanked crates.
|
||||
# ---------------------------------------------------------------------------
|
||||
[advisories]
|
||||
version = 2
|
||||
# Fail the check if any crate in the lockfile has been yanked from crates.io.
|
||||
# Yanked crates are a signal the author retracted the release (often due to
|
||||
# bugs or security issues) and should not be depended on.
|
||||
yanked = "deny"
|
||||
# Advisory IDs we have explicitly reviewed and chosen to accept. Every
|
||||
# entry must include a rationale and, where possible, an upstream issue
|
||||
# pointing to a fix. Revisit this list whenever dependencies are updated.
|
||||
ignore = [
|
||||
# rsa: Marvin Attack timing side-channel in PKCS#1 v1.5 decryption.
|
||||
# Reached only through opendal → reqsign → rsa. We do not use RSA
|
||||
# decryption in LanceDB ourselves; this is dormant in the signing path.
|
||||
# No fixed release exists upstream as of this writing.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2023-0071
|
||||
{ id = "RUSTSEC-2023-0071", reason = "rsa crate via opendal/reqsign; no fixed upstream release" },
|
||||
|
||||
# instant: unmaintained. Pulled in via backoff → instant. Upstream
|
||||
# recommends switching to `web-time`; fix has to come from backoff.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2024-0384
|
||||
{ id = "RUSTSEC-2024-0384", reason = "transitive via backoff; waiting on backoff replacement" },
|
||||
|
||||
# paste: unmaintained (author archived the repo). Used transitively by
|
||||
# datafusion and the arrow ecosystem; widespread, no drop-in replacement.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2024-0436
|
||||
{ id = "RUSTSEC-2024-0436", reason = "transitive via datafusion; awaiting ecosystem migration" },
|
||||
|
||||
# tantivy: segfault on malformed input due to missing bounds check.
|
||||
# Pulled in via lance for full-text search. We only feed tantivy
|
||||
# documents we construct ourselves, not attacker-controlled bytes.
|
||||
# Tracked for a lance dependency bump.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0003
|
||||
{ id = "RUSTSEC-2025-0003", reason = "tantivy via lance; inputs are internally produced, not user-supplied bytes" },
|
||||
|
||||
# backoff: unmaintained. Reached only via async-openai. Replacement
|
||||
# requires async-openai to migrate (or us to drop async-openai).
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0012
|
||||
{ id = "RUSTSEC-2025-0012", reason = "transitive via async-openai; waiting on upstream migration" },
|
||||
|
||||
# number_prefix: unmaintained. Transitive via indicatif → hf-hub.
|
||||
# No security impact, just maintenance status.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0119
|
||||
{ id = "RUSTSEC-2025-0119", reason = "transitive via hf-hub/indicatif; cosmetic formatting crate" },
|
||||
|
||||
# rustls-pemfile: unmaintained. Reached from two separate chains:
|
||||
# rustls-native-certs 0.6 (via hyper-rustls 0.24) and object_store 0.12.
|
||||
# Both upstream dependencies need to move before we can drop it.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0134
|
||||
{ id = "RUSTSEC-2025-0134", reason = "transitive via rustls-native-certs/object_store; waiting on upstream migration" },
|
||||
|
||||
# rustls-webpki 0.101.7 (old major line): name-constraint checks for
|
||||
# URI / wildcard names. Pulled in only via the legacy rustls 0.21 chain
|
||||
# from aws-smithy-http-client. The 0.103 line we actively use is patched.
|
||||
# Clearing the 0.101 copy requires the aws-sdk chain to migrate off
|
||||
# rustls 0.21.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0098
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0099
|
||||
{ id = "RUSTSEC-2026-0098", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
|
||||
{ id = "RUSTSEC-2026-0099", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
|
||||
|
||||
# rustls-webpki 0.101.7: reachable panic in CRL parsing. Same legacy
|
||||
# rustls 0.21 chain from aws-smithy-http-client as above. The 0.103 line
|
||||
# we actively use is upgraded to 0.103.13 which contains the fix.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0104
|
||||
{ id = "RUSTSEC-2026-0104", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Licenses: only allow licenses we've reviewed as compatible with Apache-2.0.
|
||||
# ---------------------------------------------------------------------------
|
||||
[licenses]
|
||||
version = 2
|
||||
# SPDX identifiers for licenses that are compatible with our Apache-2.0
|
||||
# distribution. Additions require legal review.
|
||||
allow = [
|
||||
"Apache-2.0",
|
||||
"Apache-2.0 WITH LLVM-exception",
|
||||
"MIT",
|
||||
"BSD-2-Clause",
|
||||
"BSD-3-Clause",
|
||||
"ISC",
|
||||
"Unicode-3.0",
|
||||
"Unicode-DFS-2016",
|
||||
"Zlib",
|
||||
"CC0-1.0",
|
||||
"MPL-2.0",
|
||||
"BSL-1.0",
|
||||
"OpenSSL",
|
||||
# 0BSD ("BSD Zero Clause") is effectively public domain — no attribution
|
||||
# required. Pulled in by `mock_instant`.
|
||||
"0BSD",
|
||||
# bzip2-1.0.6 is the permissive upstream bzip2 license (BSD-like). Pulled
|
||||
# in by `libbz2-rs-sys`, the pure-Rust bzip2 implementation.
|
||||
"bzip2-1.0.6",
|
||||
# CDLA-Permissive-2.0 is a permissive data license used by `webpki-roots`
|
||||
# for the Mozilla CA root bundle. Data-only, distribution-compatible.
|
||||
"CDLA-Permissive-2.0",
|
||||
]
|
||||
confidence-threshold = 0.8
|
||||
# Crates whose license cannot be determined from Cargo metadata but whose
|
||||
# license we've manually confirmed from upstream. Keep this list minimal.
|
||||
[[licenses.clarify]]
|
||||
# polars-arrow-format omits the `license` field in its Cargo.toml, but the
|
||||
# upstream repo (pola-rs/polars-arrow-format) is dual-licensed Apache-2.0 OR
|
||||
# MIT. See https://github.com/pola-rs/polars-arrow-format/blob/main/LICENSE
|
||||
crate = "polars-arrow-format"
|
||||
expression = "Apache-2.0 OR MIT"
|
||||
license-files = []
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bans: disallow specific crates and flag dependency hygiene issues.
|
||||
# ---------------------------------------------------------------------------
|
||||
[bans]
|
||||
# Warn (not deny) on duplicate versions of the same crate. In a large
|
||||
# workspace like this one, duplicates are common and often unavoidable
|
||||
# transitively. We surface them to discourage growth, but don't fail CI.
|
||||
multiple-versions = "warn"
|
||||
# Wildcard version requirements (`foo = "*"`) are a footgun — they let any
|
||||
# future release in without review. Ban them outright.
|
||||
wildcards = "deny"
|
||||
# Internal workspace crates reference each other via `path = "..."`, which
|
||||
# cargo-deny sees as a wildcard version. That's fine for private workspace
|
||||
# members (not published to crates.io), so allow it specifically for paths.
|
||||
allow-wildcard-paths = true
|
||||
# Features that, if enabled, should cause the check to fail.
|
||||
deny = []
|
||||
# Crates to skip when checking for duplicate versions.
|
||||
skip = []
|
||||
# Similar to `skip`, but also skips the entire transitive subtree.
|
||||
skip-tree = []
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sources: restrict where crates can come from.
|
||||
# ---------------------------------------------------------------------------
|
||||
[sources]
|
||||
# Deny any registry other than the ones explicitly listed below.
|
||||
unknown-registry = "deny"
|
||||
# Deny any git dependency whose host isn't in the allow-list below. This
|
||||
# prevents accidental pulls from arbitrary forks.
|
||||
unknown-git = "deny"
|
||||
allow-registry = ["https://github.com/rust-lang/crates.io-index"]
|
||||
# Lance is developed in a sibling repo and pulled as a git dependency until
|
||||
# releases are cut to crates.io. Allow that specific host.
|
||||
allow-git = [
|
||||
"https://github.com/lance-format/lance",
|
||||
]
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.28.0-beta.9</version>
|
||||
<version>0.28.0-beta.11</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -41,6 +41,29 @@ for testing purposes.
|
||||
|
||||
***
|
||||
|
||||
### manifestEnabled?
|
||||
|
||||
```ts
|
||||
optional manifestEnabled: boolean;
|
||||
```
|
||||
|
||||
(For LanceDB OSS only): use directory namespace manifests as the source
|
||||
of truth for table metadata. Existing directory-listed root tables are
|
||||
migrated into the manifest on access.
|
||||
|
||||
***
|
||||
|
||||
### namespaceClientProperties?
|
||||
|
||||
```ts
|
||||
optional namespaceClientProperties: Record<string, string>;
|
||||
```
|
||||
|
||||
(For LanceDB OSS only): extra properties for the backing namespace
|
||||
client used by manifest-enabled native connections.
|
||||
|
||||
***
|
||||
|
||||
### readConsistencyInterval?
|
||||
|
||||
```ts
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.28.0-beta.9</version>
|
||||
<version>0.28.0-beta.11</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.28.0-beta.9</version>
|
||||
<version>0.28.0-beta.11</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>6.0.0-beta.2</lance-core.version>
|
||||
<lance-core.version>7.0.0-beta.1</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.28.0-beta.9"
|
||||
version = "0.28.0-beta.11"
|
||||
publish = false
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
@@ -15,7 +16,7 @@ crate-type = ["cdylib"]
|
||||
async-trait.workspace = true
|
||||
arrow-ipc.workspace = true
|
||||
arrow-array.workspace = true
|
||||
arrow-buffer = "57.2"
|
||||
arrow-buffer = "58.0.0"
|
||||
half.workspace = true
|
||||
arrow-schema.workspace = true
|
||||
env_logger.workspace = true
|
||||
@@ -31,8 +32,8 @@ lzma-sys = { version = "0.1", features = ["static"] }
|
||||
log.workspace = true
|
||||
|
||||
# Pin to resolve build failures; update periodically for security patches.
|
||||
aws-lc-sys = "=0.38.0"
|
||||
aws-lc-rs = "=1.16.1"
|
||||
aws-lc-sys = "=0.40.0"
|
||||
aws-lc-rs = "=1.16.3"
|
||||
|
||||
[build-dependencies]
|
||||
napi-build = "2.3.1"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.28.0-beta.9",
|
||||
"version": "0.28.0-beta.11",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.28.0-beta.9",
|
||||
"version": "0.28.0-beta.11",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.28.0-beta.9",
|
||||
"version": "0.28.0-beta.11",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.28.0-beta.9",
|
||||
"version": "0.28.0-beta.11",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.28.0-beta.9",
|
||||
"version": "0.28.0-beta.11",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.28.0-beta.9",
|
||||
"version": "0.28.0-beta.11",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.28.0-beta.9",
|
||||
"version": "0.28.0-beta.11",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.28.0-beta.9",
|
||||
"version": "0.28.0-beta.11",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.28.0-beta.9",
|
||||
"version": "0.28.0-beta.11",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.28.0-beta.9",
|
||||
"version": "0.28.0-beta.11",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -67,6 +67,12 @@ impl Connection {
|
||||
builder = builder.storage_option(key, value);
|
||||
}
|
||||
}
|
||||
if let Some(manifest_enabled) = options.manifest_enabled {
|
||||
builder = builder.manifest_enabled(manifest_enabled);
|
||||
}
|
||||
if let Some(namespace_client_properties) = options.namespace_client_properties {
|
||||
builder = builder.namespace_client_properties(namespace_client_properties);
|
||||
}
|
||||
|
||||
// Create client config, optionally with header provider
|
||||
let client_config = options.client_config.unwrap_or_default();
|
||||
|
||||
@@ -37,6 +37,13 @@ pub struct ConnectionOptions {
|
||||
///
|
||||
/// The available options are described at https://docs.lancedb.com/storage/
|
||||
pub storage_options: Option<HashMap<String, String>>,
|
||||
/// (For LanceDB OSS only): use directory namespace manifests as the source
|
||||
/// of truth for table metadata. Existing directory-listed root tables are
|
||||
/// migrated into the manifest on access.
|
||||
pub manifest_enabled: Option<bool>,
|
||||
/// (For LanceDB OSS only): extra properties for the backing namespace
|
||||
/// client used by manifest-enabled native connections.
|
||||
pub namespace_client_properties: Option<HashMap<String, String>>,
|
||||
/// (For LanceDB OSS only): the session to use for this connection. Holds
|
||||
/// shared caches and other session-specific state.
|
||||
pub session: Option<session::Session>,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.31.0-beta.9"
|
||||
current_version = "0.31.0-beta.11"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.31.0-beta.9"
|
||||
version = "0.31.0-beta.11"
|
||||
publish = false
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
@@ -14,7 +15,7 @@ name = "_lancedb"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
arrow = { version = "57.2", features = ["pyarrow"] }
|
||||
arrow = { version = "58.0.0", features = ["pyarrow"] }
|
||||
async-trait = "0.1"
|
||||
bytes = "1"
|
||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||
@@ -24,8 +25,8 @@ lance-namespace-impls.workspace = true
|
||||
lance-io.workspace = true
|
||||
env_logger.workspace = true
|
||||
log.workspace = true
|
||||
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
|
||||
pyo3-async-runtimes = { version = "0.26", features = [
|
||||
pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39"] }
|
||||
pyo3-async-runtimes = { version = "0.28", features = [
|
||||
"attributes",
|
||||
"tokio-runtime",
|
||||
] }
|
||||
@@ -37,7 +38,7 @@ snafu.workspace = true
|
||||
tokio = { version = "1.40", features = ["sync"] }
|
||||
|
||||
[build-dependencies]
|
||||
pyo3-build-config = { version = "0.26", features = [
|
||||
pyo3-build-config = { version = "0.28", features = [
|
||||
"extension-module",
|
||||
"abi3-py39",
|
||||
] }
|
||||
|
||||
@@ -73,6 +73,7 @@ def connect(
|
||||
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_impl: Optional[str] = None,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
namespace_client_pushdown_operations: Optional[List[str]] = None,
|
||||
@@ -111,6 +112,10 @@ def connect(
|
||||
storage_options: dict, optional
|
||||
Additional options for the storage backend. See available options at
|
||||
<https://docs.lancedb.com/storage/>
|
||||
manifest_enabled : bool, default False
|
||||
When true for local/native connections, use directory namespace
|
||||
manifests as the source of truth for table metadata. Existing
|
||||
directory-listed root tables are migrated into the manifest on access.
|
||||
session: Session, optional
|
||||
(For LanceDB OSS only)
|
||||
A session to use for this connection. Sessions allow you to configure
|
||||
@@ -158,11 +163,11 @@ def connect(
|
||||
conn : DBConnection
|
||||
A connection to a LanceDB database.
|
||||
"""
|
||||
if namespace_client_impl is not None or namespace_client_properties is not None:
|
||||
if namespace_client_impl is None or namespace_client_properties is None:
|
||||
if namespace_client_impl is not None:
|
||||
if namespace_client_properties is None:
|
||||
raise ValueError(
|
||||
"Both namespace_client_impl and "
|
||||
"namespace_client_properties must be provided"
|
||||
"namespace_client_properties must be provided when "
|
||||
"namespace_client_impl is set"
|
||||
)
|
||||
if kwargs:
|
||||
raise ValueError(f"Unknown keyword arguments: {kwargs}")
|
||||
@@ -175,6 +180,12 @@ def connect(
|
||||
namespace_client_pushdown_operations=namespace_client_pushdown_operations,
|
||||
)
|
||||
|
||||
if namespace_client_properties is not None and not manifest_enabled:
|
||||
raise ValueError(
|
||||
"namespace_client_impl must be provided when using "
|
||||
"namespace_client_properties unless manifest_enabled=True"
|
||||
)
|
||||
|
||||
if namespace_client_pushdown_operations is not None:
|
||||
raise ValueError(
|
||||
"namespace_client_pushdown_operations is only valid when "
|
||||
@@ -212,6 +223,8 @@ def connect(
|
||||
read_consistency_interval=read_consistency_interval,
|
||||
storage_options=storage_options,
|
||||
session=session,
|
||||
manifest_enabled=manifest_enabled,
|
||||
namespace_client_properties=namespace_client_properties,
|
||||
)
|
||||
|
||||
|
||||
@@ -289,6 +302,8 @@ def deserialize_conn(
|
||||
parsed["uri"],
|
||||
read_consistency_interval=rci,
|
||||
storage_options=storage_options,
|
||||
manifest_enabled=parsed.get("manifest_enabled", False),
|
||||
namespace_client_properties=parsed.get("namespace_client_properties"),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown connection_type: {connection_type}")
|
||||
@@ -304,6 +319,8 @@ async def connect_async(
|
||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
) -> AsyncConnection:
|
||||
"""Connect to a LanceDB database.
|
||||
|
||||
@@ -343,6 +360,13 @@ async def connect_async(
|
||||
cache sizes for index and metadata caches, which can significantly
|
||||
impact memory use and performance. They can also be re-used across
|
||||
multiple connections to share the same cache state.
|
||||
manifest_enabled : bool, default False
|
||||
When true for local/native connections, use directory namespace
|
||||
manifests as the source of truth for table metadata. Existing
|
||||
directory-listed root tables are migrated into the manifest on access.
|
||||
namespace_client_properties : dict, optional
|
||||
Additional directory namespace client properties to use with
|
||||
``manifest_enabled=True``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
@@ -385,6 +409,8 @@ async def connect_async(
|
||||
client_config,
|
||||
storage_options,
|
||||
session,
|
||||
manifest_enabled,
|
||||
namespace_client_properties,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -242,6 +242,8 @@ async def connect(
|
||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
|
||||
storage_options: Optional[Dict[str, str]],
|
||||
session: Optional[Session],
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
) -> Connection: ...
|
||||
|
||||
class RecordBatchStream:
|
||||
|
||||
@@ -590,8 +590,13 @@ class LanceDBConnection(DBConnection):
|
||||
read_consistency_interval: Optional[timedelta] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
_inner: Optional[LanceDbConnection] = None,
|
||||
):
|
||||
self.storage_options = storage_options
|
||||
self._manifest_enabled = manifest_enabled
|
||||
self._namespace_client_properties = namespace_client_properties
|
||||
if _inner is not None:
|
||||
self._conn = _inner
|
||||
self._cached_namespace_client = None
|
||||
@@ -633,6 +638,8 @@ class LanceDBConnection(DBConnection):
|
||||
None,
|
||||
storage_options,
|
||||
session,
|
||||
manifest_enabled,
|
||||
namespace_client_properties,
|
||||
)
|
||||
|
||||
# TODO: It would be nice if we didn't store self.storage_options but it is
|
||||
@@ -640,7 +647,6 @@ class LanceDBConnection(DBConnection):
|
||||
# work because some paths like LanceDBConnection.from_inner will lose the
|
||||
# storage_options. Also, this class really shouldn't be holding any state
|
||||
# beyond _conn.
|
||||
self.storage_options = storage_options
|
||||
self._conn = AsyncConnection(LOOP.run(do_connect()))
|
||||
self._cached_namespace_client: Optional[LanceNamespace] = None
|
||||
|
||||
@@ -677,6 +683,8 @@ class LanceDBConnection(DBConnection):
|
||||
"connection_type": "local",
|
||||
"uri": self.uri,
|
||||
"storage_options": self.storage_options,
|
||||
"manifest_enabled": self._manifest_enabled,
|
||||
"namespace_client_properties": self._namespace_client_properties,
|
||||
"read_consistency_interval_seconds": (
|
||||
rci.total_seconds() if rci else None
|
||||
),
|
||||
|
||||
@@ -1643,7 +1643,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
def _validate_query(self, query, vector=None, text=None):
|
||||
if query is not None and (vector is not None or text is not None):
|
||||
raise ValueError(
|
||||
"You can either provide a string query in search() method"
|
||||
"You can either provide a string query in search() method "
|
||||
"or set `vector()` and `text()` explicitly for hybrid search."
|
||||
"But not both."
|
||||
)
|
||||
|
||||
@@ -525,7 +525,7 @@ impl Connection {
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
|
||||
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None, manifest_enabled=false, namespace_client_properties=None))]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn connect(
|
||||
py: Python<'_>,
|
||||
@@ -537,6 +537,8 @@ pub fn connect(
|
||||
client_config: Option<PyClientConfig>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
session: Option<crate::session::Session>,
|
||||
manifest_enabled: bool,
|
||||
namespace_client_properties: Option<HashMap<String, String>>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
future_into_py(py, async move {
|
||||
let mut builder = lancedb::connect(&uri);
|
||||
@@ -556,6 +558,12 @@ pub fn connect(
|
||||
if let Some(storage_options) = storage_options {
|
||||
builder = builder.storage_options(storage_options);
|
||||
}
|
||||
if manifest_enabled {
|
||||
builder = builder.manifest_enabled(true);
|
||||
}
|
||||
if let Some(namespace_client_properties) = namespace_client_properties {
|
||||
builder = builder.namespace_client_properties(namespace_client_properties);
|
||||
}
|
||||
#[cfg(feature = "remote")]
|
||||
if let Some(client_config) = client_config {
|
||||
builder = builder.client_config(client_config.into());
|
||||
|
||||
@@ -17,7 +17,7 @@ use pyo3::{Bound, PyAny, PyResult, exceptions::PyValueError, prelude::*, pyfunct
|
||||
/// [`expr_lit`] and combined with the methods on this struct. On the Python
|
||||
/// side a thin wrapper class (`lancedb.expr.Expr`) delegates to these methods
|
||||
/// and adds Python operator overloads.
|
||||
#[pyclass(name = "PyExpr")]
|
||||
#[pyclass(name = "PyExpr", from_py_object)]
|
||||
#[derive(Clone)]
|
||||
pub struct PyExpr(pub DfExpr);
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ impl PyHeaderProvider {
|
||||
Ok(headers_py) => {
|
||||
// Convert Python dict to Rust HashMap
|
||||
let bound_headers = headers_py.bind(py);
|
||||
let dict: &Bound<PyDict> = bound_headers.downcast().map_err(|e| {
|
||||
let dict: &Bound<PyDict> = bound_headers.cast().map_err(|e| {
|
||||
format!("HeaderProvider.get_headers must return a dict: {}", e)
|
||||
})?;
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ use pyo3::{
|
||||
Bound, FromPyObject, PyAny, PyResult, Python,
|
||||
exceptions::{PyKeyError, PyValueError},
|
||||
intern, pyclass, pymethods,
|
||||
types::PyAnyMethods,
|
||||
types::{PyAnyMethods, PyString},
|
||||
};
|
||||
|
||||
use crate::util::parse_distance_type;
|
||||
@@ -22,7 +22,7 @@ pub fn class_name(ob: &'_ Bound<'_, PyAny>) -> PyResult<String> {
|
||||
let full_name = ob
|
||||
.getattr(intern!(ob.py(), "__class__"))?
|
||||
.getattr(intern!(ob.py(), "__name__"))?;
|
||||
let full_name = full_name.downcast()?.to_string_lossy();
|
||||
let full_name = full_name.cast::<PyString>()?.to_string_lossy();
|
||||
|
||||
match full_name.rsplit_once('.') {
|
||||
Some((_, name)) => Ok(name.to_string()),
|
||||
|
||||
@@ -183,7 +183,7 @@ async fn call_py_method_primitive<Req, Resp>(
|
||||
) -> lance_core::Result<Resp>
|
||||
where
|
||||
Req: serde::Serialize + Send + 'static,
|
||||
Resp: for<'py> pyo3::FromPyObject<'py> + Send + 'static,
|
||||
Resp: for<'a, 'py> pyo3::FromPyObject<'a, 'py> + Send + 'static,
|
||||
{
|
||||
let request_json = serde_json::to_string(&request).map_err(|e| {
|
||||
lance_core::Error::io(format!(
|
||||
@@ -203,7 +203,7 @@ where
|
||||
|
||||
// Call the Python method
|
||||
let result = py_namespace.call_method1(py, method_name, (request_arg,))?;
|
||||
let value: Resp = result.extract(py)?;
|
||||
let value: Resp = result.extract(py).map_err(Into::into)?;
|
||||
Ok::<_, PyErr>(value)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -25,12 +25,12 @@ use pyo3_async_runtimes::tokio::future_into_py;
|
||||
|
||||
fn table_from_py<'a>(table: Bound<'a, PyAny>) -> PyResult<Bound<'a, Table>> {
|
||||
if table.hasattr("_inner")? {
|
||||
Ok(table.getattr("_inner")?.downcast_into::<Table>()?)
|
||||
Ok(table.getattr("_inner")?.cast_into::<Table>()?)
|
||||
} else if table.hasattr("_table")? {
|
||||
Ok(table
|
||||
.getattr("_table")?
|
||||
.getattr("_inner")?
|
||||
.downcast_into::<Table>()?)
|
||||
.cast_into::<Table>()?)
|
||||
} else {
|
||||
Err(PyRuntimeError::new_err(
|
||||
"Provided table does not appear to be a Table or RemoteTable instance",
|
||||
@@ -90,9 +90,9 @@ impl PyAsyncPermutationBuilder {
|
||||
database
|
||||
.getattr("_conn")?
|
||||
.getattr("_inner")?
|
||||
.downcast_into::<Connection>()?
|
||||
.cast_into::<Connection>()?
|
||||
} else {
|
||||
database.getattr("_inner")?.downcast_into::<Connection>()?
|
||||
database.getattr("_inner")?.cast_into::<Connection>()?
|
||||
};
|
||||
let database = conn.borrow().database()?;
|
||||
slf.modify(|builder| builder.persist(database, table_name))
|
||||
@@ -243,7 +243,7 @@ impl PyPermutationReader {
|
||||
let Some(selection) = selection else {
|
||||
return Ok(Select::All);
|
||||
};
|
||||
let selection = selection.downcast_into::<PyDict>()?;
|
||||
let selection = selection.cast_into::<PyDict>()?;
|
||||
let selection = selection
|
||||
.iter()
|
||||
.map(|(key, value)| {
|
||||
|
||||
@@ -33,7 +33,7 @@ use pyo3::pyfunction;
|
||||
use pyo3::pymethods;
|
||||
use pyo3::types::PyList;
|
||||
use pyo3::types::{PyDict, PyString};
|
||||
use pyo3::{FromPyObject, exceptions::PyRuntimeError};
|
||||
use pyo3::{Borrowed, FromPyObject, exceptions::PyRuntimeError};
|
||||
use pyo3::{PyErr, pyclass};
|
||||
use pyo3::{exceptions::PyValueError, intern};
|
||||
use pyo3_async_runtimes::tokio::future_into_py;
|
||||
@@ -43,9 +43,12 @@ use crate::util::parse_distance_type;
|
||||
use crate::{arrow::RecordBatchStream, util::PyLanceDB};
|
||||
use crate::{error::PythonErrorExt, index::class_name};
|
||||
|
||||
impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
|
||||
fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult<Self> {
|
||||
match class_name(ob)?.as_str() {
|
||||
impl<'a, 'py> FromPyObject<'a, 'py> for PyLanceDB<FtsQuery> {
|
||||
type Error = PyErr;
|
||||
|
||||
fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
|
||||
let ob = ob.to_owned();
|
||||
match class_name(&ob)?.as_str() {
|
||||
"MatchQuery" => {
|
||||
let query = ob.getattr("query")?.extract()?;
|
||||
let column = ob.getattr("column")?.extract()?;
|
||||
@@ -424,7 +427,7 @@ impl Query {
|
||||
"Query text is required for nearest_to_text",
|
||||
))?;
|
||||
|
||||
let query = if let Ok(query_text) = fts_query.downcast::<PyString>() {
|
||||
let query = if let Ok(query_text) = fts_query.cast::<PyString>() {
|
||||
let mut query_text = query_text.to_string();
|
||||
let columns = query
|
||||
.get_item("columns")?
|
||||
@@ -606,7 +609,7 @@ impl TakeQuery {
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass]
|
||||
#[pyclass(from_py_object)]
|
||||
#[derive(Clone)]
|
||||
pub struct FTSQuery {
|
||||
inner: LanceDbQuery,
|
||||
@@ -735,7 +738,7 @@ impl FTSQuery {
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass]
|
||||
#[pyclass(from_py_object)]
|
||||
#[derive(Clone)]
|
||||
pub struct VectorQuery {
|
||||
inner: LanceDbVectorQuery,
|
||||
|
||||
@@ -11,7 +11,7 @@ use pyo3::{PyResult, pyclass, pymethods};
|
||||
/// Sessions allow you to configure cache sizes for index and metadata caches,
|
||||
/// which can significantly impact memory use and performance. They can
|
||||
/// also be re-used across multiple connections to share the same cache state.
|
||||
#[pyclass]
|
||||
#[pyclass(from_py_object)]
|
||||
#[derive(Clone)]
|
||||
pub struct Session {
|
||||
pub(crate) inner: Arc<LanceSession>,
|
||||
|
||||
@@ -29,7 +29,7 @@ use pyo3_async_runtimes::tokio::future_into_py;
|
||||
mod scannable;
|
||||
|
||||
/// Statistics about a compaction operation.
|
||||
#[pyclass(get_all)]
|
||||
#[pyclass(get_all, from_py_object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CompactionStats {
|
||||
/// The number of fragments removed
|
||||
@@ -43,7 +43,7 @@ pub struct CompactionStats {
|
||||
}
|
||||
|
||||
/// Statistics about a cleanup operation
|
||||
#[pyclass(get_all)]
|
||||
#[pyclass(get_all, from_py_object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RemovalStats {
|
||||
/// The number of bytes removed
|
||||
@@ -53,7 +53,7 @@ pub struct RemovalStats {
|
||||
}
|
||||
|
||||
/// Statistics about an optimize operation
|
||||
#[pyclass(get_all)]
|
||||
#[pyclass(get_all, from_py_object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct OptimizeStats {
|
||||
/// Statistics about the compaction operation
|
||||
@@ -62,7 +62,7 @@ pub struct OptimizeStats {
|
||||
pub prune: RemovalStats,
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[pyclass(get_all, from_py_object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct UpdateResult {
|
||||
pub rows_updated: u64,
|
||||
@@ -88,7 +88,7 @@ impl From<lancedb::table::UpdateResult> for UpdateResult {
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[pyclass(get_all, from_py_object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AddResult {
|
||||
pub version: u64,
|
||||
@@ -109,7 +109,7 @@ impl From<lancedb::table::AddResult> for AddResult {
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[pyclass(get_all, from_py_object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DeleteResult {
|
||||
pub num_deleted_rows: u64,
|
||||
@@ -135,7 +135,7 @@ impl From<lancedb::table::DeleteResult> for DeleteResult {
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[pyclass(get_all, from_py_object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MergeResult {
|
||||
pub version: u64,
|
||||
@@ -171,7 +171,7 @@ impl From<lancedb::table::MergeResult> for MergeResult {
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[pyclass(get_all, from_py_object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AddColumnsResult {
|
||||
pub version: u64,
|
||||
@@ -192,7 +192,7 @@ impl From<lancedb::table::AddColumnsResult> for AddColumnsResult {
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[pyclass(get_all, from_py_object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AlterColumnsResult {
|
||||
pub version: u64,
|
||||
@@ -213,7 +213,7 @@ impl From<lancedb::table::AlterColumnsResult> for AlterColumnsResult {
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(get_all)]
|
||||
#[pyclass(get_all, from_py_object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DropColumnsResult {
|
||||
pub version: u64,
|
||||
|
||||
@@ -126,8 +126,11 @@ impl Scannable for PyScannable {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'py> FromPyObject<'py> for PyScannable {
|
||||
fn extract_bound(ob: &pyo3::Bound<'py, PyAny>) -> pyo3::PyResult<Self> {
|
||||
impl<'a, 'py> FromPyObject<'a, 'py> for PyScannable {
|
||||
type Error = pyo3::PyErr;
|
||||
|
||||
fn extract(ob: pyo3::Borrowed<'a, 'py, PyAny>) -> pyo3::PyResult<Self> {
|
||||
let ob = ob.to_owned();
|
||||
// Convert from Scannable dataclass.
|
||||
let schema: PyArrowType<Schema> = ob.getattr("schema")?.extract()?;
|
||||
let schema = Arc::new(schema.0);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.28.0-beta.9"
|
||||
version = "0.28.0-beta.11"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
@@ -111,7 +111,12 @@ default = []
|
||||
aws = ["lance/aws", "lance-io/aws", "lance-namespace-impls/dir-aws"]
|
||||
oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"]
|
||||
gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"]
|
||||
azure = ["lance/azure", "lance-io/azure", "lance-namespace-impls/dir-azure"]
|
||||
azure = [
|
||||
"lance/azure",
|
||||
"lance-io/azure",
|
||||
"lance-namespace-impls/dir-azure",
|
||||
"lance-namespace-impls/credential-vendor-azure",
|
||||
]
|
||||
huggingface = [
|
||||
"lance/huggingface",
|
||||
"lance-io/huggingface",
|
||||
|
||||
@@ -590,6 +590,15 @@ pub struct ConnectRequest {
|
||||
/// storage options.
|
||||
pub namespace_client_properties: HashMap<String, String>,
|
||||
|
||||
/// Use directory namespace manifests as the source of truth for native
|
||||
/// LanceDB table metadata.
|
||||
///
|
||||
/// When enabled for a local/native connection, LanceDB returns a
|
||||
/// namespace-backed database directly. Directory listing fallback remains
|
||||
/// enabled for migration, and directory-listing-to-manifest migration is
|
||||
/// forced on.
|
||||
pub manifest_enabled: bool,
|
||||
|
||||
/// The interval at which to check for updates from other processes.
|
||||
///
|
||||
/// If None, then consistency is not checked. For performance
|
||||
@@ -630,6 +639,7 @@ impl ConnectBuilder {
|
||||
read_consistency_interval: None,
|
||||
options: HashMap::new(),
|
||||
namespace_client_properties: HashMap::new(),
|
||||
manifest_enabled: false,
|
||||
session: None,
|
||||
},
|
||||
embedding_registry: None,
|
||||
@@ -791,6 +801,17 @@ impl ConnectBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable or disable manifest-backed directory namespace mode for local
|
||||
/// native connections.
|
||||
///
|
||||
/// When enabled, the connection uses the directory namespace database
|
||||
/// directly for all table operations and forces
|
||||
/// `dir_listing_to_manifest_migration_enabled=true`.
|
||||
pub fn manifest_enabled(mut self, enabled: bool) -> Self {
|
||||
self.request.manifest_enabled = enabled;
|
||||
self
|
||||
}
|
||||
|
||||
/// The interval at which to check for updates from other processes. This
|
||||
/// only affects LanceDB OSS.
|
||||
///
|
||||
@@ -886,6 +907,16 @@ impl ConnectBuilder {
|
||||
pub async fn execute(self) -> Result<Connection> {
|
||||
if self.request.uri.starts_with("db") {
|
||||
self.execute_remote()
|
||||
} else if self.request.manifest_enabled {
|
||||
let internal = Arc::new(
|
||||
ListingDatabase::connect_manifest_enabled_namespace_database(&self.request).await?,
|
||||
);
|
||||
Ok(Connection {
|
||||
internal,
|
||||
embedding_registry: self
|
||||
.embedding_registry
|
||||
.unwrap_or_else(|| Arc::new(MemoryRegistry::new())),
|
||||
})
|
||||
} else {
|
||||
let internal = Arc::new(ListingDatabase::connect_with_options(&self.request).await?);
|
||||
Ok(Connection {
|
||||
@@ -1132,6 +1163,9 @@ mod tests {
|
||||
use lance_testing::datagen::{BatchGenerator, IncrementingInt32};
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::database::listing::{ListingDatabaseOptions, OPT_NEW_TABLE_V2_MANIFEST_PATHS};
|
||||
use crate::database::namespace::LanceNamespaceDatabase;
|
||||
use crate::table::NativeTable;
|
||||
use crate::test_utils::connection::new_test_connection;
|
||||
|
||||
use super::*;
|
||||
@@ -1204,6 +1238,147 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_connect_with_manifest_enabled_uses_directory_namespace() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
let db = connect(uri)
|
||||
.manifest_enabled(true)
|
||||
.storage_option("timeout", "30s")
|
||||
.namespace_client_property("manifest_enabled", "false")
|
||||
.namespace_client_property("dir_listing_to_manifest_migration_enabled", "false")
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(
|
||||
db.database()
|
||||
.as_any()
|
||||
.downcast_ref::<LanceNamespaceDatabase>()
|
||||
.is_some()
|
||||
);
|
||||
assert_eq!(db.uri(), uri);
|
||||
|
||||
let (ns_impl, properties) = db.namespace_client_config().await.unwrap();
|
||||
assert_eq!(ns_impl, "dir");
|
||||
assert_eq!(properties.get("root"), Some(&uri.to_string()));
|
||||
assert_eq!(
|
||||
properties.get("manifest_enabled"),
|
||||
Some(&"true".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
properties.get("dir_listing_to_manifest_migration_enabled"),
|
||||
Some(&"true".to_string())
|
||||
);
|
||||
assert_eq!(properties.get("storage.timeout"), Some(&"30s".to_string()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_manifest_enabled_rejects_commit_engine_uri() {
|
||||
let Err(err) = connect("s3+ddb://bucket/db?ddbTableName=manifest")
|
||||
.manifest_enabled(true)
|
||||
.execute()
|
||||
.await
|
||||
else {
|
||||
panic!("expected manifest-enabled s3+ddb connection to fail");
|
||||
};
|
||||
assert!(
|
||||
matches!(err, Error::NotSupported { message } if message.contains("commit engine URI schemes"))
|
||||
);
|
||||
|
||||
let Err(err) = connect("s3://bucket/db?engine=ddb&ddbTableName=manifest")
|
||||
.manifest_enabled(true)
|
||||
.execute()
|
||||
.await
|
||||
else {
|
||||
panic!("expected manifest-enabled engine query connection to fail");
|
||||
};
|
||||
assert!(
|
||||
matches!(err, Error::NotSupported { message } if message.contains("commit engine"))
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_manifest_enabled_connection_migrates_root_listing_table() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
|
||||
|
||||
connect(uri)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
.create_empty_table("legacy", schema)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let db = connect(uri).manifest_enabled(true).execute().await.unwrap();
|
||||
let tables = db.table_names().execute().await.unwrap();
|
||||
assert_eq!(tables, vec!["legacy".to_string()]);
|
||||
db.open_table("legacy").execute().await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_manifest_enabled_preserves_new_table_options() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let options = ListingDatabaseOptions::builder()
|
||||
.enable_v2_manifest_paths(true)
|
||||
.build();
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
|
||||
|
||||
let table = connect(uri)
|
||||
.manifest_enabled(true)
|
||||
.database_options(&options)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
.create_empty_table("v1_manifest", schema)
|
||||
.storage_option(OPT_NEW_TABLE_V2_MANIFEST_PATHS, "false")
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let native_table = table
|
||||
.base_table()
|
||||
.as_any()
|
||||
.downcast_ref::<NativeTable>()
|
||||
.unwrap();
|
||||
assert!(!native_table.uses_v2_manifest_paths().await.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_manifest_enabled_vend_input_storage_options() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
|
||||
|
||||
let table = connect(uri)
|
||||
.manifest_enabled(true)
|
||||
.storage_option("test_storage_option", "test_value")
|
||||
.namespace_client_property("vend_input_storage_options", "true")
|
||||
.namespace_client_property(
|
||||
"vend_input_storage_options_refresh_interval_millis",
|
||||
"60000",
|
||||
)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
.create_empty_table("vended", schema)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let storage_options = table.latest_storage_options().await.unwrap().unwrap();
|
||||
assert_eq!(
|
||||
storage_options.get("test_storage_option"),
|
||||
Some(&"test_value".to_string())
|
||||
);
|
||||
assert!(storage_options.contains_key("expires_at_millis"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_table_names() {
|
||||
let tc = new_test_connection().await.unwrap();
|
||||
|
||||
@@ -285,7 +285,7 @@ const MIRRORED_STORE: &str = "mirroredStore";
|
||||
|
||||
/// A connection to LanceDB
|
||||
impl ListingDatabase {
|
||||
fn build_namespace_client_properties(
|
||||
pub(crate) fn build_namespace_client_properties(
|
||||
uri: &str,
|
||||
storage_options: &HashMap<String, String>,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
@@ -298,6 +298,24 @@ impl ListingDatabase {
|
||||
properties
|
||||
}
|
||||
|
||||
pub(crate) fn build_manifest_enabled_namespace_client_properties(
|
||||
uri: &str,
|
||||
storage_options: &HashMap<String, String>,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
) -> HashMap<String, String> {
|
||||
let mut properties = Self::build_namespace_client_properties(
|
||||
uri,
|
||||
storage_options,
|
||||
namespace_client_properties,
|
||||
);
|
||||
properties.insert("manifest_enabled".to_string(), "true".to_string());
|
||||
properties.insert(
|
||||
"dir_listing_to_manifest_migration_enabled".to_string(),
|
||||
"true".to_string(),
|
||||
);
|
||||
properties
|
||||
}
|
||||
|
||||
async fn connect_namespace_database(
|
||||
uri: &str,
|
||||
storage_options: HashMap<String, String>,
|
||||
@@ -323,6 +341,119 @@ impl ListingDatabase {
|
||||
))
|
||||
}
|
||||
|
||||
async fn prepare_namespace_root(
|
||||
uri: &str,
|
||||
storage_options: &HashMap<String, String>,
|
||||
session: Arc<lance::session::Session>,
|
||||
) -> Result<String> {
|
||||
match url::Url::parse(uri) {
|
||||
Ok(url) if url.scheme().len() == 1 && cfg!(windows) => {
|
||||
let (object_store, _) = ObjectStore::from_uri_and_params(
|
||||
session.store_registry(),
|
||||
uri,
|
||||
&ObjectStoreParams::default(),
|
||||
)
|
||||
.await?;
|
||||
if object_store.is_local() {
|
||||
Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?;
|
||||
}
|
||||
Ok(uri.to_string())
|
||||
}
|
||||
Ok(mut url) => {
|
||||
if url.scheme().contains('+') {
|
||||
return Err(Error::NotSupported {
|
||||
message: "commit engine URI schemes are not supported for manifest-enabled namespace connections".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
for (key, value) in url.query_pairs() {
|
||||
if key == ENGINE {
|
||||
return Err(Error::NotSupported {
|
||||
message: format!(
|
||||
"commit engine '{}' is not supported for manifest-enabled namespace connections",
|
||||
value
|
||||
),
|
||||
});
|
||||
} else if key == MIRRORED_STORE {
|
||||
return Err(Error::NotSupported {
|
||||
message: "mirrored store is not supported for manifest-enabled namespace connections"
|
||||
.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
url.set_query(None);
|
||||
let plain_uri = url.to_string();
|
||||
|
||||
let os_params = ObjectStoreParams {
|
||||
storage_options_accessor: if storage_options.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Arc::new(StorageOptionsAccessor::with_static_options(
|
||||
storage_options.clone(),
|
||||
)))
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
let (object_store, _) = ObjectStore::from_uri_and_params(
|
||||
session.store_registry(),
|
||||
&plain_uri,
|
||||
&os_params,
|
||||
)
|
||||
.await?;
|
||||
if object_store.is_local() {
|
||||
Self::try_create_dir(&plain_uri).context(CreateDirSnafu {
|
||||
path: plain_uri.clone(),
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(plain_uri)
|
||||
}
|
||||
Err(_) => {
|
||||
let (object_store, _) = ObjectStore::from_uri_and_params(
|
||||
session.store_registry(),
|
||||
uri,
|
||||
&ObjectStoreParams::default(),
|
||||
)
|
||||
.await?;
|
||||
if object_store.is_local() {
|
||||
Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?;
|
||||
}
|
||||
Ok(uri.to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn connect_manifest_enabled_namespace_database(
|
||||
request: &ConnectRequest,
|
||||
) -> Result<LanceNamespaceDatabase> {
|
||||
let options = ListingDatabaseOptions::parse_from_map(&request.options)?;
|
||||
let session = request
|
||||
.session
|
||||
.clone()
|
||||
.unwrap_or_else(|| Arc::new(lance::session::Session::default()));
|
||||
let namespace_root =
|
||||
Self::prepare_namespace_root(&request.uri, &options.storage_options, session.clone())
|
||||
.await?;
|
||||
let ns_properties = Self::build_manifest_enabled_namespace_client_properties(
|
||||
&namespace_root,
|
||||
&options.storage_options,
|
||||
request.namespace_client_properties.clone(),
|
||||
);
|
||||
|
||||
LanceNamespaceDatabase::connect_with_new_table_config(
|
||||
"dir",
|
||||
ns_properties,
|
||||
options.storage_options,
|
||||
request.read_consistency_interval,
|
||||
Some(session),
|
||||
HashSet::new(),
|
||||
options.new_table_config,
|
||||
)
|
||||
.await
|
||||
.map(|db| db.with_uri(request.uri.clone()))
|
||||
}
|
||||
|
||||
/// Connect to a listing database
|
||||
///
|
||||
/// The URI should be a path to a directory where the tables are stored.
|
||||
@@ -690,15 +821,12 @@ impl ListingDatabase {
|
||||
store_params.storage_options_accessor = Some(Arc::new(accessor));
|
||||
}
|
||||
|
||||
write_params.data_storage_version = self
|
||||
.new_table_config
|
||||
.data_storage_version
|
||||
.or(storage_version_override);
|
||||
write_params.data_storage_version = storage_version_override
|
||||
.or(write_params.data_storage_version)
|
||||
.or(self.new_table_config.data_storage_version);
|
||||
|
||||
if let Some(enable_v2_manifest_paths) = self
|
||||
.new_table_config
|
||||
.enable_v2_manifest_paths
|
||||
.or(v2_manifest_override)
|
||||
if let Some(enable_v2_manifest_paths) =
|
||||
v2_manifest_override.or(self.new_table_config.enable_v2_manifest_paths)
|
||||
{
|
||||
write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
|
||||
}
|
||||
@@ -1158,6 +1286,7 @@ mod tests {
|
||||
client_config: Default::default(),
|
||||
options: Default::default(),
|
||||
namespace_client_properties: Default::default(),
|
||||
manifest_enabled: false,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -1292,6 +1421,7 @@ mod tests {
|
||||
client_config: Default::default(),
|
||||
options: options.clone(),
|
||||
namespace_client_properties: Default::default(),
|
||||
manifest_enabled: false,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -1827,6 +1957,7 @@ mod tests {
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
namespace_client_properties: Default::default(),
|
||||
manifest_enabled: false,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -1933,6 +2064,7 @@ mod tests {
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
namespace_client_properties: Default::default(),
|
||||
manifest_enabled: false,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -2005,6 +2137,7 @@ mod tests {
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
namespace_client_properties: Default::default(),
|
||||
manifest_enabled: false,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -2202,6 +2335,7 @@ mod tests {
|
||||
client_config: Default::default(),
|
||||
options: Default::default(),
|
||||
namespace_client_properties,
|
||||
manifest_enabled: false,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
|
||||
@@ -8,9 +8,7 @@ use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use lance::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore;
|
||||
use lance_io::object_store::{
|
||||
ObjectStore as LanceObjectStore, ObjectStoreParams, ObjectStoreRegistry, StorageOptionsAccessor,
|
||||
};
|
||||
use lance_io::object_store::{ObjectStoreParams, StorageOptionsAccessor};
|
||||
use lance_namespace::{
|
||||
LanceNamespace,
|
||||
models::{
|
||||
@@ -26,6 +24,10 @@ use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler;
|
||||
|
||||
use crate::connection::NamespaceClientPushdownOperation;
|
||||
use crate::database::ReadConsistency;
|
||||
use crate::database::listing::{
|
||||
NewTableConfig, OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS, OPT_NEW_TABLE_STORAGE_VERSION,
|
||||
OPT_NEW_TABLE_V2_MANIFEST_PATHS,
|
||||
};
|
||||
use crate::error::{Error, Result};
|
||||
use crate::table::NativeTable;
|
||||
use lance::dataset::WriteMode;
|
||||
@@ -52,6 +54,8 @@ pub struct LanceNamespaceDatabase {
|
||||
ns_impl: String,
|
||||
// Namespace properties used to construct the namespace client
|
||||
ns_properties: HashMap<String, String>,
|
||||
// Options for tables created by this connection
|
||||
new_table_config: NewTableConfig,
|
||||
}
|
||||
|
||||
impl LanceNamespaceDatabase {
|
||||
@@ -73,9 +77,15 @@ impl LanceNamespaceDatabase {
|
||||
pushdown_operations: namespace_client_pushdown_operations,
|
||||
ns_impl: namespace_client_impl,
|
||||
ns_properties: namespace_client_properties,
|
||||
new_table_config: NewTableConfig::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn with_uri(mut self, uri: impl Into<String>) -> Self {
|
||||
self.uri = uri.into();
|
||||
self
|
||||
}
|
||||
|
||||
pub async fn connect(
|
||||
ns_impl: &str,
|
||||
ns_properties: HashMap<String, String>,
|
||||
@@ -83,6 +93,27 @@ impl LanceNamespaceDatabase {
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
) -> Result<Self> {
|
||||
Self::connect_with_new_table_config(
|
||||
ns_impl,
|
||||
ns_properties,
|
||||
storage_options,
|
||||
read_consistency_interval,
|
||||
session,
|
||||
pushdown_operations,
|
||||
NewTableConfig::default(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) async fn connect_with_new_table_config(
|
||||
ns_impl: &str,
|
||||
ns_properties: HashMap<String, String>,
|
||||
storage_options: HashMap<String, String>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
new_table_config: NewTableConfig,
|
||||
) -> Result<Self> {
|
||||
let mut builder = ConnectBuilder::new(ns_impl);
|
||||
for (key, value) in ns_properties.clone() {
|
||||
@@ -104,8 +135,79 @@ impl LanceNamespaceDatabase {
|
||||
pushdown_operations,
|
||||
ns_impl: ns_impl.to_string(),
|
||||
ns_properties,
|
||||
new_table_config,
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_storage_overrides(
|
||||
&self,
|
||||
request: &DbCreateTableRequest,
|
||||
) -> Result<(
|
||||
Option<lance_encoding::version::LanceFileVersion>,
|
||||
Option<bool>,
|
||||
Option<bool>,
|
||||
)> {
|
||||
let storage_options = request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.as_ref()
|
||||
.and_then(|p| p.store_params.as_ref())
|
||||
.and_then(|sp| sp.storage_options());
|
||||
|
||||
let storage_version_override = storage_options
|
||||
.and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION))
|
||||
.map(|s| s.parse::<lance_encoding::version::LanceFileVersion>())
|
||||
.transpose()?;
|
||||
|
||||
let v2_manifest_override = storage_options
|
||||
.and_then(|opts| opts.get(OPT_NEW_TABLE_V2_MANIFEST_PATHS))
|
||||
.map(|s| s.parse::<bool>())
|
||||
.transpose()
|
||||
.map_err(|_| Error::InvalidInput {
|
||||
message: "enable_v2_manifest_paths must be a boolean".to_string(),
|
||||
})?;
|
||||
|
||||
let stable_row_ids_override = storage_options
|
||||
.and_then(|opts| opts.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS))
|
||||
.map(|s| s.parse::<bool>())
|
||||
.transpose()
|
||||
.map_err(|_| Error::InvalidInput {
|
||||
message: "enable_stable_row_ids must be a boolean".to_string(),
|
||||
})?;
|
||||
|
||||
Ok((
|
||||
storage_version_override,
|
||||
v2_manifest_override,
|
||||
stable_row_ids_override,
|
||||
))
|
||||
}
|
||||
|
||||
fn apply_new_table_config(
|
||||
&self,
|
||||
params: &mut lance::dataset::WriteParams,
|
||||
request: &DbCreateTableRequest,
|
||||
) -> Result<()> {
|
||||
let (storage_version_override, v2_manifest_override, stable_row_ids_override) =
|
||||
self.extract_storage_overrides(request)?;
|
||||
|
||||
params.data_storage_version = storage_version_override
|
||||
.or(params.data_storage_version)
|
||||
.or(self.new_table_config.data_storage_version);
|
||||
|
||||
if let Some(enable_v2_manifest_paths) =
|
||||
v2_manifest_override.or(self.new_table_config.enable_v2_manifest_paths)
|
||||
{
|
||||
params.enable_v2_manifest_paths = enable_v2_manifest_paths;
|
||||
}
|
||||
|
||||
if let Some(enable_stable_row_ids) =
|
||||
stable_row_ids_override.or(self.new_table_config.enable_stable_row_ids)
|
||||
{
|
||||
params.enable_stable_row_ids = enable_stable_row_ids;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for LanceNamespaceDatabase {
|
||||
@@ -301,7 +403,12 @@ impl Database for LanceNamespaceDatabase {
|
||||
};
|
||||
|
||||
// Build write params with storage options and commit handler
|
||||
let mut params = request.write_options.lance_write_params.unwrap_or_default();
|
||||
let mut params = request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.clone()
|
||||
.unwrap_or_default();
|
||||
self.apply_new_table_config(&mut params, &request)?;
|
||||
|
||||
if matches!(request.mode, CreateTableMode::Overwrite) {
|
||||
params.mode = WriteMode::Overwrite;
|
||||
@@ -343,20 +450,6 @@ impl Database for LanceNamespaceDatabase {
|
||||
)
|
||||
.await?;
|
||||
|
||||
// lance >= v6.0.0-beta.2: declare_table creates a `.lance-reserved` marker file to
|
||||
// prevent directory scanning from seeing an incomplete table. After writing data we
|
||||
// must remove it so the table appears in directory listings.
|
||||
if let Ok(dataset) = native_table.dataset.get().await {
|
||||
let registry = Arc::new(ObjectStoreRegistry::default());
|
||||
if let Ok(table_path) = LanceObjectStore::extract_path_from_uri(registry, dataset.uri())
|
||||
{
|
||||
let _ = dataset
|
||||
.object_store
|
||||
.delete(&table_path.child(".lance-reserved"))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Arc::new(native_table))
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ pub struct RemoteInsertExec<S: HttpSend = Sender> {
|
||||
client: RestfulLanceDbClient<S>,
|
||||
input: Arc<dyn ExecutionPlan>,
|
||||
overwrite: bool,
|
||||
properties: PlanProperties,
|
||||
properties: Arc<PlanProperties>,
|
||||
add_result: Arc<Mutex<Option<AddResult>>>,
|
||||
metrics: ExecutionPlanMetricsSet,
|
||||
upload_id: Option<String>,
|
||||
@@ -118,7 +118,7 @@ impl<S: HttpSend + 'static> RemoteInsertExec<S> {
|
||||
client,
|
||||
input,
|
||||
overwrite,
|
||||
properties,
|
||||
properties: Arc::new(properties),
|
||||
add_result: Arc::new(Mutex::new(None)),
|
||||
metrics: ExecutionPlanMetricsSet::new(),
|
||||
upload_id,
|
||||
@@ -232,7 +232,7 @@ impl<S: HttpSend + 'static> ExecutionPlan for RemoteInsertExec<S> {
|
||||
self
|
||||
}
|
||||
|
||||
fn properties(&self) -> &PlanProperties {
|
||||
fn properties(&self) -> &Arc<PlanProperties> {
|
||||
&self.properties
|
||||
}
|
||||
|
||||
|
||||
@@ -39,21 +39,26 @@ use lance_index::scalar::FullTextSearchQuery;
|
||||
struct MetadataEraserExec {
|
||||
input: Arc<dyn ExecutionPlan>,
|
||||
schema: Arc<ArrowSchema>,
|
||||
properties: PlanProperties,
|
||||
properties: Arc<PlanProperties>,
|
||||
}
|
||||
|
||||
impl MetadataEraserExec {
|
||||
fn compute_properties_from_input(
|
||||
input: &Arc<dyn ExecutionPlan>,
|
||||
schema: &Arc<ArrowSchema>,
|
||||
) -> PlanProperties {
|
||||
) -> Arc<PlanProperties> {
|
||||
let input_properties = input.properties();
|
||||
let eq_properties = input_properties
|
||||
.eq_properties
|
||||
.clone()
|
||||
.with_new_schema(schema.clone())
|
||||
.unwrap();
|
||||
input_properties.clone().with_eq_properties(eq_properties)
|
||||
Arc::new(
|
||||
input_properties
|
||||
.as_ref()
|
||||
.clone()
|
||||
.with_eq_properties(eq_properties),
|
||||
)
|
||||
}
|
||||
|
||||
fn new(input: Arc<dyn ExecutionPlan>) -> Self {
|
||||
@@ -87,7 +92,7 @@ impl ExecutionPlan for MetadataEraserExec {
|
||||
self
|
||||
}
|
||||
|
||||
fn properties(&self) -> &PlanProperties {
|
||||
fn properties(&self) -> &Arc<PlanProperties> {
|
||||
&self.properties
|
||||
}
|
||||
|
||||
|
||||
@@ -81,7 +81,7 @@ pub struct InsertExec {
|
||||
dataset: Arc<Dataset>,
|
||||
input: Arc<dyn ExecutionPlan>,
|
||||
write_params: WriteParams,
|
||||
properties: PlanProperties,
|
||||
properties: Arc<PlanProperties>,
|
||||
partial_transactions: Arc<Mutex<Vec<Transaction>>>,
|
||||
metrics: ExecutionPlanMetricsSet,
|
||||
}
|
||||
@@ -107,7 +107,7 @@ impl InsertExec {
|
||||
dataset,
|
||||
input,
|
||||
write_params,
|
||||
properties,
|
||||
properties: Arc::new(properties),
|
||||
partial_transactions: Arc::new(Mutex::new(Vec::with_capacity(num_partitions))),
|
||||
metrics: ExecutionPlanMetricsSet::new(),
|
||||
}
|
||||
@@ -136,7 +136,7 @@ impl ExecutionPlan for InsertExec {
|
||||
self
|
||||
}
|
||||
|
||||
fn properties(&self) -> &PlanProperties {
|
||||
fn properties(&self) -> &Arc<PlanProperties> {
|
||||
&self.properties
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ pub(crate) struct ScannableExec {
|
||||
// We don't require Scannable to be Sync, so we wrap it in a Mutex to allow safe concurrent access.
|
||||
source: Mutex<Box<dyn Scannable>>,
|
||||
num_rows: Option<usize>,
|
||||
properties: PlanProperties,
|
||||
properties: Arc<PlanProperties>,
|
||||
tracker: Option<Arc<WriteProgressTracker>>,
|
||||
}
|
||||
|
||||
@@ -49,7 +49,7 @@ impl ScannableExec {
|
||||
Self {
|
||||
source,
|
||||
num_rows,
|
||||
properties,
|
||||
properties: Arc::new(properties),
|
||||
tracker,
|
||||
}
|
||||
}
|
||||
@@ -70,7 +70,7 @@ impl ExecutionPlan for ScannableExec {
|
||||
self
|
||||
}
|
||||
|
||||
fn properties(&self) -> &PlanProperties {
|
||||
fn properties(&self) -> &Arc<PlanProperties> {
|
||||
&self.properties
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user