mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-13 01:50:42 +00:00
Compare commits
1 Commits
ticket/324
...
v0.28.0-be
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
78cdb12732 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.28.0-beta.10"
|
||||
current_version = "0.28.0-beta.4"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/documentation.yml
vendored
2
.github/ISSUE_TEMPLATE/documentation.yml
vendored
@@ -18,6 +18,6 @@ body:
|
||||
label: Link
|
||||
description: >
|
||||
Provide a link to the existing documentation, if applicable.
|
||||
placeholder: ex. https://docs.lancedb.com/tables/...
|
||||
placeholder: ex. https://lancedb.com/docs/tables/...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
18
.github/dependabot.yml
vendored
18
.github/dependabot.yml
vendored
@@ -1,18 +0,0 @@
|
||||
version: 2
|
||||
|
||||
# Scope: the root Cargo workspace, which produces the Rust binaries we
|
||||
# ship to users (the Node.js and Python native extensions). The
|
||||
# `rust/lancedb` library crate shares the same lockfile; its consumers
|
||||
# pick their own dependency versions, but bumping transitive deps here
|
||||
# keeps the binaries we ship current.
|
||||
updates:
|
||||
- package-ecosystem: cargo
|
||||
directory: /
|
||||
schedule:
|
||||
interval: weekly
|
||||
open-pull-requests-limit: 10
|
||||
groups:
|
||||
rust-minor-patch:
|
||||
update-types:
|
||||
- minor
|
||||
- patch
|
||||
3
.github/workflows/dev.yml
vendored
3
.github/workflows/dev.yml
vendored
@@ -8,9 +8,6 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
labeler:
|
||||
permissions:
|
||||
|
||||
3
.github/workflows/java-publish.yml
vendored
3
.github/workflows/java-publish.yml
vendored
@@ -19,9 +19,6 @@ on:
|
||||
paths:
|
||||
- .github/workflows/java-publish.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
name: Build and Publish
|
||||
|
||||
3
.github/workflows/java.yml
vendored
3
.github/workflows/java.yml
vendored
@@ -24,9 +24,6 @@ on:
|
||||
- java/**
|
||||
- .github/workflows/java.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build-java:
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
4
.github/workflows/license-header-check.yml
vendored
4
.github/workflows/license-header-check.yml
vendored
@@ -10,10 +10,6 @@ on:
|
||||
- nodejs/**
|
||||
- java/**
|
||||
- .github/workflows/license-header-check.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check-licenses:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
3
.github/workflows/nodejs.yml
vendored
3
.github/workflows/nodejs.yml
vendored
@@ -15,9 +15,6 @@ on:
|
||||
- .github/workflows/nodejs.yml
|
||||
- docker-compose.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
15
.github/workflows/pypi-publish.yml
vendored
15
.github/workflows/pypi-publish.yml
vendored
@@ -14,16 +14,10 @@ on:
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }}
|
||||
timeout-minutes: 60
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
@@ -63,12 +57,10 @@ jobs:
|
||||
- uses: ./.github/workflows/upload_wheel
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
with:
|
||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||
mac:
|
||||
timeout-minutes: 90
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
runs-on: ${{ matrix.config.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -93,12 +85,10 @@ jobs:
|
||||
- uses: ./.github/workflows/upload_wheel
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
with:
|
||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||
windows:
|
||||
timeout-minutes: 60
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -117,6 +107,7 @@ jobs:
|
||||
- uses: ./.github/workflows/upload_wheel
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
with:
|
||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||
gh-release:
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
|
||||
5
.github/workflows/python.yml
vendored
5
.github/workflows/python.yml
vendored
@@ -17,9 +17,6 @@ on:
|
||||
- .github/workflows/build_windows_wheel/**
|
||||
- .github/workflows/run_tests/**
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
@@ -111,6 +108,7 @@ jobs:
|
||||
- name: Install
|
||||
run: |
|
||||
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
|
||||
pip install tantivy
|
||||
pip install mlx
|
||||
- name: Doctest
|
||||
run: pytest --doctest-modules python/lancedb
|
||||
@@ -229,5 +227,6 @@ jobs:
|
||||
pip install "pydantic<2"
|
||||
pip install pyarrow==16
|
||||
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
||||
pip install tantivy
|
||||
- name: Run tests
|
||||
run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests
|
||||
|
||||
17
.github/workflows/rust.yml
vendored
17
.github/workflows/rust.yml
vendored
@@ -9,15 +9,9 @@ on:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- deny.toml
|
||||
- rust/**
|
||||
- nodejs/Cargo.toml
|
||||
- python/Cargo.toml
|
||||
- .github/workflows/rust.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
@@ -59,17 +53,6 @@ jobs:
|
||||
- name: Run clippy (without remote feature)
|
||||
run: cargo clippy --profile ci --workspace --tests -- -D warnings
|
||||
|
||||
deny:
|
||||
# Supply-chain checks: advisories, licenses, banned crates, and source
|
||||
# restrictions. Configuration lives in `deny.toml` at the workspace root.
|
||||
timeout-minutes: 10
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: EmbarkStudios/cargo-deny-action@v2
|
||||
with:
|
||||
command: check advisories bans licenses sources
|
||||
|
||||
build-no-lock:
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 30
|
||||
|
||||
@@ -3,9 +3,6 @@ name: Update package-lock.json
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -3,9 +3,6 @@ name: Update NodeJs package-lock.json
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
31
.github/workflows/upload_wheel/action.yml
vendored
31
.github/workflows/upload_wheel/action.yml
vendored
@@ -2,6 +2,9 @@ name: upload-wheel
|
||||
|
||||
description: "Upload wheels to Pypi"
|
||||
inputs:
|
||||
pypi_token:
|
||||
required: true
|
||||
description: "release token for the repo"
|
||||
fury_token:
|
||||
required: true
|
||||
description: "release token for the fury repo"
|
||||
@@ -9,6 +12,12 @@ inputs:
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install twine
|
||||
python3 -m pip install --upgrade pkginfo
|
||||
- name: Choose repo
|
||||
shell: bash
|
||||
id: choose_repo
|
||||
@@ -18,17 +27,19 @@ runs:
|
||||
else
|
||||
echo "repo=pypi" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
- name: Publish to Fury
|
||||
if: steps.choose_repo.outputs.repo == 'fury'
|
||||
- name: Publish to PyPI
|
||||
shell: bash
|
||||
env:
|
||||
FURY_TOKEN: ${{ inputs.fury_token }}
|
||||
PYPI_TOKEN: ${{ inputs.pypi_token }}
|
||||
run: |
|
||||
WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
|
||||
echo "Uploading $WHEEL to Fury"
|
||||
curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
|
||||
- name: Publish to PyPI
|
||||
if: steps.choose_repo.outputs.repo == 'pypi'
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
packages-dir: target/wheels/
|
||||
if [[ ${{ steps.choose_repo.outputs.repo }} == fury ]]; then
|
||||
WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
|
||||
echo "Uploading $WHEEL to Fury"
|
||||
curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
|
||||
else
|
||||
twine upload --repository ${{ steps.choose_repo.outputs.repo }} \
|
||||
--username __token__ \
|
||||
--password $PYPI_TOKEN \
|
||||
target/wheels/lancedb-*.whl
|
||||
fi
|
||||
|
||||
498
Cargo.lock
generated
498
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
30
Cargo.toml
30
Cargo.toml
@@ -1,5 +1,7 @@
|
||||
[workspace]
|
||||
members = ["rust/lancedb", "nodejs", "python"]
|
||||
# Python package needs to be built by maturin.
|
||||
exclude = ["python"]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
@@ -13,20 +15,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=6.0.0-beta.4", default-features = false, "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=6.0.0-beta.4", default-features = false, "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=6.0.0-beta.4", default-features = false, "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=5.1.0-beta.3", default-features = false, "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=5.1.0-beta.3", default-features = false, "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=5.1.0-beta.3", default-features = false, "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=5.1.0-beta.3", "tag" = "v5.1.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "57.2", optional = false }
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# **The Multimodal AI Lakehouse**
|
||||
|
||||
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://docs.lancedb.com) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
|
||||
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://lancedb.com/docs) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
|
||||
|
||||
**The ultimate multimodal data platform for AI/ML applications.**
|
||||
|
||||
@@ -57,7 +57,7 @@ LanceDB is a central location where developers can build, train and analyze thei
|
||||
|
||||
## **How to Install**:
|
||||
|
||||
Follow the [Quickstart](https://docs.lancedb.com/quickstart) doc to set up LanceDB locally.
|
||||
Follow the [Quickstart](https://lancedb.com/docs/quickstart/) doc to set up LanceDB locally.
|
||||
|
||||
**API & SDK:** We also support Python, Typescript and Rust SDKs
|
||||
|
||||
|
||||
172
deny.toml
172
deny.toml
@@ -1,172 +0,0 @@
|
||||
# cargo-deny configuration for LanceDB.
|
||||
#
|
||||
# Run locally with `cargo deny check`. See
|
||||
# https://embarkstudios.github.io/cargo-deny/ for the full reference.
|
||||
|
||||
# The set of target triples we care about. cargo-deny will only consider
|
||||
# dependencies that are used on at least one of these targets. Keeping this
|
||||
# explicit avoids noise from platform-specific crates (e.g. wasm, android,
|
||||
# ios) that we never actually ship.
|
||||
[graph]
|
||||
targets = [
|
||||
"x86_64-unknown-linux-gnu",
|
||||
"aarch64-unknown-linux-gnu",
|
||||
"x86_64-apple-darwin",
|
||||
"aarch64-apple-darwin",
|
||||
"x86_64-pc-windows-msvc",
|
||||
"aarch64-pc-windows-msvc",
|
||||
]
|
||||
all-features = true
|
||||
|
||||
[output]
|
||||
feature-depth = 1
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Advisories: security vulnerabilities and yanked crates.
|
||||
# ---------------------------------------------------------------------------
|
||||
[advisories]
|
||||
version = 2
|
||||
# Fail the check if any crate in the lockfile has been yanked from crates.io.
|
||||
# Yanked crates are a signal the author retracted the release (often due to
|
||||
# bugs or security issues) and should not be depended on.
|
||||
yanked = "deny"
|
||||
# Advisory IDs we have explicitly reviewed and chosen to accept. Every
|
||||
# entry must include a rationale and, where possible, an upstream issue
|
||||
# pointing to a fix. Revisit this list whenever dependencies are updated.
|
||||
ignore = [
|
||||
# rsa: Marvin Attack timing side-channel in PKCS#1 v1.5 decryption.
|
||||
# Reached only through opendal → reqsign → rsa. We do not use RSA
|
||||
# decryption in LanceDB ourselves; this is dormant in the signing path.
|
||||
# No fixed release exists upstream as of this writing.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2023-0071
|
||||
{ id = "RUSTSEC-2023-0071", reason = "rsa crate via opendal/reqsign; no fixed upstream release" },
|
||||
|
||||
# instant: unmaintained. Pulled in via backoff → instant. Upstream
|
||||
# recommends switching to `web-time`; fix has to come from backoff.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2024-0384
|
||||
{ id = "RUSTSEC-2024-0384", reason = "transitive via backoff; waiting on backoff replacement" },
|
||||
|
||||
# paste: unmaintained (author archived the repo). Used transitively by
|
||||
# datafusion and the arrow ecosystem; widespread, no drop-in replacement.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2024-0436
|
||||
{ id = "RUSTSEC-2024-0436", reason = "transitive via datafusion; awaiting ecosystem migration" },
|
||||
|
||||
# tantivy: segfault on malformed input due to missing bounds check.
|
||||
# Pulled in via lance for full-text search. We only feed tantivy
|
||||
# documents we construct ourselves, not attacker-controlled bytes.
|
||||
# Tracked for a lance dependency bump.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0003
|
||||
{ id = "RUSTSEC-2025-0003", reason = "tantivy via lance; inputs are internally produced, not user-supplied bytes" },
|
||||
|
||||
# backoff: unmaintained. Reached only via async-openai. Replacement
|
||||
# requires async-openai to migrate (or us to drop async-openai).
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0012
|
||||
{ id = "RUSTSEC-2025-0012", reason = "transitive via async-openai; waiting on upstream migration" },
|
||||
|
||||
# number_prefix: unmaintained. Transitive via indicatif → hf-hub.
|
||||
# No security impact, just maintenance status.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0119
|
||||
{ id = "RUSTSEC-2025-0119", reason = "transitive via hf-hub/indicatif; cosmetic formatting crate" },
|
||||
|
||||
# rustls-pemfile: unmaintained. Reached from two separate chains:
|
||||
# rustls-native-certs 0.6 (via hyper-rustls 0.24) and object_store 0.12.
|
||||
# Both upstream dependencies need to move before we can drop it.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0134
|
||||
{ id = "RUSTSEC-2025-0134", reason = "transitive via rustls-native-certs/object_store; waiting on upstream migration" },
|
||||
|
||||
# rustls-webpki 0.101.7 (old major line): name-constraint checks for
|
||||
# URI / wildcard names. Pulled in only via the legacy rustls 0.21 chain
|
||||
# from aws-smithy-http-client. The 0.103 line we actively use is patched.
|
||||
# Clearing the 0.101 copy requires the aws-sdk chain to migrate off
|
||||
# rustls 0.21.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0098
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0099
|
||||
{ id = "RUSTSEC-2026-0098", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
|
||||
{ id = "RUSTSEC-2026-0099", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
|
||||
|
||||
# rustls-webpki 0.101.7: reachable panic in CRL parsing. Same legacy
|
||||
# rustls 0.21 chain from aws-smithy-http-client as above. The 0.103 line
|
||||
# we actively use is upgraded to 0.103.13 which contains the fix.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0104
|
||||
{ id = "RUSTSEC-2026-0104", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Licenses: only allow licenses we've reviewed as compatible with Apache-2.0.
|
||||
# ---------------------------------------------------------------------------
|
||||
[licenses]
|
||||
version = 2
|
||||
# SPDX identifiers for licenses that are compatible with our Apache-2.0
|
||||
# distribution. Additions require legal review.
|
||||
allow = [
|
||||
"Apache-2.0",
|
||||
"Apache-2.0 WITH LLVM-exception",
|
||||
"MIT",
|
||||
"BSD-2-Clause",
|
||||
"BSD-3-Clause",
|
||||
"ISC",
|
||||
"Unicode-3.0",
|
||||
"Unicode-DFS-2016",
|
||||
"Zlib",
|
||||
"CC0-1.0",
|
||||
"MPL-2.0",
|
||||
"BSL-1.0",
|
||||
"OpenSSL",
|
||||
# 0BSD ("BSD Zero Clause") is effectively public domain — no attribution
|
||||
# required. Pulled in by `mock_instant`.
|
||||
"0BSD",
|
||||
# bzip2-1.0.6 is the permissive upstream bzip2 license (BSD-like). Pulled
|
||||
# in by `libbz2-rs-sys`, the pure-Rust bzip2 implementation.
|
||||
"bzip2-1.0.6",
|
||||
# CDLA-Permissive-2.0 is a permissive data license used by `webpki-roots`
|
||||
# for the Mozilla CA root bundle. Data-only, distribution-compatible.
|
||||
"CDLA-Permissive-2.0",
|
||||
]
|
||||
confidence-threshold = 0.8
|
||||
# Crates whose license cannot be determined from Cargo metadata but whose
|
||||
# license we've manually confirmed from upstream. Keep this list minimal.
|
||||
[[licenses.clarify]]
|
||||
# polars-arrow-format omits the `license` field in its Cargo.toml, but the
|
||||
# upstream repo (pola-rs/polars-arrow-format) is dual-licensed Apache-2.0 OR
|
||||
# MIT. See https://github.com/pola-rs/polars-arrow-format/blob/main/LICENSE
|
||||
crate = "polars-arrow-format"
|
||||
expression = "Apache-2.0 OR MIT"
|
||||
license-files = []
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bans: disallow specific crates and flag dependency hygiene issues.
|
||||
# ---------------------------------------------------------------------------
|
||||
[bans]
|
||||
# Warn (not deny) on duplicate versions of the same crate. In a large
|
||||
# workspace like this one, duplicates are common and often unavoidable
|
||||
# transitively. We surface them to discourage growth, but don't fail CI.
|
||||
multiple-versions = "warn"
|
||||
# Wildcard version requirements (`foo = "*"`) are a footgun — they let any
|
||||
# future release in without review. Ban them outright.
|
||||
wildcards = "deny"
|
||||
# Internal workspace crates reference each other via `path = "..."`, which
|
||||
# cargo-deny sees as a wildcard version. That's fine for private workspace
|
||||
# members (not published to crates.io), so allow it specifically for paths.
|
||||
allow-wildcard-paths = true
|
||||
# Features that, if enabled, should cause the check to fail.
|
||||
deny = []
|
||||
# Crates to skip when checking for duplicate versions.
|
||||
skip = []
|
||||
# Similar to `skip`, but also skips the entire transitive subtree.
|
||||
skip-tree = []
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sources: restrict where crates can come from.
|
||||
# ---------------------------------------------------------------------------
|
||||
[sources]
|
||||
# Deny any registry other than the ones explicitly listed below.
|
||||
unknown-registry = "deny"
|
||||
# Deny any git dependency whose host isn't in the allow-list below. This
|
||||
# prevents accidental pulls from arbitrary forks.
|
||||
unknown-git = "deny"
|
||||
allow-registry = ["https://github.com/rust-lang/crates.io-index"]
|
||||
# Lance is developed in a sibling repo and pulled as a git dependency until
|
||||
# releases are cut to crates.io. Allow that specific host.
|
||||
allow-git = [
|
||||
"https://github.com/lance-format/lance",
|
||||
]
|
||||
@@ -24,4 +24,4 @@ RUN python --version && \
|
||||
rustc --version && \
|
||||
protoc --version
|
||||
|
||||
RUN pip install --no-cache-dir lancedb
|
||||
RUN pip install --no-cache-dir tantivy lancedb
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# LanceDB Documentation
|
||||
|
||||
LanceDB docs are available at [docs.lancedb.com](https://docs.lancedb.com).
|
||||
LanceDB docs are available at [lancedb.com/docs](https://lancedb.com/docs).
|
||||
|
||||
The SDK docs are built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
|
||||
whenever a commit is pushed to the `main` branch. So it is possible for the docs to show
|
||||
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.28.0-beta.10</version>
|
||||
<version>0.28.0-beta.4</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](https://docs.lancedb.com/quickstart/) contains more complete examples.
|
||||
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -41,29 +41,6 @@ for testing purposes.
|
||||
|
||||
***
|
||||
|
||||
### manifestEnabled?
|
||||
|
||||
```ts
|
||||
optional manifestEnabled: boolean;
|
||||
```
|
||||
|
||||
(For LanceDB OSS only): use directory namespace manifests as the source
|
||||
of truth for table metadata. Existing directory-listed root tables are
|
||||
migrated into the manifest on access.
|
||||
|
||||
***
|
||||
|
||||
### namespaceClientProperties?
|
||||
|
||||
```ts
|
||||
optional namespaceClientProperties: Record<string, string>;
|
||||
```
|
||||
|
||||
(For LanceDB OSS only): extra properties for the backing namespace
|
||||
client used by manifest-enabled native connections.
|
||||
|
||||
***
|
||||
|
||||
### readConsistencyInterval?
|
||||
|
||||
```ts
|
||||
@@ -112,4 +89,4 @@ optional storageOptions: Record<string, string>;
|
||||
|
||||
(For LanceDB OSS only): configuration for object storage.
|
||||
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
|
||||
@@ -97,4 +97,4 @@ Configuration for object storage.
|
||||
Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
|
||||
@@ -42,4 +42,4 @@ Configuration for object storage.
|
||||
Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.28.0-beta.10</version>
|
||||
<version>0.28.0-beta.4</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.28.0-beta.10</version>
|
||||
<version>0.28.0-beta.4</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>6.0.0-beta.4</lance-core.version>
|
||||
<lance-core.version>5.1.0-beta.3</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.28.0-beta.10"
|
||||
publish = false
|
||||
version = "0.28.0-beta.4"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
@@ -32,8 +31,8 @@ lzma-sys = { version = "0.1", features = ["static"] }
|
||||
log.workspace = true
|
||||
|
||||
# Pin to resolve build failures; update periodically for security patches.
|
||||
aws-lc-sys = "=0.40.0"
|
||||
aws-lc-rs = "=1.16.3"
|
||||
aws-lc-sys = "=0.38.0"
|
||||
aws-lc-rs = "=1.16.1"
|
||||
|
||||
[build-dependencies]
|
||||
napi-build = "2.3.1"
|
||||
|
||||
@@ -30,7 +30,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](https://docs.lancedb.com/quickstart/) contains more complete examples.
|
||||
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import * as path from "node:path";
|
||||
import { RecordBatch } from "apache-arrow";
|
||||
import * as tmp from "tmp";
|
||||
import { Connection, Index, Table, connect, makeArrowTable } from "../lancedb";
|
||||
@@ -78,91 +76,4 @@ describe("rerankers", function () {
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("does not keep process alive after rerank query", async function () {
|
||||
const script = `
|
||||
import * as lancedb from "./dist/index.js";
|
||||
import * as os from "node:os";
|
||||
import * as path from "node:path";
|
||||
import * as fs from "node:fs/promises";
|
||||
|
||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lancedb-rerank-exit-"));
|
||||
const db = await lancedb.connect(dir);
|
||||
const table = await db.createTable("test", [{ text: "hello", vector: [1, 2, 3] }], {
|
||||
mode: "overwrite",
|
||||
});
|
||||
await table.createIndex("text", { config: lancedb.Index.fts() });
|
||||
await table.waitForIndex(["text_idx"], 30);
|
||||
|
||||
const reranker = await lancedb.rerankers.RRFReranker.create();
|
||||
await table
|
||||
.query()
|
||||
.nearestTo([1, 2, 3])
|
||||
.fullTextSearch("hello")
|
||||
.rerank(reranker)
|
||||
.toArray();
|
||||
|
||||
table.close();
|
||||
db.close();
|
||||
`;
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(
|
||||
process.execPath,
|
||||
["--input-type=module", "-e", script],
|
||||
{
|
||||
cwd: path.resolve(__dirname, ".."),
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
},
|
||||
);
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
|
||||
child.stdout.on("data", (chunk) => {
|
||||
stdout += chunk.toString();
|
||||
});
|
||||
|
||||
child.stderr.on("data", (chunk) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
child.kill();
|
||||
reject(
|
||||
new Error(
|
||||
`child process did not exit in time\nstdout:\n${stdout}\nstderr:\n${stderr}`,
|
||||
),
|
||||
);
|
||||
}, 20_000);
|
||||
|
||||
child.on("error", (err) => {
|
||||
clearTimeout(timeout);
|
||||
reject(err);
|
||||
});
|
||||
|
||||
child.on("exit", (code, signal) => {
|
||||
clearTimeout(timeout);
|
||||
if (signal !== null) {
|
||||
reject(
|
||||
new Error(
|
||||
`child process exited with signal ${signal}\nstdout:\n${stdout}\nstderr:\n${stderr}`,
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code !== 0) {
|
||||
reject(
|
||||
new Error(
|
||||
`child process exited with code ${code}\nstdout:\n${stdout}\nstderr:\n${stderr}`,
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -42,7 +42,7 @@ export interface CreateTableOptions {
|
||||
* Options already set on the connection will be inherited by the table,
|
||||
* but can be overridden here.
|
||||
*
|
||||
* The available options are described at https://docs.lancedb.com/storage/
|
||||
* The available options are described at https://lancedb.com/docs/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>;
|
||||
|
||||
@@ -78,7 +78,7 @@ export interface OpenTableOptions {
|
||||
* Options already set on the connection will be inherited by the table,
|
||||
* but can be overridden here.
|
||||
*
|
||||
* The available options are described at https://docs.lancedb.com/storage/
|
||||
* The available options are described at https://lancedb.com/docs/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>;
|
||||
/**
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.28.0-beta.10",
|
||||
"version": "0.28.0-beta.4",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.28.0-beta.10",
|
||||
"version": "0.28.0-beta.4",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.28.0-beta.10",
|
||||
"version": "0.28.0-beta.4",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.28.0-beta.10",
|
||||
"version": "0.28.0-beta.4",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.28.0-beta.10",
|
||||
"version": "0.28.0-beta.4",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.28.0-beta.10",
|
||||
"version": "0.28.0-beta.4",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.28.0-beta.10",
|
||||
"version": "0.28.0-beta.4",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.28.0-beta.10",
|
||||
"version": "0.28.0-beta.3",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.28.0-beta.10",
|
||||
"version": "0.28.0-beta.3",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.28.0-beta.10",
|
||||
"version": "0.28.0-beta.4",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -67,12 +67,6 @@ impl Connection {
|
||||
builder = builder.storage_option(key, value);
|
||||
}
|
||||
}
|
||||
if let Some(manifest_enabled) = options.manifest_enabled {
|
||||
builder = builder.manifest_enabled(manifest_enabled);
|
||||
}
|
||||
if let Some(namespace_client_properties) = options.namespace_client_properties {
|
||||
builder = builder.namespace_client_properties(namespace_client_properties);
|
||||
}
|
||||
|
||||
// Create client config, optionally with header provider
|
||||
let client_config = options.client_config.unwrap_or_default();
|
||||
|
||||
@@ -35,15 +35,8 @@ pub struct ConnectionOptions {
|
||||
pub read_consistency_interval: Option<f64>,
|
||||
/// (For LanceDB OSS only): configuration for object storage.
|
||||
///
|
||||
/// The available options are described at https://docs.lancedb.com/storage/
|
||||
/// The available options are described at https://lancedb.com/docs/storage/
|
||||
pub storage_options: Option<HashMap<String, String>>,
|
||||
/// (For LanceDB OSS only): use directory namespace manifests as the source
|
||||
/// of truth for table metadata. Existing directory-listed root tables are
|
||||
/// migrated into the manifest on access.
|
||||
pub manifest_enabled: Option<bool>,
|
||||
/// (For LanceDB OSS only): extra properties for the backing namespace
|
||||
/// client used by manifest-enabled native connections.
|
||||
pub namespace_client_properties: Option<HashMap<String, String>>,
|
||||
/// (For LanceDB OSS only): the session to use for this connection. Holds
|
||||
/// shared caches and other session-specific state.
|
||||
pub session: Option<session::Session>,
|
||||
|
||||
@@ -18,7 +18,6 @@ type RerankHybridFn = ThreadsafeFunction<
|
||||
RerankHybridCallbackArgs,
|
||||
Status,
|
||||
false,
|
||||
true,
|
||||
>;
|
||||
|
||||
/// Reranker implementation that "wraps" a NodeJS Reranker implementation.
|
||||
@@ -33,10 +32,7 @@ impl Reranker {
|
||||
pub fn new(
|
||||
rerank_hybrid: Function<RerankHybridCallbackArgs, Promise<Buffer>>,
|
||||
) -> napi::Result<Self> {
|
||||
let rerank_hybrid = rerank_hybrid
|
||||
.build_threadsafe_function()
|
||||
.weak::<true>()
|
||||
.build()?;
|
||||
let rerank_hybrid = rerank_hybrid.build_threadsafe_function().build()?;
|
||||
Ok(Self { rerank_hybrid })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.31.0-beta.10"
|
||||
current_version = "0.31.0-beta.4"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.31.0-beta.10"
|
||||
publish = false
|
||||
version = "0.31.0-beta.4"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -183,6 +183,7 @@
|
||||
| stack-data | 0.6.3 | MIT License | http://github.com/alexmojaki/stack_data |
|
||||
| sympy | 1.14.0 | BSD License | https://sympy.org |
|
||||
| tabulate | 0.9.0 | MIT License | https://github.com/astanin/python-tabulate |
|
||||
| tantivy | 0.25.1 | UNKNOWN | UNKNOWN |
|
||||
| threadpoolctl | 3.6.0 | BSD License | https://github.com/joblib/threadpoolctl |
|
||||
| timm | 1.0.24 | Apache Software License | https://github.com/huggingface/pytorch-image-models |
|
||||
| tinycss2 | 1.4.0 | BSD License | https://www.courtbouillon.org/tinycss2 |
|
||||
|
||||
@@ -57,6 +57,7 @@ tests = [
|
||||
"duckdb>=0.9.0",
|
||||
"pytz>=2023.3",
|
||||
"polars>=0.19, <=1.3.0",
|
||||
"tantivy>=0.20.0",
|
||||
"pyarrow-stubs>=16.0",
|
||||
"pylance>=5.0.0b5",
|
||||
"requests>=2.31.0",
|
||||
|
||||
@@ -73,7 +73,6 @@ def connect(
|
||||
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_impl: Optional[str] = None,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
namespace_client_pushdown_operations: Optional[List[str]] = None,
|
||||
@@ -111,11 +110,7 @@ def connect(
|
||||
default configuration is used.
|
||||
storage_options: dict, optional
|
||||
Additional options for the storage backend. See available options at
|
||||
<https://docs.lancedb.com/storage/>
|
||||
manifest_enabled : bool, default False
|
||||
When true for local/native connections, use directory namespace
|
||||
manifests as the source of truth for table metadata. Existing
|
||||
directory-listed root tables are migrated into the manifest on access.
|
||||
<https://lancedb.com/docs/storage/>
|
||||
session: Session, optional
|
||||
(For LanceDB OSS only)
|
||||
A session to use for this connection. Sessions allow you to configure
|
||||
@@ -163,11 +158,11 @@ def connect(
|
||||
conn : DBConnection
|
||||
A connection to a LanceDB database.
|
||||
"""
|
||||
if namespace_client_impl is not None:
|
||||
if namespace_client_properties is None:
|
||||
if namespace_client_impl is not None or namespace_client_properties is not None:
|
||||
if namespace_client_impl is None or namespace_client_properties is None:
|
||||
raise ValueError(
|
||||
"namespace_client_properties must be provided when "
|
||||
"namespace_client_impl is set"
|
||||
"Both namespace_client_impl and "
|
||||
"namespace_client_properties must be provided"
|
||||
)
|
||||
if kwargs:
|
||||
raise ValueError(f"Unknown keyword arguments: {kwargs}")
|
||||
@@ -180,12 +175,6 @@ def connect(
|
||||
namespace_client_pushdown_operations=namespace_client_pushdown_operations,
|
||||
)
|
||||
|
||||
if namespace_client_properties is not None and not manifest_enabled:
|
||||
raise ValueError(
|
||||
"namespace_client_impl must be provided when using "
|
||||
"namespace_client_properties unless manifest_enabled=True"
|
||||
)
|
||||
|
||||
if namespace_client_pushdown_operations is not None:
|
||||
raise ValueError(
|
||||
"namespace_client_pushdown_operations is only valid when "
|
||||
@@ -223,92 +212,9 @@ def connect(
|
||||
read_consistency_interval=read_consistency_interval,
|
||||
storage_options=storage_options,
|
||||
session=session,
|
||||
manifest_enabled=manifest_enabled,
|
||||
namespace_client_properties=namespace_client_properties,
|
||||
)
|
||||
|
||||
|
||||
WORKER_PROPERTY_PREFIX = "_lancedb_worker_"
|
||||
|
||||
|
||||
def _apply_worker_overrides(props: dict[str, str]) -> dict[str, str]:
|
||||
"""Apply worker property overrides.
|
||||
|
||||
Any key starting with ``_lancedb_worker_`` is extracted, the prefix
|
||||
is stripped, and the resulting key-value pair is put back into the
|
||||
map (overriding the existing value if present). The original
|
||||
prefixed key is removed.
|
||||
"""
|
||||
worker_keys = [k for k in props if k.startswith(WORKER_PROPERTY_PREFIX)]
|
||||
if not worker_keys:
|
||||
return props
|
||||
result = dict(props)
|
||||
for key in worker_keys:
|
||||
value = result.pop(key)
|
||||
real_key = key[len(WORKER_PROPERTY_PREFIX) :]
|
||||
result[real_key] = value
|
||||
return result
|
||||
|
||||
|
||||
def deserialize_conn(
|
||||
data: str,
|
||||
*,
|
||||
for_worker: bool = False,
|
||||
) -> DBConnection:
|
||||
"""Reconstruct a DBConnection from a serialized string.
|
||||
|
||||
The string must have been produced by
|
||||
:meth:`DBConnection.serialize`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : str
|
||||
String produced by ``serialize()``.
|
||||
for_worker : bool, default False
|
||||
When ``True``, any namespace client property whose key starts
|
||||
with ``_lancedb_worker_`` has that prefix stripped and the
|
||||
value overrides the corresponding property. For example,
|
||||
``_lancedb_worker_uri`` replaces ``uri``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DBConnection
|
||||
A new connection matching the serialized state.
|
||||
"""
|
||||
import json
|
||||
|
||||
parsed = json.loads(data)
|
||||
connection_type = parsed.get("connection_type")
|
||||
|
||||
rci_secs = parsed.get("read_consistency_interval_seconds")
|
||||
rci = timedelta(seconds=rci_secs) if rci_secs is not None else None
|
||||
storage_options = parsed.get("storage_options")
|
||||
|
||||
if connection_type == "namespace":
|
||||
props = dict(parsed.get("namespace_client_properties") or {})
|
||||
if for_worker:
|
||||
props = _apply_worker_overrides(props)
|
||||
return connect_namespace(
|
||||
namespace_client_impl=parsed["namespace_client_impl"],
|
||||
namespace_client_properties=props,
|
||||
read_consistency_interval=rci,
|
||||
storage_options=storage_options,
|
||||
namespace_client_pushdown_operations=parsed.get(
|
||||
"namespace_client_pushdown_operations"
|
||||
),
|
||||
)
|
||||
elif connection_type == "local":
|
||||
return LanceDBConnection(
|
||||
parsed["uri"],
|
||||
read_consistency_interval=rci,
|
||||
storage_options=storage_options,
|
||||
manifest_enabled=parsed.get("manifest_enabled", False),
|
||||
namespace_client_properties=parsed.get("namespace_client_properties"),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown connection_type: {connection_type}")
|
||||
|
||||
|
||||
async def connect_async(
|
||||
uri: URI,
|
||||
*,
|
||||
@@ -319,8 +225,6 @@ async def connect_async(
|
||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
) -> AsyncConnection:
|
||||
"""Connect to a LanceDB database.
|
||||
|
||||
@@ -353,20 +257,13 @@ async def connect_async(
|
||||
default configuration is used.
|
||||
storage_options: dict, optional
|
||||
Additional options for the storage backend. See available options at
|
||||
<https://docs.lancedb.com/storage/>
|
||||
<https://lancedb.com/docs/storage/>
|
||||
session: Session, optional
|
||||
(For LanceDB OSS only)
|
||||
A session to use for this connection. Sessions allow you to configure
|
||||
cache sizes for index and metadata caches, which can significantly
|
||||
impact memory use and performance. They can also be re-used across
|
||||
multiple connections to share the same cache state.
|
||||
manifest_enabled : bool, default False
|
||||
When true for local/native connections, use directory namespace
|
||||
manifests as the source of truth for table metadata. Existing
|
||||
directory-listed root tables are migrated into the manifest on access.
|
||||
namespace_client_properties : dict, optional
|
||||
Additional directory namespace client properties to use with
|
||||
``manifest_enabled=True``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
@@ -409,8 +306,6 @@ async def connect_async(
|
||||
client_config,
|
||||
storage_options,
|
||||
session,
|
||||
manifest_enabled,
|
||||
namespace_client_properties,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -242,8 +242,6 @@ async def connect(
|
||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
|
||||
storage_options: Optional[Dict[str, str]],
|
||||
session: Optional[Session],
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
) -> Connection: ...
|
||||
|
||||
class RecordBatchStream:
|
||||
|
||||
@@ -96,7 +96,7 @@ def data_to_reader(
|
||||
f"Unknown data type {type(data)}. "
|
||||
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
|
||||
"pyarrow Table/RecordBatch, or Pydantic models. "
|
||||
"See https://docs.lancedb.com/tables/ for examples."
|
||||
"See https://lancedb.com/docs/tables/ for examples."
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -282,7 +282,7 @@ class DBConnection(EnforceOverrides):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://docs.lancedb.com/storage/>
|
||||
<https://lancedb.com/docs/storage/>
|
||||
|
||||
To enable stable row IDs (row IDs remain stable after compaction,
|
||||
update, delete, and merges), set `new_table_enable_stable_row_ids`
|
||||
@@ -433,7 +433,7 @@ class DBConnection(EnforceOverrides):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://docs.lancedb.com/storage/>
|
||||
<https://lancedb.com/docs/storage/>
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -529,19 +529,6 @@ class DBConnection(EnforceOverrides):
|
||||
"namespace_client is not supported for this connection type"
|
||||
)
|
||||
|
||||
def serialize(self) -> str:
|
||||
"""Serialize this connection for reconstruction.
|
||||
|
||||
The returned string can be passed to :func:`lancedb.deserialize_conn`
|
||||
to recreate an equivalent connection, e.g. in a remote worker.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Serialized representation of this connection.
|
||||
"""
|
||||
raise NotImplementedError("serialize is not supported for this connection type")
|
||||
|
||||
|
||||
class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
@@ -590,16 +577,10 @@ class LanceDBConnection(DBConnection):
|
||||
read_consistency_interval: Optional[timedelta] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
_inner: Optional[LanceDbConnection] = None,
|
||||
):
|
||||
self.storage_options = storage_options
|
||||
self._manifest_enabled = manifest_enabled
|
||||
self._namespace_client_properties = namespace_client_properties
|
||||
if _inner is not None:
|
||||
self._conn = _inner
|
||||
self._cached_namespace_client = None
|
||||
return
|
||||
|
||||
if not isinstance(uri, Path):
|
||||
@@ -638,8 +619,6 @@ class LanceDBConnection(DBConnection):
|
||||
None,
|
||||
storage_options,
|
||||
session,
|
||||
manifest_enabled,
|
||||
namespace_client_properties,
|
||||
)
|
||||
|
||||
# TODO: It would be nice if we didn't store self.storage_options but it is
|
||||
@@ -647,8 +626,8 @@ class LanceDBConnection(DBConnection):
|
||||
# work because some paths like LanceDBConnection.from_inner will lose the
|
||||
# storage_options. Also, this class really shouldn't be holding any state
|
||||
# beyond _conn.
|
||||
self.storage_options = storage_options
|
||||
self._conn = AsyncConnection(LOOP.run(do_connect()))
|
||||
self._cached_namespace_client: Optional[LanceNamespace] = None
|
||||
|
||||
@property
|
||||
def read_consistency_interval(self) -> Optional[timedelta]:
|
||||
@@ -673,24 +652,6 @@ class LanceDBConnection(DBConnection):
|
||||
val += ")"
|
||||
return val
|
||||
|
||||
@override
|
||||
def serialize(self) -> str:
|
||||
import json
|
||||
|
||||
rci = self.read_consistency_interval
|
||||
return json.dumps(
|
||||
{
|
||||
"connection_type": "local",
|
||||
"uri": self.uri,
|
||||
"storage_options": self.storage_options,
|
||||
"manifest_enabled": self._manifest_enabled,
|
||||
"namespace_client_properties": self._namespace_client_properties,
|
||||
"read_consistency_interval_seconds": (
|
||||
rci.total_seconds() if rci else None
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
async def _async_get_table_names(self, start_after: Optional[str], limit: int):
|
||||
conn = AsyncConnection(await lancedb_connect(self.uri))
|
||||
return await conn.table_names(start_after=start_after, limit=limit)
|
||||
@@ -726,10 +687,10 @@ class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
return self._namespace_conn().list_namespaces(
|
||||
namespace_path=namespace_path,
|
||||
page_token=page_token,
|
||||
limit=limit,
|
||||
return LOOP.run(
|
||||
self._conn.list_namespaces(
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
@@ -739,10 +700,27 @@ class LanceDBConnection(DBConnection):
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
return self._namespace_conn().create_namespace(
|
||||
namespace_path=namespace_path,
|
||||
mode=mode,
|
||||
properties=properties,
|
||||
"""Create a new namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
or "overwrite" (replace if exists). Case insensitive.
|
||||
properties: Dict[str, str], optional
|
||||
Properties to set on the namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing the properties of the created namespace.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.create_namespace(
|
||||
namespace_path=namespace_path, mode=mode, properties=properties
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
@@ -752,19 +730,46 @@ class LanceDBConnection(DBConnection):
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
return self._namespace_conn().drop_namespace(
|
||||
namespace_path=namespace_path,
|
||||
mode=mode,
|
||||
behavior=behavior,
|
||||
"""Drop a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior: str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.drop_namespace(
|
||||
namespace_path=namespace_path, mode=mode, behavior=behavior
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def describe_namespace(
|
||||
self, namespace_path: List[str]
|
||||
) -> DescribeNamespaceResponse:
|
||||
return self._namespace_conn().describe_namespace(
|
||||
namespace_path=namespace_path,
|
||||
)
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
return LOOP.run(self._conn.describe_namespace(namespace_path=namespace_path))
|
||||
|
||||
@override
|
||||
def list_tables(
|
||||
@@ -793,12 +798,6 @@ class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if namespace_path:
|
||||
return self._namespace_conn().list_tables(
|
||||
namespace_path=namespace_path,
|
||||
page_token=page_token,
|
||||
limit=limit,
|
||||
)
|
||||
return LOOP.run(
|
||||
self._conn.list_tables(
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
@@ -887,22 +886,6 @@ class LanceDBConnection(DBConnection):
|
||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||
validate_table_name(name)
|
||||
|
||||
if namespace_path:
|
||||
return self._namespace_conn().create_table(
|
||||
name,
|
||||
data=data,
|
||||
schema=schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
embedding_functions=embedding_functions,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
data_storage_version=data_storage_version,
|
||||
enable_v2_manifest_paths=enable_v2_manifest_paths,
|
||||
)
|
||||
|
||||
tbl = LanceTable.create(
|
||||
self,
|
||||
name,
|
||||
@@ -918,19 +901,6 @@ class LanceDBConnection(DBConnection):
|
||||
)
|
||||
return tbl
|
||||
|
||||
def _namespace_conn(self) -> DBConnection:
|
||||
"""Return a LanceNamespaceDBConnection backed by this connection's
|
||||
directory namespace. Used to delegate child-namespace operations."""
|
||||
from lancedb.namespace import LanceNamespaceDBConnection
|
||||
|
||||
return LanceNamespaceDBConnection(
|
||||
self.namespace_client(),
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
storage_options=self.storage_options,
|
||||
namespace_client_impl=None,
|
||||
namespace_client_properties=None,
|
||||
)
|
||||
|
||||
@override
|
||||
def open_table(
|
||||
self,
|
||||
@@ -947,8 +917,7 @@ class LanceDBConnection(DBConnection):
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace_path: List[str], optional
|
||||
The namespace to open the table from. When non-empty, the
|
||||
table is resolved through the directory namespace client.
|
||||
The namespace to open the table from.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -967,14 +936,6 @@ class LanceDBConnection(DBConnection):
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
if namespace_path:
|
||||
return self._namespace_conn().open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
)
|
||||
|
||||
return LanceTable.open(
|
||||
self,
|
||||
name,
|
||||
@@ -1059,9 +1020,6 @@ class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if namespace_path:
|
||||
self._namespace_conn().drop_table(name, namespace_path=namespace_path)
|
||||
return
|
||||
LOOP.run(
|
||||
self._conn.drop_table(
|
||||
name, namespace_path=namespace_path, ignore_missing=ignore_missing
|
||||
@@ -1113,17 +1071,14 @@ class LanceDBConnection(DBConnection):
|
||||
"""Get the equivalent namespace client for this connection.
|
||||
|
||||
Returns a DirectoryNamespace pointing to the same root with the
|
||||
same storage options. The result is cached for the lifetime of
|
||||
this connection.
|
||||
same storage options.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceNamespace
|
||||
The namespace client for this connection.
|
||||
"""
|
||||
if self._cached_namespace_client is None:
|
||||
self._cached_namespace_client = LOOP.run(self._conn.namespace_client())
|
||||
return self._cached_namespace_client
|
||||
return LOOP.run(self._conn.namespace_client())
|
||||
|
||||
@deprecation.deprecated(
|
||||
deprecated_in="0.15.1",
|
||||
@@ -1398,7 +1353,6 @@ class AsyncConnection(object):
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
location: Optional[str] = None,
|
||||
namespace_client: Optional[Any] = None,
|
||||
) -> AsyncTable:
|
||||
"""Create an [AsyncTable][lancedb.table.AsyncTable] in the database.
|
||||
|
||||
@@ -1443,7 +1397,7 @@ class AsyncConnection(object):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://docs.lancedb.com/storage/>
|
||||
<https://lancedb.com/docs/storage/>
|
||||
|
||||
To enable stable row IDs (row IDs remain stable after compaction,
|
||||
update, delete, and merges), set `new_table_enable_stable_row_ids`
|
||||
@@ -1596,7 +1550,6 @@ class AsyncConnection(object):
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
location=location,
|
||||
namespace_client=namespace_client,
|
||||
)
|
||||
else:
|
||||
data = data_to_reader(data, schema)
|
||||
@@ -1607,7 +1560,6 @@ class AsyncConnection(object):
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
location=location,
|
||||
namespace_client=namespace_client,
|
||||
)
|
||||
|
||||
return AsyncTable(new_table)
|
||||
@@ -1636,7 +1588,7 @@ class AsyncConnection(object):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://docs.lancedb.com/storage/>
|
||||
<https://lancedb.com/docs/storage/>
|
||||
index_cache_size: int, default 256
|
||||
**Deprecated**: Use session-level cache configuration instead.
|
||||
Create a Session with custom cache sizes and pass it to lancedb.connect().
|
||||
|
||||
201
python/python/lancedb/fts.py
Normal file
201
python/python/lancedb/fts.py
Normal file
@@ -0,0 +1,201 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
"""Full text search index using tantivy-py"""
|
||||
|
||||
import os
|
||||
from typing import List, Tuple, Optional
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
try:
|
||||
import tantivy
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install tantivy-py `pip install tantivy` to use the full text search feature." # noqa: E501
|
||||
)
|
||||
|
||||
from .table import LanceTable
|
||||
|
||||
|
||||
def create_index(
|
||||
index_path: str,
|
||||
text_fields: List[str],
|
||||
ordering_fields: Optional[List[str]] = None,
|
||||
tokenizer_name: str = "default",
|
||||
) -> tantivy.Index:
|
||||
"""
|
||||
Create a new Index (not populated)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
index_path : str
|
||||
Path to the index directory
|
||||
text_fields : List[str]
|
||||
List of text fields to index
|
||||
ordering_fields: List[str]
|
||||
List of unsigned type fields to order by at search time
|
||||
tokenizer_name : str, default "default"
|
||||
The tokenizer to use
|
||||
|
||||
Returns
|
||||
-------
|
||||
index : tantivy.Index
|
||||
The index object (not yet populated)
|
||||
"""
|
||||
if ordering_fields is None:
|
||||
ordering_fields = []
|
||||
# Declaring our schema.
|
||||
schema_builder = tantivy.SchemaBuilder()
|
||||
# special field that we'll populate with row_id
|
||||
schema_builder.add_integer_field("doc_id", stored=True)
|
||||
# data fields
|
||||
for name in text_fields:
|
||||
schema_builder.add_text_field(name, stored=True, tokenizer_name=tokenizer_name)
|
||||
if ordering_fields:
|
||||
for name in ordering_fields:
|
||||
schema_builder.add_unsigned_field(name, fast=True)
|
||||
schema = schema_builder.build()
|
||||
os.makedirs(index_path, exist_ok=True)
|
||||
index = tantivy.Index(schema, path=index_path)
|
||||
return index
|
||||
|
||||
|
||||
def populate_index(
|
||||
index: tantivy.Index,
|
||||
table: LanceTable,
|
||||
fields: List[str],
|
||||
writer_heap_size: Optional[int] = None,
|
||||
ordering_fields: Optional[List[str]] = None,
|
||||
) -> int:
|
||||
"""
|
||||
Populate an index with data from a LanceTable
|
||||
|
||||
Parameters
|
||||
----------
|
||||
index : tantivy.Index
|
||||
The index object
|
||||
table : LanceTable
|
||||
The table to index
|
||||
fields : List[str]
|
||||
List of fields to index
|
||||
writer_heap_size : int
|
||||
The writer heap size in bytes, defaults to 1GB
|
||||
|
||||
Returns
|
||||
-------
|
||||
int
|
||||
The number of rows indexed
|
||||
"""
|
||||
if ordering_fields is None:
|
||||
ordering_fields = []
|
||||
writer_heap_size = writer_heap_size or 1024 * 1024 * 1024
|
||||
# first check the fields exist and are string or large string type
|
||||
nested = []
|
||||
|
||||
for name in fields:
|
||||
try:
|
||||
f = table.schema.field(name) # raises KeyError if not found
|
||||
except KeyError:
|
||||
f = resolve_path(table.schema, name)
|
||||
nested.append(name)
|
||||
|
||||
if not pa.types.is_string(f.type) and not pa.types.is_large_string(f.type):
|
||||
raise TypeError(f"Field {name} is not a string type")
|
||||
|
||||
# create a tantivy writer
|
||||
writer = index.writer(heap_size=writer_heap_size)
|
||||
# write data into index
|
||||
dataset = table.to_lance()
|
||||
row_id = 0
|
||||
|
||||
max_nested_level = 0
|
||||
if len(nested) > 0:
|
||||
max_nested_level = max([len(name.split(".")) for name in nested])
|
||||
|
||||
for b in dataset.to_batches(columns=fields + ordering_fields):
|
||||
if max_nested_level > 0:
|
||||
b = pa.Table.from_batches([b])
|
||||
for _ in range(max_nested_level - 1):
|
||||
b = b.flatten()
|
||||
for i in range(b.num_rows):
|
||||
doc = tantivy.Document()
|
||||
for name in fields:
|
||||
value = b[name][i].as_py()
|
||||
if value is not None:
|
||||
doc.add_text(name, value)
|
||||
for name in ordering_fields:
|
||||
value = b[name][i].as_py()
|
||||
if value is not None:
|
||||
doc.add_unsigned(name, value)
|
||||
if not doc.is_empty:
|
||||
doc.add_integer("doc_id", row_id)
|
||||
writer.add_document(doc)
|
||||
row_id += 1
|
||||
# commit changes
|
||||
writer.commit()
|
||||
return row_id
|
||||
|
||||
|
||||
def resolve_path(schema, field_name: str) -> pa.Field:
|
||||
"""
|
||||
Resolve a nested field path to a list of field names
|
||||
|
||||
Parameters
|
||||
----------
|
||||
field_name : str
|
||||
The field name to resolve
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[str]
|
||||
The resolved path
|
||||
"""
|
||||
path = field_name.split(".")
|
||||
field = schema.field(path.pop(0))
|
||||
for segment in path:
|
||||
if pa.types.is_struct(field.type):
|
||||
field = field.type.field(segment)
|
||||
else:
|
||||
raise KeyError(f"field {field_name} not found in schema {schema}")
|
||||
return field
|
||||
|
||||
|
||||
def search_index(
|
||||
index: tantivy.Index, query: str, limit: int = 10, ordering_field=None
|
||||
) -> Tuple[Tuple[int], Tuple[float]]:
|
||||
"""
|
||||
Search an index for a query
|
||||
|
||||
Parameters
|
||||
----------
|
||||
index : tantivy.Index
|
||||
The index object
|
||||
query : str
|
||||
The query string
|
||||
limit : int
|
||||
The maximum number of results to return
|
||||
|
||||
Returns
|
||||
-------
|
||||
ids_and_score: list[tuple[int], tuple[float]]
|
||||
A tuple of two tuples, the first containing the document ids
|
||||
and the second containing the scores
|
||||
"""
|
||||
searcher = index.searcher()
|
||||
query = index.parse_query(query)
|
||||
# get top results
|
||||
if ordering_field:
|
||||
results = searcher.search(query, limit, order_by_field=ordering_field)
|
||||
else:
|
||||
results = searcher.search(query, limit)
|
||||
if results.count == 0:
|
||||
return tuple(), tuple()
|
||||
return tuple(
|
||||
zip(
|
||||
*[
|
||||
(searcher.doc(doc_address)["doc_id"][0], score)
|
||||
for score, doc_address in results.hits
|
||||
]
|
||||
)
|
||||
)
|
||||
@@ -10,6 +10,7 @@ through a namespace abstraction.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
|
||||
|
||||
@@ -24,24 +25,7 @@ if TYPE_CHECKING:
|
||||
from datetime import timedelta
|
||||
import pyarrow as pa
|
||||
|
||||
from lance_namespace_urllib3_client.models.json_arrow_data_type import JsonArrowDataType
|
||||
from lance_namespace_urllib3_client.models.json_arrow_field import JsonArrowField
|
||||
from lance_namespace_urllib3_client.models.json_arrow_schema import JsonArrowSchema
|
||||
from lance_namespace_urllib3_client.models.query_table_request import QueryTableRequest
|
||||
from lance_namespace_urllib3_client.models.query_table_request_columns import (
|
||||
QueryTableRequestColumns,
|
||||
)
|
||||
from lance_namespace_urllib3_client.models.query_table_request_full_text_query import (
|
||||
QueryTableRequestFullTextQuery,
|
||||
)
|
||||
from lance_namespace_urllib3_client.models.query_table_request_vector import (
|
||||
QueryTableRequestVector,
|
||||
)
|
||||
from lance_namespace_urllib3_client.models.string_fts_query import StringFtsQuery
|
||||
from lance_namespace.errors import TableNotFoundError
|
||||
from lancedb._lancedb import connect_namespace_client as _connect_namespace_client
|
||||
from lancedb.background_loop import LOOP
|
||||
from lancedb.db import AsyncConnection, DBConnection
|
||||
from lancedb.db import DBConnection, LanceDBConnection
|
||||
from lancedb.namespace_utils import (
|
||||
_normalize_create_namespace_mode,
|
||||
_normalize_drop_namespace_mode,
|
||||
@@ -56,11 +40,14 @@ from lance_namespace import (
|
||||
ListNamespacesResponse,
|
||||
ListTablesResponse,
|
||||
ListTablesRequest,
|
||||
DescribeTableRequest,
|
||||
DescribeNamespaceRequest,
|
||||
DropTableRequest,
|
||||
ListNamespacesRequest,
|
||||
CreateNamespaceRequest,
|
||||
DropNamespaceRequest,
|
||||
DeclareTableRequest,
|
||||
CreateTableRequest,
|
||||
)
|
||||
from lancedb.table import AsyncTable, LanceTable, Table
|
||||
from lancedb.util import validate_table_name
|
||||
@@ -69,6 +56,21 @@ from lancedb.pydantic import LanceModel
|
||||
from lancedb.embeddings import EmbeddingFunctionConfig
|
||||
from ._lancedb import Session
|
||||
|
||||
from lance_namespace_urllib3_client.models.json_arrow_schema import JsonArrowSchema
|
||||
from lance_namespace_urllib3_client.models.json_arrow_field import JsonArrowField
|
||||
from lance_namespace_urllib3_client.models.json_arrow_data_type import JsonArrowDataType
|
||||
from lance_namespace_urllib3_client.models.query_table_request import QueryTableRequest
|
||||
from lance_namespace_urllib3_client.models.query_table_request_vector import (
|
||||
QueryTableRequestVector,
|
||||
)
|
||||
from lance_namespace_urllib3_client.models.query_table_request_columns import (
|
||||
QueryTableRequestColumns,
|
||||
)
|
||||
from lance_namespace_urllib3_client.models.query_table_request_full_text_query import (
|
||||
QueryTableRequestFullTextQuery,
|
||||
)
|
||||
from lance_namespace_urllib3_client.models.string_fts_query import StringFtsQuery
|
||||
|
||||
|
||||
def _query_to_namespace_request(
|
||||
table_id: List[str],
|
||||
@@ -379,8 +381,6 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
namespace_client_pushdown_operations: Optional[List[str]] = None,
|
||||
namespace_client_impl: Optional[str] = None,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
"""
|
||||
Initialize a namespace-based LanceDB connection.
|
||||
@@ -406,60 +406,12 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
namespace.create_table() instead of using declare_table + local write.
|
||||
|
||||
Default is None (no pushdown, all operations run locally).
|
||||
namespace_client_impl : Optional[str]
|
||||
The namespace implementation name used to create this connection.
|
||||
Stored for serialization purposes.
|
||||
namespace_client_properties : Optional[Dict[str, str]]
|
||||
The namespace properties used to create this connection.
|
||||
Stored for serialization purposes.
|
||||
"""
|
||||
self._namespace_client = namespace_client
|
||||
self.read_consistency_interval = read_consistency_interval
|
||||
self.storage_options = storage_options or {}
|
||||
self.session = session
|
||||
self._namespace_client_pushdown_operations = set(
|
||||
namespace_client_pushdown_operations or []
|
||||
)
|
||||
self._namespace_client_impl = namespace_client_impl
|
||||
self._namespace_client_properties = namespace_client_properties
|
||||
self._inner = AsyncConnection(
|
||||
_connect_namespace_client(
|
||||
namespace_client,
|
||||
read_consistency_interval=(
|
||||
read_consistency_interval.total_seconds()
|
||||
if read_consistency_interval is not None
|
||||
else None
|
||||
),
|
||||
storage_options=self.storage_options or None,
|
||||
session=session,
|
||||
namespace_client_pushdown_operations=(
|
||||
list(self._namespace_client_pushdown_operations)
|
||||
),
|
||||
namespace_client_impl=namespace_client_impl,
|
||||
namespace_client_properties=namespace_client_properties,
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def serialize(self) -> str:
|
||||
import json
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"connection_type": "namespace",
|
||||
"namespace_client_impl": self._namespace_client_impl,
|
||||
"namespace_client_properties": self._namespace_client_properties,
|
||||
"namespace_client_pushdown_operations": sorted(
|
||||
self._namespace_client_pushdown_operations
|
||||
),
|
||||
"storage_options": self.storage_options or None,
|
||||
"read_consistency_interval_seconds": (
|
||||
self.read_consistency_interval.total_seconds()
|
||||
if self.read_consistency_interval
|
||||
else None
|
||||
),
|
||||
}
|
||||
)
|
||||
self._pushdown_operations = set(namespace_client_pushdown_operations or [])
|
||||
|
||||
@override
|
||||
def table_names(
|
||||
@@ -512,10 +464,13 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
if mode.lower() not in ["create", "overwrite"]:
|
||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||
validate_table_name(name)
|
||||
async_table = LOOP.run(
|
||||
self._inner.create_table(
|
||||
name,
|
||||
data,
|
||||
|
||||
table_id = namespace_path + [name]
|
||||
|
||||
if "CreateTable" in self._pushdown_operations:
|
||||
return self._create_table_server_side(
|
||||
name=name,
|
||||
data=data,
|
||||
schema=schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
@@ -525,15 +480,127 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
|
||||
# Local create path: declare_table + local write
|
||||
# Step 1: Get the table location and storage options from namespace
|
||||
# In overwrite mode, if table exists, use describe_table to get
|
||||
# existing location. Otherwise, call create_empty_table to reserve
|
||||
# a new location
|
||||
location = None
|
||||
namespace_storage_options = None
|
||||
if mode.lower() == "overwrite":
|
||||
# Try to describe the table first to see if it exists
|
||||
try:
|
||||
describe_request = DescribeTableRequest(id=table_id)
|
||||
describe_response = self._namespace_client.describe_table(
|
||||
describe_request
|
||||
)
|
||||
location = describe_response.location
|
||||
namespace_storage_options = describe_response.storage_options
|
||||
except Exception:
|
||||
# Table doesn't exist, will create a new one below
|
||||
pass
|
||||
|
||||
if location is None:
|
||||
# Table doesn't exist or mode is "create", reserve a new location
|
||||
declare_request = DeclareTableRequest(
|
||||
id=table_id,
|
||||
location=None,
|
||||
properties=self.storage_options if self.storage_options else None,
|
||||
)
|
||||
declare_response = self._namespace_client.declare_table(declare_request)
|
||||
|
||||
if not declare_response.location:
|
||||
raise ValueError(
|
||||
"Table location is missing from declare_table response"
|
||||
)
|
||||
|
||||
location = declare_response.location
|
||||
namespace_storage_options = declare_response.storage_options
|
||||
|
||||
# Merge storage options: self.storage_options < user options < namespace options
|
||||
merged_storage_options = dict(self.storage_options)
|
||||
if storage_options:
|
||||
merged_storage_options.update(storage_options)
|
||||
if namespace_storage_options:
|
||||
merged_storage_options.update(namespace_storage_options)
|
||||
|
||||
# Step 2: Create table using LanceTable.create with the location
|
||||
# We need a temporary connection for the LanceTable.create method
|
||||
temp_conn = LanceDBConnection(
|
||||
location, # Use the actual location as the connection URI
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
storage_options=merged_storage_options,
|
||||
session=self.session,
|
||||
)
|
||||
|
||||
return LanceTable(
|
||||
self,
|
||||
# Note: storage_options_provider is auto-created in Rust from namespace_client
|
||||
tbl = LanceTable.create(
|
||||
temp_conn,
|
||||
name,
|
||||
data,
|
||||
schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
embedding_functions=embedding_functions,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=merged_storage_options,
|
||||
location=location,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
)
|
||||
|
||||
return tbl
|
||||
|
||||
def _create_table_server_side(
|
||||
self,
|
||||
name: str,
|
||||
data: Optional[DATA],
|
||||
schema: Optional[Union[pa.Schema, LanceModel]],
|
||||
mode: str,
|
||||
exist_ok: bool,
|
||||
on_bad_vectors: str,
|
||||
fill_value: float,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]],
|
||||
namespace_path: Optional[List[str]],
|
||||
storage_options: Optional[Dict[str, str]],
|
||||
) -> Table:
|
||||
"""Create a table using server-side namespace.create_table()."""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
table_id = namespace_path + [name]
|
||||
|
||||
arrow_ipc_bytes = _data_to_arrow_ipc(
|
||||
data=data,
|
||||
schema=schema,
|
||||
embedding_functions=embedding_functions,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
)
|
||||
|
||||
request = CreateTableRequest(
|
||||
id=table_id,
|
||||
mode=_normalize_create_table_mode(mode),
|
||||
properties=self.storage_options if self.storage_options else None,
|
||||
)
|
||||
|
||||
try:
|
||||
self._namespace_client.create_table(request, arrow_ipc_bytes)
|
||||
except Exception as e:
|
||||
if exist_ok and "already exists" in str(e).lower():
|
||||
return self.open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
raise
|
||||
|
||||
return self.open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
_async=async_table,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
|
||||
@override
|
||||
@@ -547,28 +614,30 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
) -> Table:
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
try:
|
||||
async_table = LOOP.run(
|
||||
self._inner.open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
)
|
||||
)
|
||||
except RuntimeError as e:
|
||||
if "Table not found" in str(e):
|
||||
table_id = namespace_path + [name]
|
||||
raise TableNotFoundError(f"Table not found: {'$'.join(table_id)}")
|
||||
raise
|
||||
table_id = namespace_path + [name]
|
||||
request = DescribeTableRequest(id=table_id)
|
||||
response = self._namespace_client.describe_table(request)
|
||||
|
||||
return LanceTable(
|
||||
self,
|
||||
# Merge storage options: self.storage_options < user options < namespace options
|
||||
merged_storage_options = dict(self.storage_options)
|
||||
if storage_options:
|
||||
merged_storage_options.update(storage_options)
|
||||
if response.storage_options:
|
||||
merged_storage_options.update(response.storage_options)
|
||||
|
||||
# Pass managed_versioning to avoid redundant describe_table call in Rust.
|
||||
# Convert None to False since we already have the answer from describe_table.
|
||||
managed_versioning = response.managed_versioning is True
|
||||
|
||||
# Note: storage_options_provider is auto-created in Rust from namespace_client
|
||||
return self._lance_table_from_uri(
|
||||
name,
|
||||
response.location,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=merged_storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
_async=async_table,
|
||||
managed_versioning=managed_versioning,
|
||||
)
|
||||
|
||||
@override
|
||||
@@ -792,34 +861,33 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
namespace_client: Optional[Any] = None,
|
||||
managed_versioning: Optional[bool] = None,
|
||||
) -> LanceTable:
|
||||
# Open a table directly from the namespace-resolved physical location.
|
||||
#
|
||||
# Open the table through the Rust namespace-backed connection. The Rust
|
||||
# layer keeps the logical namespace path and namespace client intact.
|
||||
# Open a table directly from a URI using the location parameter
|
||||
# Note: storage_options should already be merged by the caller
|
||||
# Note: storage_options_provider is auto-created in Rust from namespace_client
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
|
||||
async_table = LOOP.run(
|
||||
self._inner.open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
location=None,
|
||||
namespace_client=namespace_client,
|
||||
managed_versioning=managed_versioning,
|
||||
)
|
||||
temp_conn = LanceDBConnection(
|
||||
table_uri, # Use the table location as the connection URI
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
storage_options=storage_options if storage_options is not None else {},
|
||||
session=self.session,
|
||||
)
|
||||
|
||||
return LanceTable(
|
||||
self,
|
||||
# Open the table using the temporary connection with the location parameter
|
||||
# Pass namespace_client to enable managed versioning support and auto-create
|
||||
# storage options provider
|
||||
# Pass managed_versioning to avoid redundant describe_table call
|
||||
# Pass pushdown_operations if configured on this connection
|
||||
return LanceTable.open(
|
||||
temp_conn,
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
location=table_uri,
|
||||
namespace_client=namespace_client,
|
||||
managed_versioning=managed_versioning,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
_async=async_table,
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
)
|
||||
|
||||
@override
|
||||
@@ -883,26 +951,7 @@ class AsyncLanceNamespaceDBConnection:
|
||||
self.read_consistency_interval = read_consistency_interval
|
||||
self.storage_options = storage_options or {}
|
||||
self.session = session
|
||||
self._namespace_client_pushdown_operations = set(
|
||||
namespace_client_pushdown_operations or []
|
||||
)
|
||||
self._inner = AsyncConnection(
|
||||
_connect_namespace_client(
|
||||
namespace_client,
|
||||
read_consistency_interval=(
|
||||
read_consistency_interval.total_seconds()
|
||||
if read_consistency_interval is not None
|
||||
else None
|
||||
),
|
||||
storage_options=self.storage_options or None,
|
||||
session=session,
|
||||
namespace_client_pushdown_operations=(
|
||||
list(self._namespace_client_pushdown_operations)
|
||||
),
|
||||
namespace_client_impl=None,
|
||||
namespace_client_properties=None,
|
||||
)
|
||||
)
|
||||
self._pushdown_operations = set(namespace_client_pushdown_operations or [])
|
||||
|
||||
async def table_names(
|
||||
self,
|
||||
@@ -954,16 +1003,145 @@ class AsyncLanceNamespaceDBConnection:
|
||||
if mode.lower() not in ["create", "overwrite"]:
|
||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||
validate_table_name(name)
|
||||
return await self._inner.create_table(
|
||||
|
||||
table_id = namespace_path + [name]
|
||||
|
||||
if "CreateTable" in self._pushdown_operations:
|
||||
return await self._create_table_server_side(
|
||||
name=name,
|
||||
data=data,
|
||||
schema=schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
embedding_functions=embedding_functions,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
|
||||
# Local create path: declare_table + local write
|
||||
# Step 1: Get the table location and storage options from namespace
|
||||
location = None
|
||||
namespace_storage_options = None
|
||||
if mode.lower() == "overwrite":
|
||||
# Try to describe the table first to see if it exists
|
||||
try:
|
||||
describe_request = DescribeTableRequest(id=table_id)
|
||||
describe_response = self._namespace_client.describe_table(
|
||||
describe_request
|
||||
)
|
||||
location = describe_response.location
|
||||
namespace_storage_options = describe_response.storage_options
|
||||
except Exception:
|
||||
# Table doesn't exist, will create a new one below
|
||||
pass
|
||||
|
||||
if location is None:
|
||||
# Table doesn't exist or mode is "create", reserve a new location
|
||||
declare_request = DeclareTableRequest(
|
||||
id=table_id,
|
||||
location=None,
|
||||
properties=self.storage_options if self.storage_options else None,
|
||||
)
|
||||
declare_response = self._namespace_client.declare_table(declare_request)
|
||||
|
||||
if not declare_response.location:
|
||||
raise ValueError(
|
||||
"Table location is missing from declare_table response"
|
||||
)
|
||||
|
||||
location = declare_response.location
|
||||
namespace_storage_options = declare_response.storage_options
|
||||
|
||||
# Merge storage options: self.storage_options < user options < namespace options
|
||||
merged_storage_options = dict(self.storage_options)
|
||||
if storage_options:
|
||||
merged_storage_options.update(storage_options)
|
||||
if namespace_storage_options:
|
||||
merged_storage_options.update(namespace_storage_options)
|
||||
|
||||
# Step 2: Create table using LanceTable.create with the location
|
||||
# Run the sync operation in a thread
|
||||
def _create_table():
|
||||
temp_conn = LanceDBConnection(
|
||||
location,
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
storage_options=merged_storage_options,
|
||||
session=self.session,
|
||||
)
|
||||
|
||||
# storage_options_provider is auto-created in Rust from namespace_client
|
||||
return LanceTable.create(
|
||||
temp_conn,
|
||||
name,
|
||||
data,
|
||||
schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
embedding_functions=embedding_functions,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=merged_storage_options,
|
||||
location=location,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
)
|
||||
|
||||
lance_table = await asyncio.to_thread(_create_table)
|
||||
# Get the underlying async table from LanceTable
|
||||
return lance_table._table
|
||||
|
||||
async def _create_table_server_side(
|
||||
self,
|
||||
name: str,
|
||||
data: Optional[DATA],
|
||||
schema: Optional[Union[pa.Schema, LanceModel]],
|
||||
mode: str,
|
||||
exist_ok: bool,
|
||||
on_bad_vectors: str,
|
||||
fill_value: float,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]],
|
||||
namespace_path: Optional[List[str]],
|
||||
storage_options: Optional[Dict[str, str]],
|
||||
) -> AsyncTable:
|
||||
"""Create a table using server-side namespace.create_table()."""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
table_id = namespace_path + [name]
|
||||
|
||||
def _prepare_and_create():
|
||||
arrow_ipc_bytes = _data_to_arrow_ipc(
|
||||
data=data,
|
||||
schema=schema,
|
||||
embedding_functions=embedding_functions,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
)
|
||||
|
||||
request = CreateTableRequest(
|
||||
id=table_id,
|
||||
mode=_normalize_create_table_mode(mode),
|
||||
properties=self.storage_options if self.storage_options else None,
|
||||
)
|
||||
|
||||
self._namespace_client.create_table(request, arrow_ipc_bytes)
|
||||
|
||||
try:
|
||||
await asyncio.to_thread(_prepare_and_create)
|
||||
except Exception as e:
|
||||
if exist_ok and "already exists" in str(e).lower():
|
||||
return await self.open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
raise
|
||||
|
||||
return await self.open_table(
|
||||
name,
|
||||
data,
|
||||
schema=schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
namespace_path=namespace_path,
|
||||
embedding_functions=embedding_functions,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
|
||||
@@ -978,18 +1156,45 @@ class AsyncLanceNamespaceDBConnection:
|
||||
"""Open an existing table from the namespace."""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
try:
|
||||
return await self._inner.open_table(
|
||||
table_id = namespace_path + [name]
|
||||
request = DescribeTableRequest(id=table_id)
|
||||
response = self._namespace_client.describe_table(request)
|
||||
|
||||
# Merge storage options: self.storage_options < user options < namespace options
|
||||
merged_storage_options = dict(self.storage_options)
|
||||
if storage_options:
|
||||
merged_storage_options.update(storage_options)
|
||||
if response.storage_options:
|
||||
merged_storage_options.update(response.storage_options)
|
||||
|
||||
# Capture managed_versioning from describe response.
|
||||
# Convert None to False since we already have the answer from describe_table.
|
||||
managed_versioning = response.managed_versioning is True
|
||||
|
||||
# Open table in a thread
|
||||
# Note: storage_options_provider is auto-created in Rust from namespace_client
|
||||
def _open_table():
|
||||
temp_conn = LanceDBConnection(
|
||||
response.location,
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
storage_options=merged_storage_options,
|
||||
session=self.session,
|
||||
)
|
||||
|
||||
return LanceTable.open(
|
||||
temp_conn,
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
storage_options=merged_storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
location=response.location,
|
||||
namespace_client=self._namespace_client,
|
||||
managed_versioning=managed_versioning,
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
)
|
||||
except RuntimeError as e:
|
||||
if "Table not found" in str(e):
|
||||
table_id = namespace_path + [name]
|
||||
raise TableNotFoundError(f"Table not found: {'$'.join(table_id)}")
|
||||
raise
|
||||
|
||||
lance_table = await asyncio.to_thread(_open_table)
|
||||
return lance_table._table
|
||||
|
||||
async def drop_table(self, name: str, namespace_path: Optional[List[str]] = None):
|
||||
"""Drop a table from the namespace."""
|
||||
@@ -1267,8 +1472,6 @@ def connect_namespace(
|
||||
storage_options=storage_options,
|
||||
session=session,
|
||||
namespace_client_pushdown_operations=namespace_client_pushdown_operations,
|
||||
namespace_client_impl=namespace_client_impl,
|
||||
namespace_client_properties=namespace_client_properties,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -425,35 +425,6 @@ class Permutation:
|
||||
"""
|
||||
return Permutation.from_tables(table, None, None)
|
||||
|
||||
@classmethod
|
||||
def from_table(cls, table: LanceTable) -> "_PermutationFromTable":
|
||||
"""
|
||||
Create a permutation directly from a base table, with HuggingFace /
|
||||
PyTorch-style chaining for ``shuffle``, ``filter``, and ``split_*``.
|
||||
|
||||
This is a convenience wrapper that hides the two-step
|
||||
``permutation_builder(table).shuffle().execute()`` /
|
||||
``Permutation.from_tables(table, perm_tbl)`` dance. The returned object
|
||||
accumulates builder operations and only materializes the underlying
|
||||
permutation table on first read (any access of an attribute that is
|
||||
not a builder operation), so chained calls do not pay an extra
|
||||
``execute()`` for each step.
|
||||
|
||||
After the first read all ``Permutation`` methods (``select_columns``,
|
||||
``with_format``, ``map``, ``__iter__``, ``fetch``, ``num_rows``, ...)
|
||||
are forwarded transparently.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import lancedb
|
||||
>>> db = lancedb.connect("memory:///")
|
||||
>>> tbl = db.create_table("tbl", data=[{"x": x} for x in range(100)])
|
||||
>>> perm = Permutation.from_table(tbl).shuffle(seed=42)
|
||||
>>> perm.num_rows
|
||||
100
|
||||
"""
|
||||
return _PermutationFromTable(table)
|
||||
|
||||
@classmethod
|
||||
def from_tables(
|
||||
cls,
|
||||
@@ -871,85 +842,3 @@ class Permutation:
|
||||
Repeat the permutation `times` times
|
||||
"""
|
||||
raise Exception("with_repeat is not yet implemented")
|
||||
|
||||
|
||||
class _PermutationFromTable:
|
||||
"""
|
||||
Result of [Permutation.from_table](#from_table).
|
||||
|
||||
Records pending builder operations (``shuffle``, ``filter``, ``split_*``)
|
||||
and lazily executes them on first read. After materialization all
|
||||
Permutation reads / transforms (``select_columns``, ``with_format``,
|
||||
``map``, ``__iter__``, ``fetch``, ``num_rows``, ...) are forwarded to the
|
||||
underlying [Permutation].
|
||||
"""
|
||||
|
||||
__slots__ = ("_base_table", "_pending_ops", "_materialized")
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_table: LanceTable,
|
||||
_pending_ops: Optional[list[tuple[str, tuple, dict]]] = None,
|
||||
):
|
||||
self._base_table = base_table
|
||||
self._pending_ops: list[tuple[str, tuple, dict]] = (
|
||||
list(_pending_ops) if _pending_ops is not None else []
|
||||
)
|
||||
self._materialized: Optional[Permutation] = None
|
||||
|
||||
def _with_op(
|
||||
self, name: str, args: tuple, kwargs: dict
|
||||
) -> "_PermutationFromTable":
|
||||
return _PermutationFromTable(
|
||||
self._base_table, _pending_ops=self._pending_ops + [(name, args, kwargs)]
|
||||
)
|
||||
|
||||
def shuffle(self, *args, **kwargs) -> "_PermutationFromTable":
|
||||
return self._with_op("shuffle", args, kwargs)
|
||||
|
||||
def filter(self, *args, **kwargs) -> "_PermutationFromTable":
|
||||
return self._with_op("filter", args, kwargs)
|
||||
|
||||
def split_random(self, *args, **kwargs) -> "_PermutationFromTable":
|
||||
return self._with_op("split_random", args, kwargs)
|
||||
|
||||
def split_sequential(self, *args, **kwargs) -> "_PermutationFromTable":
|
||||
return self._with_op("split_sequential", args, kwargs)
|
||||
|
||||
def split_hash(self, *args, **kwargs) -> "_PermutationFromTable":
|
||||
return self._with_op("split_hash", args, kwargs)
|
||||
|
||||
def split_calculated(self, *args, **kwargs) -> "_PermutationFromTable":
|
||||
return self._with_op("split_calculated", args, kwargs)
|
||||
|
||||
def _materialize(self) -> Permutation:
|
||||
if self._materialized is None:
|
||||
if self._pending_ops:
|
||||
builder = permutation_builder(self._base_table)
|
||||
for name, args, kwargs in self._pending_ops:
|
||||
builder = getattr(builder, name)(*args, **kwargs)
|
||||
perm_tbl = builder.execute()
|
||||
self._materialized = Permutation.from_tables(
|
||||
self._base_table, perm_tbl
|
||||
)
|
||||
else:
|
||||
self._materialized = Permutation.identity(self._base_table)
|
||||
return self._materialized
|
||||
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
# Avoid recursion on dunder/private state.
|
||||
if name.startswith("_"):
|
||||
raise AttributeError(name)
|
||||
return getattr(self._materialize(), name)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._materialize())
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._materialize())
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self._materialize()[index]
|
||||
|
||||
def __getitems__(self, indices):
|
||||
return self._materialize().__getitems__(indices)
|
||||
|
||||
@@ -25,6 +25,7 @@ import deprecation
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
import pyarrow.compute as pc
|
||||
import pyarrow.fs as pa_fs
|
||||
import pydantic
|
||||
|
||||
from lancedb.pydantic import PYDANTIC_VERSION
|
||||
@@ -1525,7 +1526,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
return self._table._output_schema(self.to_query_object())
|
||||
|
||||
def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
|
||||
self._table._ensure_no_legacy_fts_index()
|
||||
path, fs, exist = self._table._get_fts_index_path()
|
||||
if exist:
|
||||
return self.tantivy_to_arrow()
|
||||
|
||||
query = self._query
|
||||
if self._phrase_query:
|
||||
@@ -1549,6 +1552,90 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
):
|
||||
raise NotImplementedError("to_batches on an FTS query")
|
||||
|
||||
def tantivy_to_arrow(self) -> pa.Table:
|
||||
try:
|
||||
import tantivy
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install tantivy-py `pip install tantivy` to use the full text search feature." # noqa: E501
|
||||
)
|
||||
|
||||
from .fts import search_index
|
||||
|
||||
# get the index path
|
||||
path, fs, exist = self._table._get_fts_index_path()
|
||||
|
||||
# check if the index exist
|
||||
if not exist:
|
||||
raise FileNotFoundError(
|
||||
"Fts index does not exist. "
|
||||
"Please first call table.create_fts_index(['<field_names>']) to "
|
||||
"create the fts index."
|
||||
)
|
||||
|
||||
# Check that we are on local filesystem
|
||||
if not isinstance(fs, pa_fs.LocalFileSystem):
|
||||
raise NotImplementedError(
|
||||
"Tantivy-based full text search "
|
||||
"is only supported on the local filesystem"
|
||||
)
|
||||
# open the index
|
||||
index = tantivy.Index.open(path)
|
||||
# get the scores and doc ids
|
||||
query = self._query
|
||||
if self._phrase_query:
|
||||
query = query.replace('"', "'")
|
||||
query = f'"{query}"'
|
||||
limit = self._limit if self._limit is not None else 10
|
||||
row_ids, scores = search_index(
|
||||
index, query, limit, ordering_field=self.ordering_field_name
|
||||
)
|
||||
if len(row_ids) == 0:
|
||||
empty_schema = pa.schema([pa.field("_score", pa.float32())])
|
||||
return pa.Table.from_batches([], schema=empty_schema)
|
||||
scores = pa.array(scores)
|
||||
output_tbl = self._table.to_lance().take(row_ids, columns=self._columns)
|
||||
output_tbl = output_tbl.append_column("_score", scores)
|
||||
# this needs to match vector search results which are uint64
|
||||
row_ids = pa.array(row_ids, type=pa.uint64())
|
||||
|
||||
if self._where is not None:
|
||||
tmp_name = "__lancedb__duckdb__indexer__"
|
||||
output_tbl = output_tbl.append_column(
|
||||
tmp_name, pa.array(range(len(output_tbl)))
|
||||
)
|
||||
try:
|
||||
# TODO would be great to have Substrait generate pyarrow compute
|
||||
# expressions or conversely have pyarrow support SQL expressions
|
||||
# using Substrait
|
||||
import duckdb
|
||||
|
||||
indexer = duckdb.sql(
|
||||
f"SELECT {tmp_name} FROM output_tbl WHERE {self._where}"
|
||||
).to_arrow_table()[tmp_name]
|
||||
output_tbl = output_tbl.take(indexer).drop([tmp_name])
|
||||
row_ids = row_ids.take(indexer)
|
||||
|
||||
except ImportError:
|
||||
import tempfile
|
||||
|
||||
import lance
|
||||
|
||||
# TODO Use "memory://" instead once that's supported
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
ds = lance.write_dataset(output_tbl, tmp)
|
||||
output_tbl = ds.to_table(filter=self._where)
|
||||
indexer = output_tbl[tmp_name]
|
||||
row_ids = row_ids.take(indexer)
|
||||
output_tbl = output_tbl.drop([tmp_name])
|
||||
|
||||
if self._with_row_id:
|
||||
output_tbl = output_tbl.append_column("_rowid", row_ids)
|
||||
|
||||
if self._reranker is not None:
|
||||
output_tbl = self._reranker.rerank_fts(self._query, output_tbl)
|
||||
return output_tbl
|
||||
|
||||
def rerank(self, reranker: Reranker) -> LanceFtsQueryBuilder:
|
||||
"""Rerank the results using the specified reranker.
|
||||
|
||||
|
||||
@@ -191,7 +191,7 @@ def _into_pyarrow_reader(
|
||||
f"Unknown data type {type(data)}. "
|
||||
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
|
||||
"pyarrow Table/RecordBatch, or Pydantic models. "
|
||||
"See https://docs.lancedb.com/tables/ for examples."
|
||||
"See https://lancedb.com/docs/tables/ for examples."
|
||||
)
|
||||
|
||||
|
||||
@@ -943,26 +943,29 @@ class Table(ABC):
|
||||
Parameters
|
||||
----------
|
||||
field_names: str or list of str
|
||||
The name of the field to index. Native FTS indexes can only be
|
||||
created on a single field at a time. To search over multiple text
|
||||
fields, create a separate FTS index for each field.
|
||||
The name(s) of the field to index.
|
||||
If ``use_tantivy`` is False (default), only a single field name
|
||||
(str) is supported. To index multiple fields, create a separate
|
||||
FTS index for each field.
|
||||
replace: bool, default False
|
||||
If True, replace the existing index if it exists. Note that this is
|
||||
not yet an atomic operation; the index will be temporarily
|
||||
unavailable while the new index is being created.
|
||||
writer_heap_size: int, default 1GB
|
||||
Deprecated legacy Tantivy parameter. Any value other than the
|
||||
default raises an error.
|
||||
Only available with use_tantivy=True
|
||||
ordering_field_names:
|
||||
Deprecated legacy Tantivy parameter. Setting this raises an error.
|
||||
A list of unsigned type fields to index to optionally order
|
||||
results on at search time.
|
||||
only available with use_tantivy=True
|
||||
tokenizer_name: str, default "default"
|
||||
A compatibility alias for native tokenizer configs. Can be "raw",
|
||||
"default" or the 2 letter language code followed by "_stem". So
|
||||
for english it would be "en_stem".
|
||||
The tokenizer to use for the index. Can be "raw", "default" or the 2 letter
|
||||
language code followed by "_stem". So for english it would be "en_stem".
|
||||
For available languages see: https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html
|
||||
use_tantivy: bool, default False
|
||||
Deprecated legacy Tantivy parameter. Setting this to True raises an
|
||||
error.
|
||||
If True, use the legacy full-text search implementation based on tantivy.
|
||||
If False, use the new full-text search implementation based on lance-index.
|
||||
with_position: bool, default False
|
||||
Only available with use_tantivy=False
|
||||
If False, do not store the positions of the terms in the text.
|
||||
This can reduce the size of the index and improve indexing speed.
|
||||
But it will raise an exception for phrase queries.
|
||||
@@ -1743,16 +1746,6 @@ class Table(ABC):
|
||||
index_exists = fs.get_file_info(path).type != pa_fs.FileType.NotFound
|
||||
return (path, fs, index_exists)
|
||||
|
||||
def _ensure_no_legacy_fts_index(self):
|
||||
path, _, exists = self._get_fts_index_path()
|
||||
if exists:
|
||||
raise ValueError(
|
||||
"Legacy Tantivy FTS index detected at "
|
||||
f"{path}. Tantivy-based FTS has been removed. "
|
||||
"Delete the legacy index and recreate it with "
|
||||
"table.create_fts_index(...)."
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def uses_v2_manifest_paths(self) -> bool:
|
||||
"""
|
||||
@@ -2412,63 +2405,84 @@ class LanceTable(Table):
|
||||
prefix_only: bool = False,
|
||||
name: Optional[str] = None,
|
||||
):
|
||||
self._ensure_no_legacy_fts_index()
|
||||
if not use_tantivy:
|
||||
if not isinstance(field_names, str):
|
||||
raise ValueError(
|
||||
"Native FTS indexes can only be created on a single field "
|
||||
"at a time. To search over multiple text fields, create a "
|
||||
"separate FTS index for each field."
|
||||
)
|
||||
|
||||
if use_tantivy:
|
||||
raise ValueError(
|
||||
"Tantivy-based FTS has been removed. "
|
||||
"Remove use_tantivy and recreate the index with native FTS."
|
||||
if tokenizer_name is None:
|
||||
tokenizer_configs = {
|
||||
"base_tokenizer": base_tokenizer,
|
||||
"language": language,
|
||||
"with_position": with_position,
|
||||
"max_token_length": max_token_length,
|
||||
"lower_case": lower_case,
|
||||
"stem": stem,
|
||||
"remove_stop_words": remove_stop_words,
|
||||
"ascii_folding": ascii_folding,
|
||||
"ngram_min_length": ngram_min_length,
|
||||
"ngram_max_length": ngram_max_length,
|
||||
"prefix_only": prefix_only,
|
||||
}
|
||||
else:
|
||||
tokenizer_configs = self.infer_tokenizer_configs(tokenizer_name)
|
||||
|
||||
config = FTS(
|
||||
**tokenizer_configs,
|
||||
)
|
||||
if ordering_field_names is not None:
|
||||
raise ValueError(
|
||||
"ordering_field_names was only supported by the removed "
|
||||
"Tantivy-based FTS implementation."
|
||||
|
||||
# delete the existing legacy index if it exists
|
||||
if replace:
|
||||
path, fs, exist = self._get_fts_index_path()
|
||||
if exist:
|
||||
fs.delete_dir(path)
|
||||
|
||||
LOOP.run(
|
||||
self._table.create_index(
|
||||
field_names,
|
||||
replace=replace,
|
||||
config=config,
|
||||
name=name,
|
||||
)
|
||||
)
|
||||
if writer_heap_size != 1024 * 1024 * 1024:
|
||||
raise ValueError(
|
||||
"writer_heap_size was only supported by the removed "
|
||||
"Tantivy-based FTS implementation."
|
||||
)
|
||||
if not isinstance(field_names, str):
|
||||
raise ValueError(
|
||||
"Native FTS indexes can only be created on a single field "
|
||||
"at a time. To search over multiple text fields, create a "
|
||||
"separate FTS index for each field."
|
||||
)
|
||||
if "." in field_names:
|
||||
raise ValueError(
|
||||
"Native FTS indexes can only be created on top-level fields. "
|
||||
f"Received nested field path: {field_names!r}."
|
||||
return
|
||||
|
||||
from .fts import create_index, populate_index
|
||||
|
||||
if isinstance(field_names, str):
|
||||
field_names = [field_names]
|
||||
|
||||
if isinstance(ordering_field_names, str):
|
||||
ordering_field_names = [ordering_field_names]
|
||||
|
||||
path, fs, exist = self._get_fts_index_path()
|
||||
if exist:
|
||||
if not replace:
|
||||
raise ValueError("Index already exists. Use replace=True to overwrite.")
|
||||
fs.delete_dir(path)
|
||||
|
||||
if not isinstance(fs, pa_fs.LocalFileSystem):
|
||||
raise NotImplementedError(
|
||||
"Full-text search is only supported on the local filesystem"
|
||||
)
|
||||
|
||||
if tokenizer_name is None:
|
||||
tokenizer_configs = {
|
||||
"base_tokenizer": base_tokenizer,
|
||||
"language": language,
|
||||
"with_position": with_position,
|
||||
"max_token_length": max_token_length,
|
||||
"lower_case": lower_case,
|
||||
"stem": stem,
|
||||
"remove_stop_words": remove_stop_words,
|
||||
"ascii_folding": ascii_folding,
|
||||
"ngram_min_length": ngram_min_length,
|
||||
"ngram_max_length": ngram_max_length,
|
||||
"prefix_only": prefix_only,
|
||||
}
|
||||
else:
|
||||
tokenizer_configs = self.infer_tokenizer_configs(tokenizer_name)
|
||||
|
||||
config = FTS(
|
||||
**tokenizer_configs,
|
||||
tokenizer_name = "default"
|
||||
index = create_index(
|
||||
path,
|
||||
field_names,
|
||||
ordering_fields=ordering_field_names,
|
||||
tokenizer_name=tokenizer_name,
|
||||
)
|
||||
|
||||
LOOP.run(
|
||||
self._table.create_index(
|
||||
field_names,
|
||||
replace=replace,
|
||||
config=config,
|
||||
name=name,
|
||||
)
|
||||
populate_index(
|
||||
index,
|
||||
self,
|
||||
field_names,
|
||||
ordering_fields=ordering_field_names,
|
||||
writer_heap_size=writer_heap_size,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
@@ -2915,7 +2929,6 @@ class LanceTable(Table):
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
location=location,
|
||||
namespace_client=namespace_client,
|
||||
)
|
||||
)
|
||||
return self
|
||||
|
||||
@@ -180,7 +180,7 @@ def test_fts_fuzzy_query():
|
||||
),
|
||||
mode="overwrite",
|
||||
)
|
||||
table.create_fts_index("text", replace=True)
|
||||
table.create_fts_index("text", use_tantivy=False, replace=True)
|
||||
|
||||
results = table.search(MatchQuery("foo", "text", fuzziness=1)).to_pandas()
|
||||
assert len(results) == 4
|
||||
@@ -230,7 +230,7 @@ def test_fts_boost_query():
|
||||
),
|
||||
mode="overwrite",
|
||||
)
|
||||
table.create_fts_index("desc", replace=True)
|
||||
table.create_fts_index("desc", use_tantivy=False, replace=True)
|
||||
|
||||
results = table.search(
|
||||
BoostQuery(
|
||||
@@ -265,7 +265,7 @@ def test_fts_boolean_query(tmp_path):
|
||||
],
|
||||
mode="overwrite",
|
||||
)
|
||||
table.create_fts_index("text", replace=True)
|
||||
table.create_fts_index("text", use_tantivy=False, replace=True)
|
||||
|
||||
# SHOULD
|
||||
results = table.search(
|
||||
@@ -319,7 +319,9 @@ def test_fts_native():
|
||||
],
|
||||
)
|
||||
|
||||
table.create_fts_index("text")
|
||||
# passing `use_tantivy=False` to use lance FTS index
|
||||
# `use_tantivy=True` by default
|
||||
table.create_fts_index("text", use_tantivy=False)
|
||||
table.search("puppy").limit(10).select(["text"]).to_list()
|
||||
# [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]
|
||||
# ...
|
||||
@@ -330,6 +332,7 @@ def test_fts_native():
|
||||
# --8<-- [start:fts_config_folding]
|
||||
table.create_fts_index(
|
||||
"text",
|
||||
use_tantivy=False,
|
||||
language="French",
|
||||
stem=True,
|
||||
ascii_folding=True,
|
||||
@@ -343,7 +346,7 @@ def test_fts_native():
|
||||
table.search("puppy").limit(10).where("text='foo'", prefilter=False).to_list()
|
||||
# --8<-- [end:fts_postfiltering]
|
||||
# --8<-- [start:fts_with_position]
|
||||
table.create_fts_index("text", with_position=True, replace=True)
|
||||
table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True)
|
||||
# --8<-- [end:fts_with_position]
|
||||
# --8<-- [start:fts_incremental_index]
|
||||
table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}])
|
||||
|
||||
@@ -15,7 +15,8 @@ import pytest
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
|
||||
|
||||
def test_basic(tmp_path):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_basic(tmp_path, use_tantivy):
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
assert db.uri == str(tmp_path)
|
||||
@@ -48,7 +49,7 @@ def test_basic(tmp_path):
|
||||
assert len(rs) == 1
|
||||
assert rs["item"].iloc[0] == "foo"
|
||||
|
||||
table.create_fts_index("item")
|
||||
table.create_fts_index("item", use_tantivy=use_tantivy)
|
||||
rs = table.search("bar", query_type="fts").to_pandas()
|
||||
assert len(rs) == 1
|
||||
assert rs["item"].iloc[0] == "bar"
|
||||
@@ -896,22 +897,42 @@ def test_bypass_vector_index_sync(tmp_db: lancedb.DBConnection):
|
||||
|
||||
|
||||
def test_local_namespace_operations(tmp_path):
|
||||
"""Test that local mode namespace operations work via directory namespace."""
|
||||
"""Test that local mode namespace operations behave as expected."""
|
||||
# Create a local database connection
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
# Root namespace starts empty
|
||||
assert db.list_namespaces().namespaces == []
|
||||
# Test list_namespaces returns empty list for root namespace
|
||||
namespaces = db.list_namespaces().namespaces
|
||||
assert namespaces == []
|
||||
|
||||
# Create and list child namespace
|
||||
db.create_namespace(["child"])
|
||||
assert "child" in db.list_namespaces().namespaces
|
||||
# Test list_namespaces with non-empty namespace raises NotImplementedError
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Namespace operations are not supported for listing database",
|
||||
):
|
||||
db.list_namespaces(namespace_path=["test"])
|
||||
|
||||
# List namespaces under child
|
||||
assert db.list_namespaces(namespace_path=["child"]).namespaces == []
|
||||
|
||||
# Drop namespace
|
||||
db.drop_namespace(["child"])
|
||||
assert db.list_namespaces().namespaces == []
|
||||
def test_local_create_namespace_not_supported(tmp_path):
|
||||
"""Test that create_namespace is not supported in local mode."""
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Namespace operations are not supported for listing database",
|
||||
):
|
||||
db.create_namespace(["test_namespace"])
|
||||
|
||||
|
||||
def test_local_drop_namespace_not_supported(tmp_path):
|
||||
"""Test that drop_namespace is not supported in local mode."""
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Namespace operations are not supported for listing database",
|
||||
):
|
||||
db.drop_namespace(["test_namespace"])
|
||||
|
||||
|
||||
def test_clone_table_latest_version(tmp_path):
|
||||
|
||||
@@ -36,6 +36,9 @@ import pytest
|
||||
import pytest_asyncio
|
||||
from utils import exception_output
|
||||
|
||||
pytest.importorskip("lancedb.fts")
|
||||
tantivy = pytest.importorskip("tantivy")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def table(tmp_path) -> ldb.table.LanceTable:
|
||||
@@ -141,53 +144,58 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
|
||||
return table
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("kwargs", "match"),
|
||||
[
|
||||
(
|
||||
{"use_tantivy": True},
|
||||
"Tantivy-based FTS has been removed",
|
||||
),
|
||||
(
|
||||
{"ordering_field_names": ["count"]},
|
||||
"ordering_field_names was only supported",
|
||||
),
|
||||
(
|
||||
{"writer_heap_size": 128},
|
||||
"writer_heap_size was only supported",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_reject_removed_tantivy_parameters(table, kwargs, match):
|
||||
with pytest.raises(ValueError, match=match):
|
||||
table.create_fts_index("text", **kwargs)
|
||||
def test_create_index(tmp_path):
|
||||
index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
|
||||
assert isinstance(index, tantivy.Index)
|
||||
assert os.path.exists(str(tmp_path / "index"))
|
||||
|
||||
|
||||
def test_reject_legacy_tantivy_index(table):
|
||||
path, _, _ = table._get_fts_index_path()
|
||||
os.makedirs(path, exist_ok=True)
|
||||
def test_create_index_with_stemming(tmp_path, table):
|
||||
index = ldb.fts.create_index(
|
||||
str(tmp_path / "index"), ["text"], tokenizer_name="en_stem"
|
||||
)
|
||||
assert isinstance(index, tantivy.Index)
|
||||
assert os.path.exists(str(tmp_path / "index"))
|
||||
|
||||
with pytest.raises(ValueError, match="Legacy Tantivy FTS index detected"):
|
||||
table.search("puppy").limit(5).to_list()
|
||||
|
||||
with pytest.raises(ValueError, match="Legacy Tantivy FTS index detected"):
|
||||
table.create_fts_index("text")
|
||||
# Check stemming by running tokenizer on non empty table
|
||||
table.create_fts_index("text", tokenizer_name="en_stem", use_tantivy=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
@pytest.mark.parametrize("with_position", [True, False])
|
||||
def test_create_inverted_index(table, with_position):
|
||||
def test_create_inverted_index(table, use_tantivy, with_position):
|
||||
if use_tantivy and not with_position:
|
||||
pytest.skip("we don't support building a tantivy index without position")
|
||||
table.create_fts_index(
|
||||
"text",
|
||||
use_tantivy=use_tantivy,
|
||||
with_position=with_position,
|
||||
name="custom_fts_index",
|
||||
)
|
||||
indices = table.list_indices()
|
||||
fts_indices = [i for i in indices if i.index_type == "FTS"]
|
||||
assert any(i.name == "custom_fts_index" for i in fts_indices)
|
||||
if not use_tantivy:
|
||||
indices = table.list_indices()
|
||||
fts_indices = [i for i in indices if i.index_type == "FTS"]
|
||||
assert any(i.name == "custom_fts_index" for i in fts_indices)
|
||||
|
||||
|
||||
def test_search_fts(table):
|
||||
table.create_fts_index("text")
|
||||
def test_populate_index(tmp_path, table):
|
||||
index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
|
||||
assert ldb.fts.populate_index(index, table, ["text"]) == len(table)
|
||||
|
||||
|
||||
def test_search_index(tmp_path, table):
|
||||
index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
|
||||
ldb.fts.populate_index(index, table, ["text"])
|
||||
index.reload()
|
||||
results = ldb.fts.search_index(index, query="puppy", limit=5)
|
||||
assert len(results) == 2
|
||||
assert len(results[0]) == 5 # row_ids
|
||||
assert len(results[1]) == 5 # _score
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_search_fts(table, use_tantivy):
|
||||
table.create_fts_index("text", use_tantivy=use_tantivy)
|
||||
results = table.search("puppy").select(["id", "text"]).limit(5).to_list()
|
||||
assert len(results) == 5
|
||||
assert len(results[0]) == 3 # id, text, _score
|
||||
@@ -196,52 +204,53 @@ def test_search_fts(table):
|
||||
results = table.search("puppy").select(["id", "text"]).to_list()
|
||||
assert len(results) == 10
|
||||
|
||||
# Test with a query
|
||||
results = (
|
||||
table.search(MatchQuery("puppy", "text"))
|
||||
.select(["id", "text"])
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(results) == 5
|
||||
|
||||
# Test boost query
|
||||
results = (
|
||||
table.search(
|
||||
BoostQuery(
|
||||
MatchQuery("puppy", "text"),
|
||||
MatchQuery("runs", "text"),
|
||||
)
|
||||
if not use_tantivy:
|
||||
# Test with a query
|
||||
results = (
|
||||
table.search(MatchQuery("puppy", "text"))
|
||||
.select(["id", "text"])
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
.select(["id", "text"])
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(results) == 5
|
||||
assert len(results) == 5
|
||||
|
||||
# Test multi match query
|
||||
table.create_fts_index("text2")
|
||||
results = (
|
||||
table.search(MultiMatchQuery("puppy", ["text", "text2"]))
|
||||
.select(["id", "text"])
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(results) == 5
|
||||
assert len(results[0]) == 3 # id, text, _score
|
||||
# Test boost query
|
||||
results = (
|
||||
table.search(
|
||||
BoostQuery(
|
||||
MatchQuery("puppy", "text"),
|
||||
MatchQuery("runs", "text"),
|
||||
)
|
||||
)
|
||||
.select(["id", "text"])
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(results) == 5
|
||||
|
||||
# Test boolean query
|
||||
results = (
|
||||
table.search(MatchQuery("puppy", "text") & MatchQuery("runs", "text"))
|
||||
.select(["id", "text"])
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(results) == 5
|
||||
assert len(results[0]) == 3 # id, text, _score
|
||||
for r in results:
|
||||
assert "puppy" in r["text"]
|
||||
assert "runs" in r["text"]
|
||||
# Test multi match query
|
||||
table.create_fts_index("text2", use_tantivy=use_tantivy)
|
||||
results = (
|
||||
table.search(MultiMatchQuery("puppy", ["text", "text2"]))
|
||||
.select(["id", "text"])
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(results) == 5
|
||||
assert len(results[0]) == 3 # id, text, _score
|
||||
|
||||
# Test boolean query
|
||||
results = (
|
||||
table.search(MatchQuery("puppy", "text") & MatchQuery("runs", "text"))
|
||||
.select(["id", "text"])
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(results) == 5
|
||||
assert len(results[0]) == 3 # id, text, _score
|
||||
for r in results:
|
||||
assert "puppy" in r["text"]
|
||||
assert "runs" in r["text"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -309,13 +318,13 @@ async def test_fts_select_async(async_table):
|
||||
|
||||
|
||||
def test_search_fts_phrase_query(table):
|
||||
table.create_fts_index("text", with_position=False)
|
||||
table.create_fts_index("text", use_tantivy=False, with_position=False)
|
||||
try:
|
||||
phrase_results = table.search('"puppy runs"').limit(100).to_list()
|
||||
assert False
|
||||
except Exception:
|
||||
pass
|
||||
table.create_fts_index("text", with_position=True, replace=True)
|
||||
table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True)
|
||||
results = table.search("puppy").limit(100).to_list()
|
||||
|
||||
# Test with quotation marks
|
||||
@@ -366,8 +375,8 @@ async def test_search_fts_phrase_query_async(async_table):
|
||||
|
||||
|
||||
def test_search_fts_specify_column(table):
|
||||
table.create_fts_index("text")
|
||||
table.create_fts_index("text2")
|
||||
table.create_fts_index("text", use_tantivy=False)
|
||||
table.create_fts_index("text2", use_tantivy=False)
|
||||
|
||||
results = table.search("puppy", fts_columns="text").limit(5).to_list()
|
||||
assert len(results) == 5
|
||||
@@ -461,8 +470,42 @@ async def test_search_fts_specify_column_async(async_table):
|
||||
pass
|
||||
|
||||
|
||||
def test_create_index_from_table(tmp_path, table):
|
||||
table.create_fts_index("text")
|
||||
def test_search_ordering_field_index_table(tmp_path, table):
|
||||
table.create_fts_index("text", ordering_field_names=["count"], use_tantivy=True)
|
||||
rows = (
|
||||
table.search("puppy", ordering_field_name="count")
|
||||
.limit(20)
|
||||
.select(["text", "count"])
|
||||
.to_list()
|
||||
)
|
||||
for r in rows:
|
||||
assert "puppy" in r["text"]
|
||||
assert sorted(rows, key=lambda x: x["count"], reverse=True) == rows
|
||||
|
||||
|
||||
def test_search_ordering_field_index(tmp_path, table):
|
||||
index = ldb.fts.create_index(
|
||||
str(tmp_path / "index"), ["text"], ordering_fields=["count"]
|
||||
)
|
||||
|
||||
ldb.fts.populate_index(index, table, ["text"], ordering_fields=["count"])
|
||||
index.reload()
|
||||
results = ldb.fts.search_index(
|
||||
index, query="puppy", limit=5, ordering_field="count"
|
||||
)
|
||||
assert len(results) == 2
|
||||
assert len(results[0]) == 5 # row_ids
|
||||
assert len(results[1]) == 5 # _distance
|
||||
rows = table.to_lance().take(results[0]).to_pylist()
|
||||
|
||||
for r in rows:
|
||||
assert "puppy" in r["text"]
|
||||
assert sorted(rows, key=lambda x: x["count"], reverse=True) == rows
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_create_index_from_table(tmp_path, table, use_tantivy):
|
||||
table.create_fts_index("text", use_tantivy=use_tantivy)
|
||||
df = table.search("puppy").limit(5).select(["text"]).to_pandas()
|
||||
assert len(df) <= 5
|
||||
assert "text" in df.columns
|
||||
@@ -482,24 +525,36 @@ def test_create_index_from_table(tmp_path, table):
|
||||
)
|
||||
|
||||
with pytest.raises(Exception, match="already exists"):
|
||||
table.create_fts_index("text")
|
||||
table.create_fts_index("text", use_tantivy=use_tantivy)
|
||||
|
||||
table.create_fts_index("text", replace=True)
|
||||
table.create_fts_index("text", replace=True, use_tantivy=use_tantivy)
|
||||
assert len(table.search("gorilla").limit(1).to_pandas()) == 1
|
||||
|
||||
|
||||
def test_create_index_multiple_columns(tmp_path, table):
|
||||
with pytest.raises(ValueError, match="Native FTS indexes can only be created"):
|
||||
table.create_fts_index(["text", "text2"])
|
||||
table.create_fts_index(["text", "text2"], use_tantivy=True)
|
||||
df = table.search("puppy").limit(5).to_pandas()
|
||||
assert len(df) == 5
|
||||
assert "text" in df.columns
|
||||
assert "text2" in df.columns
|
||||
|
||||
|
||||
def test_empty_rs(tmp_path, table, mocker):
|
||||
table.create_fts_index(["text", "text2"], use_tantivy=True)
|
||||
mocker.patch("lancedb.fts.search_index", return_value=([], []))
|
||||
df = table.search("puppy").limit(5).to_pandas()
|
||||
assert len(df) == 0
|
||||
|
||||
|
||||
def test_nested_schema(tmp_path, table):
|
||||
with pytest.raises(ValueError, match="top-level fields"):
|
||||
table.create_fts_index("nested.text")
|
||||
table.create_fts_index("nested.text", use_tantivy=True)
|
||||
rs = table.search("puppy").limit(5).to_list()
|
||||
assert len(rs) == 5
|
||||
|
||||
|
||||
def test_search_index_with_filter(table):
|
||||
table.create_fts_index("text")
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_search_index_with_filter(table, use_tantivy):
|
||||
table.create_fts_index("text", use_tantivy=use_tantivy)
|
||||
orig_import = __import__
|
||||
|
||||
def import_mock(name, *args):
|
||||
@@ -529,7 +584,8 @@ def test_search_index_with_filter(table):
|
||||
assert r["_rowid"] is not None
|
||||
|
||||
|
||||
def test_null_input(table):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_null_input(table, use_tantivy):
|
||||
table.add(
|
||||
[
|
||||
{
|
||||
@@ -542,13 +598,14 @@ def test_null_input(table):
|
||||
}
|
||||
]
|
||||
)
|
||||
table.create_fts_index("text")
|
||||
table.create_fts_index("text", use_tantivy=use_tantivy)
|
||||
|
||||
|
||||
def test_syntax(table):
|
||||
# https://github.com/lancedb/lancedb/issues/769
|
||||
table.create_fts_index("text")
|
||||
table.search("they could have been dogs OR").limit(10).to_list()
|
||||
table.create_fts_index("text", use_tantivy=True)
|
||||
with pytest.raises(ValueError, match="Syntax Error"):
|
||||
table.search("they could have been dogs OR").limit(10).to_list()
|
||||
|
||||
# these should work
|
||||
|
||||
@@ -559,7 +616,6 @@ def test_syntax(table):
|
||||
).to_list()
|
||||
|
||||
# phrase queries
|
||||
table.create_fts_index("text", with_position=True, replace=True)
|
||||
table.search("they could have been dogs OR cats").phrase_query().limit(10).to_list()
|
||||
table.search('"they could have been dogs OR cats"').limit(10).to_list()
|
||||
table.search('''"the cats OR dogs were not really 'pets' at all"''').limit(
|
||||
@@ -583,7 +639,7 @@ def test_language(mem_db: DBConnection):
|
||||
table = mem_db.create_table("test", data=data)
|
||||
|
||||
with pytest.raises(ValueError) as e:
|
||||
table.create_fts_index("text", language="klingon")
|
||||
table.create_fts_index("text", use_tantivy=False, language="klingon")
|
||||
|
||||
assert exception_output(e) == (
|
||||
"ValueError: LanceDB does not support the requested language: 'klingon'\n"
|
||||
@@ -594,6 +650,7 @@ def test_language(mem_db: DBConnection):
|
||||
|
||||
table.create_fts_index(
|
||||
"text",
|
||||
use_tantivy=False,
|
||||
language="French",
|
||||
stem=True,
|
||||
ascii_folding=True,
|
||||
@@ -633,7 +690,7 @@ def test_fts_on_list(mem_db: DBConnection):
|
||||
}
|
||||
)
|
||||
table = mem_db.create_table("test", data=data)
|
||||
table.create_fts_index("text", with_position=True)
|
||||
table.create_fts_index("text", use_tantivy=False, with_position=True)
|
||||
|
||||
res = table.search("lance").limit(5).to_list()
|
||||
assert len(res) == 3
|
||||
@@ -645,7 +702,7 @@ def test_fts_on_list(mem_db: DBConnection):
|
||||
def test_fts_ngram(mem_db: DBConnection):
|
||||
data = pa.table({"text": ["hello world", "lance database", "lance is cool"]})
|
||||
table = mem_db.create_table("test", data=data)
|
||||
table.create_fts_index("text", base_tokenizer="ngram")
|
||||
table.create_fts_index("text", use_tantivy=False, base_tokenizer="ngram")
|
||||
|
||||
results = table.search("lan", query_type="fts").limit(10).to_list()
|
||||
assert len(results) == 2
|
||||
@@ -664,6 +721,7 @@ def test_fts_ngram(mem_db: DBConnection):
|
||||
# test setting min_ngram_length and prefix_only
|
||||
table.create_fts_index(
|
||||
"text",
|
||||
use_tantivy=False,
|
||||
base_tokenizer="ngram",
|
||||
replace=True,
|
||||
ngram_min_length=2,
|
||||
@@ -828,7 +886,7 @@ def test_fts_query_to_json():
|
||||
|
||||
|
||||
def test_fts_fast_search(table):
|
||||
table.create_fts_index("text")
|
||||
table.create_fts_index("text", use_tantivy=False)
|
||||
|
||||
# Insert some unindexed data
|
||||
table.add(
|
||||
|
||||
@@ -28,7 +28,7 @@ def sync_table(tmpdir_factory) -> Table:
|
||||
}
|
||||
)
|
||||
table = db.create_table("test", data)
|
||||
table.create_fts_index("text", with_position=False)
|
||||
table.create_fts_index("text", with_position=False, use_tantivy=False)
|
||||
return table
|
||||
|
||||
|
||||
@@ -192,7 +192,7 @@ def table_with_id(tmpdir_factory) -> Table:
|
||||
}
|
||||
)
|
||||
table = db.create_table("test_with_id", data)
|
||||
table.create_fts_index("text", with_position=False)
|
||||
table.create_fts_index("text", with_position=False, use_tantivy=False)
|
||||
return table
|
||||
|
||||
|
||||
|
||||
@@ -681,7 +681,7 @@ class TestPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["QueryTable"],
|
||||
)
|
||||
assert "QueryTable" in db._namespace_client_pushdown_operations
|
||||
assert "QueryTable" in db._pushdown_operations
|
||||
|
||||
def test_create_table_pushdown_stored(self):
|
||||
"""Test that CreateTable pushdown is stored on sync connection."""
|
||||
@@ -690,7 +690,7 @@ class TestPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["CreateTable"],
|
||||
)
|
||||
assert "CreateTable" in db._namespace_client_pushdown_operations
|
||||
assert "CreateTable" in db._pushdown_operations
|
||||
|
||||
def test_both_pushdowns_stored(self):
|
||||
"""Test that both pushdown operations can be set together."""
|
||||
@@ -699,13 +699,13 @@ class TestPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["QueryTable", "CreateTable"],
|
||||
)
|
||||
assert "QueryTable" in db._namespace_client_pushdown_operations
|
||||
assert "CreateTable" in db._namespace_client_pushdown_operations
|
||||
assert "QueryTable" in db._pushdown_operations
|
||||
assert "CreateTable" in db._pushdown_operations
|
||||
|
||||
def test_pushdown_defaults_to_empty(self):
|
||||
"""Test that pushdown operations default to empty."""
|
||||
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||
assert len(db._namespace_client_pushdown_operations) == 0
|
||||
assert len(db._pushdown_operations) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -727,7 +727,7 @@ class TestAsyncPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["QueryTable"],
|
||||
)
|
||||
assert "QueryTable" in db._namespace_client_pushdown_operations
|
||||
assert "QueryTable" in db._pushdown_operations
|
||||
|
||||
async def test_async_create_table_pushdown_stored(self):
|
||||
"""Test that CreateTable pushdown is stored on async connection."""
|
||||
@@ -736,9 +736,9 @@ class TestAsyncPushdownOperations:
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["CreateTable"],
|
||||
)
|
||||
assert "CreateTable" in db._namespace_client_pushdown_operations
|
||||
assert "CreateTable" in db._pushdown_operations
|
||||
|
||||
async def test_async_pushdown_defaults_to_empty(self):
|
||||
"""Test that pushdown operations default to empty on async connection."""
|
||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||
assert len(db._namespace_client_pushdown_operations) == 0
|
||||
assert len(db._pushdown_operations) == 0
|
||||
|
||||
@@ -18,9 +18,6 @@ Tests verify:
|
||||
"""
|
||||
|
||||
import copy
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
from typing import Dict, Optional
|
||||
@@ -390,66 +387,6 @@ def test_namespace_open_table_with_provider(s3_bucket: str, use_custom: bool):
|
||||
assert get_describe_call_count(inner_ns_client) == describe_count_after_open
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform == "win32",
|
||||
reason="TODO: fix schema-only namespace metrics test on Windows",
|
||||
)
|
||||
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||
def test_namespace_create_schema_only_with_provider(use_custom: bool):
|
||||
"""
|
||||
Test creating a schema-only table through namespace with storage options provider.
|
||||
|
||||
Verifies:
|
||||
- declare_table is called once to reserve the location
|
||||
- describe_table is not needed during create in create mode
|
||||
- the table can be reopened successfully afterward
|
||||
- opening the table triggers exactly one describe_table call
|
||||
"""
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
try:
|
||||
ns_client, inner_ns_client = create_tracking_namespace(
|
||||
bucket_name=temp_dir,
|
||||
storage_options={},
|
||||
credential_expires_in_seconds=3600,
|
||||
use_custom=use_custom,
|
||||
)
|
||||
|
||||
db = LanceNamespaceDBConnection(ns_client)
|
||||
|
||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||
db.create_namespace([namespace_name])
|
||||
|
||||
table_name = f"test_table_{uuid.uuid4().hex}"
|
||||
namespace_path = [namespace_name]
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
pa.field("text", pa.string()),
|
||||
]
|
||||
)
|
||||
|
||||
assert get_declare_call_count(inner_ns_client) == 0
|
||||
assert get_describe_call_count(inner_ns_client) == 0
|
||||
|
||||
table = db.create_table(
|
||||
table_name, schema=schema, namespace_path=namespace_path
|
||||
)
|
||||
|
||||
assert table.name == table_name
|
||||
assert table.namespace == namespace_path
|
||||
assert get_declare_call_count(inner_ns_client) == 1
|
||||
assert get_describe_call_count(inner_ns_client) == 0
|
||||
|
||||
reopened_table = db.open_table(table_name, namespace_path=namespace_path)
|
||||
|
||||
assert reopened_table.schema == schema
|
||||
assert get_declare_call_count(inner_ns_client) == 1
|
||||
assert get_describe_call_count(inner_ns_client) == 1
|
||||
finally:
|
||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||
|
||||
|
||||
@pytest.mark.s3_test
|
||||
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||
def test_namespace_credential_refresh_on_read(s3_bucket: str, use_custom: bool):
|
||||
|
||||
@@ -1095,60 +1095,3 @@ def test_getitems_invalid_offset(some_permutation: Permutation):
|
||||
"""Test __getitems__ with an out-of-range offset raises an error."""
|
||||
with pytest.raises(Exception):
|
||||
some_permutation.__getitems__([999999])
|
||||
|
||||
|
||||
def test_from_table_identity(mem_db):
|
||||
"""Permutation.from_table without ops behaves like identity."""
|
||||
tbl = mem_db.create_table("tbl", pa.table({"x": range(10)}))
|
||||
perm = Permutation.from_table(tbl)
|
||||
assert perm.num_rows == 10
|
||||
assert perm.column_names == ["x"]
|
||||
|
||||
|
||||
def test_from_table_shuffle_seeded(mem_db):
|
||||
"""from_table().shuffle(seed=...) is reproducible and reorders rows."""
|
||||
tbl = mem_db.create_table("tbl", pa.table({"x": range(100)}))
|
||||
perm = Permutation.from_table(tbl).shuffle(seed=42)
|
||||
rows = [r["x"] for r in perm.__getitems__(list(range(100)))]
|
||||
assert sorted(rows) == list(range(100))
|
||||
assert rows != list(range(100))
|
||||
|
||||
# Same seed → same order
|
||||
rows2 = [
|
||||
r["x"]
|
||||
for r in Permutation.from_table(tbl)
|
||||
.shuffle(seed=42)
|
||||
.__getitems__(list(range(100)))
|
||||
]
|
||||
assert rows == rows2
|
||||
|
||||
|
||||
def test_from_table_filter(mem_db):
|
||||
"""from_table().filter(...) limits the rows."""
|
||||
tbl = mem_db.create_table("tbl", pa.table({"x": range(100)}))
|
||||
perm = Permutation.from_table(tbl).filter("x < 25")
|
||||
assert perm.num_rows == 25
|
||||
|
||||
|
||||
def test_from_table_chained_ops(mem_db):
|
||||
"""Chained shuffle + filter materializes once."""
|
||||
tbl = mem_db.create_table("tbl", pa.table({"x": range(100)}))
|
||||
perm = Permutation.from_table(tbl).filter("x >= 50").shuffle(seed=7)
|
||||
assert perm.num_rows == 50
|
||||
rows = [r["x"] for r in perm.__getitems__(list(range(50)))]
|
||||
assert sorted(rows) == list(range(50, 100))
|
||||
|
||||
|
||||
def test_from_table_forwards_read_methods(mem_db):
|
||||
"""from_table() result transparently forwards Permutation read methods."""
|
||||
tbl = mem_db.create_table("tbl", pa.table({"x": range(10), "y": range(10)}))
|
||||
perm = Permutation.from_table(tbl).select_columns(["x"])
|
||||
assert perm.column_names == ["x"]
|
||||
|
||||
|
||||
def test_from_table_split_random(mem_db):
|
||||
"""from_table().split_random(...) returns rows from the first split."""
|
||||
tbl = mem_db.create_table("tbl", pa.table({"x": range(100)}))
|
||||
perm = Permutation.from_table(tbl).split_random(ratios=[0.3, 0.7], seed=1)
|
||||
# Default split is 0 — ratio 0.3 → ~30 rows
|
||||
assert 25 <= perm.num_rows <= 35
|
||||
|
||||
@@ -1385,7 +1385,7 @@ def test_query_timeout(tmp_path):
|
||||
}
|
||||
)
|
||||
table = db.create_table("test", data)
|
||||
table.create_fts_index("text")
|
||||
table.create_fts_index("text", use_tantivy=False)
|
||||
|
||||
with pytest.raises(Exception, match="Query timeout"):
|
||||
table.search().where("text = 'a'").to_list(timeout=timedelta(0))
|
||||
|
||||
@@ -26,8 +26,11 @@ from lancedb.rerankers import (
|
||||
)
|
||||
from lancedb.table import LanceTable
|
||||
|
||||
# Tests rely on FTS index
|
||||
pytest.importorskip("lancedb.fts")
|
||||
|
||||
def get_test_table(tmp_path):
|
||||
|
||||
def get_test_table(tmp_path, use_tantivy):
|
||||
db = lancedb.connect(tmp_path)
|
||||
# Create a LanceDB table schema with a vector and a text column
|
||||
emb = EmbeddingFunctionRegistry.get_instance().get("test").create()
|
||||
@@ -95,7 +98,7 @@ def get_test_table(tmp_path):
|
||||
)
|
||||
|
||||
# Create a fts index
|
||||
table.create_fts_index("text", replace=True)
|
||||
table.create_fts_index("text", use_tantivy=use_tantivy, replace=True)
|
||||
|
||||
return table, MyTable
|
||||
|
||||
@@ -205,8 +208,8 @@ def _run_test_reranker(reranker, table, query, query_vector, schema):
|
||||
assert len(result) == 20 and result == result_arrow
|
||||
|
||||
|
||||
def _run_test_hybrid_reranker(reranker, tmp_path):
|
||||
table, schema = get_test_table(tmp_path)
|
||||
def _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy):
|
||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
||||
# The default reranker
|
||||
result1 = (
|
||||
table.search(
|
||||
@@ -282,7 +285,8 @@ def _run_test_hybrid_reranker(reranker, tmp_path):
|
||||
)
|
||||
|
||||
|
||||
def test_linear_combination(tmp_path):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_linear_combination(tmp_path, use_tantivy):
|
||||
reranker = LinearCombinationReranker()
|
||||
|
||||
vector_results = pa.Table.from_pydict(
|
||||
@@ -309,20 +313,22 @@ def test_linear_combination(tmp_path):
|
||||
assert "_score" not in combined_results.column_names
|
||||
assert "_relevance_score" in combined_results.column_names
|
||||
|
||||
_run_test_hybrid_reranker(reranker, tmp_path)
|
||||
_run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
|
||||
|
||||
|
||||
def test_rrf_reranker(tmp_path):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_rrf_reranker(tmp_path, use_tantivy):
|
||||
reranker = RRFReranker()
|
||||
_run_test_hybrid_reranker(reranker, tmp_path)
|
||||
_run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
|
||||
|
||||
|
||||
def test_mrr_reranker(tmp_path):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_mrr_reranker(tmp_path, use_tantivy):
|
||||
reranker = MRRReranker()
|
||||
_run_test_hybrid_reranker(reranker, tmp_path)
|
||||
_run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
|
||||
|
||||
# Test multi-vector part
|
||||
table, schema = get_test_table(tmp_path)
|
||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
||||
query = "single player experience"
|
||||
rs1 = table.search(query, vector_column_name="vector").limit(10).with_row_id(True)
|
||||
rs2 = (
|
||||
@@ -357,7 +363,7 @@ def test_rrf_reranker_distance():
|
||||
table = db.create_table("test", data)
|
||||
|
||||
table.create_index(num_partitions=1, num_sub_vectors=2)
|
||||
table.create_fts_index("text")
|
||||
table.create_fts_index("text", use_tantivy=False)
|
||||
|
||||
reranker = RRFReranker(return_score="all")
|
||||
|
||||
@@ -416,31 +422,35 @@ def test_rrf_reranker_distance():
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("COHERE_API_KEY") is None, reason="COHERE_API_KEY not set"
|
||||
)
|
||||
def test_cohere_reranker(tmp_path):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_cohere_reranker(tmp_path, use_tantivy):
|
||||
pytest.importorskip("cohere")
|
||||
reranker = CohereReranker()
|
||||
table, schema = get_test_table(tmp_path)
|
||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
||||
_run_test_reranker(reranker, table, "single player experience", None, schema)
|
||||
|
||||
|
||||
def test_cross_encoder_reranker(tmp_path):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_cross_encoder_reranker(tmp_path, use_tantivy):
|
||||
pytest.importorskip("sentence_transformers")
|
||||
reranker = CrossEncoderReranker()
|
||||
table, schema = get_test_table(tmp_path)
|
||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
||||
_run_test_reranker(reranker, table, "single player experience", None, schema)
|
||||
|
||||
|
||||
def test_colbert_reranker(tmp_path):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_colbert_reranker(tmp_path, use_tantivy):
|
||||
pytest.importorskip("rerankers")
|
||||
reranker = ColbertReranker()
|
||||
table, schema = get_test_table(tmp_path)
|
||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
||||
_run_test_reranker(reranker, table, "single player experience", None, schema)
|
||||
|
||||
|
||||
def test_answerdotai_reranker(tmp_path):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_answerdotai_reranker(tmp_path, use_tantivy):
|
||||
pytest.importorskip("rerankers")
|
||||
reranker = AnswerdotaiRerankers()
|
||||
table, schema = get_test_table(tmp_path)
|
||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
||||
_run_test_reranker(reranker, table, "single player experience", None, schema)
|
||||
|
||||
|
||||
@@ -449,9 +459,10 @@ def test_answerdotai_reranker(tmp_path):
|
||||
or os.environ.get("OPENAI_BASE_URL") is not None,
|
||||
reason="OPENAI_API_KEY not set",
|
||||
)
|
||||
def test_openai_reranker(tmp_path):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_openai_reranker(tmp_path, use_tantivy):
|
||||
pytest.importorskip("openai")
|
||||
table, schema = get_test_table(tmp_path)
|
||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
||||
reranker = OpenaiReranker()
|
||||
_run_test_reranker(reranker, table, "single player experience", None, schema)
|
||||
|
||||
@@ -459,9 +470,10 @@ def test_openai_reranker(tmp_path):
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("JINA_API_KEY") is None, reason="JINA_API_KEY not set"
|
||||
)
|
||||
def test_jina_reranker(tmp_path):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_jina_reranker(tmp_path, use_tantivy):
|
||||
pytest.importorskip("jina")
|
||||
table, schema = get_test_table(tmp_path)
|
||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
||||
reranker = JinaReranker()
|
||||
_run_test_reranker(reranker, table, "single player experience", None, schema)
|
||||
|
||||
@@ -469,10 +481,11 @@ def test_jina_reranker(tmp_path):
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
|
||||
)
|
||||
def test_voyageai_reranker(tmp_path):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_voyageai_reranker(tmp_path, use_tantivy):
|
||||
pytest.importorskip("voyageai")
|
||||
reranker = VoyageAIReranker(model_name="rerank-2.5")
|
||||
table, schema = get_test_table(tmp_path)
|
||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
||||
_run_test_reranker(reranker, table, "single player experience", None, schema)
|
||||
|
||||
|
||||
@@ -491,7 +504,7 @@ def test_empty_result_reranker():
|
||||
|
||||
# Create empty table with schema
|
||||
empty_table = db.create_table("empty_table", schema=schema, mode="overwrite")
|
||||
empty_table.create_fts_index("text", replace=True)
|
||||
empty_table.create_fts_index("text", use_tantivy=False, replace=True)
|
||||
for reranker in [
|
||||
CrossEncoderReranker(),
|
||||
# ColbertReranker(),
|
||||
@@ -590,10 +603,11 @@ def test_empty_hybrid_result_reranker():
|
||||
assert "_rowid" in result.column_names
|
||||
|
||||
|
||||
def test_cross_encoder_reranker_return_all(tmp_path):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_cross_encoder_reranker_return_all(tmp_path, use_tantivy):
|
||||
pytest.importorskip("sentence_transformers")
|
||||
reranker = CrossEncoderReranker(return_score="all")
|
||||
table, schema = get_test_table(tmp_path)
|
||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
||||
query = "single player experience"
|
||||
result = (
|
||||
table.search(query, query_type="hybrid", vector_column_name="vector")
|
||||
|
||||
@@ -242,8 +242,8 @@ def test_s3_dynamodb_sync(s3_bucket: str, commit_table: str, monkeypatch):
|
||||
|
||||
# FTS indices should error since they are not supported yet.
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Tantivy-based FTS has been removed",
|
||||
NotImplementedError,
|
||||
match="Full-text search is only supported on the local filesystem",
|
||||
):
|
||||
table.create_fts_index("x", use_tantivy=True)
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
from datetime import date, datetime, timedelta
|
||||
from time import sleep
|
||||
from typing import List
|
||||
@@ -1948,6 +1947,7 @@ def setup_hybrid_search_table(db: DBConnection, embedding_func):
|
||||
|
||||
def test_hybrid_search(tmp_db: DBConnection):
|
||||
# This test uses an FTS index
|
||||
pytest.importorskip("lancedb.fts")
|
||||
pytest.importorskip("lance")
|
||||
|
||||
table, MyTable, emb = setup_hybrid_search_table(tmp_db, "test")
|
||||
@@ -2018,6 +2018,7 @@ def test_hybrid_search(tmp_db: DBConnection):
|
||||
|
||||
def test_hybrid_search_metric_type(tmp_db: DBConnection):
|
||||
# This test uses an FTS index
|
||||
pytest.importorskip("lancedb.fts")
|
||||
pytest.importorskip("lance")
|
||||
|
||||
# Need to use nonnorm as the embedding function so l2 and dot results
|
||||
@@ -2039,13 +2040,6 @@ def test_hybrid_search_metric_type(tmp_db: DBConnection):
|
||||
@pytest.mark.parametrize(
|
||||
"consistency_interval", [None, timedelta(seconds=0), timedelta(seconds=0.1)]
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
sys.platform == "win32",
|
||||
reason=(
|
||||
"TODO: directory namespace is not supported on Windows yet; "
|
||||
"re-enable after that is fixed."
|
||||
),
|
||||
)
|
||||
def test_consistency(tmp_path, consistency_interval):
|
||||
db = lancedb.connect(tmp_path)
|
||||
table = db.create_table("my_table", data=[{"id": 0}])
|
||||
@@ -2066,6 +2060,7 @@ def test_consistency(tmp_path, consistency_interval):
|
||||
elif consistency_interval == timedelta(seconds=0):
|
||||
assert table2.version == table.version
|
||||
else:
|
||||
# (consistency_interval == timedelta(seconds=0.1)
|
||||
assert table2.version == table.version - 1
|
||||
sleep(0.1)
|
||||
assert table2.version == table.version
|
||||
|
||||
@@ -1,17 +1,11 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
sync::Arc,
|
||||
time::Duration,
|
||||
};
|
||||
use std::{collections::HashMap, sync::Arc, time::Duration};
|
||||
|
||||
use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::FromPyArrow};
|
||||
use lancedb::{
|
||||
connection::Connection as LanceConnection,
|
||||
connection::NamespaceClientPushdownOperation,
|
||||
database::namespace::LanceNamespaceDatabase,
|
||||
database::{CreateTableMode, Database, ReadConsistency},
|
||||
};
|
||||
use pyo3::{
|
||||
@@ -45,29 +39,6 @@ impl Connection {
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_namespace_client_pushdown_operations(
|
||||
operations: Option<Vec<String>>,
|
||||
) -> PyResult<HashSet<NamespaceClientPushdownOperation>> {
|
||||
let mut parsed = HashSet::new();
|
||||
for operation in operations.unwrap_or_default() {
|
||||
match operation.as_str() {
|
||||
"QueryTable" => {
|
||||
parsed.insert(NamespaceClientPushdownOperation::QueryTable);
|
||||
}
|
||||
"CreateTable" => {
|
||||
parsed.insert(NamespaceClientPushdownOperation::CreateTable);
|
||||
}
|
||||
_ => {
|
||||
return Err(PyValueError::new_err(format!(
|
||||
"Invalid pushdown operation: {}",
|
||||
operation
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(parsed)
|
||||
}
|
||||
|
||||
impl Connection {
|
||||
fn parse_create_mode_str(mode: &str) -> PyResult<CreateTableMode> {
|
||||
match mode {
|
||||
@@ -525,7 +496,7 @@ impl Connection {
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None, manifest_enabled=false, namespace_client_properties=None))]
|
||||
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn connect(
|
||||
py: Python<'_>,
|
||||
@@ -537,8 +508,6 @@ pub fn connect(
|
||||
client_config: Option<PyClientConfig>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
session: Option<crate::session::Session>,
|
||||
manifest_enabled: bool,
|
||||
namespace_client_properties: Option<HashMap<String, String>>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
future_into_py(py, async move {
|
||||
let mut builder = lancedb::connect(&uri);
|
||||
@@ -558,12 +527,6 @@ pub fn connect(
|
||||
if let Some(storage_options) = storage_options {
|
||||
builder = builder.storage_options(storage_options);
|
||||
}
|
||||
if manifest_enabled {
|
||||
builder = builder.manifest_enabled(true);
|
||||
}
|
||||
if let Some(namespace_client_properties) = namespace_client_properties {
|
||||
builder = builder.namespace_client_properties(namespace_client_properties);
|
||||
}
|
||||
#[cfg(feature = "remote")]
|
||||
if let Some(client_config) = client_config {
|
||||
builder = builder.client_config(client_config.into());
|
||||
@@ -575,52 +538,6 @@ pub fn connect(
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (
|
||||
namespace_client,
|
||||
read_consistency_interval=None,
|
||||
storage_options=None,
|
||||
session=None,
|
||||
namespace_client_pushdown_operations=None,
|
||||
namespace_client_impl=None,
|
||||
namespace_client_properties=None,
|
||||
))]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn connect_namespace_client(
|
||||
py: Python<'_>,
|
||||
namespace_client: Py<PyAny>,
|
||||
read_consistency_interval: Option<f64>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
session: Option<crate::session::Session>,
|
||||
namespace_client_pushdown_operations: Option<Vec<String>>,
|
||||
namespace_client_impl: Option<String>,
|
||||
namespace_client_properties: Option<HashMap<String, String>>,
|
||||
) -> PyResult<Connection> {
|
||||
let namespace_client = extract_namespace_arc(py, namespace_client)?;
|
||||
let read_consistency_interval = read_consistency_interval.map(Duration::from_secs_f64);
|
||||
let namespace_client_pushdown_operations =
|
||||
parse_namespace_client_pushdown_operations(namespace_client_pushdown_operations)?;
|
||||
let ns_impl = namespace_client_impl.unwrap_or_else(|| "python".to_string());
|
||||
let ns_properties = namespace_client_properties.unwrap_or_default();
|
||||
let storage_options = storage_options.unwrap_or_default();
|
||||
let session = session.map(|s| s.inner.clone());
|
||||
|
||||
let database = LanceNamespaceDatabase::from_namespace_client(
|
||||
namespace_client,
|
||||
ns_impl,
|
||||
ns_properties,
|
||||
storage_options,
|
||||
read_consistency_interval,
|
||||
session,
|
||||
namespace_client_pushdown_operations,
|
||||
);
|
||||
|
||||
Ok(Connection::new(LanceConnection::new(
|
||||
Arc::new(database),
|
||||
Arc::new(lancedb::embeddings::MemoryRegistry::new()),
|
||||
)))
|
||||
}
|
||||
|
||||
#[derive(FromPyObject)]
|
||||
pub struct PyClientConfig {
|
||||
user_agent: String,
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use arrow::RecordBatchStream;
|
||||
use connection::{Connection, connect, connect_namespace_client};
|
||||
use connection::{Connection, connect};
|
||||
use env_logger::Env;
|
||||
use expr::{PyExpr, expr_col, expr_func, expr_lit};
|
||||
use index::IndexConfig;
|
||||
@@ -58,7 +58,6 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_class::<PyPermutationReader>()?;
|
||||
m.add_class::<PyExpr>()?;
|
||||
m.add_function(wrap_pyfunction!(connect, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(connect_namespace_client, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(permutation::async_permutation_builder, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(query::fts_query_to_json, m)?)?;
|
||||
|
||||
40
python/uv.lock
generated
40
python/uv.lock
generated
@@ -1996,6 +1996,7 @@ tests = [
|
||||
{ name = "pytest-mock" },
|
||||
{ name = "pytz" },
|
||||
{ name = "requests" },
|
||||
{ name = "tantivy" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
@@ -2049,6 +2050,7 @@ requires-dist = [
|
||||
{ name = "sentence-transformers", marker = "extra == 'embeddings'", specifier = ">=2.2.0" },
|
||||
{ name = "sentencepiece", marker = "extra == 'embeddings'", specifier = ">=0.1.99" },
|
||||
{ name = "sentencepiece", marker = "extra == 'siglip'" },
|
||||
{ name = "tantivy", marker = "extra == 'tests'", specifier = ">=0.20.0" },
|
||||
{ name = "torch", marker = "extra == 'clip'" },
|
||||
{ name = "torch", marker = "extra == 'embeddings'", specifier = ">=2.0.0" },
|
||||
{ name = "torch", marker = "extra == 'siglip'" },
|
||||
@@ -4777,6 +4779,44 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy"
|
||||
version = "0.25.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/1b/f9/0cd3955d155d3e3ef74b864769514dd191e5dacba9f0beb7af2d914942ce/tantivy-0.25.1.tar.gz", hash = "sha256:68a3314699a7d18fcf338b52bae8ce46a97dde1128a3e47e33fa4db7f71f265e", size = 75120, upload-time = "2025-12-02T11:57:12.997Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/80/f7/2276bed3bed983ce2970dc70e3571f372587fe4f5f2bac1d6d617df08fa3/tantivy-0.25.1-cp310-cp310-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7aa587a3dc9470584cacf5e3640fee93d12ec5f10109669c1f47c4e90820b958", size = 7638510, upload-time = "2025-12-02T11:56:08.754Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/20/8c/078dc50570e243414356b05633f52fe544b85179281ffa9f1fe05d76bbd8/tantivy-0.25.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:56d77fe667595693d9fa5f0b4545776d84da9526bab0273b3fc6c7536dc0d8a2", size = 3932659, upload-time = "2025-12-02T11:56:10.621Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bd/dc/281c48436a1e3178b58fe463af314434fe0f3a4ec0c7588a362900e0c69e/tantivy-0.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ba8c347cd48595fcaeabb28a909ebce92cf9c5e5c84ab5ba1136a280a307b5c", size = 4197430, upload-time = "2025-12-02T11:56:12.65Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7b/6c/61e6e0b0a350007d10a9b66a35703361d3345e14e7a7cc83494776b2a054/tantivy-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa7c4932e8fde1f09f2d46226060e827e197c2749abdc6129d73a752773adc38", size = 4184055, upload-time = "2025-12-02T11:56:14.647Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5f/fd/0eb059b12f0b6f91623a54a46448a83b7f716d08f3bca68c095d697b85da/tantivy-0.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:afcfc5dbb0bcd5d24531f4471737ae0896f33528426ab0b1dad3e427c19120f6", size = 3424134, upload-time = "2025-12-02T11:56:16.242Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4e/7a/8a277f377e8a151fc0e71d4ffc1114aefb6e5e1c7dd609fed0955cf34ed8/tantivy-0.25.1-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:d363d7b4207d3a5aa7f0d212420df35bed18bdb6bae26a2a8bd57428388b7c29", size = 7637033, upload-time = "2025-12-02T11:56:18.104Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/71/31/8b4acdedfc9f9a2d04b1340d07eef5213d6f151d1e18da0cb423e5f090d2/tantivy-0.25.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8f4389cf1d889a1df7c5a3195806b4b56c37cee10d8a26faaa0dea35a867b5ff", size = 3932180, upload-time = "2025-12-02T11:56:19.833Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/dc/3e8499c21b4b9795e8f2fc54c68ce5b92905aaeadadaa56ecfa9180b11b1/tantivy-0.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99864c09fc54652c3c2486cdf13f86cdc8200f4b481569cb291e095ca5d496e5", size = 4197620, upload-time = "2025-12-02T11:56:21.496Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/8e/f2ce62fffc811eb62bead92c7b23c2e218f817cbd54c4f3b802e03ba1438/tantivy-0.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05abf37ddbc5063c575548be0d62931629c086bff7a5a1b67cf5a8f5ebf4cd8c", size = 4183794, upload-time = "2025-12-02T11:56:23.215Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/de/64/24e2891b0ba3fd9853e10c296095a33b89bf3efd65e29da1ee5dae736040/tantivy-0.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:f307ee8ad21597b0be23af83008fd66cfd5f958cdfa24ec0aaa08a38e86bbef4", size = 3424235, upload-time = "2025-12-02T11:56:25.172Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/41/e7/6849c713ed0996c7628324c60512c4882006f0a62145e56c624a93407f90/tantivy-0.25.1-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:90fd919e5f611809f746560ecf36eb9be824dec62e21ae17a27243759edb9aa1", size = 7621494, upload-time = "2025-12-02T11:56:27.069Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c5/22/c3d8294600dc6e7fa350daef9ff337d3c06e132b81df727de9f7a50c692a/tantivy-0.25.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:4613c7cf6c23f3a97989819690a0f956d799354957de7a204abcc60083cebe02", size = 3925219, upload-time = "2025-12-02T11:56:29.403Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/41/fc/cbb1df71dd44c9110eff4eaaeda9d44f2d06182fe0452193be20ddfba93f/tantivy-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c477bd20b4df804d57dfc5033431bef27cde605695ae141b03abbf6ebc069129", size = 4198699, upload-time = "2025-12-02T11:56:31.359Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/47/4d/71abb78b774073c3ce12a4faa4351a9d910a71ffa3659526affba163873d/tantivy-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9b1a1ba1113c523c7ff7b10f282d6c4074006f7ef8d71e1d973d51bf7291ddb", size = 4183585, upload-time = "2025-12-02T11:56:33.317Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/be/16/3f00cd7ec458b92a0e977960af9ddfbeb762127d9acc68da9094a1fda556/tantivy-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:9de0bafd3bd7ac9f8f82d53e17562e9db11a5af308fe5185c4bd86feaddbe4a6", size = 3424622, upload-time = "2025-12-02T11:56:34.788Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/25/73cfbcf1a8ea49be6c42817431cac46b70a119fe64da903fcc2d92b5b511/tantivy-0.25.1-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:f51ff7196c6f31719202080ed8372d5e3d51e92c749c032fb8234f012e99744c", size = 7622530, upload-time = "2025-12-02T11:56:36.839Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/12/c8/c0d7591cdf4f7e7a9fc4da786d1ca8cd1aacffaa2be16ea6d401a8e4a566/tantivy-0.25.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:550e63321bfcacc003859f2fa29c1e8e56450807b3c9a501c1add27cfb9236d9", size = 3925637, upload-time = "2025-12-02T11:56:38.425Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3a/09/bedfc223bffec7641b417dd7ab071134b2ef8f8550e9b1fb6014657ef52e/tantivy-0.25.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fde31cc8d6e122faf7902aeea32bc008a429a6e8904e34d3468126a3ec01b016", size = 4197322, upload-time = "2025-12-02T11:56:40.411Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f5/f1/1fa5183500c8042200c9f2b840d34f5bbcfb434a1ee750e7132262d2a5c9/tantivy-0.25.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b11bd5a518b0be645320b47af8493f6a40c4f3234313e37adcf4534a564d27dd", size = 4183143, upload-time = "2025-12-02T11:56:42.048Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/74/a4c4f4eb95888ccb784da3b017aa0625ab1ac411bf5d022a9a797d9a2334/tantivy-0.25.1-cp313-cp313-win_amd64.whl", hash = "sha256:cc7fe88853e06b3251ee4fa42b7a2038727f850c8765bcc8167cfc73585dd24e", size = 3423491, upload-time = "2025-12-02T11:56:43.858Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8b/2f/581519492226f97d23bd0adc95dad991ebeaa73ea6abc8bff389a3096d9a/tantivy-0.25.1-cp313-cp313t-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:dae99e75b7eaa9bf5bd16ab106b416370f08c135aed0e117d62a3201cd1ffe36", size = 7610316, upload-time = "2025-12-02T11:56:45.927Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/91/40/5d7bc315ab9e6a22c5572656e8ada1c836cfa96dccf533377504fbc3c9d9/tantivy-0.25.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:506e9533c5ef4d3df43bad64ffecc0aa97c76e361ea610815dc3a20a9d6b30b3", size = 3919882, upload-time = "2025-12-02T11:56:48.469Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/02/b9/e0ef2f57a6a72444cb66c2ffbc310ab33ffaace275f1c4b0319d84ea3f18/tantivy-0.25.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dbd4f8f264dacbcc9dee542832da2173fd53deaaea03f082d95214f8b5ed6bc", size = 4196031, upload-time = "2025-12-02T11:56:50.151Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/02/bf3f8cacfd08642e14a73f7956a3fb95d58119132c98c121b9065a1f8615/tantivy-0.25.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:824c643ccb640dd9e35e00c5d5054ddf3323f56fe4219d57d428a9eeea13d22c", size = 4183437, upload-time = "2025-12-02T11:56:51.818Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9c/83/afa90e570198e2d1139dd567bec3c9cf44d8c54f63a649f16d711ede02f5/tantivy-0.25.1-cp313-cp313t-win_amd64.whl", hash = "sha256:09c987b840afcebac817836ac08407eff17272d8aa60ce6e291f89c81830221d", size = 3419409, upload-time = "2025-12-02T11:56:53.451Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/44/9f1d67aa5030f7eebc966c863d1316a510a971dd8bb45651df4acdfae9ed/tantivy-0.25.1-cp314-cp314-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7f5d29ae85dd0f23df8d15b3e7b341d4f9eb5a446bbb9640df48ac1f6d9e0c6c", size = 7623723, upload-time = "2025-12-02T11:56:55.066Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/db/30/6e085bd3ed9d12da3c91c185854abd70f9dfd35fb36a75ea98428d42c30b/tantivy-0.25.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:f2d2938fb69a74fc1bb36edfaf7f0d1596fa1264db0f377bda2195c58bcb6245", size = 3926243, upload-time = "2025-12-02T11:56:57.058Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/f5/a00d65433430f51718e5cc6938df571765d7c4e03aedec5aef4ab567aa9b/tantivy-0.25.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f5ff124c4802558e627091e780b362ca944169736caba5a372eef39a79d0ae0", size = 4207186, upload-time = "2025-12-02T11:56:58.803Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/19/63/61bdb12fc95f2a7f77bd419a5149bfa9f28caa76cb569bf2b6b06e1d033e/tantivy-0.25.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43b80ef62a340416139c93d19264e5f808da48e04f9305f1092b8ed22be0a5be", size = 4187312, upload-time = "2025-12-02T11:57:00.595Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b7/de/e39c0b01d59019bf5c38face8b81defbc4a68cebf5e0c53bcb2cd715a449/tantivy-0.25.1-cp314-cp314-win_amd64.whl", hash = "sha256:286b654f40c70c1e6b64b9bc7031ed0bf5c440f5bffeaeeee21a0ee6cc39f0e2", size = 3436535, upload-time = "2025-12-02T11:57:02.267Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "threadpoolctl"
|
||||
version = "3.6.0"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.28.0-beta.10"
|
||||
version = "0.28.0-beta.4"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
@@ -111,12 +111,7 @@ default = []
|
||||
aws = ["lance/aws", "lance-io/aws", "lance-namespace-impls/dir-aws"]
|
||||
oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"]
|
||||
gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"]
|
||||
azure = [
|
||||
"lance/azure",
|
||||
"lance-io/azure",
|
||||
"lance-namespace-impls/dir-azure",
|
||||
"lance-namespace-impls/credential-vendor-azure",
|
||||
]
|
||||
azure = ["lance/azure", "lance-io/azure", "lance-namespace-impls/dir-azure"]
|
||||
huggingface = [
|
||||
"lance/huggingface",
|
||||
"lance-io/huggingface",
|
||||
|
||||
@@ -171,7 +171,7 @@ impl OpenTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
@@ -188,7 +188,7 @@ impl OpenTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -582,23 +582,6 @@ pub struct ConnectRequest {
|
||||
/// Database specific options
|
||||
pub options: HashMap<String, String>,
|
||||
|
||||
/// Extra properties for the equivalent namespace client.
|
||||
///
|
||||
/// For a local [`ListingDatabase`], these are merged into the backing
|
||||
/// `DirectoryNamespace` properties. This is useful for namespace-specific
|
||||
/// settings such as `table_version_tracking_enabled` that are distinct from
|
||||
/// storage options.
|
||||
pub namespace_client_properties: HashMap<String, String>,
|
||||
|
||||
/// Use directory namespace manifests as the source of truth for native
|
||||
/// LanceDB table metadata.
|
||||
///
|
||||
/// When enabled for a local/native connection, LanceDB returns a
|
||||
/// namespace-backed database directly. Directory listing fallback remains
|
||||
/// enabled for migration, and directory-listing-to-manifest migration is
|
||||
/// forced on.
|
||||
pub manifest_enabled: bool,
|
||||
|
||||
/// The interval at which to check for updates from other processes.
|
||||
///
|
||||
/// If None, then consistency is not checked. For performance
|
||||
@@ -638,8 +621,6 @@ impl ConnectBuilder {
|
||||
client_config: Default::default(),
|
||||
read_consistency_interval: None,
|
||||
options: HashMap::new(),
|
||||
namespace_client_properties: HashMap::new(),
|
||||
manifest_enabled: false,
|
||||
session: None,
|
||||
},
|
||||
embedding_registry: None,
|
||||
@@ -757,7 +738,7 @@ impl ConnectBuilder {
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.request.options.insert(key.into(), value.into());
|
||||
self
|
||||
@@ -765,7 +746,7 @@ impl ConnectBuilder {
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -776,42 +757,6 @@ impl ConnectBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set an additional property for the equivalent namespace client.
|
||||
pub fn namespace_client_property(
|
||||
mut self,
|
||||
key: impl Into<String>,
|
||||
value: impl Into<String>,
|
||||
) -> Self {
|
||||
self.request
|
||||
.namespace_client_properties
|
||||
.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set multiple additional properties for the equivalent namespace client.
|
||||
pub fn namespace_client_properties(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
) -> Self {
|
||||
for (key, value) in pairs {
|
||||
self.request
|
||||
.namespace_client_properties
|
||||
.insert(key.into(), value.into());
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable or disable manifest-backed directory namespace mode for local
|
||||
/// native connections.
|
||||
///
|
||||
/// When enabled, the connection uses the directory namespace database
|
||||
/// directly for all table operations and forces
|
||||
/// `dir_listing_to_manifest_migration_enabled=true`.
|
||||
pub fn manifest_enabled(mut self, enabled: bool) -> Self {
|
||||
self.request.manifest_enabled = enabled;
|
||||
self
|
||||
}
|
||||
|
||||
/// The interval at which to check for updates from other processes. This
|
||||
/// only affects LanceDB OSS.
|
||||
///
|
||||
@@ -907,16 +852,6 @@ impl ConnectBuilder {
|
||||
pub async fn execute(self) -> Result<Connection> {
|
||||
if self.request.uri.starts_with("db") {
|
||||
self.execute_remote()
|
||||
} else if self.request.manifest_enabled {
|
||||
let internal = Arc::new(
|
||||
ListingDatabase::connect_manifest_enabled_namespace_database(&self.request).await?,
|
||||
);
|
||||
Ok(Connection {
|
||||
internal,
|
||||
embedding_registry: self
|
||||
.embedding_registry
|
||||
.unwrap_or_else(|| Arc::new(MemoryRegistry::new())),
|
||||
})
|
||||
} else {
|
||||
let internal = Arc::new(ListingDatabase::connect_with_options(&self.request).await?);
|
||||
Ok(Connection {
|
||||
@@ -946,7 +881,7 @@ use std::collections::HashSet;
|
||||
/// These operations will be executed on the namespace server instead of locally
|
||||
/// when enabled via [`ConnectNamespaceBuilder::pushdown_operations`].
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum NamespaceClientPushdownOperation {
|
||||
pub enum PushdownOperation {
|
||||
/// Execute queries on the namespace server via `query_table()` instead of locally.
|
||||
QueryTable,
|
||||
/// Execute table creation on the namespace server via `create_table()`
|
||||
@@ -958,11 +893,10 @@ pub struct ConnectNamespaceBuilder {
|
||||
ns_impl: String,
|
||||
properties: HashMap<String, String>,
|
||||
storage_options: HashMap<String, String>,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
}
|
||||
|
||||
impl ConnectNamespaceBuilder {
|
||||
@@ -971,7 +905,6 @@ impl ConnectNamespaceBuilder {
|
||||
ns_impl: ns_impl.to_string(),
|
||||
properties,
|
||||
storage_options: HashMap::new(),
|
||||
namespace_client_properties: HashMap::new(),
|
||||
read_consistency_interval: None,
|
||||
embedding_registry: None,
|
||||
session: None,
|
||||
@@ -981,7 +914,7 @@ impl ConnectNamespaceBuilder {
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.storage_options.insert(key.into(), value.into());
|
||||
self
|
||||
@@ -989,7 +922,7 @@ impl ConnectNamespaceBuilder {
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -1000,29 +933,6 @@ impl ConnectNamespaceBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set an additional namespace client property.
|
||||
pub fn namespace_client_property(
|
||||
mut self,
|
||||
key: impl Into<String>,
|
||||
value: impl Into<String>,
|
||||
) -> Self {
|
||||
self.namespace_client_properties
|
||||
.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set multiple additional namespace client properties.
|
||||
pub fn namespace_client_properties(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
) -> Self {
|
||||
for (key, value) in pairs {
|
||||
self.namespace_client_properties
|
||||
.insert(key.into(), value.into());
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// The interval at which to check for updates from other processes.
|
||||
///
|
||||
/// If left unset, consistency is not checked. For maximum read
|
||||
@@ -1060,11 +970,11 @@ impl ConnectNamespaceBuilder {
|
||||
/// and leveraging server-side compute resources.
|
||||
///
|
||||
/// Available operations:
|
||||
/// - [`NamespaceClientPushdownOperation::QueryTable`]: Execute queries via `namespace.query_table()`
|
||||
/// - [`NamespaceClientPushdownOperation::CreateTable`]: Execute table creation via `namespace.create_table()`
|
||||
/// - [`PushdownOperation::QueryTable`]: Execute queries via `namespace.query_table()`
|
||||
/// - [`PushdownOperation::CreateTable`]: Execute table creation via `namespace.create_table()`
|
||||
///
|
||||
/// By default, no operations are pushed down (all executed locally).
|
||||
pub fn pushdown_operation(mut self, operation: NamespaceClientPushdownOperation) -> Self {
|
||||
pub fn pushdown_operation(mut self, operation: PushdownOperation) -> Self {
|
||||
self.pushdown_operations.insert(operation);
|
||||
self
|
||||
}
|
||||
@@ -1074,7 +984,7 @@ impl ConnectNamespaceBuilder {
|
||||
/// See [`Self::pushdown_operation`] for details.
|
||||
pub fn pushdown_operations(
|
||||
mut self,
|
||||
operations: impl IntoIterator<Item = NamespaceClientPushdownOperation>,
|
||||
operations: impl IntoIterator<Item = PushdownOperation>,
|
||||
) -> Self {
|
||||
self.pushdown_operations.extend(operations);
|
||||
self
|
||||
@@ -1084,13 +994,10 @@ impl ConnectNamespaceBuilder {
|
||||
pub async fn execute(self) -> Result<Connection> {
|
||||
use crate::database::namespace::LanceNamespaceDatabase;
|
||||
|
||||
let mut properties = self.properties;
|
||||
properties.extend(self.namespace_client_properties);
|
||||
|
||||
let internal = Arc::new(
|
||||
LanceNamespaceDatabase::connect(
|
||||
&self.ns_impl,
|
||||
properties,
|
||||
self.properties,
|
||||
self.storage_options,
|
||||
self.read_consistency_interval,
|
||||
self.session,
|
||||
@@ -1163,9 +1070,6 @@ mod tests {
|
||||
use lance_testing::datagen::{BatchGenerator, IncrementingInt32};
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::database::listing::{ListingDatabaseOptions, OPT_NEW_TABLE_V2_MANIFEST_PATHS};
|
||||
use crate::database::namespace::LanceNamespaceDatabase;
|
||||
use crate::table::NativeTable;
|
||||
use crate::test_utils::connection::new_test_connection;
|
||||
|
||||
use super::*;
|
||||
@@ -1213,172 +1117,6 @@ mod tests {
|
||||
assert_eq!(db.uri(), relative_uri.to_str().unwrap().to_string());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_connect_with_namespace_client_properties() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
let db = connect(uri)
|
||||
.namespace_client_property("table_version_tracking_enabled", "true")
|
||||
.namespace_client_property("manifest_enabled", "true")
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (ns_impl, properties) = db.namespace_client_config().await.unwrap();
|
||||
assert_eq!(ns_impl, "dir");
|
||||
assert_eq!(properties.get("root"), Some(&uri.to_string()));
|
||||
assert_eq!(
|
||||
properties.get("table_version_tracking_enabled"),
|
||||
Some(&"true".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
properties.get("manifest_enabled"),
|
||||
Some(&"true".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_connect_with_manifest_enabled_uses_directory_namespace() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
let db = connect(uri)
|
||||
.manifest_enabled(true)
|
||||
.storage_option("timeout", "30s")
|
||||
.namespace_client_property("manifest_enabled", "false")
|
||||
.namespace_client_property("dir_listing_to_manifest_migration_enabled", "false")
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(
|
||||
db.database()
|
||||
.as_any()
|
||||
.downcast_ref::<LanceNamespaceDatabase>()
|
||||
.is_some()
|
||||
);
|
||||
assert_eq!(db.uri(), uri);
|
||||
|
||||
let (ns_impl, properties) = db.namespace_client_config().await.unwrap();
|
||||
assert_eq!(ns_impl, "dir");
|
||||
assert_eq!(properties.get("root"), Some(&uri.to_string()));
|
||||
assert_eq!(
|
||||
properties.get("manifest_enabled"),
|
||||
Some(&"true".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
properties.get("dir_listing_to_manifest_migration_enabled"),
|
||||
Some(&"true".to_string())
|
||||
);
|
||||
assert_eq!(properties.get("storage.timeout"), Some(&"30s".to_string()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_manifest_enabled_rejects_commit_engine_uri() {
|
||||
let Err(err) = connect("s3+ddb://bucket/db?ddbTableName=manifest")
|
||||
.manifest_enabled(true)
|
||||
.execute()
|
||||
.await
|
||||
else {
|
||||
panic!("expected manifest-enabled s3+ddb connection to fail");
|
||||
};
|
||||
assert!(
|
||||
matches!(err, Error::NotSupported { message } if message.contains("commit engine URI schemes"))
|
||||
);
|
||||
|
||||
let Err(err) = connect("s3://bucket/db?engine=ddb&ddbTableName=manifest")
|
||||
.manifest_enabled(true)
|
||||
.execute()
|
||||
.await
|
||||
else {
|
||||
panic!("expected manifest-enabled engine query connection to fail");
|
||||
};
|
||||
assert!(
|
||||
matches!(err, Error::NotSupported { message } if message.contains("commit engine"))
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_manifest_enabled_connection_migrates_root_listing_table() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
|
||||
|
||||
connect(uri)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
.create_empty_table("legacy", schema)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let db = connect(uri).manifest_enabled(true).execute().await.unwrap();
|
||||
let tables = db.table_names().execute().await.unwrap();
|
||||
assert_eq!(tables, vec!["legacy".to_string()]);
|
||||
db.open_table("legacy").execute().await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_manifest_enabled_preserves_new_table_options() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let options = ListingDatabaseOptions::builder()
|
||||
.enable_v2_manifest_paths(true)
|
||||
.build();
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
|
||||
|
||||
let table = connect(uri)
|
||||
.manifest_enabled(true)
|
||||
.database_options(&options)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
.create_empty_table("v1_manifest", schema)
|
||||
.storage_option(OPT_NEW_TABLE_V2_MANIFEST_PATHS, "false")
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let native_table = table
|
||||
.base_table()
|
||||
.as_any()
|
||||
.downcast_ref::<NativeTable>()
|
||||
.unwrap();
|
||||
assert!(!native_table.uses_v2_manifest_paths().await.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_manifest_enabled_vend_input_storage_options() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
|
||||
|
||||
let table = connect(uri)
|
||||
.manifest_enabled(true)
|
||||
.storage_option("test_storage_option", "test_value")
|
||||
.namespace_client_property("vend_input_storage_options", "true")
|
||||
.namespace_client_property(
|
||||
"vend_input_storage_options_refresh_interval_millis",
|
||||
"60000",
|
||||
)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
.create_empty_table("vended", schema)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let storage_options = table.latest_storage_options().await.unwrap().unwrap();
|
||||
assert_eq!(
|
||||
storage_options.get("test_storage_option"),
|
||||
Some(&"test_value".to_string())
|
||||
);
|
||||
assert!(storage_options.contains_key("expires_at_millis"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_table_names() {
|
||||
let tc = new_test_connection().await.unwrap();
|
||||
|
||||
@@ -55,7 +55,7 @@ impl CreateTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
@@ -73,7 +73,7 @@ impl CreateTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
|
||||
@@ -20,7 +20,6 @@ use snafu::ResultExt;
|
||||
|
||||
use crate::connection::ConnectRequest;
|
||||
use crate::database::ReadConsistency;
|
||||
use crate::database::namespace::LanceNamespaceDatabase;
|
||||
use crate::error::{CreateDirSnafu, Error, Result};
|
||||
use crate::io::object_store::MirroringObjectStoreWrapper;
|
||||
use crate::table::NativeTable;
|
||||
@@ -74,7 +73,7 @@ pub struct ListingDatabaseOptions {
|
||||
/// These are used to create/list tables and they are inherited by all tables
|
||||
/// opened by this database.
|
||||
///
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub storage_options: HashMap<String, String>,
|
||||
}
|
||||
|
||||
@@ -186,7 +185,7 @@ impl ListingDatabaseOptionsBuilder {
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.options
|
||||
.storage_options
|
||||
@@ -196,7 +195,7 @@ impl ListingDatabaseOptionsBuilder {
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -256,9 +255,6 @@ pub struct ListingDatabase {
|
||||
|
||||
// Session for object stores and caching
|
||||
session: Arc<lance::session::Session>,
|
||||
|
||||
// Namespace-backed database for child namespace operations
|
||||
namespace_database: Arc<LanceNamespaceDatabase>,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ListingDatabase {
|
||||
@@ -285,175 +281,6 @@ const MIRRORED_STORE: &str = "mirroredStore";
|
||||
|
||||
/// A connection to LanceDB
|
||||
impl ListingDatabase {
|
||||
pub(crate) fn build_namespace_client_properties(
|
||||
uri: &str,
|
||||
storage_options: &HashMap<String, String>,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
) -> HashMap<String, String> {
|
||||
let mut properties = namespace_client_properties;
|
||||
properties.insert("root".to_string(), uri.to_string());
|
||||
for (key, value) in storage_options {
|
||||
properties.insert(format!("storage.{}", key), value.clone());
|
||||
}
|
||||
properties
|
||||
}
|
||||
|
||||
pub(crate) fn build_manifest_enabled_namespace_client_properties(
|
||||
uri: &str,
|
||||
storage_options: &HashMap<String, String>,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
) -> HashMap<String, String> {
|
||||
let mut properties = Self::build_namespace_client_properties(
|
||||
uri,
|
||||
storage_options,
|
||||
namespace_client_properties,
|
||||
);
|
||||
properties.insert("manifest_enabled".to_string(), "true".to_string());
|
||||
properties.insert(
|
||||
"dir_listing_to_manifest_migration_enabled".to_string(),
|
||||
"true".to_string(),
|
||||
);
|
||||
properties
|
||||
}
|
||||
|
||||
async fn connect_namespace_database(
|
||||
uri: &str,
|
||||
storage_options: HashMap<String, String>,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
session: Arc<lance::session::Session>,
|
||||
) -> Result<Arc<LanceNamespaceDatabase>> {
|
||||
let ns_properties = Self::build_namespace_client_properties(
|
||||
uri,
|
||||
&storage_options,
|
||||
namespace_client_properties,
|
||||
);
|
||||
Ok(Arc::new(
|
||||
LanceNamespaceDatabase::connect(
|
||||
"dir",
|
||||
ns_properties,
|
||||
storage_options,
|
||||
read_consistency_interval,
|
||||
Some(session),
|
||||
HashSet::new(),
|
||||
)
|
||||
.await?,
|
||||
))
|
||||
}
|
||||
|
||||
async fn prepare_namespace_root(
|
||||
uri: &str,
|
||||
storage_options: &HashMap<String, String>,
|
||||
session: Arc<lance::session::Session>,
|
||||
) -> Result<String> {
|
||||
match url::Url::parse(uri) {
|
||||
Ok(url) if url.scheme().len() == 1 && cfg!(windows) => {
|
||||
let (object_store, _) = ObjectStore::from_uri_and_params(
|
||||
session.store_registry(),
|
||||
uri,
|
||||
&ObjectStoreParams::default(),
|
||||
)
|
||||
.await?;
|
||||
if object_store.is_local() {
|
||||
Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?;
|
||||
}
|
||||
Ok(uri.to_string())
|
||||
}
|
||||
Ok(mut url) => {
|
||||
if url.scheme().contains('+') {
|
||||
return Err(Error::NotSupported {
|
||||
message: "commit engine URI schemes are not supported for manifest-enabled namespace connections".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
for (key, value) in url.query_pairs() {
|
||||
if key == ENGINE {
|
||||
return Err(Error::NotSupported {
|
||||
message: format!(
|
||||
"commit engine '{}' is not supported for manifest-enabled namespace connections",
|
||||
value
|
||||
),
|
||||
});
|
||||
} else if key == MIRRORED_STORE {
|
||||
return Err(Error::NotSupported {
|
||||
message: "mirrored store is not supported for manifest-enabled namespace connections"
|
||||
.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
url.set_query(None);
|
||||
let plain_uri = url.to_string();
|
||||
|
||||
let os_params = ObjectStoreParams {
|
||||
storage_options_accessor: if storage_options.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Arc::new(StorageOptionsAccessor::with_static_options(
|
||||
storage_options.clone(),
|
||||
)))
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
let (object_store, _) = ObjectStore::from_uri_and_params(
|
||||
session.store_registry(),
|
||||
&plain_uri,
|
||||
&os_params,
|
||||
)
|
||||
.await?;
|
||||
if object_store.is_local() {
|
||||
Self::try_create_dir(&plain_uri).context(CreateDirSnafu {
|
||||
path: plain_uri.clone(),
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(plain_uri)
|
||||
}
|
||||
Err(_) => {
|
||||
let (object_store, _) = ObjectStore::from_uri_and_params(
|
||||
session.store_registry(),
|
||||
uri,
|
||||
&ObjectStoreParams::default(),
|
||||
)
|
||||
.await?;
|
||||
if object_store.is_local() {
|
||||
Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?;
|
||||
}
|
||||
Ok(uri.to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn connect_manifest_enabled_namespace_database(
|
||||
request: &ConnectRequest,
|
||||
) -> Result<LanceNamespaceDatabase> {
|
||||
let options = ListingDatabaseOptions::parse_from_map(&request.options)?;
|
||||
let session = request
|
||||
.session
|
||||
.clone()
|
||||
.unwrap_or_else(|| Arc::new(lance::session::Session::default()));
|
||||
let namespace_root =
|
||||
Self::prepare_namespace_root(&request.uri, &options.storage_options, session.clone())
|
||||
.await?;
|
||||
let ns_properties = Self::build_manifest_enabled_namespace_client_properties(
|
||||
&namespace_root,
|
||||
&options.storage_options,
|
||||
request.namespace_client_properties.clone(),
|
||||
);
|
||||
|
||||
LanceNamespaceDatabase::connect_with_new_table_config(
|
||||
"dir",
|
||||
ns_properties,
|
||||
options.storage_options,
|
||||
request.read_consistency_interval,
|
||||
Some(session),
|
||||
HashSet::new(),
|
||||
options.new_table_config,
|
||||
)
|
||||
.await
|
||||
.map(|db| db.with_uri(request.uri.clone()))
|
||||
}
|
||||
|
||||
/// Connect to a listing database
|
||||
///
|
||||
/// The URI should be a path to a directory where the tables are stored.
|
||||
@@ -473,7 +300,6 @@ impl ListingDatabase {
|
||||
uri,
|
||||
request.read_consistency_interval,
|
||||
options.new_table_config,
|
||||
request.namespace_client_properties.clone(),
|
||||
request.session.clone(),
|
||||
)
|
||||
.await
|
||||
@@ -561,15 +387,6 @@ impl ListingDatabase {
|
||||
None => None,
|
||||
};
|
||||
|
||||
let namespace_database = Self::connect_namespace_database(
|
||||
&table_base_uri,
|
||||
options.storage_options.clone(),
|
||||
request.namespace_client_properties.clone(),
|
||||
request.read_consistency_interval,
|
||||
session.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(Self {
|
||||
uri: table_base_uri,
|
||||
query_string,
|
||||
@@ -581,7 +398,6 @@ impl ListingDatabase {
|
||||
storage_options_provider: None,
|
||||
new_table_config: options.new_table_config,
|
||||
session,
|
||||
namespace_database,
|
||||
})
|
||||
}
|
||||
Err(_) => {
|
||||
@@ -589,7 +405,6 @@ impl ListingDatabase {
|
||||
uri,
|
||||
request.read_consistency_interval,
|
||||
options.new_table_config,
|
||||
request.namespace_client_properties.clone(),
|
||||
request.session.clone(),
|
||||
)
|
||||
.await
|
||||
@@ -601,7 +416,6 @@ impl ListingDatabase {
|
||||
path: &str,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
new_table_config: NewTableConfig,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
) -> Result<Self> {
|
||||
let session = session.unwrap_or_else(|| Arc::new(lance::session::Session::default()));
|
||||
@@ -615,15 +429,6 @@ impl ListingDatabase {
|
||||
Self::try_create_dir(path).context(CreateDirSnafu { path })?;
|
||||
}
|
||||
|
||||
let namespace_database = Self::connect_namespace_database(
|
||||
path,
|
||||
HashMap::new(),
|
||||
namespace_client_properties,
|
||||
read_consistency_interval,
|
||||
session.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(Self {
|
||||
uri: path.to_string(),
|
||||
query_string: None,
|
||||
@@ -635,7 +440,6 @@ impl ListingDatabase {
|
||||
storage_options_provider: None,
|
||||
new_table_config,
|
||||
session,
|
||||
namespace_database,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -693,10 +497,6 @@ impl ListingDatabase {
|
||||
Ok(uri)
|
||||
}
|
||||
|
||||
fn namespace_database(&self) -> Arc<LanceNamespaceDatabase> {
|
||||
self.namespace_database.clone()
|
||||
}
|
||||
|
||||
async fn drop_tables(&self, names: Vec<String>) -> Result<()> {
|
||||
let object_store_params = ObjectStoreParams {
|
||||
storage_options_accessor: if self.storage_options.is_empty() {
|
||||
@@ -821,12 +621,15 @@ impl ListingDatabase {
|
||||
store_params.storage_options_accessor = Some(Arc::new(accessor));
|
||||
}
|
||||
|
||||
write_params.data_storage_version = storage_version_override
|
||||
.or(write_params.data_storage_version)
|
||||
.or(self.new_table_config.data_storage_version);
|
||||
write_params.data_storage_version = self
|
||||
.new_table_config
|
||||
.data_storage_version
|
||||
.or(storage_version_override);
|
||||
|
||||
if let Some(enable_v2_manifest_paths) =
|
||||
v2_manifest_override.or(self.new_table_config.enable_v2_manifest_paths)
|
||||
if let Some(enable_v2_manifest_paths) = self
|
||||
.new_table_config
|
||||
.enable_v2_manifest_paths
|
||||
.or(v2_manifest_override)
|
||||
{
|
||||
write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
|
||||
}
|
||||
@@ -893,7 +696,16 @@ impl Database for ListingDatabase {
|
||||
&self,
|
||||
request: ListNamespacesRequest,
|
||||
) -> Result<ListNamespacesResponse> {
|
||||
self.namespace_database().list_namespaces(request).await
|
||||
if request.id.as_ref().map(|v| !v.is_empty()).unwrap_or(false) {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(ListNamespacesResponse {
|
||||
namespaces: Vec::new(),
|
||||
page_token: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn uri(&self) -> &str {
|
||||
@@ -914,26 +726,36 @@ impl Database for ListingDatabase {
|
||||
|
||||
async fn create_namespace(
|
||||
&self,
|
||||
request: CreateNamespaceRequest,
|
||||
_request: CreateNamespaceRequest,
|
||||
) -> Result<CreateNamespaceResponse> {
|
||||
self.namespace_database().create_namespace(request).await
|
||||
Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<DropNamespaceResponse> {
|
||||
self.namespace_database().drop_namespace(request).await
|
||||
async fn drop_namespace(
|
||||
&self,
|
||||
_request: DropNamespaceRequest,
|
||||
) -> Result<DropNamespaceResponse> {
|
||||
Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn describe_namespace(
|
||||
&self,
|
||||
request: DescribeNamespaceRequest,
|
||||
_request: DescribeNamespaceRequest,
|
||||
) -> Result<DescribeNamespaceResponse> {
|
||||
self.namespace_database().describe_namespace(request).await
|
||||
Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>> {
|
||||
if !request.namespace_path.is_empty() {
|
||||
return self.namespace_database().table_names(request).await;
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
|
||||
});
|
||||
}
|
||||
let mut f = self
|
||||
.object_store
|
||||
@@ -966,7 +788,9 @@ impl Database for ListingDatabase {
|
||||
|
||||
async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
|
||||
if request.id.as_ref().map(|v| !v.is_empty()).unwrap_or(false) {
|
||||
return self.namespace_database().list_tables(request).await;
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
|
||||
});
|
||||
}
|
||||
let mut f = self
|
||||
.object_store
|
||||
@@ -1014,8 +838,11 @@ impl Database for ListingDatabase {
|
||||
}
|
||||
|
||||
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
if !request.namespace_path.is_empty() {
|
||||
return self.namespace_database().create_table(request).await;
|
||||
// When namespace is not empty, location must be provided
|
||||
if !request.namespace_path.is_empty() && request.location.is_none() {
|
||||
return Err(Error::InvalidInput {
|
||||
message: "Location must be provided when namespace is not empty".into(),
|
||||
});
|
||||
}
|
||||
// Use provided location if available, otherwise derive from table name
|
||||
let table_uri = request
|
||||
@@ -1132,8 +959,11 @@ impl Database for ListingDatabase {
|
||||
}
|
||||
|
||||
async fn open_table(&self, mut request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
if !request.namespace_path.is_empty() {
|
||||
return self.namespace_database().open_table(request).await;
|
||||
// When namespace is not empty, location must be provided
|
||||
if !request.namespace_path.is_empty() && request.location.is_none() {
|
||||
return Err(Error::InvalidInput {
|
||||
message: "Location must be provided when namespace is not empty".into(),
|
||||
});
|
||||
}
|
||||
// Use provided location if available, otherwise derive from table name
|
||||
let table_uri = request
|
||||
@@ -1229,10 +1059,9 @@ impl Database for ListingDatabase {
|
||||
|
||||
async fn drop_table(&self, name: &str, namespace_path: &[String]) -> Result<()> {
|
||||
if !namespace_path.is_empty() {
|
||||
return self
|
||||
.namespace_database()
|
||||
.drop_table(name, namespace_path)
|
||||
.await;
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace parameter is not supported for listing database.".into(),
|
||||
});
|
||||
}
|
||||
self.drop_tables(vec![name.to_string()]).await
|
||||
}
|
||||
@@ -1241,10 +1070,9 @@ impl Database for ListingDatabase {
|
||||
async fn drop_all_tables(&self, namespace_path: &[String]) -> Result<()> {
|
||||
// Check if namespace parameter is provided
|
||||
if !namespace_path.is_empty() {
|
||||
return self
|
||||
.namespace_database()
|
||||
.drop_all_tables(namespace_path)
|
||||
.await;
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace parameter is not supported for listing database.".into(),
|
||||
});
|
||||
}
|
||||
let tables = self.table_names(TableNamesRequest::default()).await?;
|
||||
self.drop_tables(tables).await
|
||||
@@ -1255,11 +1083,30 @@ impl Database for ListingDatabase {
|
||||
}
|
||||
|
||||
async fn namespace_client(&self) -> Result<Arc<dyn lance_namespace::LanceNamespace>> {
|
||||
self.namespace_database.namespace_client().await
|
||||
// Create a DirectoryNamespace pointing to the same root with the same storage options
|
||||
let mut builder = lance_namespace_impls::DirectoryNamespaceBuilder::new(&self.uri);
|
||||
|
||||
// Add storage options
|
||||
if !self.storage_options.is_empty() {
|
||||
builder = builder.storage_options(self.storage_options.clone());
|
||||
}
|
||||
|
||||
// Use the same session
|
||||
builder = builder.session(self.session.clone());
|
||||
|
||||
let namespace = builder.build().await.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to create namespace client: {}", e),
|
||||
})?;
|
||||
Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
|
||||
}
|
||||
|
||||
async fn namespace_client_config(&self) -> Result<(String, HashMap<String, String>)> {
|
||||
self.namespace_database.namespace_client_config().await
|
||||
let mut properties = HashMap::new();
|
||||
properties.insert("root".to_string(), self.uri.clone());
|
||||
for (key, value) in &self.storage_options {
|
||||
properties.insert(format!("storage.{}", key), value.clone());
|
||||
}
|
||||
Ok(("dir".to_string(), properties))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1285,8 +1132,6 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options: Default::default(),
|
||||
namespace_client_properties: Default::default(),
|
||||
manifest_enabled: false,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -1420,8 +1265,6 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options: options.clone(),
|
||||
namespace_client_properties: Default::default(),
|
||||
manifest_enabled: false,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -1956,8 +1799,6 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
namespace_client_properties: Default::default(),
|
||||
manifest_enabled: false,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -2063,8 +1904,6 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
namespace_client_properties: Default::default(),
|
||||
manifest_enabled: false,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -2136,8 +1975,6 @@ mod tests {
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
namespace_client_properties: Default::default(),
|
||||
manifest_enabled: false,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
@@ -2271,210 +2108,4 @@ mod tests {
|
||||
assert!(tables.contains(&"table1".to_string()));
|
||||
assert!(tables.contains(&"table2".to_string()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_listing_database_namespace_operations() {
|
||||
let (_tempdir, db) = setup_database().await;
|
||||
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["parent".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["parent".to_string(), "child".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let root_namespaces = db
|
||||
.list_namespaces(ListNamespacesRequest {
|
||||
id: Some(vec![]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(root_namespaces.namespaces.contains(&"parent".to_string()));
|
||||
|
||||
let child_namespaces = db
|
||||
.list_namespaces(ListNamespacesRequest {
|
||||
id: Some(vec!["parent".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(child_namespaces.namespaces.contains(&"child".to_string()));
|
||||
|
||||
db.describe_namespace(DescribeNamespaceRequest {
|
||||
id: Some(vec!["parent".to_string(), "child".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[cfg(not(windows))] // TODO: support Windows once directory namespace-backed listing DB tests are supported.
|
||||
async fn test_listing_database_with_namespace_client_properties() {
|
||||
let tempdir = tempdir().unwrap();
|
||||
let uri = tempdir.path().to_str().unwrap();
|
||||
|
||||
let mut namespace_client_properties = HashMap::new();
|
||||
namespace_client_properties.insert(
|
||||
"table_version_tracking_enabled".to_string(),
|
||||
"true".to_string(),
|
||||
);
|
||||
namespace_client_properties.insert("manifest_enabled".to_string(), "true".to_string());
|
||||
|
||||
let request = ConnectRequest {
|
||||
uri: uri.to_string(),
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options: Default::default(),
|
||||
namespace_client_properties,
|
||||
manifest_enabled: false,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
|
||||
let db = ListingDatabase::connect_with_options(&request)
|
||||
.await
|
||||
.unwrap();
|
||||
let namespace_path = vec!["test_ns".to_string()];
|
||||
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(namespace_path.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new("name", DataType::Utf8, false),
|
||||
]));
|
||||
|
||||
db.create_table(CreateTableRequest {
|
||||
name: "managed_table".to_string(),
|
||||
namespace_path: namespace_path.clone(),
|
||||
data: Box::new(RecordBatch::new_empty(schema)) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let namespace_client = db.namespace_client().await.unwrap();
|
||||
let describe = namespace_client
|
||||
.describe_table(lance_namespace::models::DescribeTableRequest {
|
||||
id: Some(vec!["test_ns".to_string(), "managed_table".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(describe.managed_versioning, Some(true));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_listing_database_nested_namespace_table_ops() {
|
||||
let (_tempdir, db) = setup_database().await;
|
||||
let namespace_path = vec!["parent".to_string(), "child".to_string()];
|
||||
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["parent".to_string()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
db.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(namespace_path.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new("name", DataType::Utf8, false),
|
||||
]));
|
||||
|
||||
db.create_table(CreateTableRequest {
|
||||
name: "nested_table".to_string(),
|
||||
namespace_path: namespace_path.clone(),
|
||||
data: Box::new(RecordBatch::new_empty(schema)) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let namespace_client = db.namespace_client().await.unwrap();
|
||||
let describe = namespace_client
|
||||
.describe_table(lance_namespace::models::DescribeTableRequest {
|
||||
id: Some(vec![
|
||||
"parent".to_string(),
|
||||
"child".to_string(),
|
||||
"nested_table".to_string(),
|
||||
]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(describe.location.is_some());
|
||||
|
||||
let table = db
|
||||
.open_table(OpenTableRequest {
|
||||
name: "nested_table".to_string(),
|
||||
namespace_path: namespace_path.clone(),
|
||||
index_cache_size: None,
|
||||
lance_read_params: None,
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
managed_versioning: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table.name(), "nested_table");
|
||||
|
||||
#[allow(deprecated)]
|
||||
let table_names = db
|
||||
.table_names(TableNamesRequest {
|
||||
namespace_path: namespace_path.clone(),
|
||||
start_after: None,
|
||||
limit: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table_names, vec!["nested_table".to_string()]);
|
||||
|
||||
let list_tables = db
|
||||
.list_tables(ListTablesRequest {
|
||||
id: Some(namespace_path.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(list_tables.tables, vec!["nested_table".to_string()]);
|
||||
|
||||
db.drop_table("nested_table", &namespace_path)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let post_drop = db
|
||||
.list_tables(ListTablesRequest {
|
||||
id: Some(namespace_path),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(post_drop.tables.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,15 +22,10 @@ use lance_namespace_impls::ConnectBuilder;
|
||||
use lance_table::io::commit::CommitHandler;
|
||||
use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler;
|
||||
|
||||
use crate::connection::NamespaceClientPushdownOperation;
|
||||
use crate::connection::PushdownOperation;
|
||||
use crate::database::ReadConsistency;
|
||||
use crate::database::listing::{
|
||||
NewTableConfig, OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS, OPT_NEW_TABLE_STORAGE_VERSION,
|
||||
OPT_NEW_TABLE_V2_MANIFEST_PATHS,
|
||||
};
|
||||
use crate::error::{Error, Result};
|
||||
use crate::table::NativeTable;
|
||||
use lance::dataset::WriteMode;
|
||||
|
||||
use super::{
|
||||
BaseTable, CloneTableRequest, CreateTableMode, CreateTableRequest as DbCreateTableRequest,
|
||||
@@ -49,71 +44,21 @@ pub struct LanceNamespaceDatabase {
|
||||
// database URI
|
||||
uri: String,
|
||||
// Operations to push down to the namespace server
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
// Namespace implementation type (e.g., "dir", "rest")
|
||||
ns_impl: String,
|
||||
// Namespace properties used to construct the namespace client
|
||||
ns_properties: HashMap<String, String>,
|
||||
// Options for tables created by this connection
|
||||
new_table_config: NewTableConfig,
|
||||
}
|
||||
|
||||
impl LanceNamespaceDatabase {
|
||||
pub fn from_namespace_client(
|
||||
namespace_client: Arc<dyn LanceNamespace>,
|
||||
namespace_client_impl: String,
|
||||
namespace_client_properties: HashMap<String, String>,
|
||||
storage_options: HashMap<String, String>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
namespace_client_pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
) -> Self {
|
||||
Self {
|
||||
namespace: namespace_client,
|
||||
storage_options,
|
||||
read_consistency_interval,
|
||||
session,
|
||||
uri: format!("namespace://{}", namespace_client_impl),
|
||||
pushdown_operations: namespace_client_pushdown_operations,
|
||||
ns_impl: namespace_client_impl,
|
||||
ns_properties: namespace_client_properties,
|
||||
new_table_config: NewTableConfig::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn with_uri(mut self, uri: impl Into<String>) -> Self {
|
||||
self.uri = uri.into();
|
||||
self
|
||||
}
|
||||
|
||||
pub async fn connect(
|
||||
ns_impl: &str,
|
||||
ns_properties: HashMap<String, String>,
|
||||
storage_options: HashMap<String, String>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
) -> Result<Self> {
|
||||
Self::connect_with_new_table_config(
|
||||
ns_impl,
|
||||
ns_properties,
|
||||
storage_options,
|
||||
read_consistency_interval,
|
||||
session,
|
||||
pushdown_operations,
|
||||
NewTableConfig::default(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) async fn connect_with_new_table_config(
|
||||
ns_impl: &str,
|
||||
ns_properties: HashMap<String, String>,
|
||||
storage_options: HashMap<String, String>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
new_table_config: NewTableConfig,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
) -> Result<Self> {
|
||||
let mut builder = ConnectBuilder::new(ns_impl);
|
||||
for (key, value) in ns_properties.clone() {
|
||||
@@ -135,79 +80,8 @@ impl LanceNamespaceDatabase {
|
||||
pushdown_operations,
|
||||
ns_impl: ns_impl.to_string(),
|
||||
ns_properties,
|
||||
new_table_config,
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_storage_overrides(
|
||||
&self,
|
||||
request: &DbCreateTableRequest,
|
||||
) -> Result<(
|
||||
Option<lance_encoding::version::LanceFileVersion>,
|
||||
Option<bool>,
|
||||
Option<bool>,
|
||||
)> {
|
||||
let storage_options = request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.as_ref()
|
||||
.and_then(|p| p.store_params.as_ref())
|
||||
.and_then(|sp| sp.storage_options());
|
||||
|
||||
let storage_version_override = storage_options
|
||||
.and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION))
|
||||
.map(|s| s.parse::<lance_encoding::version::LanceFileVersion>())
|
||||
.transpose()?;
|
||||
|
||||
let v2_manifest_override = storage_options
|
||||
.and_then(|opts| opts.get(OPT_NEW_TABLE_V2_MANIFEST_PATHS))
|
||||
.map(|s| s.parse::<bool>())
|
||||
.transpose()
|
||||
.map_err(|_| Error::InvalidInput {
|
||||
message: "enable_v2_manifest_paths must be a boolean".to_string(),
|
||||
})?;
|
||||
|
||||
let stable_row_ids_override = storage_options
|
||||
.and_then(|opts| opts.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS))
|
||||
.map(|s| s.parse::<bool>())
|
||||
.transpose()
|
||||
.map_err(|_| Error::InvalidInput {
|
||||
message: "enable_stable_row_ids must be a boolean".to_string(),
|
||||
})?;
|
||||
|
||||
Ok((
|
||||
storage_version_override,
|
||||
v2_manifest_override,
|
||||
stable_row_ids_override,
|
||||
))
|
||||
}
|
||||
|
||||
fn apply_new_table_config(
|
||||
&self,
|
||||
params: &mut lance::dataset::WriteParams,
|
||||
request: &DbCreateTableRequest,
|
||||
) -> Result<()> {
|
||||
let (storage_version_override, v2_manifest_override, stable_row_ids_override) =
|
||||
self.extract_storage_overrides(request)?;
|
||||
|
||||
params.data_storage_version = storage_version_override
|
||||
.or(params.data_storage_version)
|
||||
.or(self.new_table_config.data_storage_version);
|
||||
|
||||
if let Some(enable_v2_manifest_paths) =
|
||||
v2_manifest_override.or(self.new_table_config.enable_v2_manifest_paths)
|
||||
{
|
||||
params.enable_v2_manifest_paths = enable_v2_manifest_paths;
|
||||
}
|
||||
|
||||
if let Some(enable_stable_row_ids) =
|
||||
stable_row_ids_override.or(self.new_table_config.enable_stable_row_ids)
|
||||
{
|
||||
params.enable_stable_row_ids = enable_stable_row_ids;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for LanceNamespaceDatabase {
|
||||
@@ -289,23 +163,37 @@ impl Database for LanceNamespaceDatabase {
|
||||
async fn create_table(&self, request: DbCreateTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
let mut table_id = request.namespace_path.clone();
|
||||
table_id.push(request.name.clone());
|
||||
let mut existing_table = None;
|
||||
let describe_request = DescribeTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let describe_result = self.namespace.describe_table(describe_request).await;
|
||||
|
||||
match request.mode {
|
||||
CreateTableMode::Create => {}
|
||||
CreateTableMode::Create => {
|
||||
if describe_result.is_ok() {
|
||||
return Err(Error::TableAlreadyExists {
|
||||
name: request.name.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
CreateTableMode::Overwrite => {
|
||||
let describe_request = DescribeTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
existing_table = self.namespace.describe_table(describe_request).await.ok();
|
||||
if describe_result.is_ok() {
|
||||
// Drop the existing table - must succeed
|
||||
let drop_request = DropTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
self.namespace
|
||||
.drop_table(drop_request)
|
||||
.await
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to drop existing table for overwrite: {}", e),
|
||||
})?;
|
||||
}
|
||||
}
|
||||
CreateTableMode::ExistOk(_) => {
|
||||
let describe_request = DescribeTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
let describe_result = self.namespace.describe_table(describe_request).await;
|
||||
if describe_result.is_ok() {
|
||||
let native_table = NativeTable::open_from_namespace(
|
||||
self.namespace.clone(),
|
||||
@@ -333,86 +221,20 @@ impl Database for LanceNamespaceDatabase {
|
||||
};
|
||||
|
||||
let (location, initial_storage_options, managed_versioning) = {
|
||||
if let Some(response) = existing_table {
|
||||
let loc = response.location.ok_or_else(|| Error::Runtime {
|
||||
message: "Table location is missing from describe_table response".to_string(),
|
||||
})?;
|
||||
let opts = response
|
||||
.storage_options
|
||||
.or_else(|| Some(self.storage_options.clone()))
|
||||
.filter(|o| !o.is_empty());
|
||||
(loc, opts, response.managed_versioning)
|
||||
} else {
|
||||
match self.namespace.declare_table(declare_request).await {
|
||||
Ok(response) => {
|
||||
let loc = response.location.ok_or_else(|| Error::Runtime {
|
||||
message: "Table location is missing from declare_table response"
|
||||
.to_string(),
|
||||
})?;
|
||||
let opts = response
|
||||
.storage_options
|
||||
.or_else(|| Some(self.storage_options.clone()))
|
||||
.filter(|o: &HashMap<String, String>| !o.is_empty());
|
||||
(loc, opts, response.managed_versioning)
|
||||
}
|
||||
Err(e)
|
||||
if matches!(request.mode, CreateTableMode::Create) && {
|
||||
let err_str = e.to_string();
|
||||
err_str.contains("already exists")
|
||||
|| err_str.contains("TableAlreadyExists")
|
||||
|| err_str.contains("table already exists")
|
||||
} =>
|
||||
{
|
||||
let response = self
|
||||
.namespace
|
||||
.describe_table(DescribeTableRequest {
|
||||
id: Some(table_id.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.map_err(|describe_err| Error::Runtime {
|
||||
message: format!(
|
||||
"Failed to describe existing declared table after declare conflict: {}",
|
||||
describe_err
|
||||
),
|
||||
})?;
|
||||
|
||||
if response.version.is_some() && response.schema.is_some() {
|
||||
return Err(Error::TableAlreadyExists {
|
||||
name: request.name.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
let loc = response.location.ok_or_else(|| Error::Runtime {
|
||||
message: "Table location is missing from describe_table response"
|
||||
.to_string(),
|
||||
})?;
|
||||
let opts = response
|
||||
.storage_options
|
||||
.or_else(|| Some(self.storage_options.clone()))
|
||||
.filter(|o: &HashMap<String, String>| !o.is_empty());
|
||||
(loc, opts, response.managed_versioning)
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(Error::Runtime {
|
||||
message: format!("Failed to declare table: {}", e),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
let response = self.namespace.declare_table(declare_request).await?;
|
||||
let loc = response.location.ok_or_else(|| Error::Runtime {
|
||||
message: "Table location is missing from declare_table response".to_string(),
|
||||
})?;
|
||||
// Use storage options from response, fall back to self.storage_options
|
||||
let opts = response
|
||||
.storage_options
|
||||
.or_else(|| Some(self.storage_options.clone()))
|
||||
.filter(|o| !o.is_empty());
|
||||
(loc, opts, response.managed_versioning)
|
||||
};
|
||||
|
||||
// Build write params with storage options and commit handler
|
||||
let mut params = request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.clone()
|
||||
.unwrap_or_default();
|
||||
self.apply_new_table_config(&mut params, &request)?;
|
||||
|
||||
if matches!(request.mode, CreateTableMode::Overwrite) {
|
||||
params.mode = WriteMode::Overwrite;
|
||||
}
|
||||
let mut params = request.write_options.lance_write_params.unwrap_or_default();
|
||||
|
||||
// Set up storage options if provided
|
||||
if let Some(storage_opts) = initial_storage_options {
|
||||
@@ -628,47 +450,6 @@ mod tests {
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_namespace_connection_with_namespace_client_properties() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let root_path = tmp_dir.path().to_str().unwrap().to_string();
|
||||
|
||||
let mut properties = HashMap::new();
|
||||
properties.insert("root".to_string(), root_path);
|
||||
|
||||
let conn = connect_namespace("dir", properties)
|
||||
.namespace_client_property("table_version_tracking_enabled", "true")
|
||||
.namespace_client_property("manifest_enabled", "true")
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to connect to namespace");
|
||||
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.expect("Failed to create namespace");
|
||||
|
||||
let test_data = create_test_data();
|
||||
conn.create_table("test_table", test_data)
|
||||
.namespace(vec!["test_ns".into()])
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to create table");
|
||||
|
||||
let namespace_client = conn.namespace_client().await.unwrap();
|
||||
let describe = namespace_client
|
||||
.describe_table(DescribeTableRequest {
|
||||
id: Some(vec!["test_ns".into(), "test_table".into()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.expect("Failed to describe table");
|
||||
|
||||
assert_eq!(describe.managed_versioning, Some(true));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_namespace_create_table_basic() {
|
||||
// Setup: Create a temporary directory for the namespace
|
||||
@@ -870,58 +651,6 @@ mod tests {
|
||||
assert_eq!(id_col.value(2), 30);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_namespace_create_table_after_declare_conflict() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let root_path = tmp_dir.path().to_str().unwrap().to_string();
|
||||
|
||||
let mut properties = HashMap::new();
|
||||
properties.insert("root".to_string(), root_path);
|
||||
|
||||
let conn = connect_namespace("dir", properties)
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to connect to namespace");
|
||||
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.expect("Failed to create namespace");
|
||||
|
||||
let namespace_client = conn.namespace_client().await.unwrap();
|
||||
namespace_client
|
||||
.declare_table(DeclareTableRequest {
|
||||
id: Some(vec!["test_ns".into(), "declared_test".into()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.expect("Failed to declare table");
|
||||
|
||||
let test_data = create_test_data();
|
||||
let table = conn
|
||||
.create_table("declared_test", test_data)
|
||||
.namespace(vec!["test_ns".into()])
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to create table after declare conflict");
|
||||
|
||||
let results = table
|
||||
.query()
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to query table")
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.expect("Failed to collect results");
|
||||
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].num_rows(), 5);
|
||||
assert_eq!(table.namespace(), &["test_ns"]);
|
||||
assert_eq!(table.id(), "test_ns$declared_test");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_namespace_create_table_exist_ok_mode() {
|
||||
// Setup: Create a temporary directory for the namespace
|
||||
|
||||
@@ -69,7 +69,7 @@
|
||||
//! It treats [`FixedSizeList<Float16/Float32>`](https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeListArray.html)
|
||||
//! columns as vector columns.
|
||||
//!
|
||||
//! For more details, please refer to the [LanceDB documentation](https://docs.lancedb.com).
|
||||
//! For more details, please refer to the [LanceDB documentation](https://lancedb.com/docs).
|
||||
//!
|
||||
//! #### Create a table
|
||||
//!
|
||||
|
||||
@@ -16,7 +16,7 @@ use crate::remote::retry::{ResolvedRetryConfig, RetryCounter};
|
||||
const REQUEST_ID_HEADER: HeaderName = HeaderName::from_static("x-request-id");
|
||||
|
||||
/// Configuration for TLS/mTLS settings.
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct TlsConfig {
|
||||
/// Path to the client certificate file (PEM format)
|
||||
pub cert_file: Option<String>,
|
||||
@@ -24,22 +24,10 @@ pub struct TlsConfig {
|
||||
pub key_file: Option<String>,
|
||||
/// Path to the CA certificate file for server verification (PEM format)
|
||||
pub ssl_ca_cert: Option<String>,
|
||||
/// Whether to verify the hostname in the server's certificate.
|
||||
/// Defaults to `true`.
|
||||
/// Whether to verify the hostname in the server's certificate
|
||||
pub assert_hostname: bool,
|
||||
}
|
||||
|
||||
impl Default for TlsConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
cert_file: None,
|
||||
key_file: None,
|
||||
ssl_ca_cert: None,
|
||||
assert_hostname: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait for providing custom headers for each request
|
||||
#[async_trait::async_trait]
|
||||
pub trait HeaderProvider: Send + Sync + std::fmt::Debug {
|
||||
@@ -938,7 +926,7 @@ mod tests {
|
||||
assert!(config.cert_file.is_none());
|
||||
assert!(config.key_file.is_none());
|
||||
assert!(config.ssl_ca_cert.is_none());
|
||||
assert!(config.assert_hostname);
|
||||
assert!(!config.assert_hostname);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -97,7 +97,7 @@ pub struct RemoteDatabaseOptions {
|
||||
pub host_override: Option<String>,
|
||||
/// Storage options configure the storage layer (e.g. S3, GCS, Azure, etc.)
|
||||
///
|
||||
/// See available options at <https://docs.lancedb.com/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
///
|
||||
/// These options are only used for LanceDB Enterprise and only a subset of options
|
||||
/// are supported.
|
||||
|
||||
@@ -47,7 +47,7 @@ use std::format;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::connection::NamespaceClientPushdownOperation;
|
||||
use crate::connection::PushdownOperation;
|
||||
|
||||
use crate::data::scannable::{PeekedScannable, Scannable, estimate_write_partitions};
|
||||
use crate::database::Database;
|
||||
@@ -1272,7 +1272,7 @@ pub struct NativeTable {
|
||||
pub(crate) namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
// Operations to push down to the namespace server.
|
||||
// pub(crate) so query.rs can access the field for server-side query execution.
|
||||
pub(crate) pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
pub(crate) pushdown_operations: HashSet<PushdownOperation>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for NativeTable {
|
||||
@@ -1359,7 +1359,7 @@ impl NativeTable {
|
||||
params: Option<ReadParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
managed_versioning: Option<bool>,
|
||||
) -> Result<Self> {
|
||||
let params = params.unwrap_or_default();
|
||||
@@ -1470,7 +1470,7 @@ impl NativeTable {
|
||||
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
||||
params: Option<ReadParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
) -> Result<Self> {
|
||||
let mut params = params.unwrap_or_default();
|
||||
@@ -1518,7 +1518,7 @@ impl NativeTable {
|
||||
let id = Self::build_id(&namespace, name);
|
||||
|
||||
let stored_namespace_client =
|
||||
if pushdown_operations.contains(&NamespaceClientPushdownOperation::QueryTable) {
|
||||
if pushdown_operations.contains(&PushdownOperation::QueryTable) {
|
||||
Some(namespace_client)
|
||||
} else {
|
||||
None
|
||||
@@ -1588,7 +1588,7 @@ impl NativeTable {
|
||||
params: Option<WriteParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
) -> Result<Self> {
|
||||
// Default params uses format v1.
|
||||
let params = params.unwrap_or(WriteParams {
|
||||
@@ -1635,7 +1635,7 @@ impl NativeTable {
|
||||
params: Option<WriteParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
) -> Result<Self> {
|
||||
let data: Box<dyn Scannable> = Box::new(RecordBatch::new_empty(schema));
|
||||
Self::create(
|
||||
@@ -1685,7 +1685,7 @@ impl NativeTable {
|
||||
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
||||
params: Option<WriteParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
pushdown_operations: HashSet<NamespaceClientPushdownOperation>,
|
||||
pushdown_operations: HashSet<PushdownOperation>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
) -> Result<Self> {
|
||||
// Build table_id from namespace + name for the storage options provider
|
||||
@@ -1738,7 +1738,7 @@ impl NativeTable {
|
||||
let id = Self::build_id(&namespace, name);
|
||||
|
||||
let stored_namespace_client =
|
||||
if pushdown_operations.contains(&NamespaceClientPushdownOperation::QueryTable) {
|
||||
if pushdown_operations.contains(&PushdownOperation::QueryTable) {
|
||||
Some(namespace_client)
|
||||
} else {
|
||||
None
|
||||
|
||||
@@ -285,10 +285,7 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
use crate::{
|
||||
connect, io::object_store::io_tracking::IoStatsHolder, table::WriteOptions,
|
||||
utils::background_cache::clock,
|
||||
};
|
||||
use crate::{connect, io::object_store::io_tracking::IoStatsHolder, table::WriteOptions};
|
||||
|
||||
async fn create_test_dataset(uri: &str) -> Dataset {
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
||||
@@ -466,10 +463,6 @@ mod tests {
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let ds = create_test_dataset(uri).await;
|
||||
|
||||
// Other tests use a thread-local mock clock. Simulate leaked state from a
|
||||
// previous test to ensure this wrapper starts from real time.
|
||||
clock::advance_by(Duration::from_secs(60));
|
||||
|
||||
let wrapper = DatasetConsistencyWrapper::new_latest(ds, Some(Duration::from_millis(200)));
|
||||
|
||||
// Populate the cache
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::NativeTable;
|
||||
use crate::connection::NamespaceClientPushdownOperation;
|
||||
use crate::connection::PushdownOperation;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::expr::expr_to_sql_string;
|
||||
use crate::query::{
|
||||
@@ -44,7 +44,7 @@ pub async fn execute_query(
|
||||
// If QueryTable pushdown is enabled and namespace client is configured, use server-side query execution
|
||||
if table
|
||||
.pushdown_operations
|
||||
.contains(&NamespaceClientPushdownOperation::QueryTable)
|
||||
.contains(&PushdownOperation::QueryTable)
|
||||
&& let Some(ref namespace_client) = table.namespace_client
|
||||
{
|
||||
return execute_namespace_query(table, namespace_client.clone(), query, options).await;
|
||||
|
||||
@@ -107,14 +107,6 @@ where
|
||||
refresh_window < ttl,
|
||||
"refresh_window ({refresh_window:?}) must be less than ttl ({ttl:?})"
|
||||
);
|
||||
#[cfg(test)]
|
||||
{
|
||||
// Tests may advance the thread-local mock clock and leave it behind for
|
||||
// the next test that happens to run on the same worker thread. Each new
|
||||
// cache should start from a clean clock state instead of inheriting
|
||||
// unrelated mock time from a previous test.
|
||||
clock::clear_mock();
|
||||
}
|
||||
Self {
|
||||
inner: Arc::new(Mutex::new(CacheInner {
|
||||
state: State::Empty,
|
||||
|
||||
Reference in New Issue
Block a user