Compare commits

..

1 Commits

Author SHA1 Message Date
Lance Release
0feb485944 Bump version: 0.27.0-beta.4 → 0.27.0-beta.5 2026-03-09 19:56:38 +00:00
142 changed files with 8254 additions and 12962 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.28.0-beta.10"
current_version = "0.27.0-beta.5"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -18,6 +18,6 @@ body:
label: Link
description: >
Provide a link to the existing documentation, if applicable.
placeholder: ex. https://docs.lancedb.com/tables/...
placeholder: ex. https://lancedb.com/docs/tables/...
validations:
required: false

View File

@@ -1,18 +0,0 @@
version: 2
# Scope: the root Cargo workspace, which produces the Rust binaries we
# ship to users (the Node.js and Python native extensions). The
# `rust/lancedb` library crate shares the same lockfile; its consumers
# pick their own dependency versions, but bumping transitive deps here
# keeps the binaries we ship current.
updates:
- package-ecosystem: cargo
directory: /
schedule:
interval: weekly
open-pull-requests-limit: 10
groups:
rust-minor-patch:
update-types:
- minor
- patch

View File

@@ -23,10 +23,8 @@ runs:
steps:
- name: CONFIRM ARM BUILD
shell: bash
env:
ARM_BUILD: ${{ inputs.arm-build }}
run: |
echo "ARM BUILD: $ARM_BUILD"
echo "ARM BUILD: ${{ inputs.arm-build }}"
- name: Build x86_64 Manylinux wheel
if: ${{ inputs.arm-build == 'false' }}
uses: PyO3/maturin-action@v1

View File

@@ -8,9 +8,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
labeler:
permissions:
@@ -18,7 +15,7 @@ jobs:
name: Label PR
runs-on: ubuntu-latest
steps:
- uses: srvaroa/labeler@v1
- uses: srvaroa/labeler@master
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
commitlint:
@@ -27,7 +24,7 @@ jobs:
name: Verify PR title / description conforms to semantic-release
runs-on: ubuntu-latest
steps:
- uses: actions/setup-node@v4
- uses: actions/setup-node@v3
with:
node-version: "18"
# These rules are disabled because Github will always ensure there
@@ -50,7 +47,7 @@ jobs:
${{ github.event.pull_request.body }}
- if: failure()
uses: actions/github-script@v7
uses: actions/github-script@v6
with:
script: |
const message = `**ACTION NEEDED**

View File

@@ -53,7 +53,7 @@ jobs:
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
- name: Set up node
uses: actions/setup-node@v4
uses: actions/setup-node@v3
with:
node-version: 20
cache: 'npm'
@@ -68,7 +68,7 @@ jobs:
run: |
PYTHONPATH=. mkdocs build
- name: Setup Pages
uses: actions/configure-pages@v5
uses: actions/configure-pages@v2
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:

View File

@@ -19,9 +19,6 @@ on:
paths:
- .github/workflows/java-publish.yml
permissions:
contents: read
jobs:
publish:
name: Build and Publish

View File

@@ -24,9 +24,6 @@ on:
- java/**
- .github/workflows/java.yml
permissions:
contents: read
jobs:
build-java:
runs-on: ubuntu-24.04

View File

@@ -10,10 +10,6 @@ on:
- nodejs/**
- java/**
- .github/workflows/license-header-check.yml
permissions:
contents: read
jobs:
check-licenses:
runs-on: ubuntu-latest

View File

@@ -7,17 +7,12 @@ on:
pull_request:
paths:
- Cargo.toml
- Cargo.lock
- rust-toolchain.toml
- nodejs/**
- rust/**
- docs/src/js/**
- .github/workflows/nodejs.yml
- docker-compose.yml
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
@@ -42,7 +37,7 @@ jobs:
with:
fetch-depth: 0
lfs: true
- uses: actions/setup-node@v4
- uses: actions/setup-node@v3
with:
node-version: 20
cache: 'npm'
@@ -82,7 +77,7 @@ jobs:
with:
fetch-depth: 0
lfs: true
- uses: actions/setup-node@v4
- uses: actions/setup-node@v3
name: Setup Node.js 20 for build
with:
# @napi-rs/cli v3 requires Node >= 20.12 (via @inquirer/prompts@8).
@@ -99,7 +94,7 @@ jobs:
run: |
npm ci --include=optional
npm run build:debug -- --profile ci
- uses: actions/setup-node@v4
- uses: actions/setup-node@v3
name: Setup Node.js ${{ matrix.node-version }} for test
with:
node-version: ${{ matrix.node-version }}
@@ -148,7 +143,7 @@ jobs:
with:
fetch-depth: 0
lfs: true
- uses: actions/setup-node@v4
- uses: actions/setup-node@v3
with:
node-version: 20
cache: 'npm'

View File

@@ -19,7 +19,6 @@ on:
paths:
- .github/workflows/npm-publish.yml
- Cargo.toml # Change in dependency frequently breaks builds
- Cargo.lock
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -125,12 +124,7 @@ jobs:
pre_build: |-
set -e &&
apt-get update &&
apt-get install -y protobuf-compiler pkg-config &&
# The base image (manylinux2014-cross) sets TARGET_CC to the old
# GCC 4.8 cross-compiler. aws-lc-sys checks TARGET_CC before CC,
# so it picks up GCC even though the napi-rs image sets CC=clang.
# Override to use the image's clang-18 which supports -fuse-ld=lld.
export TARGET_CC=clang TARGET_CXX=clang++
apt-get install -y protobuf-compiler pkg-config
- target: x86_64-unknown-linux-musl
# This one seems to need some extra memory
host: ubuntu-2404-8x-x64
@@ -150,10 +144,9 @@ jobs:
set -e &&
apt-get update &&
apt-get install -y protobuf-compiler pkg-config &&
export TARGET_CC=clang TARGET_CXX=clang++ &&
# The manylinux2014 sysroot has glibc 2.17 headers which lack
# AT_HWCAP2 (added in Linux 3.17). Define it for aws-lc-sys.
export CFLAGS="$CFLAGS -DAT_HWCAP2=26" &&
# https://github.com/aws/aws-lc-rs/issues/737#issuecomment-2725918627
ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc/aarch64-unknown-linux-gnu/4.8.5/crtbeginS.o /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/crtbeginS.o &&
ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/gcc &&
rustup target add aarch64-unknown-linux-gnu
- target: aarch64-unknown-linux-musl
host: ubuntu-2404-8x-x64
@@ -273,7 +266,7 @@ jobs:
- target: x86_64-unknown-linux-gnu
host: ubuntu-latest
- target: aarch64-unknown-linux-gnu
host: ubuntu-2404-8x-arm64
host: buildjet-16vcpu-ubuntu-2204-arm
node:
- '20'
runs-on: ${{ matrix.settings.host }}

View File

@@ -9,21 +9,14 @@ on:
paths:
- .github/workflows/pypi-publish.yml
- Cargo.toml # Change in dependency frequently breaks builds
- Cargo.lock
env:
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
permissions:
contents: read
jobs:
linux:
name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }}
timeout-minutes: 60
permissions:
id-token: write
contents: read
strategy:
matrix:
config:
@@ -63,12 +56,10 @@ jobs:
- uses: ./.github/workflows/upload_wheel
if: startsWith(github.ref, 'refs/tags/python-v')
with:
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
fury_token: ${{ secrets.FURY_TOKEN }}
mac:
timeout-minutes: 90
permissions:
id-token: write
contents: read
runs-on: ${{ matrix.config.runner }}
strategy:
matrix:
@@ -93,12 +84,10 @@ jobs:
- uses: ./.github/workflows/upload_wheel
if: startsWith(github.ref, 'refs/tags/python-v')
with:
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
fury_token: ${{ secrets.FURY_TOKEN }}
windows:
timeout-minutes: 60
permissions:
id-token: write
contents: read
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
@@ -117,6 +106,7 @@ jobs:
- uses: ./.github/workflows/upload_wheel
if: startsWith(github.ref, 'refs/tags/python-v')
with:
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
fury_token: ${{ secrets.FURY_TOKEN }}
gh-release:
if: startsWith(github.ref, 'refs/tags/python-v')

View File

@@ -7,8 +7,6 @@ on:
pull_request:
paths:
- Cargo.toml
- Cargo.lock
- rust-toolchain.toml
- python/**
- rust/**
- .github/workflows/python.yml
@@ -17,9 +15,6 @@ on:
- .github/workflows/build_windows_wheel/**
- .github/workflows/run_tests/**
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
@@ -111,6 +106,7 @@ jobs:
- name: Install
run: |
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
pip install tantivy
pip install mlx
- name: Doctest
run: pytest --doctest-modules python/lancedb
@@ -229,5 +225,6 @@ jobs:
pip install "pydantic<2"
pip install pyarrow==16
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
pip install tantivy
- name: Run tests
run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests

View File

@@ -7,17 +7,9 @@ on:
pull_request:
paths:
- Cargo.toml
- Cargo.lock
- rust-toolchain.toml
- deny.toml
- rust/**
- nodejs/Cargo.toml
- python/Cargo.toml
- .github/workflows/rust.yml
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
@@ -59,17 +51,6 @@ jobs:
- name: Run clippy (without remote feature)
run: cargo clippy --profile ci --workspace --tests -- -D warnings
deny:
# Supply-chain checks: advisories, licenses, banned crates, and source
# restrictions. Configuration lives in `deny.toml` at the workspace root.
timeout-minutes: 10
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
- uses: EmbarkStudios/cargo-deny-action@v2
with:
command: check advisories bans licenses sources
build-no-lock:
runs-on: ubuntu-24.04
timeout-minutes: 30
@@ -225,14 +206,14 @@ jobs:
- name: Downgrade dependencies
# These packages have newer requirements for MSRV
run: |
cargo update -p aws-sdk-bedrockruntime --precise 1.77.0
cargo update -p aws-sdk-dynamodb --precise 1.68.0
cargo update -p aws-config --precise 1.6.0
cargo update -p aws-sdk-kms --precise 1.63.0
cargo update -p aws-sdk-s3 --precise 1.79.0
cargo update -p aws-sdk-sso --precise 1.62.0
cargo update -p aws-sdk-ssooidc --precise 1.63.0
cargo update -p aws-sdk-sts --precise 1.63.0
cargo update -p aws-sdk-bedrockruntime --precise 1.64.0
cargo update -p aws-sdk-dynamodb --precise 1.55.0
cargo update -p aws-config --precise 1.5.10
cargo update -p aws-sdk-kms --precise 1.51.0
cargo update -p aws-sdk-s3 --precise 1.65.0
cargo update -p aws-sdk-sso --precise 1.50.0
cargo update -p aws-sdk-ssooidc --precise 1.51.0
cargo update -p aws-sdk-sts --precise 1.51.0
cargo update -p home --precise 0.5.9
- name: cargo +${{ matrix.msrv }} check
env:

View File

@@ -3,9 +3,6 @@ name: Update package-lock.json
on:
workflow_dispatch:
permissions:
contents: read
jobs:
publish:
runs-on: ubuntu-latest

View File

@@ -3,9 +3,6 @@ name: Update NodeJs package-lock.json
on:
workflow_dispatch:
permissions:
contents: read
jobs:
publish:
runs-on: ubuntu-latest

View File

@@ -2,6 +2,9 @@ name: upload-wheel
description: "Upload wheels to Pypi"
inputs:
pypi_token:
required: true
description: "release token for the repo"
fury_token:
required: true
description: "release token for the fury repo"
@@ -9,6 +12,12 @@ inputs:
runs:
using: "composite"
steps:
- name: Install dependencies
shell: bash
run: |
python -m pip install --upgrade pip
pip install twine
python3 -m pip install --upgrade pkginfo
- name: Choose repo
shell: bash
id: choose_repo
@@ -18,17 +27,19 @@ runs:
else
echo "repo=pypi" >> $GITHUB_OUTPUT
fi
- name: Publish to Fury
if: steps.choose_repo.outputs.repo == 'fury'
- name: Publish to PyPI
shell: bash
env:
FURY_TOKEN: ${{ inputs.fury_token }}
PYPI_TOKEN: ${{ inputs.pypi_token }}
run: |
WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
echo "Uploading $WHEEL to Fury"
curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
- name: Publish to PyPI
if: steps.choose_repo.outputs.repo == 'pypi'
uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: target/wheels/
if [[ ${{ steps.choose_repo.outputs.repo }} == fury ]]; then
WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
echo "Uploading $WHEEL to Fury"
curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
else
twine upload --repository ${{ steps.choose_repo.outputs.repo }} \
--username __token__ \
--password $PYPI_TOKEN \
target/wheels/lancedb-*.whl
fi

2720
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,7 @@
[workspace]
members = ["rust/lancedb", "nodejs", "python"]
# Python package needs to be built by maturin.
exclude = ["python"]
resolver = "2"
[workspace.package]
@@ -13,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.91.0"
[workspace.dependencies]
lance = { "version" = "=6.0.0-beta.4", default-features = false, "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=6.0.0-beta.4", default-features = false, "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=6.0.0-beta.4", default-features = false, "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=6.0.0-beta.4", "tag" = "v6.0.0-beta.4", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false }

View File

@@ -15,7 +15,7 @@
# **The Multimodal AI Lakehouse**
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://docs.lancedb.com) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://lancedb.com/docs) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
**The ultimate multimodal data platform for AI/ML applications.**
@@ -57,7 +57,7 @@ LanceDB is a central location where developers can build, train and analyze thei
## **How to Install**:
Follow the [Quickstart](https://docs.lancedb.com/quickstart) doc to set up LanceDB locally.
Follow the [Quickstart](https://lancedb.com/docs/quickstart/) doc to set up LanceDB locally.
**API & SDK:** We also support Python, Typescript and Rust SDKs

View File

@@ -3,7 +3,6 @@
from __future__ import annotations
import argparse
import functools
import json
import os
import re
@@ -27,7 +26,6 @@ SEMVER_RE = re.compile(
)
@functools.total_ordering
@dataclass(frozen=True)
class SemVer:
major: int
@@ -158,9 +156,7 @@ def read_current_version(repo_root: Path) -> str:
def determine_latest_tag(tags: Iterable[TagInfo]) -> TagInfo:
# Stable releases (no prerelease) are always preferred over pre-releases.
# Within each group, standard semver ordering applies.
return max(tags, key=lambda tag: (not tag.semver.prerelease, tag.semver))
return max(tags, key=lambda tag: tag.semver)
def write_outputs(args: argparse.Namespace, payload: dict) -> None:

172
deny.toml
View File

@@ -1,172 +0,0 @@
# cargo-deny configuration for LanceDB.
#
# Run locally with `cargo deny check`. See
# https://embarkstudios.github.io/cargo-deny/ for the full reference.
# The set of target triples we care about. cargo-deny will only consider
# dependencies that are used on at least one of these targets. Keeping this
# explicit avoids noise from platform-specific crates (e.g. wasm, android,
# ios) that we never actually ship.
[graph]
targets = [
"x86_64-unknown-linux-gnu",
"aarch64-unknown-linux-gnu",
"x86_64-apple-darwin",
"aarch64-apple-darwin",
"x86_64-pc-windows-msvc",
"aarch64-pc-windows-msvc",
]
all-features = true
[output]
feature-depth = 1
# ---------------------------------------------------------------------------
# Advisories: security vulnerabilities and yanked crates.
# ---------------------------------------------------------------------------
[advisories]
version = 2
# Fail the check if any crate in the lockfile has been yanked from crates.io.
# Yanked crates are a signal the author retracted the release (often due to
# bugs or security issues) and should not be depended on.
yanked = "deny"
# Advisory IDs we have explicitly reviewed and chosen to accept. Every
# entry must include a rationale and, where possible, an upstream issue
# pointing to a fix. Revisit this list whenever dependencies are updated.
ignore = [
# rsa: Marvin Attack timing side-channel in PKCS#1 v1.5 decryption.
# Reached only through opendal → reqsign → rsa. We do not use RSA
# decryption in LanceDB ourselves; this is dormant in the signing path.
# No fixed release exists upstream as of this writing.
# https://rustsec.org/advisories/RUSTSEC-2023-0071
{ id = "RUSTSEC-2023-0071", reason = "rsa crate via opendal/reqsign; no fixed upstream release" },
# instant: unmaintained. Pulled in via backoff → instant. Upstream
# recommends switching to `web-time`; fix has to come from backoff.
# https://rustsec.org/advisories/RUSTSEC-2024-0384
{ id = "RUSTSEC-2024-0384", reason = "transitive via backoff; waiting on backoff replacement" },
# paste: unmaintained (author archived the repo). Used transitively by
# datafusion and the arrow ecosystem; widespread, no drop-in replacement.
# https://rustsec.org/advisories/RUSTSEC-2024-0436
{ id = "RUSTSEC-2024-0436", reason = "transitive via datafusion; awaiting ecosystem migration" },
# tantivy: segfault on malformed input due to missing bounds check.
# Pulled in via lance for full-text search. We only feed tantivy
# documents we construct ourselves, not attacker-controlled bytes.
# Tracked for a lance dependency bump.
# https://rustsec.org/advisories/RUSTSEC-2025-0003
{ id = "RUSTSEC-2025-0003", reason = "tantivy via lance; inputs are internally produced, not user-supplied bytes" },
# backoff: unmaintained. Reached only via async-openai. Replacement
# requires async-openai to migrate (or us to drop async-openai).
# https://rustsec.org/advisories/RUSTSEC-2025-0012
{ id = "RUSTSEC-2025-0012", reason = "transitive via async-openai; waiting on upstream migration" },
# number_prefix: unmaintained. Transitive via indicatif → hf-hub.
# No security impact, just maintenance status.
# https://rustsec.org/advisories/RUSTSEC-2025-0119
{ id = "RUSTSEC-2025-0119", reason = "transitive via hf-hub/indicatif; cosmetic formatting crate" },
# rustls-pemfile: unmaintained. Reached from two separate chains:
# rustls-native-certs 0.6 (via hyper-rustls 0.24) and object_store 0.12.
# Both upstream dependencies need to move before we can drop it.
# https://rustsec.org/advisories/RUSTSEC-2025-0134
{ id = "RUSTSEC-2025-0134", reason = "transitive via rustls-native-certs/object_store; waiting on upstream migration" },
# rustls-webpki 0.101.7 (old major line): name-constraint checks for
# URI / wildcard names. Pulled in only via the legacy rustls 0.21 chain
# from aws-smithy-http-client. The 0.103 line we actively use is patched.
# Clearing the 0.101 copy requires the aws-sdk chain to migrate off
# rustls 0.21.
# https://rustsec.org/advisories/RUSTSEC-2026-0098
# https://rustsec.org/advisories/RUSTSEC-2026-0099
{ id = "RUSTSEC-2026-0098", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
{ id = "RUSTSEC-2026-0099", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
# rustls-webpki 0.101.7: reachable panic in CRL parsing. Same legacy
# rustls 0.21 chain from aws-smithy-http-client as above. The 0.103 line
# we actively use is upgraded to 0.103.13 which contains the fix.
# https://rustsec.org/advisories/RUSTSEC-2026-0104
{ id = "RUSTSEC-2026-0104", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
]
# ---------------------------------------------------------------------------
# Licenses: only allow licenses we've reviewed as compatible with Apache-2.0.
# ---------------------------------------------------------------------------
[licenses]
version = 2
# SPDX identifiers for licenses that are compatible with our Apache-2.0
# distribution. Additions require legal review.
allow = [
"Apache-2.0",
"Apache-2.0 WITH LLVM-exception",
"MIT",
"BSD-2-Clause",
"BSD-3-Clause",
"ISC",
"Unicode-3.0",
"Unicode-DFS-2016",
"Zlib",
"CC0-1.0",
"MPL-2.0",
"BSL-1.0",
"OpenSSL",
# 0BSD ("BSD Zero Clause") is effectively public domain — no attribution
# required. Pulled in by `mock_instant`.
"0BSD",
# bzip2-1.0.6 is the permissive upstream bzip2 license (BSD-like). Pulled
# in by `libbz2-rs-sys`, the pure-Rust bzip2 implementation.
"bzip2-1.0.6",
# CDLA-Permissive-2.0 is a permissive data license used by `webpki-roots`
# for the Mozilla CA root bundle. Data-only, distribution-compatible.
"CDLA-Permissive-2.0",
]
confidence-threshold = 0.8
# Crates whose license cannot be determined from Cargo metadata but whose
# license we've manually confirmed from upstream. Keep this list minimal.
[[licenses.clarify]]
# polars-arrow-format omits the `license` field in its Cargo.toml, but the
# upstream repo (pola-rs/polars-arrow-format) is dual-licensed Apache-2.0 OR
# MIT. See https://github.com/pola-rs/polars-arrow-format/blob/main/LICENSE
crate = "polars-arrow-format"
expression = "Apache-2.0 OR MIT"
license-files = []
# ---------------------------------------------------------------------------
# Bans: disallow specific crates and flag dependency hygiene issues.
# ---------------------------------------------------------------------------
[bans]
# Warn (not deny) on duplicate versions of the same crate. In a large
# workspace like this one, duplicates are common and often unavoidable
# transitively. We surface them to discourage growth, but don't fail CI.
multiple-versions = "warn"
# Wildcard version requirements (`foo = "*"`) are a footgun — they let any
# future release in without review. Ban them outright.
wildcards = "deny"
# Internal workspace crates reference each other via `path = "..."`, which
# cargo-deny sees as a wildcard version. That's fine for private workspace
# members (not published to crates.io), so allow it specifically for paths.
allow-wildcard-paths = true
# Features that, if enabled, should cause the check to fail.
deny = []
# Crates to skip when checking for duplicate versions.
skip = []
# Similar to `skip`, but also skips the entire transitive subtree.
skip-tree = []
# ---------------------------------------------------------------------------
# Sources: restrict where crates can come from.
# ---------------------------------------------------------------------------
[sources]
# Deny any registry other than the ones explicitly listed below.
unknown-registry = "deny"
# Deny any git dependency whose host isn't in the allow-list below. This
# prevents accidental pulls from arbitrary forks.
unknown-git = "deny"
allow-registry = ["https://github.com/rust-lang/crates.io-index"]
# Lance is developed in a sibling repo and pulled as a git dependency until
# releases are cut to crates.io. Allow that specific host.
allow-git = [
"https://github.com/lance-format/lance",
]

View File

@@ -1,7 +1,7 @@
version: "3.9"
services:
localstack:
image: localstack/localstack:4.0
image: localstack/localstack:3.3
ports:
- 4566:4566
environment:

View File

@@ -1,27 +1,27 @@
# Simple base dockerfile that supports basic dependencies required to run lance with FTS and Hybrid Search
# Usage: docker build -t lancedb:latest -f Dockerfile .
FROM python:3.12-slim-bookworm
#Simple base dockerfile that supports basic dependencies required to run lance with FTS and Hybrid Search
#Usage docker build -t lancedb:latest -f Dockerfile .
FROM python:3.10-slim-buster
# Install build dependencies in a single layer
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
build-essential \
protobuf-compiler \
git \
ca-certificates && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install Rust (pinned installer, non-interactive)
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
# Install Rust
RUN apt-get update && apt-get install -y curl build-essential && \
curl https://sh.rustup.rs -sSf | sh -s -- -y
# Set the environment variable for Rust
ENV PATH="/root/.cargo/bin:${PATH}"
# Install protobuf compiler
RUN apt-get install -y protobuf-compiler && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN apt-get -y update &&\
apt-get -y upgrade && \
apt-get -y install git
# Verify installations
RUN python --version && \
rustc --version && \
protoc --version
RUN pip install --no-cache-dir lancedb
RUN pip install tantivy lancedb

View File

@@ -1,6 +1,6 @@
# LanceDB Documentation
LanceDB docs are available at [docs.lancedb.com](https://docs.lancedb.com).
LanceDB docs are available at [lancedb.com/docs](https://lancedb.com/docs).
The SDK docs are built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
whenever a commit is pushed to the `main` branch. So it is possible for the docs to show

View File

@@ -1,9 +1,9 @@
mkdocs==1.6.1
mkdocs==1.5.3
mkdocs-jupyter==0.24.1
mkdocs-material==9.6.23
mkdocs-autorefs>=0.5,<=1.0
mkdocstrings[python]>=0.24,<1.0
griffe>=0.40,<1.0
mkdocs-render-swagger-plugin>=0.1.0
pydantic>=2.0,<3.0
mkdocs-redirects>=1.2.0
mkdocs-material==9.5.3
mkdocs-autorefs<=1.0
mkdocstrings[python]==0.25.2
griffe
mkdocs-render-swagger-plugin
pydantic
mkdocs-redirects

View File

@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-core</artifactId>
<version>0.28.0-beta.10</version>
<version>0.27.0-beta.5</version>
</dependency>
```
@@ -57,32 +57,32 @@ LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
## Metadata Operations
### Creating a Namespace Path
### Creating a Namespace
Namespace paths organize tables hierarchically. Create the desired namespace path before creating tables within it:
Namespaces organize tables hierarchically. Create a namespace before creating tables within it:
```java
import org.lance.namespace.model.CreateNamespaceRequest;
import org.lance.namespace.model.CreateNamespaceResponse;
// Create a child namespace path
// Create a child namespace
CreateNamespaceRequest request = new CreateNamespaceRequest();
request.setId(Arrays.asList("my_namespace"));
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
```
You can also create nested namespace paths:
You can also create nested namespaces:
```java
// Create a nested namespace path: parent/child
// Create a nested namespace: parent/child
CreateNamespaceRequest request = new CreateNamespaceRequest();
request.setId(Arrays.asList("parent_namespace", "child_namespace"));
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
```
### Describing a Namespace Path
### Describing a Namespace
```java
import org.lance.namespace.model.DescribeNamespaceRequest;
@@ -95,22 +95,22 @@ DescribeNamespaceResponse response = namespaceClient.describeNamespace(request);
System.out.println("Namespace properties: " + response.getProperties());
```
### Listing Namespace Paths
### Listing Namespaces
```java
import org.lance.namespace.model.ListNamespacesRequest;
import org.lance.namespace.model.ListNamespacesResponse;
// List all namespace paths at the root level
// List all namespaces at root level
ListNamespacesRequest request = new ListNamespacesRequest();
request.setId(Arrays.asList()); // Empty for root
ListNamespacesResponse response = namespaceClient.listNamespaces(request);
for (String ns : response.getNamespaces()) {
System.out.println("Namespace path: " + ns);
System.out.println("Namespace: " + ns);
}
// List child namespace paths under a parent path
// List child namespaces under a parent
ListNamespacesRequest childRequest = new ListNamespacesRequest();
childRequest.setId(Arrays.asList("parent_namespace"));
@@ -123,7 +123,7 @@ ListNamespacesResponse childResponse = namespaceClient.listNamespaces(childReque
import org.lance.namespace.model.ListTablesRequest;
import org.lance.namespace.model.ListTablesResponse;
// List tables in a namespace path
// List tables in a namespace
ListTablesRequest request = new ListTablesRequest();
request.setId(Arrays.asList("my_namespace"));
@@ -133,7 +133,7 @@ for (String table : response.getTables()) {
}
```
### Dropping a Namespace Path
### Dropping a Namespace
```java
import org.lance.namespace.model.DropNamespaceRequest;
@@ -175,7 +175,7 @@ DropTableResponse response = namespaceClient.dropTable(request);
### Creating a Table
Tables are created within a namespace path by providing data in Apache Arrow IPC format:
Tables are created within a namespace by providing data in Apache Arrow IPC format:
```java
import org.lance.namespace.LanceNamespace;
@@ -242,7 +242,7 @@ try (BufferAllocator allocator = new RootAllocator();
}
byte[] tableData = out.toByteArray();
// Create a table in a namespace path
// Create table in a namespace
CreateTableRequest request = new CreateTableRequest();
request.setId(Arrays.asList("my_namespace", "my_table"));
CreateTableResponse response = namespaceClient.createTable(request, tableData);

View File

@@ -34,7 +34,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
console.log(results);
```
The [quickstart](https://docs.lancedb.com/quickstart/) contains more complete examples.
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
## Development

View File

@@ -61,8 +61,8 @@ sharing the same data, deletion, and index files.
* **options.sourceVersion?**: `number`
The version of the source table to clone.
* **options.targetNamespacePath?**: `string`[]
The namespace path for the target table (defaults to root namespace).
* **options.targetNamespace?**: `string`[]
The namespace for the target table (defaults to root namespace).
#### Returns
@@ -116,13 +116,13 @@ Creates a new empty Table
`Promise`&lt;[`Table`](Table.md)&gt;
#### createEmptyTable(name, schema, namespacePath, options)
#### createEmptyTable(name, schema, namespace, options)
```ts
abstract createEmptyTable(
name,
schema,
namespacePath?,
namespace?,
options?): Promise<Table>
```
@@ -136,8 +136,8 @@ Creates a new empty Table
* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
The schema of the table
* **namespacePath?**: `string`[]
The namespace path to create the table in (defaults to root namespace)
* **namespace?**: `string`[]
The namespace to create the table in (defaults to root namespace)
* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
Additional options
@@ -150,10 +150,10 @@ Creates a new empty Table
### createTable()
#### createTable(options, namespacePath)
#### createTable(options, namespace)
```ts
abstract createTable(options, namespacePath?): Promise<Table>
abstract createTable(options, namespace?): Promise<Table>
```
Creates a new Table and initialize it with new data.
@@ -163,8 +163,8 @@ Creates a new Table and initialize it with new data.
* **options**: `object` & `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
The options object.
* **namespacePath?**: `string`[]
The namespace path to create the table in (defaults to root namespace)
* **namespace?**: `string`[]
The namespace to create the table in (defaults to root namespace)
##### Returns
@@ -197,13 +197,13 @@ Creates a new Table and initialize it with new data.
`Promise`&lt;[`Table`](Table.md)&gt;
#### createTable(name, data, namespacePath, options)
#### createTable(name, data, namespace, options)
```ts
abstract createTable(
name,
data,
namespacePath?,
namespace?,
options?): Promise<Table>
```
@@ -218,8 +218,8 @@ Creates a new Table and initialize it with new data.
Non-empty Array of Records
to be inserted into the table
* **namespacePath?**: `string`[]
The namespace path to create the table in (defaults to root namespace)
* **namespace?**: `string`[]
The namespace to create the table in (defaults to root namespace)
* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
Additional options
@@ -247,15 +247,15 @@ Return a brief description of the connection
### dropAllTables()
```ts
abstract dropAllTables(namespacePath?): Promise<void>
abstract dropAllTables(namespace?): Promise<void>
```
Drop all tables in the database.
#### Parameters
* **namespacePath?**: `string`[]
The namespace path to drop tables from (defaults to root namespace).
* **namespace?**: `string`[]
The namespace to drop tables from (defaults to root namespace).
#### Returns
@@ -266,7 +266,7 @@ Drop all tables in the database.
### dropTable()
```ts
abstract dropTable(name, namespacePath?): Promise<void>
abstract dropTable(name, namespace?): Promise<void>
```
Drop an existing table.
@@ -276,8 +276,8 @@ Drop an existing table.
* **name**: `string`
The name of the table to drop.
* **namespacePath?**: `string`[]
The namespace path of the table (defaults to root namespace).
* **namespace?**: `string`[]
The namespace of the table (defaults to root namespace).
#### Returns
@@ -304,7 +304,7 @@ Return true if the connection has not been closed
```ts
abstract openTable(
name,
namespacePath?,
namespace?,
options?): Promise<Table>
```
@@ -315,8 +315,8 @@ Open a table in the database.
* **name**: `string`
The name of the table
* **namespacePath?**: `string`[]
The namespace path of the table (defaults to root namespace)
* **namespace?**: `string`[]
The namespace of the table (defaults to root namespace)
* **options?**: `Partial`&lt;[`OpenTableOptions`](../interfaces/OpenTableOptions.md)&gt;
Additional options
@@ -349,10 +349,10 @@ Tables will be returned in lexicographical order.
`Promise`&lt;`string`[]&gt;
#### tableNames(namespacePath, options)
#### tableNames(namespace, options)
```ts
abstract tableNames(namespacePath?, options?): Promise<string[]>
abstract tableNames(namespace?, options?): Promise<string[]>
```
List all the table names in this database.
@@ -361,8 +361,8 @@ Tables will be returned in lexicographical order.
##### Parameters
* **namespacePath?**: `string`[]
The namespace path to list tables from (defaults to root namespace)
* **namespace?**: `string`[]
The namespace to list tables from (defaults to root namespace)
* **options?**: `Partial`&lt;[`TableNamesOptions`](../interfaces/TableNamesOptions.md)&gt;
options to control the

View File

@@ -71,12 +71,11 @@ Add new columns with defined values.
#### Parameters
* **newColumnTransforms**: `Field`&lt;`any`&gt; \| `Field`&lt;`any`&gt;[] \| `Schema`&lt;`any`&gt; \| [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
Either:
- An array of objects with column names and SQL expressions to calculate values
- A single Arrow Field defining one column with its data type (column will be initialized with null values)
- An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
- An Arrow Schema defining columns with their data types (columns will be initialized with null values)
* **newColumnTransforms**: [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
pairs of column names and
the SQL expression to use to calculate the value of the new column. These
expressions will be evaluated for each row in the table, and can
reference existing columns in the table.
#### Returns
@@ -485,7 +484,19 @@ Modeled after ``VACUUM`` in PostgreSQL.
- Prune: Removes old versions of the dataset
- Index: Optimizes the indices, adding new data to existing indices
The frequency an application should call optimize is based on the frequency of
Experimental API
----------------
The optimization process is undergoing active development and may change.
Our goal with these changes is to improve the performance of optimization and
reduce the complexity.
That being said, it is essential today to run optimize if you want the best
performance. It should be stable and safe to use in production, but it our
hope that the API may be simplified (or not even need to be called) in the
future.
The frequency an application shoudl call optimize is based on the frequency of
data modifications. If data is frequently added, deleted, or updated then
optimize should be run frequently. A good rule of thumb is to run optimize if
you have added or modified 100,000 or more records or run more than 20 data

View File

@@ -53,18 +53,3 @@ optional tlsConfig: TlsConfig;
```ts
optional userAgent: string;
```
***
### userId?
```ts
optional userId: string;
```
User identifier for tracking purposes.
This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
variable that contains the user ID value.

View File

@@ -41,29 +41,6 @@ for testing purposes.
***
### manifestEnabled?
```ts
optional manifestEnabled: boolean;
```
(For LanceDB OSS only): use directory namespace manifests as the source
of truth for table metadata. Existing directory-listed root tables are
migrated into the manifest on access.
***
### namespaceClientProperties?
```ts
optional namespaceClientProperties: Record<string, string>;
```
(For LanceDB OSS only): extra properties for the backing namespace
client used by manifest-enabled native connections.
***
### readConsistencyInterval?
```ts
@@ -112,4 +89,4 @@ optional storageOptions: Record<string, string>;
(For LanceDB OSS only): configuration for object storage.
The available options are described at https://docs.lancedb.com/storage/
The available options are described at https://lancedb.com/docs/storage/

View File

@@ -97,4 +97,4 @@ Configuration for object storage.
Options already set on the connection will be inherited by the table,
but can be overridden here.
The available options are described at https://docs.lancedb.com/storage/
The available options are described at https://lancedb.com/docs/storage/

View File

@@ -42,4 +42,4 @@ Configuration for object storage.
Options already set on the connection will be inherited by the table,
but can be overridden here.
The available options are described at https://docs.lancedb.com/storage/
The available options are described at https://lancedb.com/docs/storage/

View File

@@ -37,12 +37,3 @@ tbl.optimize({cleanupOlderThan: new Date()});
```ts
deleteUnverified: boolean;
```
Because they may be part of an in-progress transaction, files newer than
7 days old are not deleted by default. If you are sure that there are no
in-progress transactions, then you can set this to true to delete all
files older than `cleanupOlderThan`.
**WARNING**: This should only be set to true if you can guarantee that
no other process is currently working on this dataset. Otherwise the
dataset could be put into a corrupted state.

View File

@@ -52,7 +52,7 @@ new EmbeddingFunction<T, M>(): EmbeddingFunction<T, M>
### computeQueryEmbeddings()
```ts
computeQueryEmbeddings(data): Promise<number[] | Uint8Array | Float32Array | Float64Array>
computeQueryEmbeddings(data): Promise<number[] | Float32Array | Float64Array>
```
Compute the embeddings for a single query
@@ -63,7 +63,7 @@ Compute the embeddings for a single query
#### Returns
`Promise`&lt;`number`[] \| `Uint8Array` \| `Float32Array` \| `Float64Array`&gt;
`Promise`&lt;`number`[] \| `Float32Array` \| `Float64Array`&gt;
***

View File

@@ -37,7 +37,7 @@ new TextEmbeddingFunction<M>(): TextEmbeddingFunction<M>
### computeQueryEmbeddings()
```ts
computeQueryEmbeddings(data): Promise<number[] | Uint8Array | Float32Array | Float64Array>
computeQueryEmbeddings(data): Promise<number[] | Float32Array | Float64Array>
```
Compute the embeddings for a single query
@@ -48,7 +48,7 @@ Compute the embeddings for a single query
#### Returns
`Promise`&lt;`number`[] \| `Uint8Array` \| `Float32Array` \| `Float64Array`&gt;
`Promise`&lt;`number`[] \| `Float32Array` \| `Float64Array`&gt;
#### Overrides

View File

@@ -7,10 +7,5 @@
# Type Alias: IntoVector
```ts
type IntoVector:
| Float32Array
| Float64Array
| Uint8Array
| number[]
| Promise<Float32Array | Float64Array | Uint8Array | number[]>;
type IntoVector: Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
```

View File

@@ -36,20 +36,6 @@ is also an [asynchronous API client](#connections-asynchronous).
::: lancedb.table.Tags
## Expressions
Type-safe expression builder for filters and projections. Use these instead
of raw SQL strings with [where][lancedb.query.LanceQueryBuilder.where] and
[select][lancedb.query.LanceQueryBuilder.select].
::: lancedb.expr.Expr
::: lancedb.expr.col
::: lancedb.expr.lit
::: lancedb.expr.func
## Querying (Synchronous)
::: lancedb.query.Query

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.28.0-beta.10</version>
<version>0.27.0-beta.5</version>
<relativePath>../pom.xml</relativePath>
</parent>
@@ -56,21 +56,21 @@
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j2-impl</artifactId>
<version>2.25.3</version>
<version>2.24.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.25.3</version>
<version>2.24.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.25.3</version>
<version>2.24.3</version>
<scope>test</scope>
</dependency>
</dependencies>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.28.0-beta.10</version>
<version>0.27.0-beta.5</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>
@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-core.version>6.0.0-beta.4</lance-core.version>
<lance-core.version>3.1.0-beta.2</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
@@ -111,7 +111,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.3.1</version>
<version>2.2.1</version>
<executions>
<execution>
<id>attach-sources</id>
@@ -124,7 +124,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.11.2</version>
<version>2.9.1</version>
<executions>
<execution>
<id>attach-javadocs</id>
@@ -178,15 +178,15 @@
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.4.1</version>
<version>3.1.0</version>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.3.1</version>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.14.0</version>
<version>3.8.1</version>
<configuration>
<compilerArgs>
<arg>-h</arg>
@@ -205,11 +205,11 @@
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.4.2</version>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>3.1.3</version>
<version>2.5.2</version>
</plugin>
<plugin>
<groupId>com.diffplug.spotless</groupId>
@@ -327,7 +327,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>3.2.7</version>
<version>1.5</version>
<executions>
<execution>
<id>sign-artifacts</id>

View File

@@ -1,8 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.28.0-beta.10"
publish = false
version = "0.27.0-beta.5"
license.workspace = true
description.workspace = true
repository.workspace = true
@@ -16,8 +15,6 @@ crate-type = ["cdylib"]
async-trait.workspace = true
arrow-ipc.workspace = true
arrow-array.workspace = true
arrow-buffer = "57.2"
half.workspace = true
arrow-schema.workspace = true
env_logger.workspace = true
futures.workspace = true
@@ -28,12 +25,12 @@ napi = { version = "3.8.3", default-features = false, features = [
] }
napi-derive = "3.5.2"
# Prevent dynamic linking of lzma, which comes from datafusion
lzma-sys = { version = "0.1", features = ["static"] }
lzma-sys = { version = "*", features = ["static"] }
log.workspace = true
# Pin to resolve build failures; update periodically for security patches.
aws-lc-sys = "=0.40.0"
aws-lc-rs = "=1.16.3"
# Workaround for build failure until we can fix it.
aws-lc-sys = "=0.28.0"
aws-lc-rs = "=1.13.0"
[build-dependencies]
napi-build = "2.3.1"

View File

@@ -30,7 +30,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
console.log(results);
```
The [quickstart](https://docs.lancedb.com/quickstart/) contains more complete examples.
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
## Development

View File

@@ -1,8 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import { spawn } from "node:child_process";
import * as path from "node:path";
import { RecordBatch } from "apache-arrow";
import * as tmp from "tmp";
import { Connection, Index, Table, connect, makeArrowTable } from "../lancedb";
@@ -78,91 +76,4 @@ describe("rerankers", function () {
expect(result).toHaveLength(2);
});
it("does not keep process alive after rerank query", async function () {
const script = `
import * as lancedb from "./dist/index.js";
import * as os from "node:os";
import * as path from "node:path";
import * as fs from "node:fs/promises";
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lancedb-rerank-exit-"));
const db = await lancedb.connect(dir);
const table = await db.createTable("test", [{ text: "hello", vector: [1, 2, 3] }], {
mode: "overwrite",
});
await table.createIndex("text", { config: lancedb.Index.fts() });
await table.waitForIndex(["text_idx"], 30);
const reranker = await lancedb.rerankers.RRFReranker.create();
await table
.query()
.nearestTo([1, 2, 3])
.fullTextSearch("hello")
.rerank(reranker)
.toArray();
table.close();
db.close();
`;
await new Promise<void>((resolve, reject) => {
const child = spawn(
process.execPath,
["--input-type=module", "-e", script],
{
cwd: path.resolve(__dirname, ".."),
stdio: ["ignore", "pipe", "pipe"],
},
);
let stdout = "";
let stderr = "";
child.stdout.on("data", (chunk) => {
stdout += chunk.toString();
});
child.stderr.on("data", (chunk) => {
stderr += chunk.toString();
});
const timeout = setTimeout(() => {
child.kill();
reject(
new Error(
`child process did not exit in time\nstdout:\n${stdout}\nstderr:\n${stderr}`,
),
);
}, 20_000);
child.on("error", (err) => {
clearTimeout(timeout);
reject(err);
});
child.on("exit", (code, signal) => {
clearTimeout(timeout);
if (signal !== null) {
reject(
new Error(
`child process exited with signal ${signal}\nstdout:\n${stdout}\nstderr:\n${stderr}`,
),
);
return;
}
if (code !== 0) {
reject(
new Error(
`child process exited with code ${code}\nstdout:\n${stdout}\nstderr:\n${stderr}`,
),
);
return;
}
resolve();
});
});
});
});

View File

@@ -103,7 +103,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
},
numIndices: 0,
numRows: 3,
totalBytes: 44,
totalBytes: 24,
});
});
@@ -1259,98 +1259,6 @@ describe("schema evolution", function () {
expect(await table.schema()).toEqual(expectedSchema);
});
it("can add columns with schema for explicit data types", async function () {
const con = await connect(tmpDir.name);
const table = await con.createTable("vectors", [
{ id: 1n, vector: [0.1, 0.2] },
]);
// Define schema for new columns with explicit data types
// Note: All columns must be nullable when using addColumns with Schema
// because they are initially populated with null values
const newColumnsSchema = new Schema([
new Field("price", new Float64(), true),
new Field("category", new Utf8(), true),
new Field("rating", new Int32(), true),
]);
const result = await table.addColumns(newColumnsSchema);
expect(result).toHaveProperty("version");
expect(result.version).toBe(2);
const expectedSchema = new Schema([
new Field("id", new Int64(), true),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float32(), true)),
true,
),
new Field("price", new Float64(), true),
new Field("category", new Utf8(), true),
new Field("rating", new Int32(), true),
]);
expect(await table.schema()).toEqual(expectedSchema);
// Verify that new columns are populated with null values
const results = await table.query().toArray();
expect(results).toHaveLength(1);
expect(results[0].price).toBeNull();
expect(results[0].category).toBeNull();
expect(results[0].rating).toBeNull();
});
it("can add a single column using Field", async function () {
const con = await connect(tmpDir.name);
const table = await con.createTable("vectors", [
{ id: 1n, vector: [0.1, 0.2] },
]);
// Add a single field
const priceField = new Field("price", new Float64(), true);
const result = await table.addColumns(priceField);
expect(result).toHaveProperty("version");
expect(result.version).toBe(2);
const expectedSchema = new Schema([
new Field("id", new Int64(), true),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float32(), true)),
true,
),
new Field("price", new Float64(), true),
]);
expect(await table.schema()).toEqual(expectedSchema);
});
it("can add multiple columns using array of Fields", async function () {
const con = await connect(tmpDir.name);
const table = await con.createTable("vectors", [
{ id: 1n, vector: [0.1, 0.2] },
]);
// Add multiple fields as array
const fields = [
new Field("price", new Float64(), true),
new Field("category", new Utf8(), true),
];
const result = await table.addColumns(fields);
expect(result).toHaveProperty("version");
expect(result.version).toBe(2);
const expectedSchema = new Schema([
new Field("id", new Int64(), true),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float32(), true)),
true,
),
new Field("price", new Float64(), true),
new Field("category", new Utf8(), true),
]);
expect(await table.schema()).toEqual(expectedSchema);
});
it("can alter the columns in the schema", async function () {
const con = await connect(tmpDir.name);
const schema = new Schema([

View File

@@ -1,110 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import * as tmp from "tmp";
import { type Table, connect } from "../lancedb";
import {
Field,
FixedSizeList,
Float32,
Int64,
Schema,
makeArrowTable,
} from "../lancedb/arrow";
describe("Vector query with different typed arrays", () => {
let tmpDir: tmp.DirResult;
afterEach(() => {
tmpDir?.removeCallback();
});
async function createFloat32Table(): Promise<Table> {
tmpDir = tmp.dirSync({ unsafeCleanup: true });
const db = await connect(tmpDir.name);
const schema = new Schema([
new Field("id", new Int64(), true),
new Field(
"vec",
new FixedSizeList(2, new Field("item", new Float32())),
true,
),
]);
const data = makeArrowTable(
[
{ id: 1n, vec: [1.0, 0.0] },
{ id: 2n, vec: [0.0, 1.0] },
{ id: 3n, vec: [1.0, 1.0] },
],
{ schema },
);
return db.createTable("test_f32", data);
}
it("should search with Float32Array (baseline)", async () => {
const table = await createFloat32Table();
const results = await table
.query()
.nearestTo(new Float32Array([1.0, 0.0]))
.limit(1)
.toArray();
expect(results.length).toBe(1);
expect(Number(results[0].id)).toBe(1);
});
it("should search with number[] (backward compat)", async () => {
const table = await createFloat32Table();
const results = await table
.query()
.nearestTo([1.0, 0.0])
.limit(1)
.toArray();
expect(results.length).toBe(1);
expect(Number(results[0].id)).toBe(1);
});
it("should search with Float64Array via raw path", async () => {
const table = await createFloat32Table();
const results = await table
.query()
.nearestTo(new Float64Array([1.0, 0.0]))
.limit(1)
.toArray();
expect(results.length).toBe(1);
expect(Number(results[0].id)).toBe(1);
});
it("should add multiple query vectors with Float64Array", async () => {
const table = await createFloat32Table();
const results = await table
.query()
.nearestTo(new Float64Array([1.0, 0.0]))
.addQueryVector(new Float64Array([0.0, 1.0]))
.limit(2)
.toArray();
expect(results.length).toBeGreaterThanOrEqual(2);
});
// Float16Array is only available in Node 22+; not in TypeScript's standard lib yet
const float16ArrayCtor = (globalThis as unknown as Record<string, unknown>)
.Float16Array as (new (values: number[]) => unknown) | undefined;
const hasFloat16 = float16ArrayCtor !== undefined;
const f16it = hasFloat16 ? it : it.skip;
f16it("should search with Float16Array via raw path", async () => {
const table = await createFloat32Table();
const results = await table
.query()
.nearestTo(new float16ArrayCtor!([1.0, 0.0]) as Float32Array)
.limit(1)
.toArray();
expect(results.length).toBe(1);
expect(Number(results[0].id)).toBe(1);
});
});

View File

@@ -30,15 +30,12 @@
"x64",
"arm64"
],
"dev": true,
"license": "Apache-2.0",
"optional": true,
"os": [
"darwin",
"linux",
"win32"
],
"peer": true,
"dependencies": {
"reflect-metadata": "^0.2.2"
},
@@ -94,15 +91,14 @@
}
},
"node_modules/@babel/code-frame": {
"version": "7.29.0",
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz",
"integrity": "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==",
"version": "7.26.2",
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz",
"integrity": "sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/helper-validator-identifier": "^7.28.5",
"@babel/helper-validator-identifier": "^7.25.9",
"js-tokens": "^4.0.0",
"picocolors": "^1.1.1"
"picocolors": "^1.0.0"
},
"engines": {
"node": ">=6.9.0"
@@ -237,21 +233,19 @@
}
},
"node_modules/@babel/helper-string-parser": {
"version": "7.27.1",
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz",
"integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==",
"version": "7.25.9",
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz",
"integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=6.9.0"
}
},
"node_modules/@babel/helper-validator-identifier": {
"version": "7.28.5",
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz",
"integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==",
"version": "7.25.9",
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz",
"integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=6.9.0"
}
@@ -266,27 +260,25 @@
}
},
"node_modules/@babel/helpers": {
"version": "7.28.6",
"resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.28.6.tgz",
"integrity": "sha512-xOBvwq86HHdB7WUDTfKfT/Vuxh7gElQ+Sfti2Cy6yIWNW05P8iUslOVcZ4/sKbE+/jQaukQAdz/gf3724kYdqw==",
"version": "7.26.0",
"resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.26.0.tgz",
"integrity": "sha512-tbhNuIxNcVb21pInl3ZSjksLCvgdZy9KwJ8brv993QtIVKJBBkYXz4q4ZbAv31GdnC+R90np23L5FbEBlthAEw==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/template": "^7.28.6",
"@babel/types": "^7.28.6"
"@babel/template": "^7.25.9",
"@babel/types": "^7.26.0"
},
"engines": {
"node": ">=6.9.0"
}
},
"node_modules/@babel/parser": {
"version": "7.29.0",
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.0.tgz",
"integrity": "sha512-IyDgFV5GeDUVX4YdF/3CPULtVGSXXMLh1xVIgdCgxApktqnQV0r7/8Nqthg+8YLGaAtdyIlo2qIdZrbCv4+7ww==",
"version": "7.26.2",
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.26.2.tgz",
"integrity": "sha512-DWMCZH9WA4Maitz2q21SRKHo9QXZxkDsbNZoVD62gusNtNBBqDg9i7uOhASfTfIGNzW+O+r7+jAlM8dwphcJKQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/types": "^7.29.0"
"@babel/types": "^7.26.0"
},
"bin": {
"parser": "bin/babel-parser.js"
@@ -518,15 +510,14 @@
}
},
"node_modules/@babel/template": {
"version": "7.28.6",
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz",
"integrity": "sha512-YA6Ma2KsCdGb+WC6UpBVFJGXL58MDA6oyONbjyF/+5sBgxY/dwkhLogbMT2GXXyU84/IhRw/2D1Os1B/giz+BQ==",
"version": "7.25.9",
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.25.9.tgz",
"integrity": "sha512-9DGttpmPvIxBb/2uwpVo3dqJ+O6RooAFOS+lB+xDqoE2PVCE8nfoHMdZLpfCQRLwvohzXISPZcgxt80xLfsuwg==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/code-frame": "^7.28.6",
"@babel/parser": "^7.28.6",
"@babel/types": "^7.28.6"
"@babel/code-frame": "^7.25.9",
"@babel/parser": "^7.25.9",
"@babel/types": "^7.25.9"
},
"engines": {
"node": ">=6.9.0"
@@ -551,14 +542,13 @@
}
},
"node_modules/@babel/types": {
"version": "7.29.0",
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.29.0.tgz",
"integrity": "sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==",
"version": "7.26.0",
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.26.0.tgz",
"integrity": "sha512-Z/yiTPj+lDVnF7lWeKCIJzaIkI0vYO87dMpZ4bg4TDrFe4XXLFWL1TbXU27gBP3QccxV9mZICCrnjnYlJjXHOA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/helper-string-parser": "^7.27.1",
"@babel/helper-validator-identifier": "^7.28.5"
"@babel/helper-string-parser": "^7.25.9",
"@babel/helper-validator-identifier": "^7.25.9"
},
"engines": {
"node": ">=6.9.0"
@@ -1161,6 +1151,95 @@
"url": "https://opencollective.com/libvips"
}
},
"node_modules/@isaacs/cliui": {
"version": "8.0.2",
"resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz",
"integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==",
"dependencies": {
"string-width": "^5.1.2",
"string-width-cjs": "npm:string-width@^4.2.0",
"strip-ansi": "^7.0.1",
"strip-ansi-cjs": "npm:strip-ansi@^6.0.1",
"wrap-ansi": "^8.1.0",
"wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0"
},
"engines": {
"node": ">=12"
}
},
"node_modules/@isaacs/cliui/node_modules/ansi-regex": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz",
"integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/chalk/ansi-regex?sponsor=1"
}
},
"node_modules/@isaacs/cliui/node_modules/ansi-styles": {
"version": "6.2.1",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
"integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
}
},
"node_modules/@isaacs/cliui/node_modules/emoji-regex": {
"version": "9.2.2",
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz",
"integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg=="
},
"node_modules/@isaacs/cliui/node_modules/string-width": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz",
"integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==",
"dependencies": {
"eastasianwidth": "^0.2.0",
"emoji-regex": "^9.2.2",
"strip-ansi": "^7.0.1"
},
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/@isaacs/cliui/node_modules/strip-ansi": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz",
"integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==",
"dependencies": {
"ansi-regex": "^6.0.1"
},
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/chalk/strip-ansi?sponsor=1"
}
},
"node_modules/@isaacs/cliui/node_modules/wrap-ansi": {
"version": "8.1.0",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
"integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==",
"dependencies": {
"ansi-styles": "^6.1.0",
"string-width": "^5.0.1",
"strip-ansi": "^7.0.1"
},
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
}
},
"node_modules/@isaacs/fs-minipass": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
@@ -1527,6 +1606,15 @@
"resolved": "../dist",
"link": true
},
"node_modules/@pkgjs/parseargs": {
"version": "0.11.0",
"resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
"integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==",
"optional": true,
"engines": {
"node": ">=14"
}
},
"node_modules/@protobufjs/aspromise": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
@@ -1758,7 +1846,6 @@
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
"dev": true,
"engines": {
"node": ">=8"
}
@@ -1767,7 +1854,6 @@
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
"dev": true,
"dependencies": {
"color-convert": "^2.0.1"
},
@@ -1933,15 +2019,13 @@
"node_modules/balanced-match": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
"dev": true
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
},
"node_modules/brace-expansion": {
"version": "1.1.12",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
"version": "1.1.11",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
"dev": true,
"license": "MIT",
"dependencies": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
@@ -2018,19 +2102,6 @@
"integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==",
"dev": true
},
"node_modules/call-bind-apply-helpers": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
"integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
"function-bind": "^1.1.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/callsites": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@@ -2227,11 +2298,9 @@
}
},
"node_modules/cross-spawn": {
"version": "7.0.6",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
"integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
"dev": true,
"license": "MIT",
"version": "7.0.3",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
"integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
"dependencies": {
"path-key": "^3.1.0",
"shebang-command": "^2.0.0",
@@ -2315,19 +2384,10 @@
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
}
},
"node_modules/dunder-proto": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
"integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.1",
"es-errors": "^1.3.0",
"gopd": "^1.2.0"
},
"engines": {
"node": ">= 0.4"
}
"node_modules/eastasianwidth": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz",
"integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
},
"node_modules/ejs": {
"version": "3.1.10",
@@ -2365,8 +2425,7 @@
"node_modules/emoji-regex": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
"dev": true
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
},
"node_modules/error-ex": {
"version": "1.3.2",
@@ -2383,51 +2442,6 @@
"integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==",
"dev": true
},
"node_modules/es-define-property": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-errors": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-object-atoms": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
"integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-set-tostringtag": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
"integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
"get-intrinsic": "^1.2.6",
"has-tostringtag": "^1.0.2",
"hasown": "^2.0.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/escalade": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
@@ -2540,21 +2554,19 @@
}
},
"node_modules/filelist/node_modules/brace-expansion": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
"dev": true,
"license": "MIT",
"dependencies": {
"balanced-match": "^1.0.0"
}
},
"node_modules/filelist/node_modules/minimatch": {
"version": "5.1.9",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.9.tgz",
"integrity": "sha512-7o1wEA2RyMP7Iu7GNba9vc0RWWGACJOCZBJX2GJWip0ikV+wcOsgVuY9uE8CPiyQhkGFSlhuSkZPavN7u1c2Fw==",
"version": "5.1.6",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz",
"integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
"dev": true,
"license": "ISC",
"dependencies": {
"brace-expansion": "^2.0.1"
},
@@ -2592,16 +2604,39 @@
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="
},
"node_modules/foreground-child": {
"version": "3.3.0",
"resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.0.tgz",
"integrity": "sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==",
"dependencies": {
"cross-spawn": "^7.0.0",
"signal-exit": "^4.0.1"
},
"engines": {
"node": ">=14"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/foreground-child/node_modules/signal-exit": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz",
"integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==",
"engines": {
"node": ">=14"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/form-data": {
"version": "4.0.5",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz",
"integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==",
"license": "MIT",
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.1.tgz",
"integrity": "sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==",
"dependencies": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.8",
"es-set-tostringtag": "^2.1.0",
"hasown": "^2.0.2",
"mime-types": "^2.1.12"
},
"engines": {
@@ -2649,6 +2684,7 @@
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
"dev": true,
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
@@ -2671,30 +2707,6 @@
"node": "6.* || 8.* || >= 10.*"
}
},
"node_modules/get-intrinsic": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
"integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.2",
"es-define-property": "^1.0.1",
"es-errors": "^1.3.0",
"es-object-atoms": "^1.1.1",
"function-bind": "^1.1.2",
"get-proto": "^1.0.1",
"gopd": "^1.2.0",
"has-symbols": "^1.1.0",
"hasown": "^2.0.2",
"math-intrinsics": "^1.1.0"
},
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/get-package-type": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/get-package-type/-/get-package-type-0.1.0.tgz",
@@ -2704,19 +2716,6 @@
"node": ">=8.0.0"
}
},
"node_modules/get-proto": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
"integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
"license": "MIT",
"dependencies": {
"dunder-proto": "^1.0.1",
"es-object-atoms": "^1.0.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/get-stream": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
@@ -2759,18 +2758,6 @@
"node": ">=4"
}
},
"node_modules/gopd": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/graceful-fs": {
"version": "4.2.11",
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
@@ -2791,37 +2778,11 @@
"node": ">=8"
}
},
"node_modules/has-symbols": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
"integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/has-tostringtag": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
"integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
"license": "MIT",
"dependencies": {
"has-symbols": "^1.0.3"
},
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/hasown": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
"dev": true,
"dependencies": {
"function-bind": "^1.1.2"
},
@@ -2921,7 +2882,6 @@
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
"dev": true,
"engines": {
"node": ">=8"
}
@@ -2959,8 +2919,7 @@
"node_modules/isexe": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
"dev": true
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="
},
"node_modules/istanbul-lib-coverage": {
"version": "3.2.2",
@@ -3028,6 +2987,20 @@
"node": ">=8"
}
},
"node_modules/jackspeak": {
"version": "3.4.3",
"resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz",
"integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==",
"dependencies": {
"@isaacs/cliui": "^8.0.2"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
},
"optionalDependencies": {
"@pkgjs/parseargs": "^0.11.0"
}
},
"node_modules/jake": {
"version": "10.9.2",
"resolved": "https://registry.npmjs.org/jake/-/jake-10.9.2.tgz",
@@ -3632,11 +3605,10 @@
"dev": true
},
"node_modules/js-yaml": {
"version": "3.14.2",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.2.tgz",
"integrity": "sha512-PMSmkqxr106Xa156c2M265Z+FTrPl+oxd/rgOQy2tijQeK5TxQ43psO1ZCwhVOSdnn+RzkzlRz/eY4BgJBYVpg==",
"version": "3.14.1",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz",
"integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==",
"dev": true,
"license": "MIT",
"dependencies": {
"argparse": "^1.0.7",
"esprima": "^4.0.0"
@@ -3756,15 +3728,6 @@
"tmpl": "1.0.5"
}
},
"node_modules/math-intrinsics": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
"integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/merge-stream": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
@@ -3813,11 +3776,10 @@
}
},
"node_modules/minimatch": {
"version": "3.1.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
"integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
"dev": true,
"license": "ISC",
"dependencies": {
"brace-expansion": "^1.1.7"
},
@@ -3834,17 +3796,31 @@
}
},
"node_modules/minizlib": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz",
"integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==",
"license": "MIT",
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.1.tgz",
"integrity": "sha512-umcy022ILvb5/3Djuu8LWeqUa8D68JaBzlttKeMWen48SjabqS3iY5w/vzeMzMUNhLDifyhbOwKDSznB1vvrwg==",
"dependencies": {
"minipass": "^7.1.2"
"minipass": "^7.0.4",
"rimraf": "^5.0.5"
},
"engines": {
"node": ">= 18"
}
},
"node_modules/mkdirp": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
"integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
"bin": {
"mkdirp": "dist/cjs/src/bin.js"
},
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/ms": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
@@ -4034,6 +4010,11 @@
"node": ">=6"
}
},
"node_modules/package-json-from-dist": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz",
"integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw=="
},
"node_modules/parse-json": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz",
@@ -4074,7 +4055,6 @@
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
"integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
"dev": true,
"engines": {
"node": ">=8"
}
@@ -4085,6 +4065,26 @@
"integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
"dev": true
},
"node_modules/path-scurry": {
"version": "1.11.1",
"resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz",
"integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==",
"dependencies": {
"lru-cache": "^10.2.0",
"minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
},
"engines": {
"node": ">=16 || 14 >=14.18"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/path-scurry/node_modules/lru-cache": {
"version": "10.4.3",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz",
"integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="
},
"node_modules/picocolors": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
@@ -4246,6 +4246,61 @@
"node": ">=10"
}
},
"node_modules/rimraf": {
"version": "5.0.10",
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.10.tgz",
"integrity": "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==",
"dependencies": {
"glob": "^10.3.7"
},
"bin": {
"rimraf": "dist/esm/bin.mjs"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/rimraf/node_modules/brace-expansion": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
"dependencies": {
"balanced-match": "^1.0.0"
}
},
"node_modules/rimraf/node_modules/glob": {
"version": "10.4.5",
"resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz",
"integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==",
"dependencies": {
"foreground-child": "^3.1.0",
"jackspeak": "^3.1.2",
"minimatch": "^9.0.4",
"minipass": "^7.1.2",
"package-json-from-dist": "^1.0.0",
"path-scurry": "^1.11.1"
},
"bin": {
"glob": "dist/esm/bin.mjs"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/rimraf/node_modules/minimatch": {
"version": "9.0.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
"dependencies": {
"brace-expansion": "^2.0.1"
},
"engines": {
"node": ">=16 || 14 >=14.17"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/semver": {
"version": "7.6.3",
"resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz",
@@ -4299,7 +4354,6 @@
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
"integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
"dev": true,
"dependencies": {
"shebang-regex": "^3.0.0"
},
@@ -4311,7 +4365,6 @@
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
"integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
"dev": true,
"engines": {
"node": ">=8"
}
@@ -4399,7 +4452,20 @@
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
"dev": true,
"dependencies": {
"emoji-regex": "^8.0.0",
"is-fullwidth-code-point": "^3.0.0",
"strip-ansi": "^6.0.1"
},
"engines": {
"node": ">=8"
}
},
"node_modules/string-width-cjs": {
"name": "string-width",
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
"dependencies": {
"emoji-regex": "^8.0.0",
"is-fullwidth-code-point": "^3.0.0",
@@ -4413,7 +4479,18 @@
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
"dev": true,
"dependencies": {
"ansi-regex": "^5.0.1"
},
"engines": {
"node": ">=8"
}
},
"node_modules/strip-ansi-cjs": {
"name": "strip-ansi",
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
"dependencies": {
"ansi-regex": "^5.0.1"
},
@@ -4464,15 +4541,15 @@
}
},
"node_modules/tar": {
"version": "7.5.10",
"resolved": "https://registry.npmjs.org/tar/-/tar-7.5.10.tgz",
"integrity": "sha512-8mOPs1//5q/rlkNSPcCegA6hiHJYDmSLEI8aMH/CdSQJNWztHC9WHNam5zdQlfpTwB9Xp7IBEsHfV5LKMJGVAw==",
"license": "BlueOak-1.0.0",
"version": "7.4.3",
"resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz",
"integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==",
"dependencies": {
"@isaacs/fs-minipass": "^4.0.0",
"chownr": "^3.0.0",
"minipass": "^7.1.2",
"minizlib": "^3.1.0",
"minizlib": "^3.0.1",
"mkdirp": "^3.0.1",
"yallist": "^5.0.0"
},
"engines": {
@@ -4705,7 +4782,6 @@
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
"integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
"dev": true,
"dependencies": {
"isexe": "^2.0.0"
},
@@ -4733,6 +4809,23 @@
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
}
},
"node_modules/wrap-ansi-cjs": {
"name": "wrap-ansi",
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
"integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
"dependencies": {
"ansi-styles": "^4.0.0",
"string-width": "^4.1.0",
"strip-ansi": "^6.0.0"
},
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
}
},
"node_modules/wrappy": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",

View File

@@ -117,9 +117,8 @@ export type TableLike =
export type IntoVector =
| Float32Array
| Float64Array
| Uint8Array
| number[]
| Promise<Float32Array | Float64Array | Uint8Array | number[]>;
| Promise<Float32Array | Float64Array | number[]>;
export type MultiVector = IntoVector[];
@@ -127,48 +126,14 @@ export function isMultiVector(value: unknown): value is MultiVector {
return Array.isArray(value) && isIntoVector(value[0]);
}
// Float16Array is not in TypeScript's standard lib yet; access dynamically
type Float16ArrayCtor = new (
...args: unknown[]
) => { buffer: ArrayBuffer; byteOffset: number; byteLength: number };
const float16ArrayCtor = (globalThis as unknown as Record<string, unknown>)
.Float16Array as Float16ArrayCtor | undefined;
export function isIntoVector(value: unknown): value is IntoVector {
return (
value instanceof Float32Array ||
value instanceof Float64Array ||
value instanceof Uint8Array ||
(float16ArrayCtor !== undefined && value instanceof float16ArrayCtor) ||
(Array.isArray(value) && !Array.isArray(value[0]))
);
}
/**
* Extract the underlying byte buffer and data type from a typed array
* for passing to the Rust NAPI layer without precision loss.
*/
export function extractVectorBuffer(
vector: Float32Array | Float64Array | Uint8Array,
): { data: Uint8Array; dtype: string } | null {
if (float16ArrayCtor !== undefined && vector instanceof float16ArrayCtor) {
return {
data: new Uint8Array(vector.buffer, vector.byteOffset, vector.byteLength),
dtype: "float16",
};
}
if (vector instanceof Float64Array) {
return {
data: new Uint8Array(vector.buffer, vector.byteOffset, vector.byteLength),
dtype: "float64",
};
}
if (vector instanceof Uint8Array && !(vector instanceof Float32Array)) {
return { data: vector, dtype: "uint8" };
}
return null;
}
export function isArrowTable(value: object): value is TableLike {
if (value instanceof ArrowTable) return true;
return "schema" in value && "batches" in value;

View File

@@ -42,7 +42,7 @@ export interface CreateTableOptions {
* Options already set on the connection will be inherited by the table,
* but can be overridden here.
*
* The available options are described at https://docs.lancedb.com/storage/
* The available options are described at https://lancedb.com/docs/storage/
*/
storageOptions?: Record<string, string>;
@@ -78,7 +78,7 @@ export interface OpenTableOptions {
* Options already set on the connection will be inherited by the table,
* but can be overridden here.
*
* The available options are described at https://docs.lancedb.com/storage/
* The available options are described at https://lancedb.com/docs/storage/
*/
storageOptions?: Record<string, string>;
/**
@@ -166,25 +166,25 @@ export abstract class Connection {
* List all the table names in this database.
*
* Tables will be returned in lexicographical order.
* @param {string[]} namespacePath - The namespace path to list tables from (defaults to root namespace)
* @param {string[]} namespace - The namespace to list tables from (defaults to root namespace)
* @param {Partial<TableNamesOptions>} options - options to control the
* paging / start point
*
*/
abstract tableNames(
namespacePath?: string[],
namespace?: string[],
options?: Partial<TableNamesOptions>,
): Promise<string[]>;
/**
* Open a table in the database.
* @param {string} name - The name of the table
* @param {string[]} namespacePath - The namespace path of the table (defaults to root namespace)
* @param {string[]} namespace - The namespace of the table (defaults to root namespace)
* @param {Partial<OpenTableOptions>} options - Additional options
*/
abstract openTable(
name: string,
namespacePath?: string[],
namespace?: string[],
options?: Partial<OpenTableOptions>,
): Promise<Table>;
@@ -193,7 +193,7 @@ export abstract class Connection {
* @param {object} options - The options object.
* @param {string} options.name - The name of the table.
* @param {Data} options.data - Non-empty Array of Records to be inserted into the table
* @param {string[]} namespacePath - The namespace path to create the table in (defaults to root namespace)
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
*
*/
abstract createTable(
@@ -201,7 +201,7 @@ export abstract class Connection {
name: string;
data: Data;
} & Partial<CreateTableOptions>,
namespacePath?: string[],
namespace?: string[],
): Promise<Table>;
/**
* Creates a new Table and initialize it with new data.
@@ -220,13 +220,13 @@ export abstract class Connection {
* @param {string} name - The name of the table.
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
* to be inserted into the table
* @param {string[]} namespacePath - The namespace path to create the table in (defaults to root namespace)
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
* @param {Partial<CreateTableOptions>} options - Additional options
*/
abstract createTable(
name: string,
data: Record<string, unknown>[] | TableLike,
namespacePath?: string[],
namespace?: string[],
options?: Partial<CreateTableOptions>,
): Promise<Table>;
@@ -245,28 +245,28 @@ export abstract class Connection {
* Creates a new empty Table
* @param {string} name - The name of the table.
* @param {Schema} schema - The schema of the table
* @param {string[]} namespacePath - The namespace path to create the table in (defaults to root namespace)
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
* @param {Partial<CreateTableOptions>} options - Additional options
*/
abstract createEmptyTable(
name: string,
schema: import("./arrow").SchemaLike,
namespacePath?: string[],
namespace?: string[],
options?: Partial<CreateTableOptions>,
): Promise<Table>;
/**
* Drop an existing table.
* @param {string} name The name of the table to drop.
* @param {string[]} namespacePath The namespace path of the table (defaults to root namespace).
* @param {string[]} namespace The namespace of the table (defaults to root namespace).
*/
abstract dropTable(name: string, namespacePath?: string[]): Promise<void>;
abstract dropTable(name: string, namespace?: string[]): Promise<void>;
/**
* Drop all tables in the database.
* @param {string[]} namespacePath The namespace path to drop tables from (defaults to root namespace).
* @param {string[]} namespace The namespace to drop tables from (defaults to root namespace).
*/
abstract dropAllTables(namespacePath?: string[]): Promise<void>;
abstract dropAllTables(namespace?: string[]): Promise<void>;
/**
* Clone a table from a source table.
@@ -279,7 +279,7 @@ export abstract class Connection {
* @param {string} targetTableName - The name of the target table to create.
* @param {string} sourceUri - The URI of the source table to clone from.
* @param {object} options - Clone options.
* @param {string[]} options.targetNamespacePath - The namespace path for the target table (defaults to root namespace).
* @param {string[]} options.targetNamespace - The namespace for the target table (defaults to root namespace).
* @param {number} options.sourceVersion - The version of the source table to clone.
* @param {string} options.sourceTag - The tag of the source table to clone.
* @param {boolean} options.isShallow - Whether to perform a shallow clone (defaults to true).
@@ -288,7 +288,7 @@ export abstract class Connection {
targetTableName: string,
sourceUri: string,
options?: {
targetNamespacePath?: string[];
targetNamespace?: string[];
sourceVersion?: number;
sourceTag?: string;
isShallow?: boolean;
@@ -319,25 +319,25 @@ export class LocalConnection extends Connection {
}
async tableNames(
namespacePathOrOptions?: string[] | Partial<TableNamesOptions>,
namespaceOrOptions?: string[] | Partial<TableNamesOptions>,
options?: Partial<TableNamesOptions>,
): Promise<string[]> {
// Detect if first argument is namespacePath array or options object
let namespacePath: string[] | undefined;
// Detect if first argument is namespace array or options object
let namespace: string[] | undefined;
let tableNamesOptions: Partial<TableNamesOptions> | undefined;
if (Array.isArray(namespacePathOrOptions)) {
// First argument is namespacePath array
namespacePath = namespacePathOrOptions;
if (Array.isArray(namespaceOrOptions)) {
// First argument is namespace array
namespace = namespaceOrOptions;
tableNamesOptions = options;
} else {
// First argument is options object (backwards compatibility)
namespacePath = undefined;
tableNamesOptions = namespacePathOrOptions;
namespace = undefined;
tableNamesOptions = namespaceOrOptions;
}
return this.inner.tableNames(
namespacePath ?? [],
namespace ?? [],
tableNamesOptions?.startAfter,
tableNamesOptions?.limit,
);
@@ -345,12 +345,12 @@ export class LocalConnection extends Connection {
async openTable(
name: string,
namespacePath?: string[],
namespace?: string[],
options?: Partial<OpenTableOptions>,
): Promise<Table> {
const innerTable = await this.inner.openTable(
name,
namespacePath ?? [],
namespace ?? [],
cleanseStorageOptions(options?.storageOptions),
options?.indexCacheSize,
);
@@ -362,7 +362,7 @@ export class LocalConnection extends Connection {
targetTableName: string,
sourceUri: string,
options?: {
targetNamespacePath?: string[];
targetNamespace?: string[];
sourceVersion?: number;
sourceTag?: string;
isShallow?: boolean;
@@ -371,7 +371,7 @@ export class LocalConnection extends Connection {
const innerTable = await this.inner.cloneTable(
targetTableName,
sourceUri,
options?.targetNamespacePath ?? [],
options?.targetNamespace ?? [],
options?.sourceVersion ?? null,
options?.sourceTag ?? null,
options?.isShallow ?? true,
@@ -406,42 +406,42 @@ export class LocalConnection extends Connection {
nameOrOptions:
| string
| ({ name: string; data: Data } & Partial<CreateTableOptions>),
dataOrNamespacePath?: Record<string, unknown>[] | TableLike | string[],
namespacePathOrOptions?: string[] | Partial<CreateTableOptions>,
dataOrNamespace?: Record<string, unknown>[] | TableLike | string[],
namespaceOrOptions?: string[] | Partial<CreateTableOptions>,
options?: Partial<CreateTableOptions>,
): Promise<Table> {
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
// First overload: createTable(options, namespacePath?)
// First overload: createTable(options, namespace?)
const { name, data, ...createOptions } = nameOrOptions;
const namespacePath = dataOrNamespacePath as string[] | undefined;
return this._createTableImpl(name, data, namespacePath, createOptions);
const namespace = dataOrNamespace as string[] | undefined;
return this._createTableImpl(name, data, namespace, createOptions);
}
// Second overload: createTable(name, data, namespacePath?, options?)
// Second overload: createTable(name, data, namespace?, options?)
const name = nameOrOptions;
const data = dataOrNamespacePath as Record<string, unknown>[] | TableLike;
const data = dataOrNamespace as Record<string, unknown>[] | TableLike;
// Detect if third argument is namespacePath array or options object
let namespacePath: string[] | undefined;
// Detect if third argument is namespace array or options object
let namespace: string[] | undefined;
let createOptions: Partial<CreateTableOptions> | undefined;
if (Array.isArray(namespacePathOrOptions)) {
// Third argument is namespacePath array
namespacePath = namespacePathOrOptions;
if (Array.isArray(namespaceOrOptions)) {
// Third argument is namespace array
namespace = namespaceOrOptions;
createOptions = options;
} else {
// Third argument is options object (backwards compatibility)
namespacePath = undefined;
createOptions = namespacePathOrOptions;
namespace = undefined;
createOptions = namespaceOrOptions;
}
return this._createTableImpl(name, data, namespacePath, createOptions);
return this._createTableImpl(name, data, namespace, createOptions);
}
private async _createTableImpl(
name: string,
data: Data,
namespacePath?: string[],
namespace?: string[],
options?: Partial<CreateTableOptions>,
): Promise<Table> {
if (data === undefined) {
@@ -455,7 +455,7 @@ export class LocalConnection extends Connection {
name,
buf,
mode,
namespacePath ?? [],
namespace ?? [],
storageOptions,
);
@@ -465,21 +465,21 @@ export class LocalConnection extends Connection {
async createEmptyTable(
name: string,
schema: import("./arrow").SchemaLike,
namespacePathOrOptions?: string[] | Partial<CreateTableOptions>,
namespaceOrOptions?: string[] | Partial<CreateTableOptions>,
options?: Partial<CreateTableOptions>,
): Promise<Table> {
// Detect if third argument is namespacePath array or options object
let namespacePath: string[] | undefined;
// Detect if third argument is namespace array or options object
let namespace: string[] | undefined;
let createOptions: Partial<CreateTableOptions> | undefined;
if (Array.isArray(namespacePathOrOptions)) {
// Third argument is namespacePath array
namespacePath = namespacePathOrOptions;
if (Array.isArray(namespaceOrOptions)) {
// Third argument is namespace array
namespace = namespaceOrOptions;
createOptions = options;
} else {
// Third argument is options object (backwards compatibility)
namespacePath = undefined;
createOptions = namespacePathOrOptions;
namespace = undefined;
createOptions = namespaceOrOptions;
}
let mode: string = createOptions?.mode ?? "create";
@@ -502,18 +502,18 @@ export class LocalConnection extends Connection {
name,
buf,
mode,
namespacePath ?? [],
namespace ?? [],
storageOptions,
);
return new LocalTable(innerTable);
}
async dropTable(name: string, namespacePath?: string[]): Promise<void> {
return this.inner.dropTable(name, namespacePath ?? []);
async dropTable(name: string, namespace?: string[]): Promise<void> {
return this.inner.dropTable(name, namespace ?? []);
}
async dropAllTables(namespacePath?: string[]): Promise<void> {
return this.inner.dropAllTables(namespacePath ?? []);
async dropAllTables(namespace?: string[]): Promise<void> {
return this.inner.dropAllTables(namespace ?? []);
}
}

View File

@@ -5,7 +5,6 @@ import {
Table as ArrowTable,
type IntoVector,
RecordBatch,
extractVectorBuffer,
fromBufferToRecordBatch,
fromRecordBatchToBuffer,
tableFromIPC,
@@ -662,8 +661,10 @@ export class VectorQuery extends StandardQueryBase<NativeVectorQuery> {
const res = (async () => {
try {
const v = await vector;
const arr = Float32Array.from(v);
//
// biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
const value: any = this.addQueryVector(v);
const value: any = this.addQueryVector(arr);
const inner = value.inner as
| NativeVectorQuery
| Promise<NativeVectorQuery>;
@@ -675,12 +676,7 @@ export class VectorQuery extends StandardQueryBase<NativeVectorQuery> {
return new VectorQuery(res);
} else {
super.doCall((inner) => {
const raw = Array.isArray(vector) ? null : extractVectorBuffer(vector);
if (raw) {
inner.addQueryVectorRaw(raw.data, raw.dtype);
} else {
inner.addQueryVector(Float32Array.from(vector as number[]));
}
inner.addQueryVector(Float32Array.from(vector));
});
return this;
}
@@ -769,23 +765,14 @@ export class Query extends StandardQueryBase<NativeQuery> {
* a default `limit` of 10 will be used. @see {@link Query#limit}
*/
nearestTo(vector: IntoVector): VectorQuery {
const callNearestTo = (
inner: NativeQuery,
resolved: Float32Array | Float64Array | Uint8Array | number[],
): NativeVectorQuery => {
const raw = Array.isArray(resolved)
? null
: extractVectorBuffer(resolved);
if (raw) {
return inner.nearestToRaw(raw.data, raw.dtype);
}
return inner.nearestTo(Float32Array.from(resolved as number[]));
};
if (this.inner instanceof Promise) {
const nativeQuery = this.inner.then(async (inner) => {
const resolved = vector instanceof Promise ? await vector : vector;
return callNearestTo(inner, resolved);
if (vector instanceof Promise) {
const arr = await vector.then((v) => Float32Array.from(v));
return inner.nearestTo(arr);
} else {
return inner.nearestTo(Float32Array.from(vector));
}
});
return new VectorQuery(nativeQuery);
}
@@ -793,8 +780,10 @@ export class Query extends StandardQueryBase<NativeQuery> {
const res = (async () => {
try {
const v = await vector;
const arr = Float32Array.from(v);
//
// biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
const value: any = this.nearestTo(v);
const value: any = this.nearestTo(arr);
const inner = value.inner as
| NativeVectorQuery
| Promise<NativeVectorQuery>;
@@ -805,7 +794,7 @@ export class Query extends StandardQueryBase<NativeQuery> {
})();
return new VectorQuery(res);
} else {
const vectorQuery = callNearestTo(this.inner, vector);
const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
return new VectorQuery(vectorQuery);
}
}

View File

@@ -5,15 +5,12 @@ import {
Table as ArrowTable,
Data,
DataType,
Field,
IntoVector,
MultiVector,
Schema,
dataTypeToJson,
fromDataToBuffer,
fromTableToBuffer,
isMultiVector,
makeEmptyTable,
tableFromIPC,
} from "./arrow";
@@ -87,16 +84,6 @@ export interface OptimizeOptions {
* tbl.optimize({cleanupOlderThan: new Date()});
*/
cleanupOlderThan: Date;
/**
* Because they may be part of an in-progress transaction, files newer than
* 7 days old are not deleted by default. If you are sure that there are no
* in-progress transactions, then you can set this to true to delete all
* files older than `cleanupOlderThan`.
*
* **WARNING**: This should only be set to true if you can guarantee that
* no other process is currently working on this dataset. Otherwise the
* dataset could be put into a corrupted state.
*/
deleteUnverified: boolean;
}
@@ -394,16 +381,15 @@ export abstract class Table {
abstract vectorSearch(vector: IntoVector | MultiVector): VectorQuery;
/**
* Add new columns with defined values.
* @param {AddColumnsSql[] | Field | Field[] | Schema} newColumnTransforms Either:
* - An array of objects with column names and SQL expressions to calculate values
* - A single Arrow Field defining one column with its data type (column will be initialized with null values)
* - An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
* - An Arrow Schema defining columns with their data types (columns will be initialized with null values)
* @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
* the SQL expression to use to calculate the value of the new column. These
* expressions will be evaluated for each row in the table, and can
* reference existing columns in the table.
* @returns {Promise<AddColumnsResult>} A promise that resolves to an object
* containing the new version number of the table after adding the columns.
*/
abstract addColumns(
newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
newColumnTransforms: AddColumnsSql[],
): Promise<AddColumnsResult>;
/**
@@ -515,7 +501,19 @@ export abstract class Table {
* - Index: Optimizes the indices, adding new data to existing indices
*
*
* The frequency an application should call optimize is based on the frequency of
* Experimental API
* ----------------
*
* The optimization process is undergoing active development and may change.
* Our goal with these changes is to improve the performance of optimization and
* reduce the complexity.
*
* That being said, it is essential today to run optimize if you want the best
* performance. It should be stable and safe to use in production, but it our
* hope that the API may be simplified (or not even need to be called) in the
* future.
*
* The frequency an application shoudl call optimize is based on the frequency of
* data modifications. If data is frequently added, deleted, or updated then
* optimize should be run frequently. A good rule of thumb is to run optimize if
* you have added or modified 100,000 or more records or run more than 20 data
@@ -808,40 +806,9 @@ export class LocalTable extends Table {
// TODO: Support BatchUDF
async addColumns(
newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
newColumnTransforms: AddColumnsSql[],
): Promise<AddColumnsResult> {
// Handle single Field -> convert to array of Fields
if (newColumnTransforms instanceof Field) {
newColumnTransforms = [newColumnTransforms];
}
// Handle array of Fields -> convert to Schema
if (
Array.isArray(newColumnTransforms) &&
newColumnTransforms.length > 0 &&
newColumnTransforms[0] instanceof Field
) {
const fields = newColumnTransforms as Field[];
newColumnTransforms = new Schema(fields);
}
// Handle Schema -> use schema-based approach
if (newColumnTransforms instanceof Schema) {
const schema = newColumnTransforms;
// Convert schema to buffer using Arrow IPC format
const emptyTable = makeEmptyTable(schema);
const schemaBuf = await fromTableToBuffer(emptyTable);
return await this.inner.addColumnsWithSchema(schemaBuf);
}
// Handle SQL expressions (existing functionality)
if (Array.isArray(newColumnTransforms)) {
return await this.inner.addColumns(
newColumnTransforms as AddColumnsSql[],
);
}
throw new Error("Invalid input type for addColumns");
return await this.inner.addColumns(newColumnTransforms);
}
async alterColumns(

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.28.0-beta.10",
"version": "0.27.0-beta.5",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.28.0-beta.10",
"version": "0.27.0-beta.5",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.28.0-beta.10",
"version": "0.27.0-beta.5",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.28.0-beta.10",
"version": "0.27.0-beta.5",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.28.0-beta.10",
"version": "0.27.0-beta.5",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.28.0-beta.10",
"version": "0.27.0-beta.5",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.28.0-beta.10",
"version": "0.27.0-beta.5",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

4599
nodejs/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.28.0-beta.10",
"version": "0.27.0-beta.5",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -67,12 +67,6 @@ impl Connection {
builder = builder.storage_option(key, value);
}
}
if let Some(manifest_enabled) = options.manifest_enabled {
builder = builder.manifest_enabled(manifest_enabled);
}
if let Some(namespace_client_properties) = options.namespace_client_properties {
builder = builder.namespace_client_properties(namespace_client_properties);
}
// Create client config, optionally with header provider
let client_config = options.client_config.unwrap_or_default();
@@ -125,12 +119,12 @@ impl Connection {
#[napi(catch_unwind)]
pub async fn table_names(
&self,
namespace_path: Option<Vec<String>>,
namespace: Vec<String>,
start_after: Option<String>,
limit: Option<u32>,
) -> napi::Result<Vec<String>> {
let mut op = self.get_inner()?.table_names();
op = op.namespace(namespace_path.unwrap_or_default());
op = op.namespace(namespace);
if let Some(start_after) = start_after {
op = op.start_after(start_after);
}
@@ -152,7 +146,7 @@ impl Connection {
name: String,
buf: Buffer,
mode: String,
namespace_path: Option<Vec<String>>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
) -> napi::Result<Table> {
let batches = ipc_file_to_batches(buf.to_vec())
@@ -160,7 +154,7 @@ impl Connection {
let mode = Self::parse_create_mode_str(&mode)?;
let mut builder = self.get_inner()?.create_table(&name, batches).mode(mode);
builder = builder.namespace(namespace_path.unwrap_or_default());
builder = builder.namespace(namespace);
if let Some(storage_options) = storage_options {
for (key, value) in storage_options {
@@ -177,7 +171,7 @@ impl Connection {
name: String,
schema_buf: Buffer,
mode: String,
namespace_path: Option<Vec<String>>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
) -> napi::Result<Table> {
let schema = ipc_file_to_schema(schema_buf.to_vec()).map_err(|e| {
@@ -189,7 +183,7 @@ impl Connection {
.create_empty_table(&name, schema)
.mode(mode);
builder = builder.namespace(namespace_path.unwrap_or_default());
builder = builder.namespace(namespace);
if let Some(storage_options) = storage_options {
for (key, value) in storage_options {
@@ -204,13 +198,13 @@ impl Connection {
pub async fn open_table(
&self,
name: String,
namespace_path: Option<Vec<String>>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
index_cache_size: Option<u32>,
) -> napi::Result<Table> {
let mut builder = self.get_inner()?.open_table(&name);
builder = builder.namespace(namespace_path.unwrap_or_default());
builder = builder.namespace(namespace);
if let Some(storage_options) = storage_options {
for (key, value) in storage_options {
@@ -229,7 +223,7 @@ impl Connection {
&self,
target_table_name: String,
source_uri: String,
target_namespace_path: Option<Vec<String>>,
target_namespace: Vec<String>,
source_version: Option<i64>,
source_tag: Option<String>,
is_shallow: bool,
@@ -238,7 +232,7 @@ impl Connection {
.get_inner()?
.clone_table(&target_table_name, &source_uri);
builder = builder.target_namespace(target_namespace_path.unwrap_or_default());
builder = builder.target_namespace(target_namespace);
if let Some(version) = source_version {
builder = builder.source_version(version as u64);
@@ -256,21 +250,18 @@ impl Connection {
/// Drop table with the name. Or raise an error if the table does not exist.
#[napi(catch_unwind)]
pub async fn drop_table(
&self,
name: String,
namespace_path: Option<Vec<String>>,
) -> napi::Result<()> {
let ns = namespace_path.unwrap_or_default();
pub async fn drop_table(&self, name: String, namespace: Vec<String>) -> napi::Result<()> {
self.get_inner()?
.drop_table(&name, &ns)
.drop_table(&name, &namespace)
.await
.default_error()
}
#[napi(catch_unwind)]
pub async fn drop_all_tables(&self, namespace_path: Option<Vec<String>>) -> napi::Result<()> {
let ns = namespace_path.unwrap_or_default();
self.get_inner()?.drop_all_tables(&ns).await.default_error()
pub async fn drop_all_tables(&self, namespace: Vec<String>) -> napi::Result<()> {
self.get_inner()?
.drop_all_tables(&namespace)
.await
.default_error()
}
}

View File

@@ -35,15 +35,8 @@ pub struct ConnectionOptions {
pub read_consistency_interval: Option<f64>,
/// (For LanceDB OSS only): configuration for object storage.
///
/// The available options are described at https://docs.lancedb.com/storage/
/// The available options are described at https://lancedb.com/docs/storage/
pub storage_options: Option<HashMap<String, String>>,
/// (For LanceDB OSS only): use directory namespace manifests as the source
/// of truth for table metadata. Existing directory-listed root tables are
/// migrated into the manifest on access.
pub manifest_enabled: Option<bool>,
/// (For LanceDB OSS only): extra properties for the backing namespace
/// client used by manifest-enabled native connections.
pub namespace_client_properties: Option<HashMap<String, String>>,
/// (For LanceDB OSS only): the session to use for this connection. Holds
/// shared caches and other session-specific state.
pub session: Option<session::Session>,

View File

@@ -3,12 +3,6 @@
use std::sync::Arc;
use arrow_array::{
Array, Float16Array as ArrowFloat16Array, Float32Array as ArrowFloat32Array,
Float64Array as ArrowFloat64Array, UInt8Array as ArrowUInt8Array,
};
use arrow_buffer::ScalarBuffer;
use half::f16;
use lancedb::index::scalar::{
BooleanQuery, BoostQuery, FtsQuery, FullTextSearchQuery, MatchQuery, MultiMatchQuery, Occur,
Operator, PhraseQuery,
@@ -30,33 +24,6 @@ use crate::rerankers::RerankHybridCallbackArgs;
use crate::rerankers::Reranker;
use crate::util::{parse_distance_type, schema_to_buffer};
fn bytes_to_arrow_array(data: Uint8Array, dtype: String) -> napi::Result<Arc<dyn Array>> {
let buf = arrow_buffer::Buffer::from(data.to_vec());
let num_bytes = buf.len();
match dtype.as_str() {
"float16" => {
let scalar_buf = ScalarBuffer::<f16>::new(buf, 0, num_bytes / 2);
Ok(Arc::new(ArrowFloat16Array::new(scalar_buf, None)))
}
"float32" => {
let scalar_buf = ScalarBuffer::<f32>::new(buf, 0, num_bytes / 4);
Ok(Arc::new(ArrowFloat32Array::new(scalar_buf, None)))
}
"float64" => {
let scalar_buf = ScalarBuffer::<f64>::new(buf, 0, num_bytes / 8);
Ok(Arc::new(ArrowFloat64Array::new(scalar_buf, None)))
}
"uint8" => {
let scalar_buf = ScalarBuffer::<u8>::new(buf, 0, num_bytes);
Ok(Arc::new(ArrowUInt8Array::new(scalar_buf, None)))
}
_ => Err(napi::Error::from_reason(format!(
"Unsupported vector dtype: {}. Expected one of: float16, float32, float64, uint8",
dtype
))),
}
}
#[napi]
pub struct Query {
inner: LanceDbQuery,
@@ -111,13 +78,6 @@ impl Query {
Ok(VectorQuery { inner })
}
#[napi]
pub fn nearest_to_raw(&mut self, data: Uint8Array, dtype: String) -> Result<VectorQuery> {
let array = bytes_to_arrow_array(data, dtype)?;
let inner = self.inner.clone().nearest_to(array).default_error()?;
Ok(VectorQuery { inner })
}
#[napi]
pub fn fast_search(&mut self) {
self.inner = self.inner.clone().fast_search();
@@ -203,13 +163,6 @@ impl VectorQuery {
Ok(())
}
#[napi]
pub fn add_query_vector_raw(&mut self, data: Uint8Array, dtype: String) -> Result<()> {
let array = bytes_to_arrow_array(data, dtype)?;
self.inner = self.inner.clone().add_query_vector(array).default_error()?;
Ok(())
}
#[napi]
pub fn distance_type(&mut self, distance_type: String) -> napi::Result<()> {
let distance_type = parse_distance_type(distance_type)?;

View File

@@ -92,13 +92,6 @@ pub struct ClientConfig {
pub extra_headers: Option<HashMap<String, String>>,
pub id_delimiter: Option<String>,
pub tls_config: Option<TlsConfig>,
/// User identifier for tracking purposes.
///
/// This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
/// It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
/// Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
/// variable that contains the user ID value.
pub user_id: Option<String>,
}
impl From<TimeoutConfig> for lancedb::remote::TimeoutConfig {
@@ -152,7 +145,6 @@ impl From<ClientConfig> for lancedb::remote::ClientConfig {
id_delimiter: config.id_delimiter,
tls_config: config.tls_config.map(Into::into),
header_provider: None, // the header provider is set separately later
user_id: config.user_id,
}
}
}

View File

@@ -18,7 +18,6 @@ type RerankHybridFn = ThreadsafeFunction<
RerankHybridCallbackArgs,
Status,
false,
true,
>;
/// Reranker implementation that "wraps" a NodeJS Reranker implementation.
@@ -33,10 +32,7 @@ impl Reranker {
pub fn new(
rerank_hybrid: Function<RerankHybridCallbackArgs, Promise<Buffer>>,
) -> napi::Result<Self> {
let rerank_hybrid = rerank_hybrid
.build_threadsafe_function()
.weak::<true>()
.build()?;
let rerank_hybrid = rerank_hybrid.build_threadsafe_function().build()?;
Ok(Self { rerank_hybrid })
}
}

View File

@@ -3,7 +3,7 @@
use std::collections::HashMap;
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
use lancedb::ipc::ipc_file_to_batches;
use lancedb::table::{
AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
OptimizeAction, OptimizeOptions, Table as LanceDbTable,
@@ -279,23 +279,6 @@ impl Table {
Ok(res.into())
}
#[napi(catch_unwind)]
pub async fn add_columns_with_schema(
&self,
schema_buf: Buffer,
) -> napi::Result<AddColumnsResult> {
let schema = ipc_file_to_schema(schema_buf.to_vec())
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC schema: {}", e)))?;
let transforms = NewColumnTransform::AllNulls(schema);
let res = self
.inner_ref()?
.add_columns(transforms, None)
.await
.default_error()?;
Ok(res.into())
}
#[napi(catch_unwind)]
pub async fn alter_columns(
&self,

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.31.0-beta.10"
current_version = "0.30.0-beta.5"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

2
python/.gitignore vendored
View File

@@ -1,5 +1,3 @@
# Test data created by some example tests
data/
_lancedb.pyd
# macOS debug symbols bundle generated during build
*.dSYM/

View File

@@ -1,7 +1,6 @@
[package]
name = "lancedb-python"
version = "0.31.0-beta.10"
publish = false
version = "0.30.0-beta.5"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true
@@ -24,7 +23,6 @@ lance-namespace.workspace = true
lance-namespace-impls.workspace = true
lance-io.workspace = true
env_logger.workspace = true
log.workspace = true
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
pyo3-async-runtimes = { version = "0.26", features = [
"attributes",

View File

@@ -183,6 +183,7 @@
| stack-data | 0.6.3 | MIT License | http://github.com/alexmojaki/stack_data |
| sympy | 1.14.0 | BSD License | https://sympy.org |
| tabulate | 0.9.0 | MIT License | https://github.com/astanin/python-tabulate |
| tantivy | 0.25.1 | UNKNOWN | UNKNOWN |
| threadpoolctl | 3.6.0 | BSD License | https://github.com/joblib/threadpoolctl |
| timm | 1.0.24 | Apache Software License | https://github.com/huggingface/pytorch-image-models |
| tinycss2 | 1.4.0 | BSD License | https://www.courtbouillon.org/tinycss2 |

View File

@@ -3,10 +3,10 @@ name = "lancedb"
# version in Cargo.toml
dynamic = ["version"]
dependencies = [
"deprecation>=2.1.0",
"numpy>=1.24.0",
"deprecation",
"numpy",
"overrides>=0.7; python_version<'3.12'",
"packaging>=23.0",
"packaging",
"pyarrow>=16",
"pydantic>=1.10",
"tqdm>=4.27.0",
@@ -45,50 +45,51 @@ repository = "https://github.com/lancedb/lancedb"
[project.optional-dependencies]
pylance = [
"pylance>=5.0.0b5",
"pylance>=4.0.0b7",
]
tests = [
"aiohttp>=3.9.0",
"boto3>=1.28.57",
"aiohttp",
"boto3",
"pandas>=1.4",
"pytest>=7.0",
"pytest-mock>=3.10",
"pytest-asyncio>=0.21",
"duckdb>=0.9.0",
"pytz>=2023.3",
"pytest",
"pytest-mock",
"pytest-asyncio",
"duckdb",
"pytz",
"polars>=0.19, <=1.3.0",
"pyarrow-stubs>=16.0",
"pylance>=5.0.0b5",
"requests>=2.31.0",
"tantivy",
"pyarrow-stubs",
"pylance>=4.0.0b7",
"requests",
"datafusion>=52,<53",
]
dev = [
"ruff>=0.3.0",
"pre-commit>=3.5.0",
"pyright>=1.1.350",
"ruff",
"pre-commit",
"pyright",
'typing-extensions>=4.0.0; python_version < "3.11"',
]
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings-python"]
clip = ["torch", "pillow>=12.1.1", "open-clip-torch"]
siglip = ["torch", "pillow>=12.1.1", "transformers>=4.41.0","sentencepiece"]
clip = ["torch", "pillow", "open-clip-torch"]
siglip = ["torch", "pillow", "transformers>=4.41.0","sentencepiece"]
embeddings = [
"requests>=2.31.0",
"openai>=1.6.1",
"sentence-transformers>=2.2.0",
"torch>=2.0.0",
"pillow>=12.1.1",
"open-clip-torch>=2.20.0",
"cohere>=4.0",
"sentence-transformers",
"torch",
"pillow",
"open-clip-torch",
"cohere",
"colpali-engine>=0.3.10",
"huggingface_hub>=0.19.0",
"InstructorEmbedding>=1.0.1",
"google-genai>=1.0.0",
"huggingface_hub",
"InstructorEmbedding",
"google.generativeai",
"boto3>=1.28.57",
"awscli>=1.44.38",
"awscli>=1.29.57",
"botocore>=1.31.57",
'ibm-watsonx-ai>=1.1.2; python_version >= "3.10"',
"ollama>=0.3.0",
"sentencepiece>=0.1.99"
"sentencepiece"
]
azure = ["adlfs>=2024.2.0"]

View File

@@ -6,7 +6,7 @@ import importlib.metadata
import os
from concurrent.futures import ThreadPoolExecutor
from datetime import timedelta
from typing import Dict, Optional, Union, Any, List
from typing import Dict, Optional, Union, Any
import warnings
__version__ = importlib.metadata.version("lancedb")
@@ -15,9 +15,9 @@ from ._lancedb import connect as lancedb_connect
from .common import URI, sanitize_uri
from urllib.parse import urlparse
from .db import AsyncConnection, DBConnection, LanceDBConnection
from .io import StorageOptionsProvider
from .remote import ClientConfig
from .remote.db import RemoteDBConnection
from .expr import Expr, col, lit, func
from .schema import vector
from .table import AsyncTable, Table
from ._lancedb import Session
@@ -63,7 +63,7 @@ def _check_s3_bucket_with_dots(
def connect(
uri: Optional[URI] = None,
uri: URI,
*,
api_key: Optional[str] = None,
region: str = "us-east-1",
@@ -73,19 +73,14 @@ def connect(
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
storage_options: Optional[Dict[str, str]] = None,
session: Optional[Session] = None,
manifest_enabled: bool = False,
namespace_client_impl: Optional[str] = None,
namespace_client_properties: Optional[Dict[str, str]] = None,
namespace_client_pushdown_operations: Optional[List[str]] = None,
**kwargs: Any,
) -> DBConnection:
"""Connect to a LanceDB database.
Parameters
----------
uri: str or Path, optional
The uri of the database. When ``namespace_client_impl`` is provided you may
omit ``uri`` and connect through a namespace client instead.
uri: str or Path
The uri of the database.
api_key: str, optional
If presented, connect to LanceDB cloud.
Otherwise, connect to a database on file system or cloud storage.
@@ -111,29 +106,13 @@ def connect(
default configuration is used.
storage_options: dict, optional
Additional options for the storage backend. See available options at
<https://docs.lancedb.com/storage/>
manifest_enabled : bool, default False
When true for local/native connections, use directory namespace
manifests as the source of truth for table metadata. Existing
directory-listed root tables are migrated into the manifest on access.
<https://lancedb.com/docs/storage/>
session: Session, optional
(For LanceDB OSS only)
A session to use for this connection. Sessions allow you to configure
cache sizes for index and metadata caches, which can significantly
impact memory use and performance. They can also be re-used across
multiple connections to share the same cache state.
namespace_client_impl : str, optional
When provided along with ``namespace_client_properties``, ``connect``
returns a namespace-backed connection by delegating to
:func:`connect_namespace`. The value identifies which namespace
implementation to load (e.g., ``"dir"`` or ``"rest"``).
namespace_client_properties : dict, optional
Configuration to pass to the namespace client implementation. Required
when ``namespace_client_impl`` is set.
namespace_client_pushdown_operations : list[str], optional
Only used when ``namespace_client_properties`` is provided. Forwards to
:func:`connect_namespace` to control which operations are executed on the
namespace service (e.g., ``["QueryTable", "CreateTable"]``).
Examples
--------
@@ -153,48 +132,11 @@ def connect(
>>> db = lancedb.connect("db://my_database", api_key="ldb_...",
... client_config={"retry_config": {"retries": 5}})
Connect to a namespace-backed database:
>>> db = lancedb.connect(namespace_client_impl="dir",
... namespace_client_properties={"root": "/tmp/ns"})
Returns
-------
conn : DBConnection
A connection to a LanceDB database.
"""
if namespace_client_impl is not None:
if namespace_client_properties is None:
raise ValueError(
"namespace_client_properties must be provided when "
"namespace_client_impl is set"
)
if kwargs:
raise ValueError(f"Unknown keyword arguments: {kwargs}")
return connect_namespace(
namespace_client_impl,
namespace_client_properties,
read_consistency_interval=read_consistency_interval,
storage_options=storage_options,
session=session,
namespace_client_pushdown_operations=namespace_client_pushdown_operations,
)
if namespace_client_properties is not None and not manifest_enabled:
raise ValueError(
"namespace_client_impl must be provided when using "
"namespace_client_properties unless manifest_enabled=True"
)
if namespace_client_pushdown_operations is not None:
raise ValueError(
"namespace_client_pushdown_operations is only valid when "
"connecting through a namespace"
)
if uri is None:
raise ValueError(
"uri is required when not connecting through a namespace client"
)
if isinstance(uri, str) and uri.startswith("db://"):
if api_key is None:
api_key = os.environ.get("LANCEDB_API_KEY")
@@ -223,92 +165,9 @@ def connect(
read_consistency_interval=read_consistency_interval,
storage_options=storage_options,
session=session,
manifest_enabled=manifest_enabled,
namespace_client_properties=namespace_client_properties,
)
WORKER_PROPERTY_PREFIX = "_lancedb_worker_"
def _apply_worker_overrides(props: dict[str, str]) -> dict[str, str]:
"""Apply worker property overrides.
Any key starting with ``_lancedb_worker_`` is extracted, the prefix
is stripped, and the resulting key-value pair is put back into the
map (overriding the existing value if present). The original
prefixed key is removed.
"""
worker_keys = [k for k in props if k.startswith(WORKER_PROPERTY_PREFIX)]
if not worker_keys:
return props
result = dict(props)
for key in worker_keys:
value = result.pop(key)
real_key = key[len(WORKER_PROPERTY_PREFIX) :]
result[real_key] = value
return result
def deserialize_conn(
data: str,
*,
for_worker: bool = False,
) -> DBConnection:
"""Reconstruct a DBConnection from a serialized string.
The string must have been produced by
:meth:`DBConnection.serialize`.
Parameters
----------
data : str
String produced by ``serialize()``.
for_worker : bool, default False
When ``True``, any namespace client property whose key starts
with ``_lancedb_worker_`` has that prefix stripped and the
value overrides the corresponding property. For example,
``_lancedb_worker_uri`` replaces ``uri``.
Returns
-------
DBConnection
A new connection matching the serialized state.
"""
import json
parsed = json.loads(data)
connection_type = parsed.get("connection_type")
rci_secs = parsed.get("read_consistency_interval_seconds")
rci = timedelta(seconds=rci_secs) if rci_secs is not None else None
storage_options = parsed.get("storage_options")
if connection_type == "namespace":
props = dict(parsed.get("namespace_client_properties") or {})
if for_worker:
props = _apply_worker_overrides(props)
return connect_namespace(
namespace_client_impl=parsed["namespace_client_impl"],
namespace_client_properties=props,
read_consistency_interval=rci,
storage_options=storage_options,
namespace_client_pushdown_operations=parsed.get(
"namespace_client_pushdown_operations"
),
)
elif connection_type == "local":
return LanceDBConnection(
parsed["uri"],
read_consistency_interval=rci,
storage_options=storage_options,
manifest_enabled=parsed.get("manifest_enabled", False),
namespace_client_properties=parsed.get("namespace_client_properties"),
)
else:
raise ValueError(f"Unknown connection_type: {connection_type}")
async def connect_async(
uri: URI,
*,
@@ -319,8 +178,6 @@ async def connect_async(
client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
storage_options: Optional[Dict[str, str]] = None,
session: Optional[Session] = None,
manifest_enabled: bool = False,
namespace_client_properties: Optional[Dict[str, str]] = None,
) -> AsyncConnection:
"""Connect to a LanceDB database.
@@ -353,20 +210,13 @@ async def connect_async(
default configuration is used.
storage_options: dict, optional
Additional options for the storage backend. See available options at
<https://docs.lancedb.com/storage/>
<https://lancedb.com/docs/storage/>
session: Session, optional
(For LanceDB OSS only)
A session to use for this connection. Sessions allow you to configure
cache sizes for index and metadata caches, which can significantly
impact memory use and performance. They can also be re-used across
multiple connections to share the same cache state.
manifest_enabled : bool, default False
When true for local/native connections, use directory namespace
manifests as the source of truth for table metadata. Existing
directory-listed root tables are migrated into the manifest on access.
namespace_client_properties : dict, optional
Additional directory namespace client properties to use with
``manifest_enabled=True``.
Examples
--------
@@ -409,8 +259,6 @@ async def connect_async(
client_config,
storage_options,
session,
manifest_enabled,
namespace_client_properties,
)
)
@@ -423,10 +271,6 @@ __all__ = [
"AsyncConnection",
"AsyncLanceNamespaceDBConnection",
"AsyncTable",
"col",
"Expr",
"func",
"lit",
"URI",
"sanitize_uri",
"vector",
@@ -435,6 +279,7 @@ __all__ = [
"LanceNamespaceDBConnection",
"RemoteDBConnection",
"Session",
"StorageOptionsProvider",
"Table",
"__version__",
]

View File

@@ -14,6 +14,7 @@ from .index import (
HnswSq,
FTS,
)
from .io import StorageOptionsProvider
from lance_namespace import (
ListNamespacesResponse,
CreateNamespaceResponse,
@@ -26,32 +27,6 @@ from .remote import ClientConfig
IvfHnswPq: type[HnswPq] = HnswPq
IvfHnswSq: type[HnswSq] = HnswSq
class PyExpr:
"""A type-safe DataFusion expression node (Rust-side handle)."""
def eq(self, other: "PyExpr") -> "PyExpr": ...
def ne(self, other: "PyExpr") -> "PyExpr": ...
def lt(self, other: "PyExpr") -> "PyExpr": ...
def lte(self, other: "PyExpr") -> "PyExpr": ...
def gt(self, other: "PyExpr") -> "PyExpr": ...
def gte(self, other: "PyExpr") -> "PyExpr": ...
def and_(self, other: "PyExpr") -> "PyExpr": ...
def or_(self, other: "PyExpr") -> "PyExpr": ...
def not_(self) -> "PyExpr": ...
def add(self, other: "PyExpr") -> "PyExpr": ...
def sub(self, other: "PyExpr") -> "PyExpr": ...
def mul(self, other: "PyExpr") -> "PyExpr": ...
def div(self, other: "PyExpr") -> "PyExpr": ...
def lower(self) -> "PyExpr": ...
def upper(self) -> "PyExpr": ...
def contains(self, substr: "PyExpr") -> "PyExpr": ...
def cast(self, data_type: pa.DataType) -> "PyExpr": ...
def to_sql(self) -> str: ...
def expr_col(name: str) -> PyExpr: ...
def expr_lit(value: Union[bool, int, float, str]) -> PyExpr: ...
def expr_func(name: str, args: List[PyExpr]) -> PyExpr: ...
class Session:
def __init__(
self,
@@ -71,35 +46,35 @@ class Connection(object):
async def close(self): ...
async def list_namespaces(
self,
namespace_path: Optional[List[str]] = None,
namespace: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: Optional[int] = None,
) -> ListNamespacesResponse: ...
async def create_namespace(
self,
namespace_path: List[str],
namespace: List[str],
mode: Optional[str] = None,
properties: Optional[Dict[str, str]] = None,
) -> CreateNamespaceResponse: ...
async def drop_namespace(
self,
namespace_path: List[str],
namespace: List[str],
mode: Optional[str] = None,
behavior: Optional[str] = None,
) -> DropNamespaceResponse: ...
async def describe_namespace(
self,
namespace_path: List[str],
namespace: List[str],
) -> DescribeNamespaceResponse: ...
async def list_tables(
self,
namespace_path: Optional[List[str]] = None,
namespace: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: Optional[int] = None,
) -> ListTablesResponse: ...
async def table_names(
self,
namespace_path: Optional[List[str]],
namespace: Optional[List[str]],
start_after: Optional[str],
limit: Optional[int],
) -> list[str]: ... # Deprecated: Use list_tables instead
@@ -108,8 +83,9 @@ class Connection(object):
name: str,
mode: str,
data: pa.RecordBatchReader,
namespace_path: Optional[List[str]] = None,
namespace: Optional[List[str]] = None,
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
location: Optional[str] = None,
) -> Table: ...
async def create_empty_table(
@@ -117,15 +93,17 @@ class Connection(object):
name: str,
mode: str,
schema: pa.Schema,
namespace_path: Optional[List[str]] = None,
namespace: Optional[List[str]] = None,
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
location: Optional[str] = None,
) -> Table: ...
async def open_table(
self,
name: str,
namespace_path: Optional[List[str]] = None,
namespace: Optional[List[str]] = None,
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
index_cache_size: Optional[int] = None,
location: Optional[str] = None,
) -> Table: ...
@@ -133,7 +111,7 @@ class Connection(object):
self,
target_table_name: str,
source_uri: str,
target_namespace_path: Optional[List[str]] = None,
target_namespace: Optional[List[str]] = None,
source_version: Optional[int] = None,
source_tag: Optional[str] = None,
is_shallow: bool = True,
@@ -142,18 +120,13 @@ class Connection(object):
self,
cur_name: str,
new_name: str,
cur_namespace_path: Optional[List[str]] = None,
new_namespace_path: Optional[List[str]] = None,
cur_namespace: Optional[List[str]] = None,
new_namespace: Optional[List[str]] = None,
) -> None: ...
async def drop_table(
self, name: str, namespace_path: Optional[List[str]] = None
self, name: str, namespace: Optional[List[str]] = None
) -> None: ...
async def drop_all_tables(
self, namespace_path: Optional[List[str]] = None
) -> None: ...
async def namespace_client_config(
self,
) -> Dict[str, Any]: ...
async def drop_all_tables(self, namespace: Optional[List[str]] = None) -> None: ...
class Table:
def name(self) -> str: ...
@@ -162,10 +135,7 @@ class Table:
def close(self) -> None: ...
async def schema(self) -> pa.Schema: ...
async def add(
self,
data: pa.RecordBatchReader,
mode: Literal["append", "overwrite"],
progress: Optional[Any] = None,
self, data: pa.RecordBatchReader, mode: Literal["append", "overwrite"]
) -> AddResult: ...
async def update(
self, updates: Dict[str, str], where: Optional[str]
@@ -196,8 +166,6 @@ class Table:
async def checkout(self, version: Union[int, str]): ...
async def checkout_latest(self): ...
async def restore(self, version: Optional[Union[int, str]] = None): ...
async def prewarm_index(self, index_name: str) -> None: ...
async def prewarm_data(self, columns: Optional[List[str]] = None) -> None: ...
async def list_indices(self) -> list[IndexConfig]: ...
async def delete(self, filter: str) -> DeleteResult: ...
async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...
@@ -242,8 +210,6 @@ async def connect(
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
storage_options: Optional[Dict[str, str]],
session: Optional[Session],
manifest_enabled: bool = False,
namespace_client_properties: Optional[Dict[str, str]] = None,
) -> Connection: ...
class RecordBatchStream:
@@ -254,9 +220,7 @@ class RecordBatchStream:
class Query:
def where(self, filter: str): ...
def where_expr(self, expr: PyExpr): ...
def select(self, columns: List[Tuple[str, str]]): ...
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
def select(self, columns: Tuple[str, str]): ...
def select_columns(self, columns: List[str]): ...
def limit(self, limit: int): ...
def offset(self, offset: int): ...
@@ -282,9 +246,7 @@ class TakeQuery:
class FTSQuery:
def where(self, filter: str): ...
def where_expr(self, expr: PyExpr): ...
def select(self, columns: List[Tuple[str, str]]): ...
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
def select(self, columns: List[str]): ...
def limit(self, limit: int): ...
def offset(self, offset: int): ...
def fast_search(self): ...
@@ -303,9 +265,7 @@ class VectorQuery:
async def output_schema(self) -> pa.Schema: ...
async def execute(self) -> RecordBatchStream: ...
def where(self, filter: str): ...
def where_expr(self, expr: PyExpr): ...
def select(self, columns: List[Tuple[str, str]]): ...
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
def select(self, columns: List[str]): ...
def select_with_projection(self, columns: Tuple[str, str]): ...
def limit(self, limit: int): ...
def offset(self, offset: int): ...
@@ -322,9 +282,7 @@ class VectorQuery:
class HybridQuery:
def where(self, filter: str): ...
def where_expr(self, expr: PyExpr): ...
def select(self, columns: List[Tuple[str, str]]): ...
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
def select(self, columns: List[str]): ...
def limit(self, limit: int): ...
def offset(self, offset: int): ...
def fast_search(self): ...

View File

@@ -96,7 +96,7 @@ def data_to_reader(
f"Unknown data type {type(data)}. "
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
"pyarrow Table/RecordBatch, or Pydantic models. "
"See https://docs.lancedb.com/tables/ for examples."
"See https://lancedb.com/docs/tables/ for examples."
)

File diff suppressed because it is too large Load Diff

View File

@@ -19,10 +19,10 @@ from .utils import TEXT, api_key_not_found_help
@register("gemini-text")
class GeminiText(TextEmbeddingFunction):
"""
An embedding function that uses Google's Gemini API. Requires GOOGLE_API_KEY to
An embedding function that uses the Google's Gemini API. Requires GOOGLE_API_KEY to
be set.
https://ai.google.dev/gemini-api/docs/embeddings
https://ai.google.dev/docs/embeddings_guide
Supports various tasks types:
| Task Type | Description |
@@ -46,12 +46,9 @@ class GeminiText(TextEmbeddingFunction):
Parameters
----------
name: str, default "gemini-embedding-001"
The name of the model to use. Supported models include:
- "gemini-embedding-001" (768 dimensions)
Note: The legacy "models/embedding-001" format is also supported but
"gemini-embedding-001" is recommended.
name: str, default "models/embedding-001"
The name of the model to use. See the Gemini documentation for a list of
available models.
query_task_type: str, default "retrieval_query"
Sets the task type for the queries.
@@ -80,7 +77,7 @@ class GeminiText(TextEmbeddingFunction):
"""
name: str = "gemini-embedding-001"
name: str = "models/embedding-001"
query_task_type: str = "retrieval_query"
source_task_type: str = "retrieval_document"
@@ -117,48 +114,23 @@ class GeminiText(TextEmbeddingFunction):
texts: list[str] or np.ndarray (of str)
The texts to embed
"""
from google.genai import types
if (
kwargs.get("task_type") == "retrieval_document"
): # Provide a title to use existing API design
title = "Embedding of a document"
kwargs["title"] = title
task_type = kwargs.get("task_type")
# Build content objects for embed_content
contents = []
for text in texts:
if task_type == "retrieval_document":
# Provide a title for retrieval_document task
contents.append(
{"parts": [{"text": "Embedding of a document"}, {"text": text}]}
)
else:
contents.append({"parts": [{"text": text}]})
# Build config
config_kwargs = {}
if task_type:
config_kwargs["task_type"] = task_type.upper() # API expects uppercase
# Call embed_content for each content
embeddings = []
for content in contents:
config = (
types.EmbedContentConfig(**config_kwargs) if config_kwargs else None
)
response = self.client.models.embed_content(
model=self.name,
contents=content,
config=config,
)
embeddings.append(response.embeddings[0].values)
return embeddings
return [
self.client.embed_content(model=self.name, content=text, **kwargs)[
"embedding"
]
for text in texts
]
@cached_property
def client(self):
attempt_import_or_raise("google.genai", "google-genai")
genai = attempt_import_or_raise("google.generativeai", "google.generativeai")
if not os.environ.get("GOOGLE_API_KEY"):
api_key_not_found_help("google")
from google import genai as genai_module
return genai_module.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
return genai

View File

@@ -10,7 +10,6 @@ import sys
import threading
import time
import urllib.error
import urllib.request
import weakref
import logging
from functools import wraps

View File

@@ -1,298 +0,0 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
"""Type-safe expression builder for filters and projections.
Instead of writing raw SQL strings you can build expressions with Python
operators::
from lancedb.expr import col, lit
# filter: age > 18 AND status = 'active'
filt = (col("age") > lit(18)) & (col("status") == lit("active"))
# projection: compute a derived column
proj = {"score": col("raw_score") * lit(1.5)}
table.search().where(filt).select(proj).to_list()
"""
from __future__ import annotations
from typing import Union
import pyarrow as pa
from lancedb._lancedb import PyExpr, expr_col, expr_lit, expr_func
__all__ = ["Expr", "col", "lit", "func"]
_STR_TO_PA_TYPE: dict = {
"bool": pa.bool_(),
"boolean": pa.bool_(),
"int8": pa.int8(),
"int16": pa.int16(),
"int32": pa.int32(),
"int64": pa.int64(),
"uint8": pa.uint8(),
"uint16": pa.uint16(),
"uint32": pa.uint32(),
"uint64": pa.uint64(),
"float16": pa.float16(),
"float32": pa.float32(),
"float": pa.float32(),
"float64": pa.float64(),
"double": pa.float64(),
"string": pa.string(),
"utf8": pa.string(),
"str": pa.string(),
"large_string": pa.large_utf8(),
"large_utf8": pa.large_utf8(),
"date32": pa.date32(),
"date": pa.date32(),
"date64": pa.date64(),
}
def _coerce(value: "ExprLike") -> "Expr":
"""Return *value* as an :class:`Expr`, wrapping plain Python values via
:func:`lit` if needed."""
if isinstance(value, Expr):
return value
return lit(value)
# Type alias used in annotations.
ExprLike = Union["Expr", bool, int, float, str]
class Expr:
"""A type-safe expression node.
Construct instances with :func:`col` and :func:`lit`, then combine them
using Python operators or the named methods below.
Examples
--------
>>> from lancedb.expr import col, lit
>>> filt = (col("age") > lit(18)) & (col("name").lower() == lit("alice"))
>>> proj = {"double": col("x") * lit(2)}
"""
# Make Expr unhashable so that == returns an Expr rather than being used
# for dict keys / set membership.
__hash__ = None # type: ignore[assignment]
def __init__(self, inner: PyExpr) -> None:
self._inner = inner
# ── comparisons ──────────────────────────────────────────────────────────
def __eq__(self, other: ExprLike) -> "Expr": # type: ignore[override]
"""Equal to (``col("x") == 1``)."""
return Expr(self._inner.eq(_coerce(other)._inner))
def __ne__(self, other: ExprLike) -> "Expr": # type: ignore[override]
"""Not equal to (``col("x") != 1``)."""
return Expr(self._inner.ne(_coerce(other)._inner))
def __lt__(self, other: ExprLike) -> "Expr":
"""Less than (``col("x") < 1``)."""
return Expr(self._inner.lt(_coerce(other)._inner))
def __le__(self, other: ExprLike) -> "Expr":
"""Less than or equal to (``col("x") <= 1``)."""
return Expr(self._inner.lte(_coerce(other)._inner))
def __gt__(self, other: ExprLike) -> "Expr":
"""Greater than (``col("x") > 1``)."""
return Expr(self._inner.gt(_coerce(other)._inner))
def __ge__(self, other: ExprLike) -> "Expr":
"""Greater than or equal to (``col("x") >= 1``)."""
return Expr(self._inner.gte(_coerce(other)._inner))
# ── logical ──────────────────────────────────────────────────────────────
def __and__(self, other: "Expr") -> "Expr":
"""Logical AND (``expr_a & expr_b``)."""
return Expr(self._inner.and_(_coerce(other)._inner))
def __or__(self, other: "Expr") -> "Expr":
"""Logical OR (``expr_a | expr_b``)."""
return Expr(self._inner.or_(_coerce(other)._inner))
def __invert__(self) -> "Expr":
"""Logical NOT (``~expr``)."""
return Expr(self._inner.not_())
# ── arithmetic ───────────────────────────────────────────────────────────
def __add__(self, other: ExprLike) -> "Expr":
"""Add (``col("x") + 1``)."""
return Expr(self._inner.add(_coerce(other)._inner))
def __radd__(self, other: ExprLike) -> "Expr":
"""Right-hand add (``1 + col("x")``)."""
return Expr(_coerce(other)._inner.add(self._inner))
def __sub__(self, other: ExprLike) -> "Expr":
"""Subtract (``col("x") - 1``)."""
return Expr(self._inner.sub(_coerce(other)._inner))
def __rsub__(self, other: ExprLike) -> "Expr":
"""Right-hand subtract (``1 - col("x")``)."""
return Expr(_coerce(other)._inner.sub(self._inner))
def __mul__(self, other: ExprLike) -> "Expr":
"""Multiply (``col("x") * 2``)."""
return Expr(self._inner.mul(_coerce(other)._inner))
def __rmul__(self, other: ExprLike) -> "Expr":
"""Right-hand multiply (``2 * col("x")``)."""
return Expr(_coerce(other)._inner.mul(self._inner))
def __truediv__(self, other: ExprLike) -> "Expr":
"""Divide (``col("x") / 2``)."""
return Expr(self._inner.div(_coerce(other)._inner))
def __rtruediv__(self, other: ExprLike) -> "Expr":
"""Right-hand divide (``1 / col("x")``)."""
return Expr(_coerce(other)._inner.div(self._inner))
# ── string methods ───────────────────────────────────────────────────────
def lower(self) -> "Expr":
"""Convert string column values to lowercase."""
return Expr(self._inner.lower())
def upper(self) -> "Expr":
"""Convert string column values to uppercase."""
return Expr(self._inner.upper())
def contains(self, substr: "ExprLike") -> "Expr":
"""Return True where the string contains *substr*."""
return Expr(self._inner.contains(_coerce(substr)._inner))
# ── type cast ────────────────────────────────────────────────────────────
def cast(self, data_type: Union[str, "pa.DataType"]) -> "Expr":
"""Cast values to *data_type*.
Parameters
----------
data_type:
A PyArrow ``DataType`` (e.g. ``pa.int32()``) or one of the type
name strings: ``"bool"``, ``"int8"``, ``"int16"``, ``"int32"``,
``"int64"``, ``"uint8"````"uint64"``, ``"float32"``,
``"float64"``, ``"string"``, ``"date32"``, ``"date64"``.
"""
if isinstance(data_type, str):
try:
data_type = _STR_TO_PA_TYPE[data_type]
except KeyError:
raise ValueError(
f"unsupported data type: '{data_type}'. Supported: "
f"{', '.join(_STR_TO_PA_TYPE)}"
)
return Expr(self._inner.cast(data_type))
# ── named comparison helpers (alternative to operators) ──────────────────
def eq(self, other: ExprLike) -> "Expr":
"""Equal to."""
return self.__eq__(other)
def ne(self, other: ExprLike) -> "Expr":
"""Not equal to."""
return self.__ne__(other)
def lt(self, other: ExprLike) -> "Expr":
"""Less than."""
return self.__lt__(other)
def lte(self, other: ExprLike) -> "Expr":
"""Less than or equal to."""
return self.__le__(other)
def gt(self, other: ExprLike) -> "Expr":
"""Greater than."""
return self.__gt__(other)
def gte(self, other: ExprLike) -> "Expr":
"""Greater than or equal to."""
return self.__ge__(other)
def and_(self, other: "Expr") -> "Expr":
"""Logical AND."""
return self.__and__(other)
def or_(self, other: "Expr") -> "Expr":
"""Logical OR."""
return self.__or__(other)
# ── utilities ────────────────────────────────────────────────────────────
def to_sql(self) -> str:
"""Render the expression as a SQL string (useful for debugging)."""
return self._inner.to_sql()
def __repr__(self) -> str:
return f"Expr({self._inner.to_sql()})"
# ── free functions ────────────────────────────────────────────────────────────
def col(name: str) -> Expr:
"""Reference a table column by name.
Parameters
----------
name:
The column name.
Examples
--------
>>> from lancedb.expr import col, lit
>>> col("age") > lit(18)
Expr((age > 18))
"""
return Expr(expr_col(name))
def lit(value: Union[bool, int, float, str]) -> Expr:
"""Create a literal (constant) value expression.
Parameters
----------
value:
A Python ``bool``, ``int``, ``float``, or ``str``.
Examples
--------
>>> from lancedb.expr import col, lit
>>> col("price") * lit(1.1)
Expr((price * 1.1))
"""
return Expr(expr_lit(value))
def func(name: str, *args: ExprLike) -> Expr:
"""Call an arbitrary SQL function by name.
Parameters
----------
name:
The SQL function name (e.g. ``"lower"``, ``"upper"``).
*args:
The function arguments as :class:`Expr` or plain Python literals.
Examples
--------
>>> from lancedb.expr import col, func
>>> func("lower", col("name"))
Expr(lower(name))
"""
inner_args = [_coerce(a)._inner for a in args]
return Expr(expr_func(name, inner_args))

View File

@@ -0,0 +1,201 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
"""Full text search index using tantivy-py"""
import os
from typing import List, Tuple, Optional
import pyarrow as pa
try:
import tantivy
except ImportError:
raise ImportError(
"Please install tantivy-py `pip install tantivy` to use the full text search feature." # noqa: E501
)
from .table import LanceTable
def create_index(
index_path: str,
text_fields: List[str],
ordering_fields: Optional[List[str]] = None,
tokenizer_name: str = "default",
) -> tantivy.Index:
"""
Create a new Index (not populated)
Parameters
----------
index_path : str
Path to the index directory
text_fields : List[str]
List of text fields to index
ordering_fields: List[str]
List of unsigned type fields to order by at search time
tokenizer_name : str, default "default"
The tokenizer to use
Returns
-------
index : tantivy.Index
The index object (not yet populated)
"""
if ordering_fields is None:
ordering_fields = []
# Declaring our schema.
schema_builder = tantivy.SchemaBuilder()
# special field that we'll populate with row_id
schema_builder.add_integer_field("doc_id", stored=True)
# data fields
for name in text_fields:
schema_builder.add_text_field(name, stored=True, tokenizer_name=tokenizer_name)
if ordering_fields:
for name in ordering_fields:
schema_builder.add_unsigned_field(name, fast=True)
schema = schema_builder.build()
os.makedirs(index_path, exist_ok=True)
index = tantivy.Index(schema, path=index_path)
return index
def populate_index(
index: tantivy.Index,
table: LanceTable,
fields: List[str],
writer_heap_size: Optional[int] = None,
ordering_fields: Optional[List[str]] = None,
) -> int:
"""
Populate an index with data from a LanceTable
Parameters
----------
index : tantivy.Index
The index object
table : LanceTable
The table to index
fields : List[str]
List of fields to index
writer_heap_size : int
The writer heap size in bytes, defaults to 1GB
Returns
-------
int
The number of rows indexed
"""
if ordering_fields is None:
ordering_fields = []
writer_heap_size = writer_heap_size or 1024 * 1024 * 1024
# first check the fields exist and are string or large string type
nested = []
for name in fields:
try:
f = table.schema.field(name) # raises KeyError if not found
except KeyError:
f = resolve_path(table.schema, name)
nested.append(name)
if not pa.types.is_string(f.type) and not pa.types.is_large_string(f.type):
raise TypeError(f"Field {name} is not a string type")
# create a tantivy writer
writer = index.writer(heap_size=writer_heap_size)
# write data into index
dataset = table.to_lance()
row_id = 0
max_nested_level = 0
if len(nested) > 0:
max_nested_level = max([len(name.split(".")) for name in nested])
for b in dataset.to_batches(columns=fields + ordering_fields):
if max_nested_level > 0:
b = pa.Table.from_batches([b])
for _ in range(max_nested_level - 1):
b = b.flatten()
for i in range(b.num_rows):
doc = tantivy.Document()
for name in fields:
value = b[name][i].as_py()
if value is not None:
doc.add_text(name, value)
for name in ordering_fields:
value = b[name][i].as_py()
if value is not None:
doc.add_unsigned(name, value)
if not doc.is_empty:
doc.add_integer("doc_id", row_id)
writer.add_document(doc)
row_id += 1
# commit changes
writer.commit()
return row_id
def resolve_path(schema, field_name: str) -> pa.Field:
"""
Resolve a nested field path to a list of field names
Parameters
----------
field_name : str
The field name to resolve
Returns
-------
List[str]
The resolved path
"""
path = field_name.split(".")
field = schema.field(path.pop(0))
for segment in path:
if pa.types.is_struct(field.type):
field = field.type.field(segment)
else:
raise KeyError(f"field {field_name} not found in schema {schema}")
return field
def search_index(
index: tantivy.Index, query: str, limit: int = 10, ordering_field=None
) -> Tuple[Tuple[int], Tuple[float]]:
"""
Search an index for a query
Parameters
----------
index : tantivy.Index
The index object
query : str
The query string
limit : int
The maximum number of results to return
Returns
-------
ids_and_score: list[tuple[int], tuple[float]]
A tuple of two tuples, the first containing the document ids
and the second containing the scores
"""
searcher = index.searcher()
query = index.parse_query(query)
# get top results
if ordering_field:
results = searcher.search(query, limit, order_by_field=ordering_field)
else:
results = searcher.search(query, limit)
if results.count == 0:
return tuple(), tuple()
return tuple(
zip(
*[
(searcher.doc(doc_address)["doc_id"][0], score)
for score, doc_address in results.hits
]
)
)

View File

@@ -2,3 +2,70 @@
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
"""I/O utilities and interfaces for LanceDB."""
from abc import ABC, abstractmethod
from typing import Dict
class StorageOptionsProvider(ABC):
"""Abstract base class for providing storage options to LanceDB tables.
Storage options providers enable automatic credential refresh for cloud
storage backends (e.g., AWS S3, Azure Blob Storage, GCS). When credentials
have an expiration time, the provider's fetch_storage_options() method will
be called periodically to get fresh credentials before they expire.
Example
-------
>>> class MyProvider(StorageOptionsProvider):
... def fetch_storage_options(self) -> Dict[str, str]:
... # Fetch fresh credentials from your credential manager
... return {
... "aws_access_key_id": "...",
... "aws_secret_access_key": "...",
... "expires_at_millis": "1234567890000" # Optional
... }
"""
@abstractmethod
def fetch_storage_options(self) -> Dict[str, str]:
"""Fetch fresh storage credentials.
This method is called by LanceDB when credentials need to be refreshed.
If the returned dictionary contains an "expires_at_millis" key with a
Unix timestamp in milliseconds, LanceDB will automatically refresh the
credentials before that time. If the key is not present, credentials
are assumed to not expire.
Returns
-------
Dict[str, str]
Dictionary containing cloud storage credentials and optionally an
expiration time:
- "expires_at_millis" (optional): Unix timestamp in milliseconds when
credentials expire
- Provider-specific credential keys (e.g., aws_access_key_id,
aws_secret_access_key, etc.)
Raises
------
RuntimeError
If credentials cannot be fetched or are invalid
"""
pass
def provider_id(self) -> str:
"""Return a human-readable unique identifier for this provider instance.
This identifier is used for caching and equality comparison. Two providers
with the same ID will share the same cached object store connection.
The default implementation uses the class name and string representation.
Override this method if you need custom identification logic.
Returns
-------
str
A unique identifier for this provider instance
"""
return f"{self.__class__.__name__} {{ repr: {str(self)!r} }}"

File diff suppressed because it is too large Load Diff

View File

@@ -284,8 +284,9 @@ class Permutations:
self.permutation_table = permutation_table
if permutation_table.schema.metadata is not None:
raw = permutation_table.schema.metadata.get(b"split_names")
split_names = raw.decode("utf-8") if raw is not None else None
split_names = permutation_table.schema.metadata.get(
b"split_names", None
).decode("utf-8")
if split_names is not None:
self.split_names = json.loads(split_names)
self.split_dict = {
@@ -425,35 +426,6 @@ class Permutation:
"""
return Permutation.from_tables(table, None, None)
@classmethod
def from_table(cls, table: LanceTable) -> "_PermutationFromTable":
"""
Create a permutation directly from a base table, with HuggingFace /
PyTorch-style chaining for ``shuffle``, ``filter``, and ``split_*``.
This is a convenience wrapper that hides the two-step
``permutation_builder(table).shuffle().execute()`` /
``Permutation.from_tables(table, perm_tbl)`` dance. The returned object
accumulates builder operations and only materializes the underlying
permutation table on first read (any access of an attribute that is
not a builder operation), so chained calls do not pay an extra
``execute()`` for each step.
After the first read all ``Permutation`` methods (``select_columns``,
``with_format``, ``map``, ``__iter__``, ``fetch``, ``num_rows``, ...)
are forwarded transparently.
Examples
--------
>>> import lancedb
>>> db = lancedb.connect("memory:///")
>>> tbl = db.create_table("tbl", data=[{"x": x} for x in range(100)])
>>> perm = Permutation.from_table(tbl).shuffle(seed=42)
>>> perm.num_rows
100
"""
return _PermutationFromTable(table)
@classmethod
def from_tables(
cls,
@@ -488,8 +460,9 @@ class Permutation:
f"Cannot create a permutation on split `{split}`"
" because no split names are defined in the permutation table"
)
raw = permutation_table.schema.metadata.get(b"split_names")
split_names = raw.decode("utf-8") if raw is not None else None
split_names = permutation_table.schema.metadata.get(
b"split_names", None
).decode("utf-8")
if split_names is None:
raise ValueError(
f"Cannot create a permutation on split `{split}`"
@@ -871,85 +844,3 @@ class Permutation:
Repeat the permutation `times` times
"""
raise Exception("with_repeat is not yet implemented")
class _PermutationFromTable:
"""
Result of [Permutation.from_table](#from_table).
Records pending builder operations (``shuffle``, ``filter``, ``split_*``)
and lazily executes them on first read. After materialization all
Permutation reads / transforms (``select_columns``, ``with_format``,
``map``, ``__iter__``, ``fetch``, ``num_rows``, ...) are forwarded to the
underlying [Permutation].
"""
__slots__ = ("_base_table", "_pending_ops", "_materialized")
def __init__(
self,
base_table: LanceTable,
_pending_ops: Optional[list[tuple[str, tuple, dict]]] = None,
):
self._base_table = base_table
self._pending_ops: list[tuple[str, tuple, dict]] = (
list(_pending_ops) if _pending_ops is not None else []
)
self._materialized: Optional[Permutation] = None
def _with_op(
self, name: str, args: tuple, kwargs: dict
) -> "_PermutationFromTable":
return _PermutationFromTable(
self._base_table, _pending_ops=self._pending_ops + [(name, args, kwargs)]
)
def shuffle(self, *args, **kwargs) -> "_PermutationFromTable":
return self._with_op("shuffle", args, kwargs)
def filter(self, *args, **kwargs) -> "_PermutationFromTable":
return self._with_op("filter", args, kwargs)
def split_random(self, *args, **kwargs) -> "_PermutationFromTable":
return self._with_op("split_random", args, kwargs)
def split_sequential(self, *args, **kwargs) -> "_PermutationFromTable":
return self._with_op("split_sequential", args, kwargs)
def split_hash(self, *args, **kwargs) -> "_PermutationFromTable":
return self._with_op("split_hash", args, kwargs)
def split_calculated(self, *args, **kwargs) -> "_PermutationFromTable":
return self._with_op("split_calculated", args, kwargs)
def _materialize(self) -> Permutation:
if self._materialized is None:
if self._pending_ops:
builder = permutation_builder(self._base_table)
for name, args, kwargs in self._pending_ops:
builder = getattr(builder, name)(*args, **kwargs)
perm_tbl = builder.execute()
self._materialized = Permutation.from_tables(
self._base_table, perm_tbl
)
else:
self._materialized = Permutation.identity(self._base_table)
return self._materialized
def __getattr__(self, name: str) -> Any:
# Avoid recursion on dunder/private state.
if name.startswith("_"):
raise AttributeError(name)
return getattr(self._materialize(), name)
def __iter__(self):
return iter(self._materialize())
def __len__(self) -> int:
return len(self._materialize())
def __getitem__(self, index):
return self._materialize()[index]
def __getitems__(self, indices):
return self._materialize().__getitems__(indices)

View File

@@ -10,7 +10,6 @@ import sys
import types
from abc import ABC, abstractmethod
from datetime import date, datetime
from enum import Enum
from typing import (
TYPE_CHECKING,
Any,
@@ -315,19 +314,6 @@ def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
return pa.list_(pa.list_(tp.value_arrow_type(), tp.dim()))
# For regular Vector
return pa.list_(tp.value_arrow_type(), tp.dim())
if _safe_issubclass(tp, Enum):
# Map Enum to the Arrow type of its value.
# For string-valued enums, use dictionary encoding for efficiency.
# For integer enums, use the native type.
# Fall back to utf8 for mixed-type or empty enums.
value_types = {type(m.value) for m in tp}
if len(value_types) == 1:
value_type = value_types.pop()
if value_type is str:
# Use dictionary encoding for string enums
return pa.dictionary(pa.int32(), pa.utf8())
return _py_type_to_arrow_type(value_type, field)
return pa.utf8()
return _py_type_to_arrow_type(tp, field)

View File

@@ -25,6 +25,7 @@ import deprecation
import numpy as np
import pyarrow as pa
import pyarrow.compute as pc
import pyarrow.fs as pa_fs
import pydantic
from lancedb.pydantic import PYDANTIC_VERSION
@@ -37,7 +38,6 @@ from .rerankers.base import Reranker
from .rerankers.rrf import RRFReranker
from .rerankers.util import check_reranker_result
from .util import flatten_columns
from .expr import Expr
from lancedb._lancedb import fts_query_to_json
from typing_extensions import Annotated
@@ -70,7 +70,7 @@ def ensure_vector_query(
) -> Union[List[float], List[List[float]], pa.Array, List[pa.Array]]:
if isinstance(val, list):
if len(val) == 0:
raise ValueError("Vector query must be a non-empty list")
return ValueError("Vector query must be a non-empty list")
sample = val[0]
else:
if isinstance(val, float):
@@ -83,7 +83,7 @@ def ensure_vector_query(
return val
if isinstance(sample, list):
if len(sample) == 0:
raise ValueError("Vector query must be a non-empty list")
return ValueError("Vector query must be a non-empty list")
if isinstance(sample[0], float):
# val is list of list of floats
return val
@@ -449,8 +449,8 @@ class Query(pydantic.BaseModel):
ensure_vector_query,
] = None
# sql filter or type-safe Expr to refine the query with
filter: Optional[Union[str, Expr]] = None
# sql filter to refine the query with
filter: Optional[str] = None
# if True then apply the filter after vector search
postfilter: Optional[bool] = None
@@ -464,8 +464,8 @@ class Query(pydantic.BaseModel):
# distance type to use for vector search
distance_type: Optional[str] = None
# which columns to return in the results (dict values may be str or Expr)
columns: Optional[Union[List[str], Dict[str, Union[str, Expr]]]] = None
# which columns to return in the results
columns: Optional[Union[List[str], Dict[str, str]]] = None
# minimum number of IVF partitions to search
#
@@ -856,15 +856,14 @@ class LanceQueryBuilder(ABC):
self._offset = offset
return self
def select(self, columns: Union[list[str], dict[str, Union[str, Expr]]]) -> Self:
def select(self, columns: Union[list[str], dict[str, str]]) -> Self:
"""Set the columns to return.
Parameters
----------
columns: list of str, or dict of str to str or Expr
columns: list of str, or dict of str to str default None
List of column names to be fetched.
Or a dictionary of column names to SQL expressions or
:class:`~lancedb.expr.Expr` objects.
Or a dictionary of column names to SQL expressions.
All columns are fetched if None or unspecified.
Returns
@@ -878,15 +877,15 @@ class LanceQueryBuilder(ABC):
raise ValueError("columns must be a list or a dictionary")
return self
def where(self, where: Union[str, Expr], prefilter: bool = True) -> Self:
def where(self, where: str, prefilter: bool = True) -> Self:
"""Set the where clause.
Parameters
----------
where: str or :class:`~lancedb.expr.Expr`
The filter condition. Can be a SQL string or a type-safe
:class:`~lancedb.expr.Expr` built with :func:`~lancedb.expr.col`
and :func:`~lancedb.expr.lit`.
where: str
The where clause which is a valid SQL where clause. See
`Lance filter pushdown <https://lance.org/guide/read_and_write#filter-push-down>`_
for valid SQL expressions.
prefilter: bool, default True
If True, apply the filter before vector search, otherwise the
filter is applied on the result of vector search.
@@ -1356,17 +1355,15 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
return result_set
def where(
self, where: Union[str, Expr], prefilter: bool = None
) -> LanceVectorQueryBuilder:
def where(self, where: str, prefilter: bool = None) -> LanceVectorQueryBuilder:
"""Set the where clause.
Parameters
----------
where: str or :class:`~lancedb.expr.Expr`
The filter condition. Can be a SQL string or a type-safe
:class:`~lancedb.expr.Expr` built with :func:`~lancedb.expr.col`
and :func:`~lancedb.expr.lit`.
where: str
The where clause which is a valid SQL where clause. See
`Lance filter pushdown <https://lance.org/guide/read_and_write#filter-push-down>`_
for valid SQL expressions.
prefilter: bool, default True
If True, apply the filter before vector search, otherwise the
filter is applied on the result of vector search.
@@ -1525,7 +1522,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
return self._table._output_schema(self.to_query_object())
def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
self._table._ensure_no_legacy_fts_index()
path, fs, exist = self._table._get_fts_index_path()
if exist:
return self.tantivy_to_arrow()
query = self._query
if self._phrase_query:
@@ -1549,6 +1548,90 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
):
raise NotImplementedError("to_batches on an FTS query")
def tantivy_to_arrow(self) -> pa.Table:
try:
import tantivy
except ImportError:
raise ImportError(
"Please install tantivy-py `pip install tantivy` to use the full text search feature." # noqa: E501
)
from .fts import search_index
# get the index path
path, fs, exist = self._table._get_fts_index_path()
# check if the index exist
if not exist:
raise FileNotFoundError(
"Fts index does not exist. "
"Please first call table.create_fts_index(['<field_names>']) to "
"create the fts index."
)
# Check that we are on local filesystem
if not isinstance(fs, pa_fs.LocalFileSystem):
raise NotImplementedError(
"Tantivy-based full text search "
"is only supported on the local filesystem"
)
# open the index
index = tantivy.Index.open(path)
# get the scores and doc ids
query = self._query
if self._phrase_query:
query = query.replace('"', "'")
query = f'"{query}"'
limit = self._limit if self._limit is not None else 10
row_ids, scores = search_index(
index, query, limit, ordering_field=self.ordering_field_name
)
if len(row_ids) == 0:
empty_schema = pa.schema([pa.field("_score", pa.float32())])
return pa.Table.from_batches([], schema=empty_schema)
scores = pa.array(scores)
output_tbl = self._table.to_lance().take(row_ids, columns=self._columns)
output_tbl = output_tbl.append_column("_score", scores)
# this needs to match vector search results which are uint64
row_ids = pa.array(row_ids, type=pa.uint64())
if self._where is not None:
tmp_name = "__lancedb__duckdb__indexer__"
output_tbl = output_tbl.append_column(
tmp_name, pa.array(range(len(output_tbl)))
)
try:
# TODO would be great to have Substrait generate pyarrow compute
# expressions or conversely have pyarrow support SQL expressions
# using Substrait
import duckdb
indexer = duckdb.sql(
f"SELECT {tmp_name} FROM output_tbl WHERE {self._where}"
).to_arrow_table()[tmp_name]
output_tbl = output_tbl.take(indexer).drop([tmp_name])
row_ids = row_ids.take(indexer)
except ImportError:
import tempfile
import lance
# TODO Use "memory://" instead once that's supported
with tempfile.TemporaryDirectory() as tmp:
ds = lance.write_dataset(output_tbl, tmp)
output_tbl = ds.to_table(filter=self._where)
indexer = output_tbl[tmp_name]
row_ids = row_ids.take(indexer)
output_tbl = output_tbl.drop([tmp_name])
if self._with_row_id:
output_tbl = output_tbl.append_column("_rowid", row_ids)
if self._reranker is not None:
output_tbl = self._reranker.rerank_fts(self._query, output_tbl)
return output_tbl
def rerank(self, reranker: Reranker) -> LanceFtsQueryBuilder:
"""Rerank the results using the specified reranker.
@@ -2122,8 +2205,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
self._vector_query.select(self._columns)
self._fts_query.select(self._columns)
if self._where:
self._vector_query.where(self._where, not self._postfilter)
self._fts_query.where(self._where, not self._postfilter)
self._vector_query.where(self._where, self._postfilter)
self._fts_query.where(self._where, self._postfilter)
if self._with_row_id:
self._vector_query.with_row_id(True)
self._fts_query.with_row_id(True)
@@ -2203,20 +2286,10 @@ class AsyncQueryBase(object):
"""
if isinstance(columns, list) and all(isinstance(c, str) for c in columns):
self._inner.select_columns(columns)
elif isinstance(columns, dict) and all(isinstance(k, str) for k in columns):
if any(isinstance(v, Expr) for v in columns.values()):
# At least one value is an Expr — use the type-safe path.
from .expr import _coerce
pairs = [(k, _coerce(v)._inner) for k, v in columns.items()]
self._inner.select_expr(pairs)
elif all(isinstance(v, str) for v in columns.values()):
self._inner.select(list(columns.items()))
else:
raise TypeError(
"dict values must be str or Expr, got "
+ str({k: type(v) for k, v in columns.items()})
)
elif isinstance(columns, dict) and all(
isinstance(k, str) and isinstance(v, str) for k, v in columns.items()
):
self._inner.select(list(columns.items()))
else:
raise TypeError("columns must be a list of column names or a dict")
return self
@@ -2456,13 +2529,11 @@ class AsyncStandardQuery(AsyncQueryBase):
"""
super().__init__(inner)
def where(self, predicate: Union[str, Expr]) -> Self:
def where(self, predicate: str) -> Self:
"""
Only return rows matching the given predicate
The predicate can be a SQL string or a type-safe
:class:`~lancedb.expr.Expr` built with :func:`~lancedb.expr.col`
and :func:`~lancedb.expr.lit`.
The predicate should be supplied as an SQL query string.
Examples
--------
@@ -2474,10 +2545,7 @@ class AsyncStandardQuery(AsyncQueryBase):
Filtering performance can often be improved by creating a scalar index
on the filter column(s).
"""
if isinstance(predicate, Expr):
self._inner.where_expr(predicate._inner)
else:
self._inner.where(predicate)
self._inner.where(predicate)
return self
def limit(self, limit: int) -> Self:

View File

@@ -145,33 +145,6 @@ class TlsConfig:
@dataclass
class ClientConfig:
"""Configuration for the LanceDB Cloud HTTP client.
Attributes
----------
user_agent: str
User agent string sent with requests.
retry_config: RetryConfig
Configuration for retrying failed requests.
timeout_config: Optional[TimeoutConfig]
Configuration for request timeouts.
extra_headers: Optional[dict]
Additional headers to include in requests.
id_delimiter: Optional[str]
The delimiter to use when constructing object identifiers.
tls_config: Optional[TlsConfig]
TLS/mTLS configuration for secure connections.
header_provider: Optional[HeaderProvider]
Provider for dynamic headers to be added to each request.
user_id: Optional[str]
User identifier for tracking purposes. This is sent as the
`x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
This can also be set via the `LANCEDB_USER_ID` environment variable.
Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another
environment variable that contains the user ID value.
"""
user_agent: str = f"LanceDB-Python-Client/{__version__}"
retry_config: RetryConfig = field(default_factory=RetryConfig)
timeout_config: Optional[TimeoutConfig] = field(default_factory=TimeoutConfig)
@@ -179,7 +152,6 @@ class ClientConfig:
id_delimiter: Optional[str] = None
tls_config: Optional[TlsConfig] = None
header_provider: Optional["HeaderProvider"] = None
user_id: Optional[str] = None
def __post_init__(self):
if isinstance(self.retry_config, dict):

View File

@@ -24,7 +24,6 @@ from ..common import DATA
from ..db import DBConnection, LOOP
from ..embeddings import EmbeddingFunctionConfig
from lance_namespace import (
LanceNamespace,
CreateNamespaceResponse,
DescribeNamespaceResponse,
DropNamespaceResponse,
@@ -112,7 +111,7 @@ class RemoteDBConnection(DBConnection):
@override
def list_namespaces(
self,
namespace_path: Optional[List[str]] = None,
namespace: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: Optional[int] = None,
) -> ListNamespacesResponse:
@@ -120,7 +119,7 @@ class RemoteDBConnection(DBConnection):
Parameters
----------
namespace_path: List[str], optional
namespace: List[str], optional
The parent namespace to list namespaces in.
None or empty list represents root namespace.
page_token: str, optional
@@ -134,18 +133,18 @@ class RemoteDBConnection(DBConnection):
ListNamespacesResponse
Response containing namespace names and optional page_token for pagination.
"""
if namespace_path is None:
namespace_path = []
if namespace is None:
namespace = []
return LOOP.run(
self._conn.list_namespaces(
namespace_path=namespace_path, page_token=page_token, limit=limit
namespace=namespace, page_token=page_token, limit=limit
)
)
@override
def create_namespace(
self,
namespace_path: List[str],
namespace: List[str],
mode: Optional[str] = None,
properties: Optional[Dict[str, str]] = None,
) -> CreateNamespaceResponse:
@@ -153,7 +152,7 @@ class RemoteDBConnection(DBConnection):
Parameters
----------
namespace_path: List[str]
namespace: List[str]
The namespace identifier to create.
mode: str, optional
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
@@ -168,14 +167,14 @@ class RemoteDBConnection(DBConnection):
"""
return LOOP.run(
self._conn.create_namespace(
namespace_path=namespace_path, mode=mode, properties=properties
namespace=namespace, mode=mode, properties=properties
)
)
@override
def drop_namespace(
self,
namespace_path: List[str],
namespace: List[str],
mode: Optional[str] = None,
behavior: Optional[str] = None,
) -> DropNamespaceResponse:
@@ -183,7 +182,7 @@ class RemoteDBConnection(DBConnection):
Parameters
----------
namespace_path: List[str]
namespace: List[str]
The namespace identifier to drop.
mode: str, optional
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
@@ -197,20 +196,16 @@ class RemoteDBConnection(DBConnection):
Response containing properties and transaction_id if applicable.
"""
return LOOP.run(
self._conn.drop_namespace(
namespace_path=namespace_path, mode=mode, behavior=behavior
)
self._conn.drop_namespace(namespace=namespace, mode=mode, behavior=behavior)
)
@override
def describe_namespace(
self, namespace_path: List[str]
) -> DescribeNamespaceResponse:
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
"""Describe a namespace.
Parameters
----------
namespace_path: List[str]
namespace: List[str]
The namespace identifier to describe.
Returns
@@ -218,12 +213,12 @@ class RemoteDBConnection(DBConnection):
DescribeNamespaceResponse
Response containing the namespace properties.
"""
return LOOP.run(self._conn.describe_namespace(namespace_path=namespace_path))
return LOOP.run(self._conn.describe_namespace(namespace=namespace))
@override
def list_tables(
self,
namespace_path: Optional[List[str]] = None,
namespace: Optional[List[str]] = None,
page_token: Optional[str] = None,
limit: Optional[int] = None,
) -> ListTablesResponse:
@@ -231,7 +226,7 @@ class RemoteDBConnection(DBConnection):
Parameters
----------
namespace_path: List[str], optional
namespace: List[str], optional
The namespace to list tables in.
None or empty list represents root namespace.
page_token: str, optional
@@ -245,11 +240,11 @@ class RemoteDBConnection(DBConnection):
ListTablesResponse
Response containing table names and optional page_token for pagination.
"""
if namespace_path is None:
namespace_path = []
if namespace is None:
namespace = []
return LOOP.run(
self._conn.list_tables(
namespace_path=namespace_path, page_token=page_token, limit=limit
namespace=namespace, page_token=page_token, limit=limit
)
)
@@ -259,7 +254,7 @@ class RemoteDBConnection(DBConnection):
page_token: Optional[str] = None,
limit: int = 10,
*,
namespace_path: Optional[List[str]] = None,
namespace: Optional[List[str]] = None,
) -> Iterable[str]:
"""List the names of all tables in the database.
@@ -268,7 +263,7 @@ class RemoteDBConnection(DBConnection):
Parameters
----------
namespace_path: List[str], default []
namespace: List[str], default []
The namespace to list tables in.
Empty list represents root namespace.
page_token: str
@@ -287,11 +282,11 @@ class RemoteDBConnection(DBConnection):
DeprecationWarning,
stacklevel=2,
)
if namespace_path is None:
namespace_path = []
if namespace is None:
namespace = []
return LOOP.run(
self._conn.table_names(
namespace_path=namespace_path, start_after=page_token, limit=limit
namespace=namespace, start_after=page_token, limit=limit
)
)
@@ -300,7 +295,7 @@ class RemoteDBConnection(DBConnection):
self,
name: str,
*,
namespace_path: Optional[List[str]] = None,
namespace: Optional[List[str]] = None,
storage_options: Optional[Dict[str, str]] = None,
index_cache_size: Optional[int] = None,
) -> Table:
@@ -310,7 +305,7 @@ class RemoteDBConnection(DBConnection):
----------
name: str
The name of the table.
namespace_path: List[str], optional
namespace: List[str], optional
The namespace to open the table from.
None or empty list represents root namespace.
@@ -320,15 +315,15 @@ class RemoteDBConnection(DBConnection):
"""
from .table import RemoteTable
if namespace_path is None:
namespace_path = []
if namespace is None:
namespace = []
if index_cache_size is not None:
logging.info(
"index_cache_size is ignored in LanceDb Cloud"
" (there is no local cache to configure)"
)
table = LOOP.run(self._conn.open_table(name, namespace_path=namespace_path))
table = LOOP.run(self._conn.open_table(name, namespace=namespace))
return RemoteTable(table, self.db_name)
def clone_table(
@@ -336,7 +331,7 @@ class RemoteDBConnection(DBConnection):
target_table_name: str,
source_uri: str,
*,
target_namespace_path: Optional[List[str]] = None,
target_namespace: Optional[List[str]] = None,
source_version: Optional[int] = None,
source_tag: Optional[str] = None,
is_shallow: bool = True,
@@ -349,7 +344,7 @@ class RemoteDBConnection(DBConnection):
The name of the target table to create.
source_uri: str
The URI of the source table to clone from.
target_namespace_path: List[str], optional
target_namespace: List[str], optional
The namespace for the target table.
None or empty list represents root namespace.
source_version: int, optional
@@ -366,13 +361,13 @@ class RemoteDBConnection(DBConnection):
"""
from .table import RemoteTable
if target_namespace_path is None:
target_namespace_path = []
if target_namespace is None:
target_namespace = []
table = LOOP.run(
self._conn.clone_table(
target_table_name,
source_uri,
target_namespace_path=target_namespace_path,
target_namespace=target_namespace,
source_version=source_version,
source_tag=source_tag,
is_shallow=is_shallow,
@@ -392,7 +387,7 @@ class RemoteDBConnection(DBConnection):
exist_ok: bool = False,
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
*,
namespace_path: Optional[List[str]] = None,
namespace: Optional[List[str]] = None,
) -> Table:
"""Create a [Table][lancedb.table.Table] in the database.
@@ -400,7 +395,7 @@ class RemoteDBConnection(DBConnection):
----------
name: str
The name of the table.
namespace_path: List[str], optional
namespace: List[str], optional
The namespace to create the table in.
None or empty list represents root namespace.
data: The data to initialize the table, *optional*
@@ -500,8 +495,8 @@ class RemoteDBConnection(DBConnection):
mode = "exist_ok"
elif not mode:
mode = "exist_ok"
if namespace_path is None:
namespace_path = []
if namespace is None:
namespace = []
validate_table_name(name)
if embedding_functions is not None:
logging.warning(
@@ -516,7 +511,7 @@ class RemoteDBConnection(DBConnection):
self._conn.create_table(
name,
data,
namespace_path=namespace_path,
namespace=namespace,
mode=mode,
schema=schema,
on_bad_vectors=on_bad_vectors,
@@ -526,28 +521,28 @@ class RemoteDBConnection(DBConnection):
return RemoteTable(table, self.db_name)
@override
def drop_table(self, name: str, namespace_path: Optional[List[str]] = None):
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
"""Drop a table from the database.
Parameters
----------
name: str
The name of the table.
namespace_path: List[str], optional
namespace: List[str], optional
The namespace to drop the table from.
None or empty list represents root namespace.
"""
if namespace_path is None:
namespace_path = []
LOOP.run(self._conn.drop_table(name, namespace_path=namespace_path))
if namespace is None:
namespace = []
LOOP.run(self._conn.drop_table(name, namespace=namespace))
@override
def rename_table(
self,
cur_name: str,
new_name: str,
cur_namespace_path: Optional[List[str]] = None,
new_namespace_path: Optional[List[str]] = None,
cur_namespace: Optional[List[str]] = None,
new_namespace: Optional[List[str]] = None,
):
"""Rename a table in the database.
@@ -558,32 +553,19 @@ class RemoteDBConnection(DBConnection):
new_name: str
The new name of the table.
"""
if cur_namespace_path is None:
cur_namespace_path = []
if new_namespace_path is None:
new_namespace_path = []
if cur_namespace is None:
cur_namespace = []
if new_namespace is None:
new_namespace = []
LOOP.run(
self._conn.rename_table(
cur_name,
new_name,
cur_namespace_path=cur_namespace_path,
new_namespace_path=new_namespace_path,
cur_namespace=cur_namespace,
new_namespace=new_namespace,
)
)
@override
def namespace_client(self) -> LanceNamespace:
"""Get the equivalent namespace client for this connection.
Returns a RestNamespace with the same URI and authentication headers.
Returns
-------
LanceNamespace
The namespace client for this connection.
"""
return LOOP.run(self._conn.namespace_client())
async def close(self):
"""Close the connection to the database."""
self._conn.close()
self._client.close()

View File

@@ -4,7 +4,7 @@
from datetime import timedelta
import logging
from functools import cached_property
from typing import Any, Callable, Dict, Iterable, List, Optional, Union, Literal
from typing import Dict, Iterable, List, Optional, Union, Literal
import warnings
from lancedb._lancedb import (
@@ -35,7 +35,6 @@ import pyarrow as pa
from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME
from lancedb.merge import LanceMergeInsertBuilder
from lancedb.embeddings import EmbeddingFunctionRegistry
from lancedb.table import _normalize_progress
from ..query import LanceVectorQueryBuilder, LanceQueryBuilder, LanceTakeQueryBuilder
from ..table import AsyncTable, IndexStatistics, Query, Table, Tags
@@ -309,7 +308,6 @@ class RemoteTable(Table):
mode: str = "append",
on_bad_vectors: str = "error",
fill_value: float = 0.0,
progress: Optional[Union[bool, Callable, Any]] = None,
) -> AddResult:
"""Add more data to the [Table](Table). It has the same API signature as
the OSS version.
@@ -332,29 +330,17 @@ class RemoteTable(Table):
One of "error", "drop", "fill".
fill_value: float, default 0.
The value to use when filling vectors. Only used if on_bad_vectors="fill".
progress: bool, callable, or tqdm-like, optional
A callback or tqdm-compatible progress bar. See
:meth:`Table.add` for details.
Returns
-------
AddResult
An object containing the new version number of the table after adding data.
"""
progress, owns = _normalize_progress(progress)
try:
return LOOP.run(
self._table.add(
data,
mode=mode,
on_bad_vectors=on_bad_vectors,
fill_value=fill_value,
progress=progress,
)
return LOOP.run(
self._table.add(
data, mode=mode, on_bad_vectors=on_bad_vectors, fill_value=fill_value
)
finally:
if owns:
progress.close()
)
def search(
self,
@@ -654,45 +640,6 @@ class RemoteTable(Table):
def drop_index(self, index_name: str):
return LOOP.run(self._table.drop_index(index_name))
def prewarm_index(self, name: str) -> None:
"""Prewarm an index in the table.
This is a hint to the database that the index will be accessed in the
future and should be loaded into memory if possible. This can reduce
cold-start latency for subsequent queries.
This call initiates prewarming and returns once the request is accepted.
It is idempotent and safe to call from multiple clients concurrently.
Parameters
----------
name: str
The name of the index to prewarm
"""
return LOOP.run(self._table.prewarm_index(name))
def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
"""Prewarm data for the table.
This is a hint to the database that the given columns will be accessed
in the future and the database should prefetch the data if possible.
Currently only supported on remote tables.
This call initiates prewarming and returns once the request is accepted.
It is idempotent and safe to call from multiple clients concurrently.
This operation has a large upfront cost but can speed up future queries
that need to fetch the given columns. Large columns such as embeddings
or binary data may not be practical to prewarm. This feature is intended
for workloads that issue many queries against the same columns.
Parameters
----------
columns: list of str, optional
The columns to prewarm. If None, all columns are prewarmed.
"""
return LOOP.run(self._table.prewarm_data(columns))
def wait_for_index(
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
):

File diff suppressed because it is too large Load Diff

View File

@@ -180,7 +180,7 @@ def test_fts_fuzzy_query():
),
mode="overwrite",
)
table.create_fts_index("text", replace=True)
table.create_fts_index("text", use_tantivy=False, replace=True)
results = table.search(MatchQuery("foo", "text", fuzziness=1)).to_pandas()
assert len(results) == 4
@@ -230,7 +230,7 @@ def test_fts_boost_query():
),
mode="overwrite",
)
table.create_fts_index("desc", replace=True)
table.create_fts_index("desc", use_tantivy=False, replace=True)
results = table.search(
BoostQuery(
@@ -265,7 +265,7 @@ def test_fts_boolean_query(tmp_path):
],
mode="overwrite",
)
table.create_fts_index("text", replace=True)
table.create_fts_index("text", use_tantivy=False, replace=True)
# SHOULD
results = table.search(
@@ -319,7 +319,9 @@ def test_fts_native():
],
)
table.create_fts_index("text")
# passing `use_tantivy=False` to use lance FTS index
# `use_tantivy=True` by default
table.create_fts_index("text", use_tantivy=False)
table.search("puppy").limit(10).select(["text"]).to_list()
# [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]
# ...
@@ -330,6 +332,7 @@ def test_fts_native():
# --8<-- [start:fts_config_folding]
table.create_fts_index(
"text",
use_tantivy=False,
language="French",
stem=True,
ascii_folding=True,
@@ -343,7 +346,7 @@ def test_fts_native():
table.search("puppy").limit(10).where("text='foo'", prefilter=False).to_list()
# --8<-- [end:fts_postfiltering]
# --8<-- [start:fts_with_position]
table.create_fts_index("text", with_position=True, replace=True)
table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True)
# --8<-- [end:fts_with_position]
# --8<-- [start:fts_incremental_index]
table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}])

View File

@@ -3,7 +3,6 @@
import re
import sys
from datetime import timedelta
import os
@@ -15,7 +14,8 @@ import pytest
from lancedb.pydantic import LanceModel, Vector
def test_basic(tmp_path):
@pytest.mark.parametrize("use_tantivy", [True, False])
def test_basic(tmp_path, use_tantivy):
db = lancedb.connect(tmp_path)
assert db.uri == str(tmp_path)
@@ -48,7 +48,7 @@ def test_basic(tmp_path):
assert len(rs) == 1
assert rs["item"].iloc[0] == "foo"
table.create_fts_index("item")
table.create_fts_index("item", use_tantivy=use_tantivy)
rs = table.search("bar", query_type="fts").to_pandas()
assert len(rs) == 1
assert rs["item"].iloc[0] == "bar"
@@ -183,8 +183,8 @@ def test_table_names(tmp_db: lancedb.DBConnection):
result = list(tmp_db.table_names("test2", limit=2))
assert result == ["test3"], f"Expected ['test3'], got {result}"
# Test that namespace_path parameter can be passed as keyword
result = list(tmp_db.table_names(namespace_path=[]))
# Test that namespace parameter can be passed as keyword
result = list(tmp_db.table_names(namespace=[]))
assert len(result) == 3
@@ -896,22 +896,42 @@ def test_bypass_vector_index_sync(tmp_db: lancedb.DBConnection):
def test_local_namespace_operations(tmp_path):
"""Test that local mode namespace operations work via directory namespace."""
"""Test that local mode namespace operations behave as expected."""
# Create a local database connection
db = lancedb.connect(tmp_path)
# Root namespace starts empty
assert db.list_namespaces().namespaces == []
# Test list_namespaces returns empty list for root namespace
namespaces = db.list_namespaces().namespaces
assert namespaces == []
# Create and list child namespace
db.create_namespace(["child"])
assert "child" in db.list_namespaces().namespaces
# Test list_namespaces with non-empty namespace raises NotImplementedError
with pytest.raises(
NotImplementedError,
match="Namespace operations are not supported for listing database",
):
db.list_namespaces(namespace=["test"])
# List namespaces under child
assert db.list_namespaces(namespace_path=["child"]).namespaces == []
# Drop namespace
db.drop_namespace(["child"])
assert db.list_namespaces().namespaces == []
def test_local_create_namespace_not_supported(tmp_path):
"""Test that create_namespace is not supported in local mode."""
db = lancedb.connect(tmp_path)
with pytest.raises(
NotImplementedError,
match="Namespace operations are not supported for listing database",
):
db.create_namespace(["test_namespace"])
def test_local_drop_namespace_not_supported(tmp_path):
"""Test that drop_namespace is not supported in local mode."""
db = lancedb.connect(tmp_path)
with pytest.raises(
NotImplementedError,
match="Namespace operations are not supported for listing database",
):
db.drop_namespace(["test_namespace"])
def test_clone_table_latest_version(tmp_path):
@@ -1028,59 +1048,3 @@ def test_clone_table_deep_clone_fails(tmp_path):
source_uri = os.path.join(tmp_path, "source.lance")
with pytest.raises(Exception, match="Deep clone is not yet implemented"):
db.clone_table("cloned", source_uri, is_shallow=False)
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
def test_namespace_client_native_storage(tmp_path):
"""Test namespace_client() returns DirectoryNamespace for native storage."""
from lance.namespace import DirectoryNamespace
db = lancedb.connect(tmp_path)
ns_client = db.namespace_client()
assert isinstance(ns_client, DirectoryNamespace)
assert str(tmp_path) in ns_client.namespace_id()
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
def test_namespace_client_with_storage_options(tmp_path):
"""Test namespace_client() preserves storage options."""
from lance.namespace import DirectoryNamespace
storage_options = {"timeout": "10s"}
db = lancedb.connect(tmp_path, storage_options=storage_options)
ns_client = db.namespace_client()
assert isinstance(ns_client, DirectoryNamespace)
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
def test_namespace_client_operations(tmp_path):
"""Test that namespace_client() returns a functional namespace client."""
db = lancedb.connect(tmp_path)
ns_client = db.namespace_client()
# Create a table through the main db connection
data = [{"id": 1, "text": "hello", "vector": [1.0, 2.0]}]
db.create_table("test_table", data=data)
# Verify the namespace client can see the table
from lance_namespace import ListTablesRequest
# id=[] means root namespace
response = ns_client.list_tables(ListTablesRequest(id=[]))
# Tables can be strings or objects with name attribute
table_names = [t.name if hasattr(t, "name") else t for t in response.tables]
assert "test_table" in table_names
@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
def test_namespace_client_namespace_connection(tmp_path):
"""Test namespace_client() returns the backing client for namespace connections."""
from lance.namespace import DirectoryNamespace
db = lancedb.connect_namespace("dir", {"root": str(tmp_path)})
ns_client = db.namespace_client()
assert isinstance(ns_client, DirectoryNamespace)
assert str(tmp_path) in ns_client.namespace_id()

View File

@@ -546,24 +546,3 @@ def test_openai_no_retry_on_401(mock_sleep):
assert mock_func.call_count == 1
# Verify that sleep was never called (no retries)
assert mock_sleep.call_count == 0
def test_url_retrieve_downloads_image():
"""
Embedding functions like open-clip, siglip, and jinaai use url_retrieve()
to download images from HTTP URLs. For example, open_clip._to_pil() calls:
PIL_Image.open(io.BytesIO(url_retrieve(image)))
Verify that url_retrieve() can download an image and open it as PIL Image,
matching the real usage pattern in embedding functions.
"""
import io
Image = pytest.importorskip("PIL.Image")
from lancedb.embeddings.utils import url_retrieve
image_url = "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg"
image_bytes = url_retrieve(image_url)
img = Image.open(io.BytesIO(image_bytes))
assert img.size[0] > 0 and img.size[1] > 0

View File

@@ -36,6 +36,9 @@ import pytest
import pytest_asyncio
from utils import exception_output
pytest.importorskip("lancedb.fts")
tantivy = pytest.importorskip("tantivy")
@pytest.fixture
def table(tmp_path) -> ldb.table.LanceTable:
@@ -141,53 +144,58 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
return table
@pytest.mark.parametrize(
("kwargs", "match"),
[
(
{"use_tantivy": True},
"Tantivy-based FTS has been removed",
),
(
{"ordering_field_names": ["count"]},
"ordering_field_names was only supported",
),
(
{"writer_heap_size": 128},
"writer_heap_size was only supported",
),
],
)
def test_reject_removed_tantivy_parameters(table, kwargs, match):
with pytest.raises(ValueError, match=match):
table.create_fts_index("text", **kwargs)
def test_create_index(tmp_path):
index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
assert isinstance(index, tantivy.Index)
assert os.path.exists(str(tmp_path / "index"))
def test_reject_legacy_tantivy_index(table):
path, _, _ = table._get_fts_index_path()
os.makedirs(path, exist_ok=True)
def test_create_index_with_stemming(tmp_path, table):
index = ldb.fts.create_index(
str(tmp_path / "index"), ["text"], tokenizer_name="en_stem"
)
assert isinstance(index, tantivy.Index)
assert os.path.exists(str(tmp_path / "index"))
with pytest.raises(ValueError, match="Legacy Tantivy FTS index detected"):
table.search("puppy").limit(5).to_list()
with pytest.raises(ValueError, match="Legacy Tantivy FTS index detected"):
table.create_fts_index("text")
# Check stemming by running tokenizer on non empty table
table.create_fts_index("text", tokenizer_name="en_stem", use_tantivy=True)
@pytest.mark.parametrize("use_tantivy", [True, False])
@pytest.mark.parametrize("with_position", [True, False])
def test_create_inverted_index(table, with_position):
def test_create_inverted_index(table, use_tantivy, with_position):
if use_tantivy and not with_position:
pytest.skip("we don't support building a tantivy index without position")
table.create_fts_index(
"text",
use_tantivy=use_tantivy,
with_position=with_position,
name="custom_fts_index",
)
indices = table.list_indices()
fts_indices = [i for i in indices if i.index_type == "FTS"]
assert any(i.name == "custom_fts_index" for i in fts_indices)
if not use_tantivy:
indices = table.list_indices()
fts_indices = [i for i in indices if i.index_type == "FTS"]
assert any(i.name == "custom_fts_index" for i in fts_indices)
def test_search_fts(table):
table.create_fts_index("text")
def test_populate_index(tmp_path, table):
index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
assert ldb.fts.populate_index(index, table, ["text"]) == len(table)
def test_search_index(tmp_path, table):
index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
ldb.fts.populate_index(index, table, ["text"])
index.reload()
results = ldb.fts.search_index(index, query="puppy", limit=5)
assert len(results) == 2
assert len(results[0]) == 5 # row_ids
assert len(results[1]) == 5 # _score
@pytest.mark.parametrize("use_tantivy", [True, False])
def test_search_fts(table, use_tantivy):
table.create_fts_index("text", use_tantivy=use_tantivy)
results = table.search("puppy").select(["id", "text"]).limit(5).to_list()
assert len(results) == 5
assert len(results[0]) == 3 # id, text, _score
@@ -196,52 +204,53 @@ def test_search_fts(table):
results = table.search("puppy").select(["id", "text"]).to_list()
assert len(results) == 10
# Test with a query
results = (
table.search(MatchQuery("puppy", "text"))
.select(["id", "text"])
.limit(5)
.to_list()
)
assert len(results) == 5
# Test boost query
results = (
table.search(
BoostQuery(
MatchQuery("puppy", "text"),
MatchQuery("runs", "text"),
)
if not use_tantivy:
# Test with a query
results = (
table.search(MatchQuery("puppy", "text"))
.select(["id", "text"])
.limit(5)
.to_list()
)
.select(["id", "text"])
.limit(5)
.to_list()
)
assert len(results) == 5
assert len(results) == 5
# Test multi match query
table.create_fts_index("text2")
results = (
table.search(MultiMatchQuery("puppy", ["text", "text2"]))
.select(["id", "text"])
.limit(5)
.to_list()
)
assert len(results) == 5
assert len(results[0]) == 3 # id, text, _score
# Test boost query
results = (
table.search(
BoostQuery(
MatchQuery("puppy", "text"),
MatchQuery("runs", "text"),
)
)
.select(["id", "text"])
.limit(5)
.to_list()
)
assert len(results) == 5
# Test boolean query
results = (
table.search(MatchQuery("puppy", "text") & MatchQuery("runs", "text"))
.select(["id", "text"])
.limit(5)
.to_list()
)
assert len(results) == 5
assert len(results[0]) == 3 # id, text, _score
for r in results:
assert "puppy" in r["text"]
assert "runs" in r["text"]
# Test multi match query
table.create_fts_index("text2", use_tantivy=use_tantivy)
results = (
table.search(MultiMatchQuery("puppy", ["text", "text2"]))
.select(["id", "text"])
.limit(5)
.to_list()
)
assert len(results) == 5
assert len(results[0]) == 3 # id, text, _score
# Test boolean query
results = (
table.search(MatchQuery("puppy", "text") & MatchQuery("runs", "text"))
.select(["id", "text"])
.limit(5)
.to_list()
)
assert len(results) == 5
assert len(results[0]) == 3 # id, text, _score
for r in results:
assert "puppy" in r["text"]
assert "runs" in r["text"]
@pytest.mark.asyncio
@@ -309,13 +318,13 @@ async def test_fts_select_async(async_table):
def test_search_fts_phrase_query(table):
table.create_fts_index("text", with_position=False)
table.create_fts_index("text", use_tantivy=False, with_position=False)
try:
phrase_results = table.search('"puppy runs"').limit(100).to_list()
assert False
except Exception:
pass
table.create_fts_index("text", with_position=True, replace=True)
table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True)
results = table.search("puppy").limit(100).to_list()
# Test with quotation marks
@@ -366,8 +375,8 @@ async def test_search_fts_phrase_query_async(async_table):
def test_search_fts_specify_column(table):
table.create_fts_index("text")
table.create_fts_index("text2")
table.create_fts_index("text", use_tantivy=False)
table.create_fts_index("text2", use_tantivy=False)
results = table.search("puppy", fts_columns="text").limit(5).to_list()
assert len(results) == 5
@@ -461,8 +470,42 @@ async def test_search_fts_specify_column_async(async_table):
pass
def test_create_index_from_table(tmp_path, table):
table.create_fts_index("text")
def test_search_ordering_field_index_table(tmp_path, table):
table.create_fts_index("text", ordering_field_names=["count"], use_tantivy=True)
rows = (
table.search("puppy", ordering_field_name="count")
.limit(20)
.select(["text", "count"])
.to_list()
)
for r in rows:
assert "puppy" in r["text"]
assert sorted(rows, key=lambda x: x["count"], reverse=True) == rows
def test_search_ordering_field_index(tmp_path, table):
index = ldb.fts.create_index(
str(tmp_path / "index"), ["text"], ordering_fields=["count"]
)
ldb.fts.populate_index(index, table, ["text"], ordering_fields=["count"])
index.reload()
results = ldb.fts.search_index(
index, query="puppy", limit=5, ordering_field="count"
)
assert len(results) == 2
assert len(results[0]) == 5 # row_ids
assert len(results[1]) == 5 # _distance
rows = table.to_lance().take(results[0]).to_pylist()
for r in rows:
assert "puppy" in r["text"]
assert sorted(rows, key=lambda x: x["count"], reverse=True) == rows
@pytest.mark.parametrize("use_tantivy", [True, False])
def test_create_index_from_table(tmp_path, table, use_tantivy):
table.create_fts_index("text", use_tantivy=use_tantivy)
df = table.search("puppy").limit(5).select(["text"]).to_pandas()
assert len(df) <= 5
assert "text" in df.columns
@@ -482,24 +525,36 @@ def test_create_index_from_table(tmp_path, table):
)
with pytest.raises(Exception, match="already exists"):
table.create_fts_index("text")
table.create_fts_index("text", use_tantivy=use_tantivy)
table.create_fts_index("text", replace=True)
table.create_fts_index("text", replace=True, use_tantivy=use_tantivy)
assert len(table.search("gorilla").limit(1).to_pandas()) == 1
def test_create_index_multiple_columns(tmp_path, table):
with pytest.raises(ValueError, match="Native FTS indexes can only be created"):
table.create_fts_index(["text", "text2"])
table.create_fts_index(["text", "text2"], use_tantivy=True)
df = table.search("puppy").limit(5).to_pandas()
assert len(df) == 5
assert "text" in df.columns
assert "text2" in df.columns
def test_empty_rs(tmp_path, table, mocker):
table.create_fts_index(["text", "text2"], use_tantivy=True)
mocker.patch("lancedb.fts.search_index", return_value=([], []))
df = table.search("puppy").limit(5).to_pandas()
assert len(df) == 0
def test_nested_schema(tmp_path, table):
with pytest.raises(ValueError, match="top-level fields"):
table.create_fts_index("nested.text")
table.create_fts_index("nested.text", use_tantivy=True)
rs = table.search("puppy").limit(5).to_list()
assert len(rs) == 5
def test_search_index_with_filter(table):
table.create_fts_index("text")
@pytest.mark.parametrize("use_tantivy", [True, False])
def test_search_index_with_filter(table, use_tantivy):
table.create_fts_index("text", use_tantivy=use_tantivy)
orig_import = __import__
def import_mock(name, *args):
@@ -529,7 +584,8 @@ def test_search_index_with_filter(table):
assert r["_rowid"] is not None
def test_null_input(table):
@pytest.mark.parametrize("use_tantivy", [True, False])
def test_null_input(table, use_tantivy):
table.add(
[
{
@@ -542,13 +598,14 @@ def test_null_input(table):
}
]
)
table.create_fts_index("text")
table.create_fts_index("text", use_tantivy=use_tantivy)
def test_syntax(table):
# https://github.com/lancedb/lancedb/issues/769
table.create_fts_index("text")
table.search("they could have been dogs OR").limit(10).to_list()
table.create_fts_index("text", use_tantivy=True)
with pytest.raises(ValueError, match="Syntax Error"):
table.search("they could have been dogs OR").limit(10).to_list()
# these should work
@@ -559,7 +616,6 @@ def test_syntax(table):
).to_list()
# phrase queries
table.create_fts_index("text", with_position=True, replace=True)
table.search("they could have been dogs OR cats").phrase_query().limit(10).to_list()
table.search('"they could have been dogs OR cats"').limit(10).to_list()
table.search('''"the cats OR dogs were not really 'pets' at all"''').limit(
@@ -583,7 +639,7 @@ def test_language(mem_db: DBConnection):
table = mem_db.create_table("test", data=data)
with pytest.raises(ValueError) as e:
table.create_fts_index("text", language="klingon")
table.create_fts_index("text", use_tantivy=False, language="klingon")
assert exception_output(e) == (
"ValueError: LanceDB does not support the requested language: 'klingon'\n"
@@ -594,6 +650,7 @@ def test_language(mem_db: DBConnection):
table.create_fts_index(
"text",
use_tantivy=False,
language="French",
stem=True,
ascii_folding=True,
@@ -633,7 +690,7 @@ def test_fts_on_list(mem_db: DBConnection):
}
)
table = mem_db.create_table("test", data=data)
table.create_fts_index("text", with_position=True)
table.create_fts_index("text", use_tantivy=False, with_position=True)
res = table.search("lance").limit(5).to_list()
assert len(res) == 3
@@ -645,7 +702,7 @@ def test_fts_on_list(mem_db: DBConnection):
def test_fts_ngram(mem_db: DBConnection):
data = pa.table({"text": ["hello world", "lance database", "lance is cool"]})
table = mem_db.create_table("test", data=data)
table.create_fts_index("text", base_tokenizer="ngram")
table.create_fts_index("text", use_tantivy=False, base_tokenizer="ngram")
results = table.search("lan", query_type="fts").limit(10).to_list()
assert len(results) == 2
@@ -664,6 +721,7 @@ def test_fts_ngram(mem_db: DBConnection):
# test setting min_ngram_length and prefix_only
table.create_fts_index(
"text",
use_tantivy=False,
base_tokenizer="ngram",
replace=True,
ngram_min_length=2,
@@ -828,7 +886,7 @@ def test_fts_query_to_json():
def test_fts_fast_search(table):
table.create_fts_index("text")
table.create_fts_index("text", use_tantivy=False)
# Insert some unindexed data
table.add(

View File

@@ -28,7 +28,7 @@ def sync_table(tmpdir_factory) -> Table:
}
)
table = db.create_table("test", data)
table.create_fts_index("text", with_position=False)
table.create_fts_index("text", with_position=False, use_tantivy=False)
return table
@@ -177,60 +177,6 @@ async def test_analyze_plan(table: AsyncTable):
assert "metrics=" in res
@pytest.fixture
def table_with_id(tmpdir_factory) -> Table:
tmp_path = str(tmpdir_factory.mktemp("data"))
db = lancedb.connect(tmp_path)
data = pa.table(
{
"id": pa.array([1, 2, 3, 4], type=pa.int64()),
"text": pa.array(["a", "b", "cat", "dog"]),
"vector": pa.array(
[[0.1, 0.1], [2, 2], [-0.1, -0.1], [0.5, -0.5]],
type=pa.list_(pa.float32(), list_size=2),
),
}
)
table = db.create_table("test_with_id", data)
table.create_fts_index("text", with_position=False)
return table
def test_hybrid_prefilter_explain_plan(table_with_id: Table):
"""
Verify that the prefilter logic is not inverted in LanceHybridQueryBuilder.
"""
plan_prefilter = (
table_with_id.search(query_type="hybrid")
.vector([0.0, 0.0])
.text("dog")
.where("id = 1", prefilter=True)
.limit(2)
.explain_plan(verbose=True)
)
plan_postfilter = (
table_with_id.search(query_type="hybrid")
.vector([0.0, 0.0])
.text("dog")
.where("id = 1", prefilter=False)
.limit(2)
.explain_plan(verbose=True)
)
# prefilter=True: filter is pushed into the LanceRead scan.
# The FTS sub-plan exposes this as "full_filter=id = Int64(1)" inside LanceRead.
assert "full_filter=id = Int64(1)" in plan_prefilter, (
f"Should push the filter into the scan.\nPlan:\n{plan_prefilter}"
)
# prefilter=False: filter is applied as a separate FilterExec after the search.
# The filter must NOT be embedded in the scan.
assert "full_filter=id = Int64(1)" not in plan_postfilter, (
f"Should NOT push the filter into the scan.\nPlan:\n{plan_postfilter}"
)
def test_normalize_scores():
cases = [
(pa.array([0.1, 0.4]), pa.array([0.0, 1.0])),

View File

@@ -3,7 +3,6 @@
from datetime import timedelta
import random
from typing import get_args, get_type_hints
import pyarrow as pa
import pytest
@@ -23,7 +22,6 @@ from lancedb.index import (
HnswSq,
FTS,
)
from lancedb.table import IndexStatistics
@pytest_asyncio.fixture
@@ -285,23 +283,3 @@ async def test_create_index_with_binary_vectors(binary_table: AsyncTable):
for v in range(256):
res = await binary_table.query().nearest_to([v] * 128).to_arrow()
assert res["id"][0].as_py() == v
def test_index_statistics_index_type_lists_all_supported_values():
expected_index_types = {
"IVF_FLAT",
"IVF_SQ",
"IVF_PQ",
"IVF_RQ",
"IVF_HNSW_SQ",
"IVF_HNSW_PQ",
"FTS",
"BTREE",
"BITMAP",
"LABEL_LIST",
}
assert (
set(get_args(get_type_hints(IndexStatistics)["index_type"]))
== expected_index_types
)

View File

@@ -8,7 +8,6 @@ import shutil
import pytest
import pyarrow as pa
import lancedb
from lance_namespace.errors import NamespaceNotEmptyError, TableNotFoundError
class TestNamespaceConnection:
@@ -33,16 +32,6 @@ class TestNamespaceConnection:
# Initially no tables in root
assert len(list(db.table_names())) == 0
def test_connect_via_connect_helper(self):
"""Connecting via lancedb.connect should delegate to namespace connection."""
db = lancedb.connect(
namespace_client_impl="dir",
namespace_client_properties={"root": self.temp_dir},
)
assert isinstance(db, lancedb.LanceNamespaceDBConnection)
assert len(list(db.table_names())) == 0
def test_create_table_through_namespace(self):
"""Test creating a table through namespace."""
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
@@ -60,14 +49,14 @@ class TestNamespaceConnection:
)
# Create empty table in child namespace
table = db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
assert table is not None
assert table.name == "test_table"
assert table.namespace == ["test_ns"]
assert table.id == "test_ns$test_table"
# Table should appear in child namespace
table_names = list(db.table_names(namespace_path=["test_ns"]))
table_names = list(db.table_names(namespace=["test_ns"]))
assert "test_table" in table_names
assert len(table_names) == 1
@@ -90,10 +79,10 @@ class TestNamespaceConnection:
pa.field("vector", pa.list_(pa.float32(), 2)),
]
)
db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
db.create_table("test_table", schema=schema, namespace=["test_ns"])
# Open the table
table = db.open_table("test_table", namespace_path=["test_ns"])
table = db.open_table("test_table", namespace=["test_ns"])
assert table is not None
assert table.name == "test_table"
assert table.namespace == ["test_ns"]
@@ -118,31 +107,31 @@ class TestNamespaceConnection:
pa.field("vector", pa.list_(pa.float32(), 2)),
]
)
db.create_table("table1", schema=schema, namespace_path=["test_ns"])
db.create_table("table2", schema=schema, namespace_path=["test_ns"])
db.create_table("table1", schema=schema, namespace=["test_ns"])
db.create_table("table2", schema=schema, namespace=["test_ns"])
# Verify both tables exist in child namespace
table_names = list(db.table_names(namespace_path=["test_ns"]))
table_names = list(db.table_names(namespace=["test_ns"]))
assert "table1" in table_names
assert "table2" in table_names
assert len(table_names) == 2
# Drop one table
db.drop_table("table1", namespace_path=["test_ns"])
db.drop_table("table1", namespace=["test_ns"])
# Verify only table2 remains
table_names = list(db.table_names(namespace_path=["test_ns"]))
table_names = list(db.table_names(namespace=["test_ns"]))
assert "table1" not in table_names
assert "table2" in table_names
assert len(table_names) == 1
# Drop the second table
db.drop_table("table2", namespace_path=["test_ns"])
assert len(list(db.table_names(namespace_path=["test_ns"]))) == 0
db.drop_table("table2", namespace=["test_ns"])
assert len(list(db.table_names(namespace=["test_ns"]))) == 0
# Should not be able to open dropped table
with pytest.raises(TableNotFoundError):
db.open_table("table1", namespace_path=["test_ns"])
with pytest.raises(RuntimeError):
db.open_table("table1", namespace=["test_ns"])
def test_create_table_with_schema(self):
"""Test creating a table with explicit schema through namespace."""
@@ -161,7 +150,7 @@ class TestNamespaceConnection:
)
# Create table with schema in child namespace
table = db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
assert table is not None
assert table.namespace == ["test_ns"]
@@ -185,7 +174,7 @@ class TestNamespaceConnection:
pa.field("vector", pa.list_(pa.float32(), 2)),
]
)
db.create_table("old_name", schema=schema, namespace_path=["test_ns"])
db.create_table("old_name", schema=schema, namespace=["test_ns"])
# Rename should raise NotImplementedError
with pytest.raises(NotImplementedError, match="rename_table is not supported"):
@@ -206,20 +195,20 @@ class TestNamespaceConnection:
]
)
for i in range(3):
db.create_table(f"table{i}", schema=schema, namespace_path=["test_ns"])
db.create_table(f"table{i}", schema=schema, namespace=["test_ns"])
# Verify tables exist in child namespace
assert len(list(db.table_names(namespace_path=["test_ns"]))) == 3
assert len(list(db.table_names(namespace=["test_ns"]))) == 3
# Drop all tables in child namespace
db.drop_all_tables(namespace_path=["test_ns"])
db.drop_all_tables(namespace=["test_ns"])
# Verify all tables are gone from child namespace
assert len(list(db.table_names(namespace_path=["test_ns"]))) == 0
assert len(list(db.table_names(namespace=["test_ns"]))) == 0
# Test that table_names works with keyword-only namespace parameter
db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
result = list(db.table_names(namespace_path=["test_ns"]))
db.create_table("test_table", schema=schema, namespace=["test_ns"])
result = list(db.table_names(namespace=["test_ns"]))
assert "test_table" in result
def test_table_operations(self):
@@ -237,7 +226,7 @@ class TestNamespaceConnection:
pa.field("text", pa.string()),
]
)
table = db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
# Verify empty table was created
result = table.to_pandas()
@@ -308,25 +297,25 @@ class TestNamespaceConnection:
]
)
table = db.create_table(
"test_table", schema=schema, namespace_path=["test_namespace"]
"test_table", schema=schema, namespace=["test_namespace"]
)
assert table is not None
# Verify table exists in namespace
tables_in_namespace = list(db.table_names(namespace_path=["test_namespace"]))
tables_in_namespace = list(db.table_names(namespace=["test_namespace"]))
assert "test_table" in tables_in_namespace
assert len(tables_in_namespace) == 1
# Open table from namespace
table = db.open_table("test_table", namespace_path=["test_namespace"])
table = db.open_table("test_table", namespace=["test_namespace"])
assert table is not None
assert table.name == "test_table"
# Drop table from namespace
db.drop_table("test_table", namespace_path=["test_namespace"])
db.drop_table("test_table", namespace=["test_namespace"])
# Verify table no longer exists in namespace
tables_in_namespace = list(db.table_names(namespace_path=["test_namespace"]))
tables_in_namespace = list(db.table_names(namespace=["test_namespace"]))
assert len(tables_in_namespace) == 0
# Drop namespace
@@ -348,14 +337,14 @@ class TestNamespaceConnection:
pa.field("vector", pa.list_(pa.float32(), 2)),
]
)
db.create_table("test_table", schema=schema, namespace_path=["test_namespace"])
db.create_table("test_table", schema=schema, namespace=["test_namespace"])
# Try to drop namespace with tables - should fail
with pytest.raises(NamespaceNotEmptyError):
with pytest.raises(RuntimeError, match="is not empty"):
db.drop_namespace(["test_namespace"])
# Drop table first
db.drop_table("test_table", namespace_path=["test_namespace"])
db.drop_table("test_table", namespace=["test_namespace"])
# Now dropping namespace should work
db.drop_namespace(["test_namespace"])
@@ -378,10 +367,10 @@ class TestNamespaceConnection:
# Create table with same name in both namespaces
table_a = db.create_table(
"same_name_table", schema=schema, namespace_path=["namespace_a"]
"same_name_table", schema=schema, namespace=["namespace_a"]
)
table_b = db.create_table(
"same_name_table", schema=schema, namespace_path=["namespace_b"]
"same_name_table", schema=schema, namespace=["namespace_b"]
)
# Add different data to each table
@@ -399,9 +388,7 @@ class TestNamespaceConnection:
table_b.add(data_b)
# Verify data in namespace_a table
opened_table_a = db.open_table(
"same_name_table", namespace_path=["namespace_a"]
)
opened_table_a = db.open_table("same_name_table", namespace=["namespace_a"])
result_a = opened_table_a.to_pandas().sort_values("id").reset_index(drop=True)
assert len(result_a) == 2
assert result_a["id"].tolist() == [1, 2]
@@ -412,9 +399,7 @@ class TestNamespaceConnection:
assert [v.tolist() for v in result_a["vector"]] == [[1.0, 2.0], [3.0, 4.0]]
# Verify data in namespace_b table
opened_table_b = db.open_table(
"same_name_table", namespace_path=["namespace_b"]
)
opened_table_b = db.open_table("same_name_table", namespace=["namespace_b"])
result_b = opened_table_b.to_pandas().sort_values("id").reset_index(drop=True)
assert len(result_b) == 3
assert result_b["id"].tolist() == [10, 20, 30]
@@ -434,8 +419,8 @@ class TestNamespaceConnection:
assert "same_name_table" not in root_tables
# Clean up
db.drop_table("same_name_table", namespace_path=["namespace_a"])
db.drop_table("same_name_table", namespace_path=["namespace_b"])
db.drop_table("same_name_table", namespace=["namespace_a"])
db.drop_table("same_name_table", namespace=["namespace_b"])
db.drop_namespace(["namespace_a"])
db.drop_namespace(["namespace_b"])
@@ -463,8 +448,6 @@ class TestAsyncNamespaceConnection:
table_names = await db.table_names()
assert len(list(table_names)) == 0
# Async connect via namespace helper is not enabled yet.
async def test_create_table_async(self):
"""Test creating a table asynchronously through namespace."""
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
@@ -483,13 +466,13 @@ class TestAsyncNamespaceConnection:
# Create empty table in child namespace
table = await db.create_table(
"test_table", schema=schema, namespace_path=["test_ns"]
"test_table", schema=schema, namespace=["test_ns"]
)
assert table is not None
assert isinstance(table, lancedb.AsyncTable)
# Table should appear in child namespace
table_names = await db.table_names(namespace_path=["test_ns"])
table_names = await db.table_names(namespace=["test_ns"])
assert "test_table" in list(table_names)
async def test_open_table_async(self):
@@ -506,10 +489,10 @@ class TestAsyncNamespaceConnection:
pa.field("vector", pa.list_(pa.float32(), 2)),
]
)
await db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
await db.create_table("test_table", schema=schema, namespace=["test_ns"])
# Open the table
table = await db.open_table("test_table", namespace_path=["test_ns"])
table = await db.open_table("test_table", namespace=["test_ns"])
assert table is not None
assert isinstance(table, lancedb.AsyncTable)
@@ -563,20 +546,20 @@ class TestAsyncNamespaceConnection:
pa.field("vector", pa.list_(pa.float32(), 2)),
]
)
await db.create_table("table1", schema=schema, namespace_path=["test_ns"])
await db.create_table("table2", schema=schema, namespace_path=["test_ns"])
await db.create_table("table1", schema=schema, namespace=["test_ns"])
await db.create_table("table2", schema=schema, namespace=["test_ns"])
# Verify both tables exist in child namespace
table_names = list(await db.table_names(namespace_path=["test_ns"]))
table_names = list(await db.table_names(namespace=["test_ns"]))
assert "table1" in table_names
assert "table2" in table_names
assert len(table_names) == 2
# Drop one table
await db.drop_table("table1", namespace_path=["test_ns"])
await db.drop_table("table1", namespace=["test_ns"])
# Verify only table2 remains
table_names = list(await db.table_names(namespace_path=["test_ns"]))
table_names = list(await db.table_names(namespace=["test_ns"]))
assert "table1" not in table_names
assert "table2" in table_names
assert len(table_names) == 1
@@ -605,24 +588,20 @@ class TestAsyncNamespaceConnection:
]
)
table = await db.create_table(
"test_table", schema=schema, namespace_path=["test_namespace"]
"test_table", schema=schema, namespace=["test_namespace"]
)
assert table is not None
# Verify table exists in namespace
tables_in_namespace = list(
await db.table_names(namespace_path=["test_namespace"])
)
tables_in_namespace = list(await db.table_names(namespace=["test_namespace"]))
assert "test_table" in tables_in_namespace
assert len(tables_in_namespace) == 1
# Drop table from namespace
await db.drop_table("test_table", namespace_path=["test_namespace"])
await db.drop_table("test_table", namespace=["test_namespace"])
# Verify table no longer exists in namespace
tables_in_namespace = list(
await db.table_names(namespace_path=["test_namespace"])
)
tables_in_namespace = list(await db.table_names(namespace=["test_namespace"]))
assert len(tables_in_namespace) == 0
# Drop namespace
@@ -647,98 +626,15 @@ class TestAsyncNamespaceConnection:
]
)
for i in range(3):
await db.create_table(
f"table{i}", schema=schema, namespace_path=["test_ns"]
)
await db.create_table(f"table{i}", schema=schema, namespace=["test_ns"])
# Verify tables exist in child namespace
table_names = await db.table_names(namespace_path=["test_ns"])
table_names = await db.table_names(namespace=["test_ns"])
assert len(list(table_names)) == 3
# Drop all tables in child namespace
await db.drop_all_tables(namespace_path=["test_ns"])
await db.drop_all_tables(namespace=["test_ns"])
# Verify all tables are gone from child namespace
table_names = await db.table_names(namespace_path=["test_ns"])
table_names = await db.table_names(namespace=["test_ns"])
assert len(list(table_names)) == 0
class TestPushdownOperations:
"""Test pushdown operations on namespace connections."""
def setup_method(self):
"""Set up test fixtures."""
self.temp_dir = tempfile.mkdtemp()
def teardown_method(self):
"""Clean up test fixtures."""
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_query_table_pushdown_stored(self):
"""Test that QueryTable pushdown is stored on sync connection."""
db = lancedb.connect_namespace(
"dir",
{"root": self.temp_dir},
namespace_client_pushdown_operations=["QueryTable"],
)
assert "QueryTable" in db._namespace_client_pushdown_operations
def test_create_table_pushdown_stored(self):
"""Test that CreateTable pushdown is stored on sync connection."""
db = lancedb.connect_namespace(
"dir",
{"root": self.temp_dir},
namespace_client_pushdown_operations=["CreateTable"],
)
assert "CreateTable" in db._namespace_client_pushdown_operations
def test_both_pushdowns_stored(self):
"""Test that both pushdown operations can be set together."""
db = lancedb.connect_namespace(
"dir",
{"root": self.temp_dir},
namespace_client_pushdown_operations=["QueryTable", "CreateTable"],
)
assert "QueryTable" in db._namespace_client_pushdown_operations
assert "CreateTable" in db._namespace_client_pushdown_operations
def test_pushdown_defaults_to_empty(self):
"""Test that pushdown operations default to empty."""
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
assert len(db._namespace_client_pushdown_operations) == 0
@pytest.mark.asyncio
class TestAsyncPushdownOperations:
"""Test pushdown operations on async namespace connections."""
def setup_method(self):
"""Set up test fixtures."""
self.temp_dir = tempfile.mkdtemp()
def teardown_method(self):
"""Clean up test fixtures."""
shutil.rmtree(self.temp_dir, ignore_errors=True)
async def test_async_query_table_pushdown_stored(self):
"""Test that QueryTable pushdown is stored on async connection."""
db = lancedb.connect_namespace_async(
"dir",
{"root": self.temp_dir},
namespace_client_pushdown_operations=["QueryTable"],
)
assert "QueryTable" in db._namespace_client_pushdown_operations
async def test_async_create_table_pushdown_stored(self):
"""Test that CreateTable pushdown is stored on async connection."""
db = lancedb.connect_namespace_async(
"dir",
{"root": self.temp_dir},
namespace_client_pushdown_operations=["CreateTable"],
)
assert "CreateTable" in db._namespace_client_pushdown_operations
async def test_async_pushdown_defaults_to_empty(self):
"""Test that pushdown operations default to empty on async connection."""
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
assert len(db._namespace_client_pushdown_operations) == 0

View File

@@ -4,11 +4,9 @@
"""
Integration tests for LanceDB Namespace with S3 and credential refresh.
This test uses DirectoryNamespace with native ops_metrics and vend_input_storage_options
features to track API calls and test credential refresh mechanisms.
Tests are parameterized to run with both DirectoryNamespace and a CustomNamespace
wrapper to verify Python-Rust binding works correctly for custom implementations.
This test simulates a namespace server that returns incrementing credentials
and verifies that the credential refresh mechanism works correctly for both
create_table and open_table operations.
Tests verify:
- Storage options provider is auto-created and used
@@ -18,141 +16,24 @@ Tests verify:
"""
import copy
import shutil
import sys
import tempfile
import time
import uuid
from typing import Dict, Optional
from threading import Lock
from typing import Dict
import pyarrow as pa
import pytest
from lance.namespace import (
from lance_namespace import (
CreateEmptyTableRequest,
CreateEmptyTableResponse,
DeclareTableRequest,
DeclareTableResponse,
DescribeTableRequest,
DescribeTableResponse,
DirectoryNamespace,
LanceNamespace,
)
from lance_namespace import (
CreateNamespaceRequest,
CreateNamespaceResponse,
CreateTableRequest,
CreateTableResponse,
CreateTableVersionRequest,
CreateTableVersionResponse,
DeregisterTableRequest,
DeregisterTableResponse,
DescribeNamespaceRequest,
DescribeNamespaceResponse,
DescribeTableVersionRequest,
DescribeTableVersionResponse,
DropNamespaceRequest,
DropNamespaceResponse,
DropTableRequest,
DropTableResponse,
ListNamespacesRequest,
ListNamespacesResponse,
ListTablesRequest,
ListTablesResponse,
ListTableVersionsRequest,
ListTableVersionsResponse,
NamespaceExistsRequest,
RegisterTableRequest,
RegisterTableResponse,
TableExistsRequest,
)
from lancedb.namespace import LanceNamespaceDBConnection
class CustomNamespace(LanceNamespace):
"""A custom namespace wrapper that delegates to DirectoryNamespace.
This class verifies that the Python-Rust binding works correctly for
custom namespace implementations that wrap the native DirectoryNamespace.
All methods simply delegate to the underlying DirectoryNamespace instance.
"""
def __init__(self, inner: DirectoryNamespace):
self._inner = inner
def namespace_id(self) -> str:
return f"CustomNamespace[{self._inner.namespace_id()}]"
def create_namespace(
self, request: CreateNamespaceRequest
) -> CreateNamespaceResponse:
return self._inner.create_namespace(request)
def describe_namespace(
self, request: DescribeNamespaceRequest
) -> DescribeNamespaceResponse:
return self._inner.describe_namespace(request)
def namespace_exists(self, request: NamespaceExistsRequest) -> None:
return self._inner.namespace_exists(request)
def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse:
return self._inner.drop_namespace(request)
def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse:
return self._inner.list_namespaces(request)
def create_table(
self, request: CreateTableRequest, data: bytes
) -> CreateTableResponse:
return self._inner.create_table(request, data)
def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
return self._inner.declare_table(request)
def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
return self._inner.describe_table(request)
def table_exists(self, request: TableExistsRequest) -> None:
return self._inner.table_exists(request)
def drop_table(self, request: DropTableRequest) -> DropTableResponse:
return self._inner.drop_table(request)
def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
return self._inner.list_tables(request)
def register_table(self, request: RegisterTableRequest) -> RegisterTableResponse:
return self._inner.register_table(request)
def deregister_table(
self, request: DeregisterTableRequest
) -> DeregisterTableResponse:
return self._inner.deregister_table(request)
def list_table_versions(
self, request: ListTableVersionsRequest
) -> ListTableVersionsResponse:
return self._inner.list_table_versions(request)
def describe_table_version(
self, request: DescribeTableVersionRequest
) -> DescribeTableVersionResponse:
return self._inner.describe_table_version(request)
def create_table_version(
self, request: CreateTableVersionRequest
) -> CreateTableVersionResponse:
return self._inner.create_table_version(request)
def retrieve_ops_metrics(self) -> Optional[Dict[str, int]]:
return self._inner.retrieve_ops_metrics()
def _wrap_if_custom(ns_client: DirectoryNamespace, use_custom: bool):
"""Wrap namespace client in CustomNamespace if use_custom is True."""
if use_custom:
return CustomNamespace(ns_client)
return ns_client
# LocalStack S3 configuration
CONFIG = {
"allow_http": "true",
@@ -208,88 +89,157 @@ def delete_bucket(s3, bucket_name):
pass
def create_tracking_namespace(
bucket_name: str,
storage_options: dict,
credential_expires_in_seconds: int = 60,
use_custom: bool = False,
):
"""Create a DirectoryNamespace with ops metrics and credential vending enabled.
Uses native DirectoryNamespace features:
- ops_metrics_enabled=true: Tracks API call counts via retrieve_ops_metrics()
- vend_input_storage_options=true: Returns input storage options in responses
- vend_input_storage_options_refresh_interval_millis: Adds expires_at_millis
Args:
bucket_name: S3 bucket name or local path
storage_options: Storage options to pass through (credentials, endpoint, etc.)
credential_expires_in_seconds: Interval in seconds for credential expiration
use_custom: If True, wrap in CustomNamespace for testing custom implementations
Returns:
Tuple of (namespace_client, inner_namespace_client) where inner is always
the DirectoryNamespace (used for metrics retrieval)
class TrackingNamespace(LanceNamespace):
"""
# Add refresh_offset_millis to storage options so that credentials are not
# considered expired immediately. Set to 1 second (1000ms) so that refresh
# checks work correctly with short-lived credentials in tests.
storage_options_with_refresh = dict(storage_options)
storage_options_with_refresh["refresh_offset_millis"] = "1000"
Mock namespace that wraps DirectoryNamespace and tracks API calls.
dir_props = {f"storage.{k}": v for k, v in storage_options_with_refresh.items()}
This namespace returns incrementing credentials with each API call to simulate
credential rotation. It also tracks the number of times each API is called
to verify caching behavior.
"""
if bucket_name.startswith("/") or bucket_name.startswith("file://"):
dir_props["root"] = f"{bucket_name}/namespace_root"
else:
dir_props["root"] = f"s3://{bucket_name}/namespace_root"
def __init__(
self,
bucket_name: str,
storage_options: Dict[str, str],
credential_expires_in_seconds: int = 60,
):
from lance.namespace import DirectoryNamespace
# Enable ops metrics tracking
dir_props["ops_metrics_enabled"] = "true"
# Enable storage options vending
dir_props["vend_input_storage_options"] = "true"
# Set refresh interval in milliseconds
dir_props["vend_input_storage_options_refresh_interval_millis"] = str(
credential_expires_in_seconds * 1000
)
self.bucket_name = bucket_name
self.base_storage_options = storage_options
self.credential_expires_in_seconds = credential_expires_in_seconds
self.describe_call_count = 0
self.create_call_count = 0
self.lock = Lock()
inner_ns_client = DirectoryNamespace(**dir_props)
ns_client = _wrap_if_custom(inner_ns_client, use_custom)
return ns_client, inner_ns_client
# Create underlying DirectoryNamespace with storage options
dir_props = {f"storage.{k}": v for k, v in storage_options.items()}
# Use S3 path for bucket name, local path for file paths
if bucket_name.startswith("/") or bucket_name.startswith("file://"):
dir_props["root"] = f"{bucket_name}/namespace_root"
else:
dir_props["root"] = f"s3://{bucket_name}/namespace_root"
def get_describe_call_count(namespace_client) -> int:
"""Get the number of describe_table calls made to the namespace client."""
return namespace_client.retrieve_ops_metrics().get("describe_table", 0)
self.inner = DirectoryNamespace(**dir_props)
def get_describe_call_count(self) -> int:
"""Thread-safe getter for describe call count."""
with self.lock:
return self.describe_call_count
def get_declare_call_count(namespace_client) -> int:
"""Get the number of declare_table calls made to the namespace client."""
return namespace_client.retrieve_ops_metrics().get("declare_table", 0)
def get_create_call_count(self) -> int:
"""Thread-safe getter for create call count."""
with self.lock:
return self.create_call_count
def namespace_id(self) -> str:
"""Return namespace identifier."""
return f"TrackingNamespace {{ inner: {self.inner.namespace_id()} }}"
def _modify_storage_options(
self, storage_options: Dict[str, str], count: int
) -> Dict[str, str]:
"""
Add incrementing credentials with expiration timestamp.
This simulates a credential rotation system where each call returns
new credentials that expire after credential_expires_in_seconds.
"""
modified = copy.deepcopy(storage_options) if storage_options else {}
# Increment credentials to simulate rotation
modified["aws_access_key_id"] = f"AKID_{count}"
modified["aws_secret_access_key"] = f"SECRET_{count}"
modified["aws_session_token"] = f"TOKEN_{count}"
# Set expiration time
expires_at_millis = int(
(time.time() + self.credential_expires_in_seconds) * 1000
)
modified["expires_at_millis"] = str(expires_at_millis)
return modified
def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
"""Track declare_table calls and inject rotating credentials."""
with self.lock:
self.create_call_count += 1
count = self.create_call_count
response = self.inner.declare_table(request)
response.storage_options = self._modify_storage_options(
response.storage_options, count
)
return response
def create_empty_table(
self, request: CreateEmptyTableRequest
) -> CreateEmptyTableResponse:
"""Track create_empty_table calls and inject rotating credentials."""
with self.lock:
self.create_call_count += 1
count = self.create_call_count
response = self.inner.create_empty_table(request)
response.storage_options = self._modify_storage_options(
response.storage_options, count
)
return response
def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
"""Track describe_table calls and inject rotating credentials."""
with self.lock:
self.describe_call_count += 1
count = self.describe_call_count
response = self.inner.describe_table(request)
response.storage_options = self._modify_storage_options(
response.storage_options, count
)
return response
# Pass through other methods to inner namespace
def list_tables(self, request):
return self.inner.list_tables(request)
def drop_table(self, request):
return self.inner.drop_table(request)
def list_namespaces(self, request):
return self.inner.list_namespaces(request)
def create_namespace(self, request):
return self.inner.create_namespace(request)
def drop_namespace(self, request):
return self.inner.drop_namespace(request)
@pytest.mark.s3_test
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
def test_namespace_create_table_with_provider(s3_bucket: str, use_custom: bool):
def test_namespace_create_table_with_provider(s3_bucket: str):
"""
Test creating a table through namespace with storage options provider.
Verifies:
- declare_table is called once to reserve location
- create_empty_table is called once to reserve location
- Storage options provider is auto-created
- Table can be written successfully
- Credentials are cached during write operations
"""
storage_options = copy.deepcopy(CONFIG)
ns_client, inner_ns_client = create_tracking_namespace(
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3600, # 1 hour
use_custom=use_custom,
)
db = LanceNamespaceDBConnection(ns_client)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -299,8 +249,8 @@ def test_namespace_create_table_with_provider(s3_bucket: str, use_custom: bool):
namespace_path = [namespace_name]
# Verify initial state
assert get_declare_call_count(inner_ns_client) == 0
assert get_describe_call_count(inner_ns_client) == 0
assert namespace.get_create_call_count() == 0
assert namespace.get_describe_call_count() == 0
# Create table with data
data = pa.table(
@@ -311,12 +261,12 @@ def test_namespace_create_table_with_provider(s3_bucket: str, use_custom: bool):
}
)
table = db.create_table(table_name, data, namespace_path=namespace_path)
table = db.create_table(table_name, data, namespace=namespace_path)
# Verify declare_table was called exactly once
assert get_declare_call_count(inner_ns_client) == 1
# Verify create_empty_table was called exactly once
assert namespace.get_create_call_count() == 1
# describe_table should NOT be called during create in create mode
assert get_describe_call_count(inner_ns_client) == 0
assert namespace.get_describe_call_count() == 0
# Verify table was created successfully
assert table.name == table_name
@@ -326,8 +276,7 @@ def test_namespace_create_table_with_provider(s3_bucket: str, use_custom: bool):
@pytest.mark.s3_test
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
def test_namespace_open_table_with_provider(s3_bucket: str, use_custom: bool):
def test_namespace_open_table_with_provider(s3_bucket: str):
"""
Test opening a table through namespace with storage options provider.
@@ -339,14 +288,13 @@ def test_namespace_open_table_with_provider(s3_bucket: str, use_custom: bool):
"""
storage_options = copy.deepcopy(CONFIG)
ns_client, inner_ns_client = create_tracking_namespace(
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3600,
use_custom=use_custom,
)
db = LanceNamespaceDBConnection(ns_client)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -364,21 +312,21 @@ def test_namespace_open_table_with_provider(s3_bucket: str, use_custom: bool):
}
)
db.create_table(table_name, data, namespace_path=namespace_path)
db.create_table(table_name, data, namespace=namespace_path)
initial_declare_count = get_declare_call_count(inner_ns_client)
assert initial_declare_count == 1
initial_create_count = namespace.get_create_call_count()
assert initial_create_count == 1
# Open the table
opened_table = db.open_table(table_name, namespace_path=namespace_path)
opened_table = db.open_table(table_name, namespace=namespace_path)
# Verify describe_table was called exactly once
assert get_describe_call_count(inner_ns_client) == 1
# declare_table should not be called again
assert get_declare_call_count(inner_ns_client) == initial_declare_count
assert namespace.get_describe_call_count() == 1
# create_empty_table should not be called again
assert namespace.get_create_call_count() == initial_create_count
# Perform multiple read operations
describe_count_after_open = get_describe_call_count(inner_ns_client)
describe_count_after_open = namespace.get_describe_call_count()
for _ in range(3):
result = opened_table.to_pandas()
@@ -387,72 +335,11 @@ def test_namespace_open_table_with_provider(s3_bucket: str, use_custom: bool):
assert count == 5
# Verify credentials were cached (no additional describe_table calls)
assert get_describe_call_count(inner_ns_client) == describe_count_after_open
@pytest.mark.skipif(
sys.platform == "win32",
reason="TODO: fix schema-only namespace metrics test on Windows",
)
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
def test_namespace_create_schema_only_with_provider(use_custom: bool):
"""
Test creating a schema-only table through namespace with storage options provider.
Verifies:
- declare_table is called once to reserve the location
- describe_table is not needed during create in create mode
- the table can be reopened successfully afterward
- opening the table triggers exactly one describe_table call
"""
temp_dir = tempfile.mkdtemp()
try:
ns_client, inner_ns_client = create_tracking_namespace(
bucket_name=temp_dir,
storage_options={},
credential_expires_in_seconds=3600,
use_custom=use_custom,
)
db = LanceNamespaceDBConnection(ns_client)
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
db.create_namespace([namespace_name])
table_name = f"test_table_{uuid.uuid4().hex}"
namespace_path = [namespace_name]
schema = pa.schema(
[
pa.field("id", pa.int64()),
pa.field("vector", pa.list_(pa.float32(), 2)),
pa.field("text", pa.string()),
]
)
assert get_declare_call_count(inner_ns_client) == 0
assert get_describe_call_count(inner_ns_client) == 0
table = db.create_table(
table_name, schema=schema, namespace_path=namespace_path
)
assert table.name == table_name
assert table.namespace == namespace_path
assert get_declare_call_count(inner_ns_client) == 1
assert get_describe_call_count(inner_ns_client) == 0
reopened_table = db.open_table(table_name, namespace_path=namespace_path)
assert reopened_table.schema == schema
assert get_declare_call_count(inner_ns_client) == 1
assert get_describe_call_count(inner_ns_client) == 1
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
assert namespace.get_describe_call_count() == describe_count_after_open
@pytest.mark.s3_test
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
def test_namespace_credential_refresh_on_read(s3_bucket: str, use_custom: bool):
def test_namespace_credential_refresh_on_read(s3_bucket: str):
"""
Test credential refresh when credentials expire during read operations.
@@ -463,14 +350,13 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str, use_custom: bool):
"""
storage_options = copy.deepcopy(CONFIG)
ns_client, inner_ns_client = create_tracking_namespace(
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3, # Short expiration for testing
use_custom=use_custom,
)
db = LanceNamespaceDBConnection(ns_client)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -487,16 +373,16 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str, use_custom: bool):
}
)
db.create_table(table_name, data, namespace_path=namespace_path)
db.create_table(table_name, data, namespace=namespace_path)
# Open table (triggers describe_table)
opened_table = db.open_table(table_name, namespace_path=namespace_path)
opened_table = db.open_table(table_name, namespace=namespace_path)
# Perform an immediate read (should use credentials from open)
result = opened_table.to_pandas()
assert len(result) == 3
describe_count_after_first_read = get_describe_call_count(inner_ns_client)
describe_count_after_first_read = namespace.get_describe_call_count()
# Wait for credentials to expire (3 seconds + buffer)
time.sleep(5)
@@ -505,7 +391,7 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str, use_custom: bool):
result = opened_table.to_pandas()
assert len(result) == 3
describe_count_after_refresh = get_describe_call_count(inner_ns_client)
describe_count_after_refresh = namespace.get_describe_call_count()
# Verify describe_table was called again (credential refresh)
refresh_delta = describe_count_after_refresh - describe_count_after_first_read
@@ -518,8 +404,7 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str, use_custom: bool):
@pytest.mark.s3_test
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
def test_namespace_credential_refresh_on_write(s3_bucket: str, use_custom: bool):
def test_namespace_credential_refresh_on_write(s3_bucket: str):
"""
Test credential refresh when credentials expire during write operations.
@@ -530,14 +415,13 @@ def test_namespace_credential_refresh_on_write(s3_bucket: str, use_custom: bool)
"""
storage_options = copy.deepcopy(CONFIG)
ns_client, inner_ns_client = create_tracking_namespace(
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3, # Short expiration
use_custom=use_custom,
)
db = LanceNamespaceDBConnection(ns_client)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -554,7 +438,7 @@ def test_namespace_credential_refresh_on_write(s3_bucket: str, use_custom: bool)
}
)
table = db.create_table(table_name, initial_data, namespace_path=namespace_path)
table = db.create_table(table_name, initial_data, namespace=namespace_path)
# Add more data (should use cached credentials)
new_data = pa.table(
@@ -582,26 +466,24 @@ def test_namespace_credential_refresh_on_write(s3_bucket: str, use_custom: bool)
@pytest.mark.s3_test
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
def test_namespace_overwrite_mode(s3_bucket: str, use_custom: bool):
def test_namespace_overwrite_mode(s3_bucket: str):
"""
Test creating table in overwrite mode with credential tracking.
Verifies:
- First create calls declare_table exactly once
- First create calls create_empty_table exactly once
- Overwrite mode calls describe_table exactly once to check existence
- Storage options provider works in overwrite mode
"""
storage_options = copy.deepcopy(CONFIG)
ns_client, inner_ns_client = create_tracking_namespace(
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3600,
use_custom=use_custom,
)
db = LanceNamespaceDBConnection(ns_client)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -618,11 +500,11 @@ def test_namespace_overwrite_mode(s3_bucket: str, use_custom: bool):
}
)
table = db.create_table(table_name, data1, namespace_path=namespace_path)
# Exactly one declare_table call for initial create
assert get_declare_call_count(inner_ns_client) == 1
table = db.create_table(table_name, data1, namespace=namespace_path)
# Exactly one create_empty_table call for initial create
assert namespace.get_create_call_count() == 1
# No describe_table calls in create mode
assert get_describe_call_count(inner_ns_client) == 0
assert namespace.get_describe_call_count() == 0
assert table.count_rows() == 2
# Overwrite the table
@@ -634,14 +516,14 @@ def test_namespace_overwrite_mode(s3_bucket: str, use_custom: bool):
)
table2 = db.create_table(
table_name, data2, namespace_path=namespace_path, mode="overwrite"
table_name, data2, namespace=namespace_path, mode="overwrite"
)
# Should still have only 1 declare_table call
# Should still have only 1 create_empty_table call
# (overwrite reuses location from describe_table)
assert get_declare_call_count(inner_ns_client) == 1
assert namespace.get_create_call_count() == 1
# Should have called describe_table exactly once to get existing table location
assert get_describe_call_count(inner_ns_client) == 1
assert namespace.get_describe_call_count() == 1
# Verify new data
assert table2.count_rows() == 3
@@ -650,8 +532,7 @@ def test_namespace_overwrite_mode(s3_bucket: str, use_custom: bool):
@pytest.mark.s3_test
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
def test_namespace_multiple_tables(s3_bucket: str, use_custom: bool):
def test_namespace_multiple_tables(s3_bucket: str):
"""
Test creating and opening multiple tables in the same namespace.
@@ -662,14 +543,13 @@ def test_namespace_multiple_tables(s3_bucket: str, use_custom: bool):
"""
storage_options = copy.deepcopy(CONFIG)
ns_client, inner_ns_client = create_tracking_namespace(
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3600,
use_custom=use_custom,
)
db = LanceNamespaceDBConnection(ns_client)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -679,22 +559,22 @@ def test_namespace_multiple_tables(s3_bucket: str, use_custom: bool):
# Create first table
table1_name = f"table1_{uuid.uuid4().hex}"
data1 = pa.table({"id": [1, 2], "value": [10, 20]})
db.create_table(table1_name, data1, namespace_path=namespace_path)
db.create_table(table1_name, data1, namespace=namespace_path)
# Create second table
table2_name = f"table2_{uuid.uuid4().hex}"
data2 = pa.table({"id": [3, 4], "value": [30, 40]})
db.create_table(table2_name, data2, namespace_path=namespace_path)
db.create_table(table2_name, data2, namespace=namespace_path)
# Should have 2 declare calls (one per table)
assert get_declare_call_count(inner_ns_client) == 2
# Should have 2 create calls (one per table)
assert namespace.get_create_call_count() == 2
# Open both tables
opened1 = db.open_table(table1_name, namespace_path=namespace_path)
opened2 = db.open_table(table2_name, namespace_path=namespace_path)
opened1 = db.open_table(table1_name, namespace=namespace_path)
opened2 = db.open_table(table2_name, namespace=namespace_path)
# Should have 2 describe calls (one per open)
assert get_describe_call_count(inner_ns_client) == 2
assert namespace.get_describe_call_count() == 2
# Verify both tables work independently
assert opened1.count_rows() == 2
@@ -708,8 +588,7 @@ def test_namespace_multiple_tables(s3_bucket: str, use_custom: bool):
@pytest.mark.s3_test
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
def test_namespace_with_schema_only(s3_bucket: str, use_custom: bool):
def test_namespace_with_schema_only(s3_bucket: str):
"""
Test creating empty table with schema only (no data).
@@ -720,14 +599,13 @@ def test_namespace_with_schema_only(s3_bucket: str, use_custom: bool):
"""
storage_options = copy.deepcopy(CONFIG)
ns_client, inner_ns_client = create_tracking_namespace(
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3600,
use_custom=use_custom,
)
db = LanceNamespaceDBConnection(ns_client)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -745,12 +623,12 @@ def test_namespace_with_schema_only(s3_bucket: str, use_custom: bool):
]
)
table = db.create_table(table_name, schema=schema, namespace_path=namespace_path)
table = db.create_table(table_name, schema=schema, namespace=namespace_path)
# Should have called declare_table once
assert get_declare_call_count(inner_ns_client) == 1
# Should have called create_empty_table once
assert namespace.get_create_call_count() == 1
# Should NOT have called describe_table in create mode
assert get_describe_call_count(inner_ns_client) == 0
assert namespace.get_describe_call_count() == 0
# Verify empty table
assert table.count_rows() == 0

View File

@@ -522,50 +522,6 @@ def test_no_split_names(some_table: Table):
assert permutations[1].num_rows == 500
def test_permutations_metadata_without_split_names_key(mem_db: DBConnection):
"""Regression: schema metadata present but missing split_names key must not crash.
Previously, `.get(b"split_names", None).decode()` was called unconditionally,
so any permutation table whose metadata dict had other keys but no split_names
raised AttributeError: 'NoneType' has no attribute 'decode'.
"""
base = mem_db.create_table("base_nosplit", pa.table({"x": range(10)}))
# Build a permutation-like table that carries some metadata but NOT split_names.
raw = pa.table(
{
"row_id": pa.array(range(10), type=pa.uint64()),
"split_id": pa.array([0] * 10, type=pa.uint32()),
}
).replace_schema_metadata({b"other_key": b"other_value"})
perm_tbl = mem_db.create_table("perm_nosplit", raw)
permutations = Permutations(base, perm_tbl)
assert permutations.split_names == []
assert permutations.split_dict == {}
def test_from_tables_string_split_missing_names_key(mem_db: DBConnection):
"""Regression: from_tables() with a string split must raise ValueError, not
AttributeError.
Previously, `.get(b"split_names", None).decode()` crashed with AttributeError
when the metadata dict existed but had no split_names key.
"""
base = mem_db.create_table("base_strsplit", pa.table({"x": range(10)}))
raw = pa.table(
{
"row_id": pa.array(range(10), type=pa.uint64()),
"split_id": pa.array([0] * 10, type=pa.uint32()),
}
).replace_schema_metadata({b"other_key": b"other_value"})
perm_tbl = mem_db.create_table("perm_strsplit", raw)
with pytest.raises(ValueError, match="no split names are defined"):
Permutation.from_tables(base, perm_tbl, split="train")
@pytest.fixture
def some_perm_table(some_table: Table) -> Table:
return (
@@ -1095,60 +1051,3 @@ def test_getitems_invalid_offset(some_permutation: Permutation):
"""Test __getitems__ with an out-of-range offset raises an error."""
with pytest.raises(Exception):
some_permutation.__getitems__([999999])
def test_from_table_identity(mem_db):
"""Permutation.from_table without ops behaves like identity."""
tbl = mem_db.create_table("tbl", pa.table({"x": range(10)}))
perm = Permutation.from_table(tbl)
assert perm.num_rows == 10
assert perm.column_names == ["x"]
def test_from_table_shuffle_seeded(mem_db):
"""from_table().shuffle(seed=...) is reproducible and reorders rows."""
tbl = mem_db.create_table("tbl", pa.table({"x": range(100)}))
perm = Permutation.from_table(tbl).shuffle(seed=42)
rows = [r["x"] for r in perm.__getitems__(list(range(100)))]
assert sorted(rows) == list(range(100))
assert rows != list(range(100))
# Same seed → same order
rows2 = [
r["x"]
for r in Permutation.from_table(tbl)
.shuffle(seed=42)
.__getitems__(list(range(100)))
]
assert rows == rows2
def test_from_table_filter(mem_db):
"""from_table().filter(...) limits the rows."""
tbl = mem_db.create_table("tbl", pa.table({"x": range(100)}))
perm = Permutation.from_table(tbl).filter("x < 25")
assert perm.num_rows == 25
def test_from_table_chained_ops(mem_db):
"""Chained shuffle + filter materializes once."""
tbl = mem_db.create_table("tbl", pa.table({"x": range(100)}))
perm = Permutation.from_table(tbl).filter("x >= 50").shuffle(seed=7)
assert perm.num_rows == 50
rows = [r["x"] for r in perm.__getitems__(list(range(50)))]
assert sorted(rows) == list(range(50, 100))
def test_from_table_forwards_read_methods(mem_db):
"""from_table() result transparently forwards Permutation read methods."""
tbl = mem_db.create_table("tbl", pa.table({"x": range(10), "y": range(10)}))
perm = Permutation.from_table(tbl).select_columns(["x"])
assert perm.column_names == ["x"]
def test_from_table_split_random(mem_db):
"""from_table().split_random(...) returns rows from the first split."""
tbl = mem_db.create_table("tbl", pa.table({"x": range(100)}))
perm = Permutation.from_table(tbl).split_random(ratios=[0.3, 0.7], seed=1)
# Default split is 0 — ratio 0.3 → ~30 rows
assert 25 <= perm.num_rows <= 35

View File

@@ -3,7 +3,6 @@
import json
from datetime import date, datetime
from enum import Enum
from typing import List, Optional, Tuple
import pyarrow as pa
@@ -674,29 +673,3 @@ async def test_aliases_in_lance_model_async(mem_db_async):
assert hasattr(model, "name")
assert hasattr(model, "distance")
assert model.distance < 0.01
def test_enum_types():
"""Enum fields should map to the Arrow type of their value (issue #1846)."""
class StrStatus(str, Enum):
PENDING = "pending"
RUNNING = "running"
DONE = "done"
class IntPriority(int, Enum):
LOW = 1
MEDIUM = 2
HIGH = 3
class TestModel(pydantic.BaseModel):
status: StrStatus
priority: IntPriority
opt_status: Optional[StrStatus] = None
schema = pydantic_to_schema(TestModel)
assert schema.field("status").type == pa.dictionary(pa.int32(), pa.utf8())
assert schema.field("priority").type == pa.int64()
assert schema.field("opt_status").type == pa.dictionary(pa.int32(), pa.utf8())
assert schema.field("opt_status").nullable

View File

@@ -30,7 +30,6 @@ from lancedb.query import (
PhraseQuery,
Query,
FullTextSearchQuery,
ensure_vector_query,
)
from lancedb.rerankers.cross_encoder import CrossEncoderReranker
from lancedb.table import AsyncTable, LanceTable
@@ -1385,7 +1384,7 @@ def test_query_timeout(tmp_path):
}
)
table = db.create_table("test", data)
table.create_fts_index("text")
table.create_fts_index("text", use_tantivy=False)
with pytest.raises(Exception, match="Query timeout"):
table.search().where("text = 'a'").to_list(timeout=timedelta(0))
@@ -1502,18 +1501,6 @@ def test_search_empty_table(mem_db):
assert results == []
def test_ensure_vector_query_empty_list():
"""Regression: ensure_vector_query used to return instead of raise ValueError."""
with pytest.raises(ValueError, match="non-empty"):
ensure_vector_query([])
def test_ensure_vector_query_nested_empty_list():
"""Regression: ensure_vector_query used to return instead of raise ValueError."""
with pytest.raises(ValueError, match="non-empty"):
ensure_vector_query([[]])
def test_fast_search(tmp_path):
db = lancedb.connect(tmp_path)

View File

@@ -1201,18 +1201,6 @@ async def test_header_provider_overrides_static_headers():
await db.table_names()
def test_close():
"""Test that close() works without AttributeError."""
import asyncio
def handler(req):
req.send_response(200)
req.end_headers()
with mock_lancedb_connection(handler) as db:
asyncio.run(db.close())
@pytest.mark.parametrize("exception", [KeyboardInterrupt, SystemExit, GeneratorExit])
def test_background_loop_cancellation(exception):
"""Test that BackgroundEventLoop.run() cancels the future on interrupt."""

Some files were not shown because too many files have changed in this diff Show More