Compare commits

..

2 Commits

Author SHA1 Message Date
lancedb automation
cb956d5464 chore: update lance dependency to v1.0.0-beta.1 2025-11-15 08:54:39 +00:00
Xuanwo
3f6b0e4588 ci: migrate codex to API key based
Removed Codex authentication steps and added OPENAI_API_KEY environment variable for Codex execution.
2025-11-15 14:27:04 +08:00
51 changed files with 726 additions and 2039 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.22.4-beta.0"
current_version = "0.22.3"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -31,7 +31,6 @@ runs:
with:
command: build
working-directory: python
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
target: x86_64-unknown-linux-gnu
manylinux: ${{ inputs.manylinux }}
args: ${{ inputs.args }}

View File

@@ -24,7 +24,6 @@ env:
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
# CI builds are faster with incremental disabled.
CARGO_INCREMENTAL: "0"
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
jobs:
# Single deploy job since we're just deploying

View File

@@ -1,62 +0,0 @@
name: Lance Release Timer
on:
schedule:
- cron: "*/10 * * * *"
workflow_dispatch:
permissions:
contents: read
actions: write
concurrency:
group: lance-release-timer
cancel-in-progress: false
jobs:
trigger-update:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Check for new Lance tag
id: check
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: |
python3 ci/check_lance_release.py --github-output "$GITHUB_OUTPUT"
- name: Look for existing PR
if: steps.check.outputs.needs_update == 'true'
id: pr
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: |
set -euo pipefail
TITLE="chore: update lance dependency to v${{ steps.check.outputs.latest_version }}"
COUNT=$(gh pr list --search "\"$TITLE\" in:title" --state open --limit 1 --json number --jq 'length')
if [ "$COUNT" -gt 0 ]; then
echo "Open PR already exists for $TITLE"
echo "pr_exists=true" >> "$GITHUB_OUTPUT"
else
echo "No existing PR for $TITLE"
echo "pr_exists=false" >> "$GITHUB_OUTPUT"
fi
- name: Trigger codex update workflow
if: steps.check.outputs.needs_update == 'true' && steps.pr.outputs.pr_exists != 'true'
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: |
set -euo pipefail
TAG=${{ steps.check.outputs.latest_tag }}
gh workflow run codex-update-lance-dependency.yml -f tag=refs/tags/$TAG
- name: Show latest codex workflow run
if: steps.check.outputs.needs_update == 'true' && steps.pr.outputs.pr_exists != 'true'
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: |
set -euo pipefail
gh run list --workflow codex-update-lance-dependency.yml --limit 1 --json databaseId,htmlUrl,displayTitle

View File

@@ -16,6 +16,9 @@ concurrency:
cancel-in-progress: true
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
RUST_BACKTRACE: "1"
jobs:
@@ -40,20 +43,18 @@ jobs:
node-version: 20
cache: 'npm'
cache-dependency-path: nodejs/package-lock.json
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- uses: Swatinem/rust-cache@v2
- name: Format Rust
run: cargo fmt --all -- --check
- name: Lint Rust
run: cargo clippy --profile ci --all --all-features -- -D warnings
- name: Lint Typescript
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt, clippy
- name: Lint
run: |
cargo fmt --all -- --check
cargo clippy --all --all-features -- -D warnings
npm ci
npm run lint-ci
- name: Lint examples
@@ -89,8 +90,7 @@ jobs:
- name: Build
run: |
npm ci
npm run build:debug -- --profile ci
npm run tsc
npm run build
- name: Setup localstack
working-directory: .
run: docker compose up --detach --wait
@@ -147,8 +147,7 @@ jobs:
- name: Build
run: |
npm ci
npm run build:debug -- --profile ci
npm run tsc
npm run build
- name: Test
run: |
npm run test

View File

@@ -10,9 +10,6 @@ on:
- .github/workflows/pypi-publish.yml
- Cargo.toml # Change in dependency frequently breaks builds
env:
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
jobs:
linux:
name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }}

View File

@@ -18,8 +18,6 @@ env:
# Color output for pytest is off by default.
PYTEST_ADDOPTS: "--color=yes"
FORCE_COLOR: "1"
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
RUST_BACKTRACE: "1"
jobs:
lint:
@@ -98,6 +96,9 @@ jobs:
run: |
sudo apt update
sudo apt install -y protobuf-compiler
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
- name: Install
run: |
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
@@ -129,9 +130,10 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: 3.${{ matrix.python-minor-version }}
- uses: ./.github/workflows/build_linux_wheel
- uses: Swatinem/rust-cache@v2
with:
args: --profile ci
workspaces: python
- uses: ./.github/workflows/build_linux_wheel
- uses: ./.github/workflows/run_tests
with:
integration: true
@@ -166,9 +168,10 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: ./.github/workflows/build_mac_wheel
- uses: Swatinem/rust-cache@v2
with:
args: --profile ci
workspaces: python
- uses: ./.github/workflows/build_mac_wheel
- uses: ./.github/workflows/run_tests
# Make sure wheels are not included in the Rust cache
- name: Delete wheels
@@ -195,9 +198,10 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: ./.github/workflows/build_windows_wheel
- uses: Swatinem/rust-cache@v2
with:
args: --profile ci
workspaces: python
- uses: ./.github/workflows/build_windows_wheel
- uses: ./.github/workflows/run_tests
# Make sure wheels are not included in the Rust cache
- name: Delete wheels

View File

@@ -18,7 +18,11 @@ env:
# This env var is used by Swatinem/rust-cache@v2 for the cache
# key, so we set it to make sure it is always consistent.
CARGO_TERM_COLOR: always
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
RUST_BACKTRACE: "1"
CARGO_INCREMENTAL: 0
jobs:
lint:
@@ -40,6 +44,8 @@ jobs:
with:
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install dependencies
run: |
sudo apt update
@@ -47,7 +53,7 @@ jobs:
- name: Run format
run: cargo fmt --all -- --check
- name: Run clippy
run: cargo clippy --profile ci --workspace --tests --all-features -- -D warnings
run: cargo clippy --workspace --tests --all-features -- -D warnings
build-no-lock:
runs-on: ubuntu-24.04
@@ -74,7 +80,7 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev
- name: Build all
run: |
cargo build --profile ci --benches --all-features --tests
cargo build --benches --all-features --tests
linux:
timeout-minutes: 30
@@ -97,8 +103,14 @@ jobs:
fetch-depth: 0
lfs: true
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install dependencies
run: sudo apt install -y protobuf-compiler libssl-dev
run: |
# This shaves 2 minutes off this step in CI. This doesn't seem to be
# necessary in standard runners, but it is in the 4x runners.
sudo rm /var/lib/man-db/auto-update
sudo apt install -y protobuf-compiler libssl-dev
- uses: rui314/setup-mold@v1
- name: Make Swap
run: |
@@ -107,16 +119,16 @@ jobs:
sudo mkswap /swapfile
sudo swapon /swapfile
- name: Build
run: cargo build --profile ci --all-features --tests --locked --examples
run: cargo build --all-features --tests --locked --examples
- name: Run feature tests
run: CARGO_ARGS="--profile ci" make -C ./lancedb feature-tests
run: make -C ./lancedb feature-tests
- name: Run examples
run: cargo run --profile ci --example simple --locked
run: cargo run --example simple --locked
- name: Run remote tests
# Running this requires access to secrets, so skip if this is
# a PR from a fork.
if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
run: CARGO_ARGS="--profile ci" make -C ./lancedb remote-tests
run: make -C ./lancedb remote-tests
macos:
timeout-minutes: 30
@@ -136,6 +148,8 @@ jobs:
- name: CPU features
run: sysctl -a | grep cpu
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install dependencies
run: brew install protobuf
- name: Run tests
@@ -145,7 +159,7 @@ jobs:
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps \
| jq -r '.packages[] | .features | keys | .[]' \
| grep -v s3-test | sort | uniq | paste -s -d "," -`
cargo test --profile ci --features $ALL_FEATURES --locked
cargo test --features $ALL_FEATURES --locked
windows:
runs-on: windows-2022
@@ -159,21 +173,22 @@ jobs:
working-directory: rust/lancedb
steps:
- uses: actions/checkout@v4
- name: Set target
run: rustup target add ${{ matrix.target }}
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install Protoc v21.12
run: choco install --no-progress protoc
- name: Build
run: |
rustup target add ${{ matrix.target }}
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
cargo build --profile ci --features remote --tests --locked --target ${{ matrix.target }}
cargo build --features remote --tests --locked --target ${{ matrix.target }}
- name: Run tests
# Can only run tests when target matches host
if: ${{ matrix.target == 'x86_64-pc-windows-msvc' }}
run: |
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
cargo test --profile ci --features remote --locked
cargo test --features remote --locked
msrv:
# Check the minimum supported Rust version
@@ -198,7 +213,6 @@ jobs:
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ matrix.msrv }}
- uses: Swatinem/rust-cache@v2
- name: Downgrade dependencies
# These packages have newer requirements for MSRV
run: |
@@ -212,4 +226,4 @@ jobs:
cargo update -p aws-sdk-sts --precise 1.51.0
cargo update -p home --precise 0.5.9
- name: cargo +${{ matrix.msrv }} check
run: cargo check --profile ci --workspace --tests --benches --all-features
run: cargo check --workspace --tests --benches --all-features

74
Cargo.lock generated
View File

@@ -3032,8 +3032,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4217,8 +4217,8 @@ dependencies = [
[[package]]
name = "lance"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow",
"arrow-arith",
@@ -4282,8 +4282,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4301,8 +4301,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrayref",
"paste",
@@ -4311,8 +4311,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4348,8 +4348,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow",
"arrow-array",
@@ -4378,8 +4378,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow",
"arrow-array",
@@ -4396,8 +4396,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4434,8 +4434,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4467,8 +4467,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow",
"arrow-arith",
@@ -4529,8 +4529,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow",
"arrow-arith",
@@ -4570,8 +4570,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4587,8 +4587,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow",
"async-trait",
@@ -4600,8 +4600,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4611,7 +4611,6 @@ dependencies = [
"futures",
"lance",
"lance-core",
"lance-index",
"lance-io",
"lance-namespace",
"log",
@@ -4639,8 +4638,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow",
"arrow-array",
@@ -4679,8 +4678,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
version = "1.0.0-beta.1"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.1#8c6732e4b809f69f56c0fb555401fa76fba19aab"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4691,7 +4690,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.22.4-beta.0"
version = "0.22.3"
dependencies = [
"ahash",
"anyhow",
@@ -4786,7 +4785,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.22.4-beta.0"
version = "0.22.3"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4806,20 +4805,17 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.25.4-beta.0"
version = "0.25.3"
dependencies = [
"arrow",
"async-trait",
"env_logger",
"futures",
"lance-core",
"lance-io",
"lancedb",
"pin-project",
"pyo3",
"pyo3-async-runtimes",
"pyo3-build-config",
"snafu",
"tokio",
]

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=1.0.0-beta.3", default-features = false, "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.0.0-beta.3", default-features = false, "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.0.0-beta.3", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"], "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=1.0.0-beta.1", default-features = false, "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-core = { "version" = "=1.0.0-beta.1", "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-datagen = { "version" = "=1.0.0-beta.1", "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-file = { "version" = "=1.0.0-beta.1", "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=1.0.0-beta.1", default-features = false, "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-index = { "version" = "=1.0.0-beta.1", "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = { "version" = "=1.0.0-beta.1", "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-namespace = { "version" = "=1.0.0-beta.1", "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-namespace-impls = { "version" = "=1.0.0-beta.1", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"], "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-table = { "version" = "=1.0.0-beta.1", "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = { "version" = "=1.0.0-beta.1", "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = { "version" = "=1.0.0-beta.1", "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = { "version" = "=1.0.0-beta.1", "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-arrow = { "version" = "=1.0.0-beta.1", "tag" = "v1.0.0-beta.1", "git" = "https://github.com/lancedb/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "56.2", optional = false }
@@ -63,17 +63,3 @@ regex = "1.10"
lazy_static = "1"
semver = "1.0.25"
chrono = "0.4"
[profile.ci]
debug = "line-tables-only"
inherits = "dev"
incremental = false
# This rule applies to every package except workspace members (dependencies
# such as `arrow` and `tokio`). It disables debug info and related features on
# dependencies so their binaries stay smaller, improving cache reuse.
[profile.ci.package."*"]
debug = false
debug-assertions = false
strip = "debuginfo"
incremental = false

View File

@@ -1,208 +0,0 @@
#!/usr/bin/env python3
"""Determine whether a newer Lance tag exists and expose results for CI."""
from __future__ import annotations
import argparse
import json
import os
import re
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, List, Sequence, Tuple, Union
try: # Python >=3.11
import tomllib # type: ignore
except ModuleNotFoundError: # pragma: no cover - fallback for older Python
import tomli as tomllib # type: ignore
LANCE_REPO = "lance-format/lance"
SEMVER_RE = re.compile(
r"^\s*(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)"
r"(?:-(?P<prerelease>[0-9A-Za-z.-]+))?"
r"(?:\+[0-9A-Za-z.-]+)?\s*$"
)
@dataclass(frozen=True)
class SemVer:
major: int
minor: int
patch: int
prerelease: Tuple[Union[int, str], ...]
def __lt__(self, other: "SemVer") -> bool: # pragma: no cover - simple comparison
if (self.major, self.minor, self.patch) != (other.major, other.minor, other.patch):
return (self.major, self.minor, self.patch) < (other.major, other.minor, other.patch)
if self.prerelease == other.prerelease:
return False
if not self.prerelease:
return False # release > anything else
if not other.prerelease:
return True
for left, right in zip(self.prerelease, other.prerelease):
if left == right:
continue
if isinstance(left, int) and isinstance(right, int):
return left < right
if isinstance(left, int):
return True
if isinstance(right, int):
return False
return str(left) < str(right)
return len(self.prerelease) < len(other.prerelease)
def __eq__(self, other: object) -> bool: # pragma: no cover - trivial
if not isinstance(other, SemVer):
return NotImplemented
return (
self.major == other.major
and self.minor == other.minor
and self.patch == other.patch
and self.prerelease == other.prerelease
)
def parse_semver(raw: str) -> SemVer:
match = SEMVER_RE.match(raw)
if not match:
raise ValueError(f"Unsupported version format: {raw}")
prerelease = match.group("prerelease")
parts: Tuple[Union[int, str], ...] = ()
if prerelease:
parsed: List[Union[int, str]] = []
for piece in prerelease.split("."):
if piece.isdigit():
parsed.append(int(piece))
else:
parsed.append(piece)
parts = tuple(parsed)
return SemVer(
major=int(match.group("major")),
minor=int(match.group("minor")),
patch=int(match.group("patch")),
prerelease=parts,
)
@dataclass
class TagInfo:
tag: str # e.g. v1.0.0-beta.2
version: str # e.g. 1.0.0-beta.2
semver: SemVer
def run_command(cmd: Sequence[str]) -> str:
result = subprocess.run(cmd, capture_output=True, text=True, check=False)
if result.returncode != 0:
raise RuntimeError(
f"Command {' '.join(cmd)} failed with {result.returncode}: {result.stderr.strip()}"
)
return result.stdout.strip()
def fetch_remote_tags() -> List[TagInfo]:
output = run_command(
[
"gh",
"api",
"-X",
"GET",
f"repos/{LANCE_REPO}/git/refs/tags",
"--paginate",
"--jq",
".[].ref",
]
)
tags: List[TagInfo] = []
for line in output.splitlines():
ref = line.strip()
if not ref.startswith("refs/tags/v"):
continue
tag = ref.split("refs/tags/")[-1]
version = tag.lstrip("v")
try:
tags.append(TagInfo(tag=tag, version=version, semver=parse_semver(version)))
except ValueError:
continue
if not tags:
raise RuntimeError("No Lance tags could be parsed from GitHub API output")
return tags
def read_current_version(repo_root: Path) -> str:
cargo_path = repo_root / "Cargo.toml"
with cargo_path.open("rb") as fh:
data = tomllib.load(fh)
try:
deps = data["workspace"]["dependencies"]
entry = deps["lance"]
except KeyError as exc: # pragma: no cover - configuration guard
raise RuntimeError("Failed to locate workspace.dependencies.lance in Cargo.toml") from exc
if isinstance(entry, str):
raw_version = entry
elif isinstance(entry, dict):
raw_version = entry.get("version", "")
else: # pragma: no cover - defensive
raise RuntimeError("Unexpected lance dependency format")
raw_version = raw_version.strip()
if not raw_version:
raise RuntimeError("lance dependency does not declare a version")
return raw_version.lstrip("=")
def determine_latest_tag(tags: Iterable[TagInfo]) -> TagInfo:
return max(tags, key=lambda tag: tag.semver)
def write_outputs(args: argparse.Namespace, payload: dict) -> None:
target = getattr(args, "github_output", None)
if not target:
return
with open(target, "a", encoding="utf-8") as handle:
for key, value in payload.items():
handle.write(f"{key}={value}\n")
def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--repo-root",
default=Path(__file__).resolve().parents[1],
type=Path,
help="Path to the lancedb repository root",
)
parser.add_argument(
"--github-output",
default=os.environ.get("GITHUB_OUTPUT"),
help="Optional file path for writing GitHub Action outputs",
)
args = parser.parse_args(argv)
repo_root = Path(args.repo_root)
current_version = read_current_version(repo_root)
current_semver = parse_semver(current_version)
tags = fetch_remote_tags()
latest = determine_latest_tag(tags)
needs_update = latest.semver > current_semver
payload = {
"current_version": current_version,
"current_tag": f"v{current_version}",
"latest_version": latest.version,
"latest_tag": latest.tag,
"needs_update": "true" if needs_update else "false",
}
print(json.dumps(payload))
write_outputs(args, payload)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -3,8 +3,6 @@ import re
import sys
import json
LANCE_GIT_URL = "https://github.com/lance-format/lance.git"
def run_command(command: str) -> str:
"""
@@ -31,7 +29,7 @@ def get_latest_stable_version() -> str:
def get_latest_preview_version() -> str:
lance_tags = run_command(
f"git ls-remote --tags {LANCE_GIT_URL} | grep 'refs/tags/v[0-9beta.-]\\+$'"
"git ls-remote --tags https://github.com/lancedb/lance.git | grep 'refs/tags/v[0-9beta.-]\\+$'"
).splitlines()
lance_tags = (
tag.split("refs/tags/")[1]
@@ -178,8 +176,8 @@ def set_stable_version(version: str):
def set_preview_version(version: str):
"""
Sets lines to
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.29.0-beta.2", "git" = LANCE_GIT_URL }
lance-io = { "version" = "=0.29.0", default-features = false, "tag" = "v0.29.0-beta.2", "git" = LANCE_GIT_URL }
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.29.0", default-features = false, "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
...
"""
@@ -196,7 +194,7 @@ def set_preview_version(version: str):
config["features"] = features
config["tag"] = f"v{version}"
config["git"] = LANCE_GIT_URL
config["git"] = "https://github.com/lancedb/lance.git"
return dict_to_toml_line(package_name, config)

View File

@@ -1,7 +1,7 @@
# Contributing to LanceDB Typescript
This document outlines the process for contributing to LanceDB Typescript.
For general contribution guidelines, see [CONTRIBUTING.md](../CONTRIBUTING.md).
For general contribution guidelines, see [CONTRIBUTING.md](../../../../CONTRIBUTING.md).
## Project layout

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.0</version>
<version>0.22.3-final.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.0</version>
<version>0.22.3-final.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.0</version>
<version>0.22.3-final.0</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.22.4-beta.0"
version = "0.22.3"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.22.4-beta.0",
"version": "0.22.3",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.22.4-beta.0",
"version": "0.22.3",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.22.4-beta.0",
"version": "0.22.3",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.22.4-beta.0",
"version": "0.22.3",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.22.4-beta.0",
"version": "0.22.3",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.22.4-beta.0",
"version": "0.22.3",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.22.4-beta.0",
"version": "0.22.3",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.22.4-beta.0",
"version": "0.22.3",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.22.4-beta.0",
"version": "0.22.3",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.22.4-beta.0",
"version": "0.22.3",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.22.4-beta.0",
"version": "0.22.3",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",
@@ -73,10 +73,8 @@
"scripts": {
"artifacts": "napi artifacts",
"build:debug": "napi build --platform --no-const-enum --dts ../lancedb/native.d.ts --js ../lancedb/native.js lancedb",
"postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
"build:release": "napi build --platform --no-const-enum --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
"postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
"build": "npm run build:debug && npm run tsc",
"build": "npm run build:debug && npm run tsc && shx cp lancedb/*.node dist/",
"build-release": "npm run build:release && npm run tsc",
"tsc": "tsc -b",
"posttsc": "shx cp lancedb/native.d.ts dist/native.d.ts",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.25.4-beta.1"
current_version = "0.25.3"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.25.4-beta.1"
version = "0.25.3"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true
@@ -17,8 +17,6 @@ crate-type = ["cdylib"]
arrow = { version = "56.2", features = ["pyarrow"] }
async-trait = "0.1"
lancedb = { path = "../rust/lancedb", default-features = false }
lance-core.workspace = true
lance-io.workspace = true
env_logger.workspace = true
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
pyo3-async-runtimes = { version = "0.25", features = [
@@ -27,7 +25,6 @@ pyo3-async-runtimes = { version = "0.25", features = [
] }
pin-project = "1.1.5"
futures.workspace = true
snafu.workspace = true
tokio = { version = "1.40", features = ["sync"] }
[build-dependencies]

View File

@@ -10,7 +10,7 @@ dependencies = [
"pyarrow>=16",
"pydantic>=1.10",
"tqdm>=4.27.0",
"lance-namespace>=0.0.21"
"lance-namespace>=0.0.16"
]
description = "lancedb"
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
@@ -59,7 +59,7 @@ tests = [
"polars>=0.19, <=1.3.0",
"tantivy",
"pyarrow-stubs",
"pylance>=1.0.0b2",
"pylance>=0.25",
"requests",
"datafusion",
]

View File

@@ -14,7 +14,6 @@ __version__ = importlib.metadata.version("lancedb")
from ._lancedb import connect as lancedb_connect
from .common import URI, sanitize_uri
from .db import AsyncConnection, DBConnection, LanceDBConnection
from .io import StorageOptionsProvider
from .remote import ClientConfig
from .remote.db import RemoteDBConnection
from .schema import vector
@@ -234,7 +233,6 @@ __all__ = [
"LanceNamespaceDBConnection",
"RemoteDBConnection",
"Session",
"StorageOptionsProvider",
"Table",
"__version__",
]

View File

@@ -4,7 +4,6 @@ from typing import Dict, List, Optional, Tuple, Any, TypedDict, Union, Literal
import pyarrow as pa
from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
from .io import StorageOptionsProvider
from .remote import ClientConfig
class Session:
@@ -45,8 +44,6 @@ class Connection(object):
data: pa.RecordBatchReader,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
location: Optional[str] = None,
) -> Table: ...
async def create_empty_table(
self,
@@ -55,17 +52,13 @@ class Connection(object):
schema: pa.Schema,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
location: Optional[str] = None,
) -> Table: ...
async def open_table(
self,
name: str,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
index_cache_size: Optional[int] = None,
location: Optional[str] = None,
) -> Table: ...
async def clone_table(
self,

View File

@@ -45,7 +45,6 @@ if TYPE_CHECKING:
from ._lancedb import Connection as LanceDbConnection
from .common import DATA, URI
from .embeddings import EmbeddingFunctionConfig
from .io import StorageOptionsProvider
from ._lancedb import Session
@@ -144,7 +143,6 @@ class DBConnection(EnforceOverrides):
*,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
data_storage_version: Optional[str] = None,
enable_v2_manifest_paths: Optional[bool] = None,
) -> Table:
@@ -310,7 +308,6 @@ class DBConnection(EnforceOverrides):
*,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
index_cache_size: Optional[int] = None,
) -> Table:
"""Open a Lance Table in the database.
@@ -466,18 +463,6 @@ class LanceDBConnection(DBConnection):
is_local = isinstance(uri, Path) or scheme == "file"
if is_local:
if isinstance(uri, str):
# Strip file:// or file:/ scheme if present
# file:///path becomes file:/path after URL normalization
if uri.startswith("file://"):
uri = uri[7:] # Remove "file://"
elif uri.startswith("file:/"):
uri = uri[5:] # Remove "file:"
if sys.platform == "win32":
# On Windows, a path like /C:/path should become C:/path
if len(uri) >= 3 and uri[0] == "/" and uri[2] == ":":
uri = uri[1:]
uri = Path(uri)
uri = uri.expanduser().absolute()
Path(uri).mkdir(parents=True, exist_ok=True)
@@ -640,7 +625,6 @@ class LanceDBConnection(DBConnection):
*,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
data_storage_version: Optional[str] = None,
enable_v2_manifest_paths: Optional[bool] = None,
) -> LanceTable:
@@ -671,7 +655,6 @@ class LanceDBConnection(DBConnection):
embedding_functions=embedding_functions,
namespace=namespace,
storage_options=storage_options,
storage_options_provider=storage_options_provider,
)
return tbl
@@ -682,7 +665,6 @@ class LanceDBConnection(DBConnection):
*,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
index_cache_size: Optional[int] = None,
) -> LanceTable:
"""Open a table in the database.
@@ -714,7 +696,6 @@ class LanceDBConnection(DBConnection):
name,
namespace=namespace,
storage_options=storage_options,
storage_options_provider=storage_options_provider,
index_cache_size=index_cache_size,
)
@@ -996,11 +977,9 @@ class AsyncConnection(object):
on_bad_vectors: Optional[str] = None,
fill_value: Optional[float] = None,
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
*,
namespace: List[str] = [],
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
location: Optional[str] = None,
) -> AsyncTable:
"""Create an [AsyncTable][lancedb.table.AsyncTable] in the database.
@@ -1191,8 +1170,6 @@ class AsyncConnection(object):
schema,
namespace=namespace,
storage_options=storage_options,
storage_options_provider=storage_options_provider,
location=location,
)
else:
data = data_to_reader(data, schema)
@@ -1202,8 +1179,6 @@ class AsyncConnection(object):
data,
namespace=namespace,
storage_options=storage_options,
storage_options_provider=storage_options_provider,
location=location,
)
return AsyncTable(new_table)
@@ -1214,9 +1189,7 @@ class AsyncConnection(object):
*,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
index_cache_size: Optional[int] = None,
location: Optional[str] = None,
) -> AsyncTable:
"""Open a Lance Table in the database.
@@ -1245,10 +1218,6 @@ class AsyncConnection(object):
This cache applies to the entire opened table, across all indices.
Setting this value higher will increase performance on larger datasets
at the expense of more RAM
location: str, optional
The explicit location (URI) of the table. If provided, the table will be
opened from this location instead of deriving it from the database URI
and table name.
Returns
-------
@@ -1258,9 +1227,7 @@ class AsyncConnection(object):
name,
namespace=namespace,
storage_options=storage_options,
storage_options_provider=storage_options_provider,
index_cache_size=index_cache_size,
location=location,
)
return AsyncTable(table)

View File

@@ -1,71 +0,0 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
"""I/O utilities and interfaces for LanceDB."""
from abc import ABC, abstractmethod
from typing import Dict
class StorageOptionsProvider(ABC):
"""Abstract base class for providing storage options to LanceDB tables.
Storage options providers enable automatic credential refresh for cloud
storage backends (e.g., AWS S3, Azure Blob Storage, GCS). When credentials
have an expiration time, the provider's fetch_storage_options() method will
be called periodically to get fresh credentials before they expire.
Example
-------
>>> class MyProvider(StorageOptionsProvider):
... def fetch_storage_options(self) -> Dict[str, str]:
... # Fetch fresh credentials from your credential manager
... return {
... "aws_access_key_id": "...",
... "aws_secret_access_key": "...",
... "expires_at_millis": "1234567890000" # Optional
... }
"""
@abstractmethod
def fetch_storage_options(self) -> Dict[str, str]:
"""Fetch fresh storage credentials.
This method is called by LanceDB when credentials need to be refreshed.
If the returned dictionary contains an "expires_at_millis" key with a
Unix timestamp in milliseconds, LanceDB will automatically refresh the
credentials before that time. If the key is not present, credentials
are assumed to not expire.
Returns
-------
Dict[str, str]
Dictionary containing cloud storage credentials and optionally an
expiration time:
- "expires_at_millis" (optional): Unix timestamp in milliseconds when
credentials expire
- Provider-specific credential keys (e.g., aws_access_key_id,
aws_secret_access_key, etc.)
Raises
------
RuntimeError
If credentials cannot be fetched or are invalid
"""
pass
def provider_id(self) -> str:
"""Return a human-readable unique identifier for this provider instance.
This identifier is used for caching and equality comparison. Two providers
with the same ID will share the same cached object store connection.
The default implementation uses the class name and string representation.
Override this method if you need custom identification logic.
Returns
-------
str
A unique identifier for this provider instance
"""
return f"{self.__class__.__name__} {{ repr: {str(self)!r} }}"

View File

@@ -10,40 +10,42 @@ through a namespace abstraction.
from __future__ import annotations
import sys
from typing import Dict, Iterable, List, Optional, Union
import os
import sys
if sys.version_info >= (3, 12):
from typing import override
else:
from overrides import override
from datetime import timedelta
import pyarrow as pa
from lancedb.db import DBConnection, LanceDBConnection
from lancedb.io import StorageOptionsProvider
from lancedb.db import DBConnection
from lancedb.table import LanceTable, Table
from lancedb.util import validate_table_name
from lancedb.common import DATA
from lancedb.pydantic import LanceModel
from lancedb.embeddings import EmbeddingFunctionConfig
from ._lancedb import Session
from lancedb.common import validate_schema
from lancedb.table import sanitize_create_table
from lance_namespace import LanceNamespace, connect as namespace_connect
from lance_namespace_urllib3_client.models import (
ListTablesRequest,
DescribeTableRequest,
CreateTableRequest,
DropTableRequest,
ListNamespacesRequest,
CreateNamespaceRequest,
DropNamespaceRequest,
CreateEmptyTableRequest,
JsonArrowSchema,
JsonArrowField,
JsonArrowDataType,
)
import pyarrow as pa
from datetime import timedelta
from lancedb.pydantic import LanceModel
from lancedb.common import DATA
from lancedb.embeddings import EmbeddingFunctionConfig
from ._lancedb import Session
def _convert_pyarrow_type_to_json(arrow_type: pa.DataType) -> JsonArrowDataType:
"""Convert PyArrow DataType to JsonArrowDataType."""
@@ -99,97 +101,7 @@ def _convert_pyarrow_schema_to_json(schema: pa.Schema) -> JsonArrowSchema:
)
fields.append(json_field)
# decode binary metadata to strings for JSON
meta = None
if schema.metadata:
meta = {
k.decode("utf-8"): v.decode("utf-8") for k, v in schema.metadata.items()
}
return JsonArrowSchema(fields=fields, metadata=meta)
class LanceNamespaceStorageOptionsProvider(StorageOptionsProvider):
"""Storage options provider that fetches storage options from a LanceNamespace.
This provider automatically fetches fresh storage options by calling the
namespace's describe_table() method, which returns both the table location
and time-limited storage options. This enables automatic credential refresh
for tables accessed through namespace connections.
Parameters
----------
namespace : LanceNamespace
The namespace instance with a describe_table() method
table_id : List[str]
The table identifier (namespace path + table name)
Examples
--------
>>> from lance_namespace import connect as namespace_connect
>>> namespace = namespace_connect("rest", {"url": "https://..."})
>>> provider = LanceNamespaceStorageOptionsProvider(
... namespace=namespace,
... table_id=["my_namespace", "my_table"]
... )
>>> options = provider.fetch_storage_options()
"""
def __init__(self, namespace: LanceNamespace, table_id: List[str]):
"""Initialize with namespace and table ID.
Parameters
----------
namespace : LanceNamespace
The namespace instance with a describe_table() method
table_id : List[str]
The table identifier
"""
self._namespace = namespace
self._table_id = table_id
def fetch_storage_options(self) -> Dict[str, str]:
"""Fetch storage options from the namespace.
This calls namespace.describe_table() to get the latest storage options
and their expiration time.
Returns
-------
Dict[str, str]
Flat dictionary of string key-value pairs containing storage options.
May include "expires_at_millis" key for automatic refresh.
Raises
------
RuntimeError
If namespace does not return storage_options
"""
request = DescribeTableRequest(id=self._table_id, version=None)
response = self._namespace.describe_table(request)
storage_options = response.storage_options
if storage_options is None:
raise RuntimeError(
"Namespace did not return storage_options. "
"Ensure the namespace supports storage options providing."
)
# Return the storage_options directly - it's already a flat Map<String, String>
return storage_options
def provider_id(self) -> str:
"""Return a human-readable unique identifier for this provider instance."""
# Try to call namespace_id() if available (lance-namespace >= 0.0.20)
if hasattr(self._namespace, "namespace_id"):
namespace_id = self._namespace.namespace_id()
else:
# Fallback for older namespace versions
namespace_id = str(self._namespace)
return (
f"LanceNamespaceStorageOptionsProvider {{ "
f"namespace: {namespace_id}, table_id: {self._table_id!r} }}"
)
return JsonArrowSchema(fields=fields, metadata=schema.metadata)
class LanceNamespaceDBConnection(DBConnection):
@@ -254,7 +166,6 @@ class LanceNamespaceDBConnection(DBConnection):
*,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
data_storage_version: Optional[str] = None,
enable_v2_manifest_paths: Optional[bool] = None,
) -> Table:
@@ -262,85 +173,49 @@ class LanceNamespaceDBConnection(DBConnection):
raise ValueError("mode must be either 'create' or 'overwrite'")
validate_table_name(name)
# Get location from namespace
# TODO: support passing data
if data is not None:
raise ValueError(
"create_table currently only supports creating empty tables (data=None)"
)
# Prepare schema
metadata = None
if embedding_functions is not None:
from lancedb.embeddings.registry import EmbeddingFunctionRegistry
registry = EmbeddingFunctionRegistry.get_instance()
metadata = registry.get_table_metadata(embedding_functions)
data, schema = sanitize_create_table(
data, schema, metadata, on_bad_vectors, fill_value
)
validate_schema(schema)
# Convert PyArrow schema to JsonArrowSchema
json_schema = _convert_pyarrow_schema_to_json(schema)
# Create table request with namespace
table_id = namespace + [name]
request = CreateTableRequest(id=table_id, var_schema=json_schema)
# Step 1: Get the table location and storage options from namespace
# In overwrite mode, if table exists, use describe_table to get
# existing location. Otherwise, call create_empty_table to reserve
# a new location
location = None
namespace_storage_options = None
if mode.lower() == "overwrite":
# Try to describe the table first to see if it exists
try:
describe_request = DescribeTableRequest(id=table_id)
describe_response = self._ns.describe_table(describe_request)
location = describe_response.location
namespace_storage_options = describe_response.storage_options
except Exception:
# Table doesn't exist, will create a new one below
pass
# Create empty Arrow IPC stream bytes
import pyarrow.ipc as ipc
import io
if location is None:
# Table doesn't exist or mode is "create", reserve a new location
create_empty_request = CreateEmptyTableRequest(
id=table_id,
location=None,
properties=self.storage_options if self.storage_options else None,
)
create_empty_response = self._ns.create_empty_table(create_empty_request)
if not create_empty_response.location:
raise ValueError(
"Table location is missing from create_empty_table response"
)
location = create_empty_response.location
namespace_storage_options = create_empty_response.storage_options
# Merge storage options: self.storage_options < user options < namespace options
merged_storage_options = dict(self.storage_options)
if storage_options:
merged_storage_options.update(storage_options)
if namespace_storage_options:
merged_storage_options.update(namespace_storage_options)
# Step 2: Create table using LanceTable.create with the location
# We need a temporary connection for the LanceTable.create method
temp_conn = LanceDBConnection(
location, # Use the actual location as the connection URI
read_consistency_interval=self.read_consistency_interval,
storage_options=merged_storage_options,
session=self.session,
empty_table = pa.Table.from_arrays(
[pa.array([], type=field.type) for field in schema], schema=schema
)
buffer = io.BytesIO()
with ipc.new_stream(buffer, schema) as writer:
writer.write_table(empty_table)
request_data = buffer.getvalue()
# Create a storage options provider if not provided by user
# Only create if namespace returned storage_options (not None)
if storage_options_provider is None and namespace_storage_options is not None:
storage_options_provider = LanceNamespaceStorageOptionsProvider(
namespace=self._ns,
table_id=table_id,
)
tbl = LanceTable.create(
temp_conn,
name,
data,
schema,
mode=mode,
exist_ok=exist_ok,
on_bad_vectors=on_bad_vectors,
fill_value=fill_value,
embedding_functions=embedding_functions,
namespace=namespace,
storage_options=merged_storage_options,
storage_options_provider=storage_options_provider,
location=location,
self._ns.create_table(request, request_data)
return self.open_table(
name, namespace=namespace, storage_options=storage_options
)
return tbl
@override
def open_table(
self,
@@ -348,34 +223,21 @@ class LanceNamespaceDBConnection(DBConnection):
*,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
index_cache_size: Optional[int] = None,
) -> Table:
table_id = namespace + [name]
request = DescribeTableRequest(id=table_id)
response = self._ns.describe_table(request)
# Merge storage options: self.storage_options < user options < namespace options
merged_storage_options = dict(self.storage_options)
merged_storage_options = dict()
if storage_options:
merged_storage_options.update(storage_options)
if response.storage_options:
merged_storage_options.update(response.storage_options)
# Create a storage options provider if not provided by user
# Only create if namespace returned storage_options (not None)
if storage_options_provider is None and response.storage_options is not None:
storage_options_provider = LanceNamespaceStorageOptionsProvider(
namespace=self._ns,
table_id=table_id,
)
return self._lance_table_from_uri(
name,
response.location,
namespace=namespace,
storage_options=merged_storage_options,
storage_options_provider=storage_options_provider,
index_cache_size=index_cache_size,
)
@@ -468,32 +330,33 @@ class LanceNamespaceDBConnection(DBConnection):
def _lance_table_from_uri(
self,
name: str,
table_uri: str,
*,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
index_cache_size: Optional[int] = None,
) -> LanceTable:
# Open a table directly from a URI using the location parameter
# Note: storage_options should already be merged by the caller
# Extract the base path and table name from the URI
if table_uri.endswith(".lance"):
base_path = os.path.dirname(table_uri)
table_name = os.path.basename(table_uri)[:-6] # Remove .lance
else:
raise ValueError(f"Invalid table URI: {table_uri}")
from lancedb.db import LanceDBConnection
temp_conn = LanceDBConnection(
table_uri, # Use the table location as the connection URI
base_path,
read_consistency_interval=self.read_consistency_interval,
storage_options=storage_options if storage_options is not None else {},
storage_options={**self.storage_options, **(storage_options or {})},
session=self.session,
)
# Open the table using the temporary connection with the location parameter
# Open the table using the temporary connection
return LanceTable.open(
temp_conn,
name,
namespace=namespace,
table_name,
storage_options=storage_options,
storage_options_provider=storage_options_provider,
index_cache_size=index_cache_size,
location=table_uri,
)

View File

@@ -75,7 +75,6 @@ from .index import lang_mapping
if TYPE_CHECKING:
from .db import LanceDBConnection
from .io import StorageOptionsProvider
from ._lancedb import (
Table as LanceDBTable,
OptimizeStats,
@@ -1710,9 +1709,7 @@ class LanceTable(Table):
*,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
index_cache_size: Optional[int] = None,
location: Optional[str] = None,
_async: AsyncTable = None,
):
self._conn = connection
@@ -1725,9 +1722,7 @@ class LanceTable(Table):
name,
namespace=namespace,
storage_options=storage_options,
storage_options_provider=storage_options_provider,
index_cache_size=index_cache_size,
location=location,
)
)
@@ -1735,18 +1730,6 @@ class LanceTable(Table):
def name(self) -> str:
return self._table.name
@property
def namespace(self) -> List[str]:
"""Return the namespace path of the table."""
return self._namespace
@property
def id(self) -> str:
"""Return the full identifier of the table (namespace$name)."""
if self._namespace:
return "$".join(self._namespace + [self.name])
return self.name
@classmethod
def from_inner(cls, tbl: LanceDBTable):
from .db import LanceDBConnection
@@ -1760,26 +1743,8 @@ class LanceTable(Table):
)
@classmethod
def open(
cls,
db,
name,
*,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
index_cache_size: Optional[int] = None,
location: Optional[str] = None,
):
tbl = cls(
db,
name,
namespace=namespace,
storage_options=storage_options,
storage_options_provider=storage_options_provider,
index_cache_size=index_cache_size,
location=location,
)
def open(cls, db, name, *, namespace: List[str] = [], **kwargs):
tbl = cls(db, name, namespace=namespace, **kwargs)
# check the dataset exists
try:
@@ -2620,10 +2585,8 @@ class LanceTable(Table):
*,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str | bool]] = None,
storage_options_provider: Optional["StorageOptionsProvider"] = None,
data_storage_version: Optional[str] = None,
enable_v2_manifest_paths: Optional[bool] = None,
location: Optional[str] = None,
):
"""
Create a new table.
@@ -2715,8 +2678,6 @@ class LanceTable(Table):
embedding_functions=embedding_functions,
namespace=namespace,
storage_options=storage_options,
storage_options_provider=storage_options_provider,
location=location,
)
)
return self

View File

@@ -781,6 +781,58 @@ def test_local_drop_namespace_not_supported(tmp_path):
db.drop_namespace(["test_namespace"])
def test_local_table_operations_with_namespace_raise_error(tmp_path):
"""
Test that table operations with namespace parameter
raise ValueError in local mode.
"""
db = lancedb.connect(tmp_path)
# Create some test data
data = [{"vector": [1.0, 2.0], "item": "test"}]
schema = pa.schema(
[pa.field("vector", pa.list_(pa.float32(), 2)), pa.field("item", pa.string())]
)
# Test create_table with namespace - should raise ValueError
with pytest.raises(
NotImplementedError,
match="Namespace parameter is not supported for listing database",
):
db.create_table(
"test_table_with_ns", data=data, schema=schema, namespace=["test_ns"]
)
# Create table normally for other tests
db.create_table("test_table", data=data, schema=schema)
assert "test_table" in db.table_names()
# Test open_table with namespace - should raise ValueError
with pytest.raises(
NotImplementedError,
match="Namespace parameter is not supported for listing database",
):
db.open_table("test_table", namespace=["test_ns"])
# Test table_names with namespace - should raise ValueError
with pytest.raises(
NotImplementedError,
match="Namespace parameter is not supported for listing database",
):
list(db.table_names(namespace=["test_ns"]))
# Test drop_table with namespace - should raise ValueError
with pytest.raises(
NotImplementedError,
match="Namespace parameter is not supported for listing database",
):
db.drop_table("test_table", namespace=["test_ns"])
# Test table_names without namespace - should work normally
tables_root = list(db.table_names())
assert "test_table" in tables_root
def test_clone_table_latest_version(tmp_path):
"""Test cloning a table with the latest version (default behavior)"""
import os

View File

@@ -32,7 +32,6 @@ import numpy as np
import pyarrow as pa
import pandas as pd
import pytest
import pytest_asyncio
from utils import exception_output
pytest.importorskip("lancedb.fts")
@@ -91,7 +90,7 @@ def table(tmp_path) -> ldb.table.LanceTable:
return table
@pytest_asyncio.fixture
@pytest.fixture
async def async_table(tmp_path) -> ldb.table.AsyncTable:
# Use local random state to avoid affecting other tests
rng = np.random.RandomState(42)
@@ -254,7 +253,7 @@ def test_search_fts(table, use_tantivy):
@pytest.mark.asyncio
async def test_fts_select_async(async_table):
tbl = async_table
tbl = await async_table
await tbl.create_index("text", config=FTS())
await tbl.create_index("text2", config=FTS())
results = (
@@ -339,6 +338,7 @@ def test_search_fts_phrase_query(table):
@pytest.mark.asyncio
async def test_search_fts_phrase_query_async(async_table):
async_table = await async_table
await async_table.create_index("text", config=FTS(with_position=False))
try:
phrase_results = (
@@ -393,6 +393,7 @@ def test_search_fts_specify_column(table):
@pytest.mark.asyncio
async def test_search_fts_async(async_table):
async_table = await async_table
await async_table.create_index("text", config=FTS())
results = await async_table.query().nearest_to_text("puppy").limit(5).to_list()
assert len(results) == 5
@@ -423,6 +424,7 @@ async def test_search_fts_async(async_table):
@pytest.mark.asyncio
async def test_search_fts_specify_column_async(async_table):
async_table = await async_table
await async_table.create_index("text", config=FTS())
await async_table.create_index("text2", config=FTS())

View File

@@ -5,39 +5,352 @@
import tempfile
import shutil
from typing import Dict, Optional
import pytest
import pyarrow as pa
import lancedb
from lance_namespace.namespace import NATIVE_IMPLS, LanceNamespace
from lance_namespace_urllib3_client.models import (
ListTablesRequest,
ListTablesResponse,
DescribeTableRequest,
DescribeTableResponse,
RegisterTableRequest,
RegisterTableResponse,
DeregisterTableRequest,
DeregisterTableResponse,
CreateTableRequest,
CreateTableResponse,
DropTableRequest,
DropTableResponse,
ListNamespacesRequest,
ListNamespacesResponse,
CreateNamespaceRequest,
CreateNamespaceResponse,
DropNamespaceRequest,
DropNamespaceResponse,
)
class TempNamespace(LanceNamespace):
"""A simple dictionary-backed namespace for testing."""
# Class-level storage to persist table registry across instances
_global_registry: Dict[str, Dict[str, str]] = {}
# Class-level storage for namespaces (supporting 1-level namespace)
_global_namespaces: Dict[str, set] = {}
def __init__(self, **properties):
"""Initialize the test namespace.
Args:
root: The root directory for tables (optional)
**properties: Additional configuration properties
"""
self.config = TempNamespaceConfig(properties)
# Use the root as a key to maintain separate registries per root
root = self.config.root
if root not in self._global_registry:
self._global_registry[root] = {}
if root not in self._global_namespaces:
self._global_namespaces[root] = set()
self.tables = self._global_registry[root] # Reference to shared registry
self.namespaces = self._global_namespaces[
root
] # Reference to shared namespaces
def namespace_id(self) -> str:
"""Return a human-readable unique identifier for this namespace instance.
Returns:
A unique identifier string based on the root directory
"""
return f"TempNamespace {{ root: '{self.config.root}' }}"
def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
"""List all tables in the namespace."""
if not request.id:
# List all tables in root namespace
tables = [name for name in self.tables.keys() if "." not in name]
else:
# List tables in specific namespace (1-level only)
if len(request.id) == 1:
namespace_name = request.id[0]
prefix = f"{namespace_name}."
tables = [
name[len(prefix) :]
for name in self.tables.keys()
if name.startswith(prefix)
]
else:
# Multi-level namespaces not supported
raise ValueError("Only 1-level namespaces are supported")
return ListTablesResponse(tables=tables)
def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
"""Describe a table by returning its location."""
if not request.id:
raise ValueError("Invalid table ID")
if len(request.id) == 1:
# Root namespace table
table_name = request.id[0]
elif len(request.id) == 2:
# Namespaced table (1-level namespace)
namespace_name, table_name = request.id
table_name = f"{namespace_name}.{table_name}"
else:
raise ValueError("Only 1-level namespaces are supported")
if table_name not in self.tables:
raise RuntimeError(f"Table does not exist: {table_name}")
table_uri = self.tables[table_name]
return DescribeTableResponse(location=table_uri)
def create_table(
self, request: CreateTableRequest, request_data: bytes
) -> CreateTableResponse:
"""Create a table in the namespace."""
if not request.id:
raise ValueError("Invalid table ID")
if len(request.id) == 1:
# Root namespace table
table_name = request.id[0]
table_uri = f"{self.config.root}/{table_name}.lance"
elif len(request.id) == 2:
# Namespaced table (1-level namespace)
namespace_name, base_table_name = request.id
# Add namespace to our namespace set
self.namespaces.add(namespace_name)
table_name = f"{namespace_name}.{base_table_name}"
table_uri = f"{self.config.root}/{namespace_name}/{base_table_name}.lance"
else:
raise ValueError("Only 1-level namespaces are supported")
# Check if table already exists
if table_name in self.tables:
if request.mode == "overwrite":
# Drop existing table for overwrite mode
del self.tables[table_name]
else:
raise RuntimeError(f"Table already exists: {table_name}")
# Parse the Arrow IPC stream to get the schema and create the actual table
import pyarrow.ipc as ipc
import io
import lance
import os
# Create directory if needed for namespaced tables
os.makedirs(os.path.dirname(table_uri), exist_ok=True)
# Read the IPC stream
reader = ipc.open_stream(io.BytesIO(request_data))
table = reader.read_all()
# Create the actual Lance table
lance.write_dataset(table, table_uri)
# Store the table mapping
self.tables[table_name] = table_uri
return CreateTableResponse(location=table_uri)
def drop_table(self, request: DropTableRequest) -> DropTableResponse:
"""Drop a table from the namespace."""
if not request.id:
raise ValueError("Invalid table ID")
if len(request.id) == 1:
# Root namespace table
table_name = request.id[0]
elif len(request.id) == 2:
# Namespaced table (1-level namespace)
namespace_name, base_table_name = request.id
table_name = f"{namespace_name}.{base_table_name}"
else:
raise ValueError("Only 1-level namespaces are supported")
if table_name not in self.tables:
raise RuntimeError(f"Table does not exist: {table_name}")
# Get the table URI
table_uri = self.tables[table_name]
# Delete the actual table files
import shutil
import os
if os.path.exists(table_uri):
shutil.rmtree(table_uri, ignore_errors=True)
# Remove from registry
del self.tables[table_name]
return DropTableResponse()
def register_table(self, request: RegisterTableRequest) -> RegisterTableResponse:
"""Register a table with the namespace."""
if not request.id or len(request.id) != 1:
raise ValueError("Invalid table ID")
if not request.location:
raise ValueError("Table location is required")
table_name = request.id[0]
self.tables[table_name] = request.location
return RegisterTableResponse()
def deregister_table(
self, request: DeregisterTableRequest
) -> DeregisterTableResponse:
"""Deregister a table from the namespace."""
if not request.id or len(request.id) != 1:
raise ValueError("Invalid table ID")
table_name = request.id[0]
if table_name not in self.tables:
raise RuntimeError(f"Table does not exist: {table_name}")
del self.tables[table_name]
return DeregisterTableResponse()
def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse:
"""List child namespaces."""
if not request.id:
# List root-level namespaces
namespaces = list(self.namespaces)
elif len(request.id) == 1:
# For 1-level namespace, there are no child namespaces
namespaces = []
else:
raise ValueError("Only 1-level namespaces are supported")
return ListNamespacesResponse(namespaces=namespaces)
def create_namespace(
self, request: CreateNamespaceRequest
) -> CreateNamespaceResponse:
"""Create a namespace."""
if not request.id:
raise ValueError("Invalid namespace ID")
if len(request.id) == 1:
# Create 1-level namespace
namespace_name = request.id[0]
self.namespaces.add(namespace_name)
# Create directory for the namespace
import os
namespace_dir = f"{self.config.root}/{namespace_name}"
os.makedirs(namespace_dir, exist_ok=True)
else:
raise ValueError("Only 1-level namespaces are supported")
return CreateNamespaceResponse()
def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse:
"""Drop a namespace."""
if not request.id:
raise ValueError("Invalid namespace ID")
if len(request.id) == 1:
# Drop 1-level namespace
namespace_name = request.id[0]
if namespace_name not in self.namespaces:
raise RuntimeError(f"Namespace does not exist: {namespace_name}")
# Check if namespace has any tables
prefix = f"{namespace_name}."
tables_in_namespace = [
name for name in self.tables.keys() if name.startswith(prefix)
]
if tables_in_namespace:
raise RuntimeError(
f"Cannot drop namespace '{namespace_name}': contains tables"
)
# Remove namespace
self.namespaces.remove(namespace_name)
# Remove directory
import shutil
import os
namespace_dir = f"{self.config.root}/{namespace_name}"
if os.path.exists(namespace_dir):
shutil.rmtree(namespace_dir, ignore_errors=True)
else:
raise ValueError("Only 1-level namespaces are supported")
return DropNamespaceResponse()
class TempNamespaceConfig:
"""Configuration for TestNamespace."""
ROOT = "root"
def __init__(self, properties: Optional[Dict[str, str]] = None):
"""Initialize configuration from properties.
Args:
properties: Dictionary of configuration properties
"""
if properties is None:
properties = {}
self._root = properties.get(self.ROOT, "/tmp")
@property
def root(self) -> str:
"""Get the namespace root directory."""
return self._root
NATIVE_IMPLS["temp"] = f"{TempNamespace.__module__}.TempNamespace"
class TestNamespaceConnection:
"""Test namespace-based LanceDB connection using DirectoryNamespace."""
"""Test namespace-based LanceDB connection."""
def setup_method(self):
"""Set up test fixtures."""
self.temp_dir = tempfile.mkdtemp()
# Clear the TestNamespace registry for this test
if self.temp_dir in TempNamespace._global_registry:
TempNamespace._global_registry[self.temp_dir].clear()
if self.temp_dir in TempNamespace._global_namespaces:
TempNamespace._global_namespaces[self.temp_dir].clear()
def teardown_method(self):
"""Clean up test fixtures."""
# Clear the TestNamespace registry
if self.temp_dir in TempNamespace._global_registry:
del TempNamespace._global_registry[self.temp_dir]
if self.temp_dir in TempNamespace._global_namespaces:
del TempNamespace._global_namespaces[self.temp_dir]
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_connect_namespace_test(self):
"""Test connecting to LanceDB through DirectoryNamespace."""
# Connect using DirectoryNamespace
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
"""Test connecting to LanceDB through TestNamespace."""
# Connect using TestNamespace
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
# Should be a LanceNamespaceDBConnection
assert isinstance(db, lancedb.LanceNamespaceDBConnection)
# Initially no tables in root
# Initially no tables
assert len(list(db.table_names())) == 0
def test_create_table_through_namespace(self):
"""Test creating a table through namespace."""
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
# Create a child namespace first
db.create_namespace(["test_ns"])
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
# Define schema for empty table
schema = pa.schema(
@@ -48,15 +361,13 @@ class TestNamespaceConnection:
]
)
# Create empty table in child namespace
table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
# Create empty table
table = db.create_table("test_table", schema=schema)
assert table is not None
assert table.name == "test_table"
assert table.namespace == ["test_ns"]
assert table.id == "test_ns$test_table"
# Table should appear in child namespace
table_names = list(db.table_names(namespace=["test_ns"]))
# Table should appear in namespace
table_names = list(db.table_names())
assert "test_table" in table_names
assert len(table_names) == 1
@@ -67,26 +378,21 @@ class TestNamespaceConnection:
def test_open_table_through_namespace(self):
"""Test opening an existing table through namespace."""
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
# Create a child namespace first
db.create_namespace(["test_ns"])
# Create a table with schema in child namespace
# Create a table with schema
schema = pa.schema(
[
pa.field("id", pa.int64()),
pa.field("vector", pa.list_(pa.float32(), 2)),
]
)
db.create_table("test_table", schema=schema, namespace=["test_ns"])
db.create_table("test_table", schema=schema)
# Open the table
table = db.open_table("test_table", namespace=["test_ns"])
table = db.open_table("test_table")
assert table is not None
assert table.name == "test_table"
assert table.namespace == ["test_ns"]
assert table.id == "test_ns$test_table"
# Verify empty table with correct schema
result = table.to_pandas()
@@ -95,50 +401,44 @@ class TestNamespaceConnection:
def test_drop_table_through_namespace(self):
"""Test dropping a table through namespace."""
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
# Create a child namespace first
db.create_namespace(["test_ns"])
# Create tables in child namespace
# Create tables
schema = pa.schema(
[
pa.field("id", pa.int64()),
pa.field("vector", pa.list_(pa.float32(), 2)),
]
)
db.create_table("table1", schema=schema, namespace=["test_ns"])
db.create_table("table2", schema=schema, namespace=["test_ns"])
db.create_table("table1", schema=schema)
db.create_table("table2", schema=schema)
# Verify both tables exist in child namespace
table_names = list(db.table_names(namespace=["test_ns"]))
# Verify both tables exist
table_names = list(db.table_names())
assert "table1" in table_names
assert "table2" in table_names
assert len(table_names) == 2
# Drop one table
db.drop_table("table1", namespace=["test_ns"])
db.drop_table("table1")
# Verify only table2 remains
table_names = list(db.table_names(namespace=["test_ns"]))
table_names = list(db.table_names())
assert "table1" not in table_names
assert "table2" in table_names
assert len(table_names) == 1
# Drop the second table
db.drop_table("table2", namespace=["test_ns"])
assert len(list(db.table_names(namespace=["test_ns"]))) == 0
# Test that drop_table works without explicit namespace parameter
db.drop_table("table2")
assert len(list(db.table_names())) == 0
# Should not be able to open dropped table
with pytest.raises(RuntimeError):
db.open_table("table1", namespace=["test_ns"])
db.open_table("table1")
def test_create_table_with_schema(self):
"""Test creating a table with explicit schema through namespace."""
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
# Create a child namespace first
db.create_namespace(["test_ns"])
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
# Define schema
schema = pa.schema(
@@ -149,10 +449,9 @@ class TestNamespaceConnection:
]
)
# Create table with schema in child namespace
table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
# Create table with schema
table = db.create_table("test_table", schema=schema)
assert table is not None
assert table.namespace == ["test_ns"]
# Verify schema
table_schema = table.schema
@@ -162,19 +461,16 @@ class TestNamespaceConnection:
def test_rename_table_not_supported(self):
"""Test that rename_table raises NotImplementedError."""
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
# Create a child namespace first
db.create_namespace(["test_ns"])
# Create a table in child namespace
# Create a table
schema = pa.schema(
[
pa.field("id", pa.int64()),
pa.field("vector", pa.list_(pa.float32(), 2)),
]
)
db.create_table("old_name", schema=schema, namespace=["test_ns"])
db.create_table("old_name", schema=schema)
# Rename should raise NotImplementedError
with pytest.raises(NotImplementedError, match="rename_table is not supported"):
@@ -182,12 +478,9 @@ class TestNamespaceConnection:
def test_drop_all_tables(self):
"""Test dropping all tables through namespace."""
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
# Create a child namespace first
db.create_namespace(["test_ns"])
# Create multiple tables in child namespace
# Create multiple tables
schema = pa.schema(
[
pa.field("id", pa.int64()),
@@ -195,30 +488,27 @@ class TestNamespaceConnection:
]
)
for i in range(3):
db.create_table(f"table{i}", schema=schema, namespace=["test_ns"])
db.create_table(f"table{i}", schema=schema)
# Verify tables exist in child namespace
assert len(list(db.table_names(namespace=["test_ns"]))) == 3
# Verify tables exist
assert len(list(db.table_names())) == 3
# Drop all tables in child namespace
db.drop_all_tables(namespace=["test_ns"])
# Drop all tables
db.drop_all_tables()
# Verify all tables are gone from child namespace
assert len(list(db.table_names(namespace=["test_ns"]))) == 0
# Verify all tables are gone
assert len(list(db.table_names())) == 0
# Test that table_names works with keyword-only namespace parameter
db.create_table("test_table", schema=schema, namespace=["test_ns"])
result = list(db.table_names(namespace=["test_ns"]))
db.create_table("test_table", schema=schema)
result = list(db.table_names(namespace=[]))
assert "test_table" in result
def test_table_operations(self):
"""Test various table operations through namespace."""
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
# Create a child namespace first
db.create_namespace(["test_ns"])
# Create a table with schema in child namespace
# Create a table with schema
schema = pa.schema(
[
pa.field("id", pa.int64()),
@@ -226,7 +516,7 @@ class TestNamespaceConnection:
pa.field("text", pa.string()),
]
)
table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
table = db.create_table("test_table", schema=schema)
# Verify empty table was created
result = table.to_pandas()
@@ -258,7 +548,7 @@ class TestNamespaceConnection:
# Connect with storage options
storage_opts = {"test_option": "test_value"}
db = lancedb.connect_namespace(
"dir", {"root": self.temp_dir}, storage_options=storage_opts
"temp", {"root": self.temp_dir}, storage_options=storage_opts
)
# Storage options should be preserved
@@ -276,7 +566,7 @@ class TestNamespaceConnection:
def test_namespace_operations(self):
"""Test namespace management operations."""
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
# Initially no namespaces
assert len(list(db.list_namespaces())) == 0
@@ -327,7 +617,7 @@ class TestNamespaceConnection:
def test_namespace_with_tables_cannot_be_dropped(self):
"""Test that namespaces containing tables cannot be dropped."""
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
# Create namespace and table
db.create_namespace(["test_namespace"])
@@ -340,7 +630,7 @@ class TestNamespaceConnection:
db.create_table("test_table", schema=schema, namespace=["test_namespace"])
# Try to drop namespace with tables - should fail
with pytest.raises(RuntimeError, match="is not empty"):
with pytest.raises(RuntimeError, match="contains tables"):
db.drop_namespace(["test_namespace"])
# Drop table first
@@ -350,7 +640,7 @@ class TestNamespaceConnection:
db.drop_namespace(["test_namespace"])
def test_same_table_name_different_namespaces(self):
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
# Create two namespaces
db.create_namespace(["namespace_a"])

View File

@@ -1,632 +0,0 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
"""
Integration tests for LanceDB Namespace with S3 and credential refresh.
This test simulates a namespace server that returns incrementing credentials
and verifies that the credential refresh mechanism works correctly for both
create_table and open_table operations.
Tests verify:
- Storage options provider is auto-created and used
- Credentials are properly cached during reads
- Credentials refresh when they expire
- Both create and open operations work with credential rotation
"""
import copy
import time
import uuid
from threading import Lock
from typing import Dict
import pyarrow as pa
import pytest
from lance_namespace import (
CreateEmptyTableRequest,
CreateEmptyTableResponse,
DescribeTableRequest,
DescribeTableResponse,
LanceNamespace,
)
from lancedb.namespace import LanceNamespaceDBConnection
# LocalStack S3 configuration
CONFIG = {
"allow_http": "true",
"aws_access_key_id": "ACCESSKEY",
"aws_secret_access_key": "SECRETKEY",
"aws_endpoint": "http://localhost:4566",
"aws_region": "us-east-1",
}
def get_boto3_client(*args, **kwargs):
import boto3
return boto3.client(
*args,
region_name=CONFIG["aws_region"],
aws_access_key_id=CONFIG["aws_access_key_id"],
aws_secret_access_key=CONFIG["aws_secret_access_key"],
**kwargs,
)
@pytest.fixture(scope="module")
def s3_bucket():
"""Create and cleanup S3 bucket for integration tests."""
s3 = get_boto3_client("s3", endpoint_url=CONFIG["aws_endpoint"])
bucket_name = "lancedb-namespace-integtest"
# Clean up existing bucket if it exists
try:
delete_bucket(s3, bucket_name)
except s3.exceptions.NoSuchBucket:
pass
s3.create_bucket(Bucket=bucket_name)
yield bucket_name
# Cleanup after tests
delete_bucket(s3, bucket_name)
def delete_bucket(s3, bucket_name):
"""Delete S3 bucket and all its contents."""
try:
# Delete all objects first
paginator = s3.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=bucket_name):
if "Contents" in page:
for obj in page["Contents"]:
s3.delete_object(Bucket=bucket_name, Key=obj["Key"])
s3.delete_bucket(Bucket=bucket_name)
except Exception:
pass
class TrackingNamespace(LanceNamespace):
"""
Mock namespace that wraps DirectoryNamespace and tracks API calls.
This namespace returns incrementing credentials with each API call to simulate
credential rotation. It also tracks the number of times each API is called
to verify caching behavior.
"""
def __init__(
self,
bucket_name: str,
storage_options: Dict[str, str],
credential_expires_in_seconds: int = 60,
):
from lance.namespace import DirectoryNamespace
self.bucket_name = bucket_name
self.base_storage_options = storage_options
self.credential_expires_in_seconds = credential_expires_in_seconds
self.describe_call_count = 0
self.create_call_count = 0
self.lock = Lock()
# Create underlying DirectoryNamespace with storage options
dir_props = {f"storage.{k}": v for k, v in storage_options.items()}
# Use S3 path for bucket name, local path for file paths
if bucket_name.startswith("/") or bucket_name.startswith("file://"):
dir_props["root"] = f"{bucket_name}/namespace_root"
else:
dir_props["root"] = f"s3://{bucket_name}/namespace_root"
self.inner = DirectoryNamespace(**dir_props)
def get_describe_call_count(self) -> int:
"""Thread-safe getter for describe call count."""
with self.lock:
return self.describe_call_count
def get_create_call_count(self) -> int:
"""Thread-safe getter for create call count."""
with self.lock:
return self.create_call_count
def namespace_id(self) -> str:
"""Return namespace identifier."""
return f"TrackingNamespace {{ inner: {self.inner.namespace_id()} }}"
def _modify_storage_options(
self, storage_options: Dict[str, str], count: int
) -> Dict[str, str]:
"""
Add incrementing credentials with expiration timestamp.
This simulates a credential rotation system where each call returns
new credentials that expire after credential_expires_in_seconds.
"""
modified = copy.deepcopy(storage_options) if storage_options else {}
# Increment credentials to simulate rotation
modified["aws_access_key_id"] = f"AKID_{count}"
modified["aws_secret_access_key"] = f"SECRET_{count}"
modified["aws_session_token"] = f"TOKEN_{count}"
# Set expiration time
expires_at_millis = int(
(time.time() + self.credential_expires_in_seconds) * 1000
)
modified["expires_at_millis"] = str(expires_at_millis)
return modified
def create_empty_table(
self, request: CreateEmptyTableRequest
) -> CreateEmptyTableResponse:
"""Track create_empty_table calls and inject rotating credentials."""
with self.lock:
self.create_call_count += 1
count = self.create_call_count
response = self.inner.create_empty_table(request)
response.storage_options = self._modify_storage_options(
response.storage_options, count
)
return response
def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
"""Track describe_table calls and inject rotating credentials."""
with self.lock:
self.describe_call_count += 1
count = self.describe_call_count
response = self.inner.describe_table(request)
response.storage_options = self._modify_storage_options(
response.storage_options, count
)
return response
# Pass through other methods to inner namespace
def list_tables(self, request):
return self.inner.list_tables(request)
def drop_table(self, request):
return self.inner.drop_table(request)
def list_namespaces(self, request):
return self.inner.list_namespaces(request)
def create_namespace(self, request):
return self.inner.create_namespace(request)
def drop_namespace(self, request):
return self.inner.drop_namespace(request)
@pytest.mark.s3_test
def test_namespace_create_table_with_provider(s3_bucket: str):
"""
Test creating a table through namespace with storage options provider.
Verifies:
- create_empty_table is called once to reserve location
- Storage options provider is auto-created
- Table can be written successfully
- Credentials are cached during write operations
"""
storage_options = copy.deepcopy(CONFIG)
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3600, # 1 hour
)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
db.create_namespace([namespace_name])
table_name = f"test_table_{uuid.uuid4().hex}"
namespace_path = [namespace_name]
# Verify initial state
assert namespace.get_create_call_count() == 0
assert namespace.get_describe_call_count() == 0
# Create table with data
data = pa.table(
{
"id": [1, 2, 3],
"vector": [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
"text": ["hello", "world", "test"],
}
)
table = db.create_table(table_name, data, namespace=namespace_path)
# Verify create_empty_table was called exactly once
assert namespace.get_create_call_count() == 1
# describe_table should NOT be called during create in create mode
assert namespace.get_describe_call_count() == 0
# Verify table was created successfully
assert table.name == table_name
result = table.to_pandas()
assert len(result) == 3
assert list(result["id"]) == [1, 2, 3]
@pytest.mark.s3_test
def test_namespace_open_table_with_provider(s3_bucket: str):
"""
Test opening a table through namespace with storage options provider.
Verifies:
- describe_table is called once when opening
- Storage options provider is auto-created
- Table can be read successfully
- Credentials are cached during read operations
"""
storage_options = copy.deepcopy(CONFIG)
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3600,
)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
db.create_namespace([namespace_name])
table_name = f"test_table_{uuid.uuid4().hex}"
namespace_path = [namespace_name]
# Create table first
data = pa.table(
{
"id": [1, 2, 3, 4, 5],
"vector": [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0]],
"value": [10, 20, 30, 40, 50],
}
)
db.create_table(table_name, data, namespace=namespace_path)
initial_create_count = namespace.get_create_call_count()
assert initial_create_count == 1
# Open the table
opened_table = db.open_table(table_name, namespace=namespace_path)
# Verify describe_table was called exactly once
assert namespace.get_describe_call_count() == 1
# create_empty_table should not be called again
assert namespace.get_create_call_count() == initial_create_count
# Perform multiple read operations
describe_count_after_open = namespace.get_describe_call_count()
for _ in range(3):
result = opened_table.to_pandas()
assert len(result) == 5
count = opened_table.count_rows()
assert count == 5
# Verify credentials were cached (no additional describe_table calls)
assert namespace.get_describe_call_count() == describe_count_after_open
@pytest.mark.s3_test
def test_namespace_credential_refresh_on_read(s3_bucket: str):
"""
Test credential refresh when credentials expire during read operations.
Verifies:
- Credentials are cached initially (no additional describe_table calls)
- After expiration, credentials are refreshed (describe_table called again)
- Read operations continue to work with refreshed credentials
"""
storage_options = copy.deepcopy(CONFIG)
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3, # Short expiration for testing
)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
db.create_namespace([namespace_name])
table_name = f"test_table_{uuid.uuid4().hex}"
namespace_path = [namespace_name]
# Create table
data = pa.table(
{
"id": [1, 2, 3],
"vector": [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
}
)
db.create_table(table_name, data, namespace=namespace_path)
# Open table (triggers describe_table)
opened_table = db.open_table(table_name, namespace=namespace_path)
# Perform an immediate read (should use credentials from open)
result = opened_table.to_pandas()
assert len(result) == 3
describe_count_after_first_read = namespace.get_describe_call_count()
# Wait for credentials to expire (3 seconds + buffer)
time.sleep(5)
# Perform read after expiration (should trigger credential refresh)
result = opened_table.to_pandas()
assert len(result) == 3
describe_count_after_refresh = namespace.get_describe_call_count()
# Verify describe_table was called again (credential refresh)
refresh_delta = describe_count_after_refresh - describe_count_after_first_read
# Verify the exact count: credential refresh should call describe_table exactly
# once
assert refresh_delta == 1, (
f"Credential refresh should call describe_table exactly once "
f"(got {refresh_delta})"
)
@pytest.mark.s3_test
def test_namespace_credential_refresh_on_write(s3_bucket: str):
"""
Test credential refresh when credentials expire during write operations.
Verifies:
- Credentials are cached during initial writes
- After expiration, new credentials are fetched before writes
- Write operations continue to work with refreshed credentials
"""
storage_options = copy.deepcopy(CONFIG)
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3, # Short expiration
)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
db.create_namespace([namespace_name])
table_name = f"test_table_{uuid.uuid4().hex}"
namespace_path = [namespace_name]
# Create table
initial_data = pa.table(
{
"id": [1, 2],
"vector": [[1.0, 2.0], [3.0, 4.0]],
}
)
table = db.create_table(table_name, initial_data, namespace=namespace_path)
# Add more data (should use cached credentials)
new_data = pa.table(
{
"id": [3, 4],
"vector": [[5.0, 6.0], [7.0, 8.0]],
}
)
table.add(new_data)
# Wait for credentials to expire
time.sleep(5)
# Add more data (should trigger credential refresh)
more_data = pa.table(
{
"id": [5, 6],
"vector": [[9.0, 10.0], [11.0, 12.0]],
}
)
table.add(more_data)
# Verify final row count
assert table.count_rows() == 6
@pytest.mark.s3_test
def test_namespace_overwrite_mode(s3_bucket: str):
"""
Test creating table in overwrite mode with credential tracking.
Verifies:
- First create calls create_empty_table exactly once
- Overwrite mode calls describe_table exactly once to check existence
- Storage options provider works in overwrite mode
"""
storage_options = copy.deepcopy(CONFIG)
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3600,
)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
db.create_namespace([namespace_name])
table_name = f"test_table_{uuid.uuid4().hex}"
namespace_path = [namespace_name]
# Create initial table
data1 = pa.table(
{
"id": [1, 2],
"vector": [[1.0, 2.0], [3.0, 4.0]],
}
)
table = db.create_table(table_name, data1, namespace=namespace_path)
# Exactly one create_empty_table call for initial create
assert namespace.get_create_call_count() == 1
# No describe_table calls in create mode
assert namespace.get_describe_call_count() == 0
assert table.count_rows() == 2
# Overwrite the table
data2 = pa.table(
{
"id": [10, 20, 30],
"vector": [[10.0, 20.0], [30.0, 40.0], [50.0, 60.0]],
}
)
table2 = db.create_table(
table_name, data2, namespace=namespace_path, mode="overwrite"
)
# Should still have only 1 create_empty_table call
# (overwrite reuses location from describe_table)
assert namespace.get_create_call_count() == 1
# Should have called describe_table exactly once to get existing table location
assert namespace.get_describe_call_count() == 1
# Verify new data
assert table2.count_rows() == 3
result = table2.to_pandas()
assert list(result["id"]) == [10, 20, 30]
@pytest.mark.s3_test
def test_namespace_multiple_tables(s3_bucket: str):
"""
Test creating and opening multiple tables in the same namespace.
Verifies:
- Each table gets its own storage options provider
- Credentials are tracked independently per table
- Multiple tables can coexist in the same namespace
"""
storage_options = copy.deepcopy(CONFIG)
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3600,
)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
db.create_namespace([namespace_name])
namespace_path = [namespace_name]
# Create first table
table1_name = f"table1_{uuid.uuid4().hex}"
data1 = pa.table({"id": [1, 2], "value": [10, 20]})
db.create_table(table1_name, data1, namespace=namespace_path)
# Create second table
table2_name = f"table2_{uuid.uuid4().hex}"
data2 = pa.table({"id": [3, 4], "value": [30, 40]})
db.create_table(table2_name, data2, namespace=namespace_path)
# Should have 2 create calls (one per table)
assert namespace.get_create_call_count() == 2
# Open both tables
opened1 = db.open_table(table1_name, namespace=namespace_path)
opened2 = db.open_table(table2_name, namespace=namespace_path)
# Should have 2 describe calls (one per open)
assert namespace.get_describe_call_count() == 2
# Verify both tables work independently
assert opened1.count_rows() == 2
assert opened2.count_rows() == 2
result1 = opened1.to_pandas()
result2 = opened2.to_pandas()
assert list(result1["id"]) == [1, 2]
assert list(result2["id"]) == [3, 4]
@pytest.mark.s3_test
def test_namespace_with_schema_only(s3_bucket: str):
"""
Test creating empty table with schema only (no data).
Verifies:
- Empty table creation works with storage options provider
- describe_table is NOT called during create
- Data can be added later
"""
storage_options = copy.deepcopy(CONFIG)
namespace = TrackingNamespace(
bucket_name=s3_bucket,
storage_options=storage_options,
credential_expires_in_seconds=3600,
)
db = LanceNamespaceDBConnection(namespace)
# Create unique namespace for this test
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
db.create_namespace([namespace_name])
table_name = f"test_table_{uuid.uuid4().hex}"
namespace_path = [namespace_name]
# Create empty table with schema
schema = pa.schema(
[
pa.field("id", pa.int64()),
pa.field("vector", pa.list_(pa.float32(), 2)),
pa.field("text", pa.utf8()),
]
)
table = db.create_table(table_name, schema=schema, namespace=namespace_path)
# Should have called create_empty_table once
assert namespace.get_create_call_count() == 1
# Should NOT have called describe_table in create mode
assert namespace.get_describe_call_count() == 0
# Verify empty table
assert table.count_rows() == 0
# Add data
data = pa.table(
{
"id": [1, 2],
"vector": [[1.0, 2.0], [3.0, 4.0]],
"text": ["hello", "world"],
}
)
table.add(data)
# Verify data was added
assert table.count_rows() == 2

View File

@@ -10,14 +10,11 @@ use lancedb::{
};
use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyclass, pyfunction, pymethods, Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult,
Python,
pyclass, pyfunction, pymethods, Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
use crate::{
error::PythonErrorExt, storage_options::py_object_to_storage_options_provider, table::Table,
};
use crate::{error::PythonErrorExt, table::Table};
#[pyclass]
pub struct Connection {
@@ -104,8 +101,7 @@ impl Connection {
future_into_py(self_.py(), async move { op.execute().await.infer_error() })
}
#[allow(clippy::too_many_arguments)]
#[pyo3(signature = (name, mode, data, namespace=vec![], storage_options=None, storage_options_provider=None, location=None))]
#[pyo3(signature = (name, mode, data, namespace=vec![], storage_options=None))]
pub fn create_table<'a>(
self_: PyRef<'a, Self>,
name: String,
@@ -113,8 +109,6 @@ impl Connection {
data: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -128,13 +122,6 @@ impl Connection {
if let Some(storage_options) = storage_options {
builder = builder.storage_options(storage_options);
}
if let Some(provider_obj) = storage_options_provider {
let provider = py_object_to_storage_options_provider(provider_obj)?;
builder = builder.storage_options_provider(provider);
}
if let Some(location) = location {
builder = builder.location(location);
}
future_into_py(self_.py(), async move {
let table = builder.execute().await.infer_error()?;
@@ -142,8 +129,7 @@ impl Connection {
})
}
#[allow(clippy::too_many_arguments)]
#[pyo3(signature = (name, mode, schema, namespace=vec![], storage_options=None, storage_options_provider=None, location=None))]
#[pyo3(signature = (name, mode, schema, namespace=vec![], storage_options=None))]
pub fn create_empty_table<'a>(
self_: PyRef<'a, Self>,
name: String,
@@ -151,8 +137,6 @@ impl Connection {
schema: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -166,13 +150,6 @@ impl Connection {
if let Some(storage_options) = storage_options {
builder = builder.storage_options(storage_options);
}
if let Some(provider_obj) = storage_options_provider {
let provider = py_object_to_storage_options_provider(provider_obj)?;
builder = builder.storage_options_provider(provider);
}
if let Some(location) = location {
builder = builder.location(location);
}
future_into_py(self_.py(), async move {
let table = builder.execute().await.infer_error()?;
@@ -180,15 +157,13 @@ impl Connection {
})
}
#[pyo3(signature = (name, namespace=vec![], storage_options = None, storage_options_provider=None, index_cache_size = None, location=None))]
#[pyo3(signature = (name, namespace=vec![], storage_options = None, index_cache_size = None))]
pub fn open_table(
self_: PyRef<'_, Self>,
name: String,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
index_cache_size: Option<u32>,
location: Option<String>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -197,16 +172,9 @@ impl Connection {
if let Some(storage_options) = storage_options {
builder = builder.storage_options(storage_options);
}
if let Some(provider_obj) = storage_options_provider {
let provider = py_object_to_storage_options_provider(provider_obj)?;
builder = builder.storage_options_provider(provider);
}
if let Some(index_cache_size) = index_cache_size {
builder = builder.index_cache_size(index_cache_size);
}
if let Some(location) = location {
builder = builder.location(location);
}
future_into_py(self_.py(), async move {
let table = builder.execute().await.infer_error()?;

View File

@@ -26,7 +26,6 @@ pub mod index;
pub mod permutation;
pub mod query;
pub mod session;
pub mod storage_options;
pub mod table;
pub mod util;

View File

@@ -1,150 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
//! PyO3 bindings for StorageOptionsProvider
//!
//! This module provides the bridge between Python StorageOptionsProvider objects
//! and Rust's StorageOptionsProvider trait, enabling automatic credential refresh.
use std::collections::HashMap;
use std::sync::Arc;
use async_trait::async_trait;
use lance_io::object_store::StorageOptionsProvider;
use pyo3::prelude::*;
use pyo3::types::PyDict;
/// Internal wrapper around a Python object implementing StorageOptionsProvider
pub struct PyStorageOptionsProvider {
/// The Python object implementing fetch_storage_options()
inner: PyObject,
}
impl Clone for PyStorageOptionsProvider {
fn clone(&self) -> Self {
Python::with_gil(|py| Self {
inner: self.inner.clone_ref(py),
})
}
}
impl PyStorageOptionsProvider {
pub fn new(obj: PyObject) -> PyResult<Self> {
Python::with_gil(|py| {
// Verify the object has a fetch_storage_options method
if !obj.bind(py).hasattr("fetch_storage_options")? {
return Err(pyo3::exceptions::PyTypeError::new_err(
"StorageOptionsProvider must implement fetch_storage_options() method",
));
}
Ok(Self { inner: obj })
})
}
}
/// Wrapper that implements the Rust StorageOptionsProvider trait
pub struct PyStorageOptionsProviderWrapper {
py_provider: PyStorageOptionsProvider,
}
impl PyStorageOptionsProviderWrapper {
pub fn new(py_provider: PyStorageOptionsProvider) -> Self {
Self { py_provider }
}
}
#[async_trait]
impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
async fn fetch_storage_options(&self) -> lance_core::Result<Option<HashMap<String, String>>> {
// Call Python method from async context using spawn_blocking
let py_provider = self.py_provider.clone();
tokio::task::spawn_blocking(move || {
Python::with_gil(|py| {
// Call the Python fetch_storage_options method
let result = py_provider
.inner
.bind(py)
.call_method0("fetch_storage_options")
.map_err(|e| lance_core::Error::IO {
source: Box::new(std::io::Error::other(format!(
"Failed to call fetch_storage_options: {}",
e
))),
location: snafu::location!(),
})?;
// If result is None, return None
if result.is_none() {
return Ok(None);
}
// Extract the result dict - should be a flat Map<String, String>
let result_dict = result.downcast::<PyDict>().map_err(|_| {
lance_core::Error::InvalidInput {
source: "fetch_storage_options() must return None or a dict of string key-value pairs".into(),
location: snafu::location!(),
}
})?;
// Convert all entries to HashMap<String, String>
let mut storage_options = HashMap::new();
for (key, value) in result_dict.iter() {
let key_str: String = key.extract().map_err(|e| {
lance_core::Error::InvalidInput {
source: format!("Storage option key must be a string: {}", e).into(),
location: snafu::location!(),
}
})?;
let value_str: String = value.extract().map_err(|e| {
lance_core::Error::InvalidInput {
source: format!("Storage option value must be a string: {}", e).into(),
location: snafu::location!(),
}
})?;
storage_options.insert(key_str, value_str);
}
Ok(Some(storage_options))
})
})
.await
.map_err(|e| lance_core::Error::IO {
source: Box::new(std::io::Error::other(format!(
"Task join error: {}",
e
))),
location: snafu::location!(),
})?
}
fn provider_id(&self) -> String {
Python::with_gil(|py| {
// Call provider_id() method on the Python object
let obj = self.py_provider.inner.bind(py);
obj.call_method0("provider_id")
.and_then(|result| result.extract::<String>())
.unwrap_or_else(|e| {
// If provider_id() fails, construct a fallback ID
format!("PyStorageOptionsProvider(error: {})", e)
})
})
}
}
impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "PyStorageOptionsProviderWrapper({})", self.provider_id())
}
}
/// Convert a Python object to an Arc<dyn StorageOptionsProvider>
///
/// This is the main entry point for converting Python StorageOptionsProvider objects
/// to Rust trait objects that can be used by the Lance ecosystem.
pub fn py_object_to_storage_options_provider(
py_obj: PyObject,
) -> PyResult<Arc<dyn StorageOptionsProvider>> {
let py_provider = PyStorageOptionsProvider::new(py_obj)?;
Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))
}

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.22.4-beta.0"
version = "0.22.3"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -9,11 +9,11 @@ all-tests: feature-tests remote-tests
# the environment.
feature-tests:
../../ci/run_with_docker_compose.sh \
cargo test --all-features --tests --locked --examples $(CARGO_ARGS)
cargo test --all-features --tests --locked --examples
.PHONY: feature-tests
# Run tests against remote endpoints.
remote-tests:
../../ci/run_with_test_connection.sh \
cargo test --features remote --locked $(CARGO_ARGS)
cargo test --features remote --locked
.PHONY: remote-tests

View File

@@ -35,7 +35,6 @@ use crate::Table;
pub use lance_encoding::version::LanceFileVersion;
#[cfg(feature = "remote")]
use lance_io::object_store::StorageOptions;
use lance_io::object_store::StorageOptionsProvider;
/// A builder for configuring a [`Connection::table_names`] operation
pub struct TableNamesBuilder {
@@ -361,30 +360,6 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
self.request.namespace = namespace;
self
}
/// Set a custom location for the table.
///
/// If not set, the database will derive a location from its URI and the table name.
/// This is useful when integrating with namespace systems that manage table locations.
pub fn location(mut self, location: impl Into<String>) -> Self {
self.request.location = Some(location.into());
self
}
/// Set a storage options provider for automatic credential refresh.
///
/// This allows tables to automatically refresh cloud storage credentials
/// when they expire, enabling long-running operations on remote storage.
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
self.request
.write_options
.lance_write_params
.get_or_insert(Default::default())
.store_params
.get_or_insert(Default::default())
.storage_options_provider = Some(provider);
self
}
}
#[derive(Clone, Debug)]
@@ -407,7 +382,6 @@ impl OpenTableBuilder {
namespace: vec![],
index_cache_size: None,
lance_read_params: None,
location: None,
},
embedding_registry,
}
@@ -487,29 +461,6 @@ impl OpenTableBuilder {
self
}
/// Set a custom location for the table.
///
/// If not set, the database will derive a location from its URI and the table name.
/// This is useful when integrating with namespace systems that manage table locations.
pub fn location(mut self, location: impl Into<String>) -> Self {
self.request.location = Some(location.into());
self
}
/// Set a storage options provider for automatic credential refresh.
///
/// This allows tables to automatically refresh cloud storage credentials
/// when they expire, enabling long-running operations on remote storage.
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
self.request
.lance_read_params
.get_or_insert(Default::default())
.store_options
.get_or_insert(Default::default())
.storage_options_provider = Some(provider);
self
}
/// Open the table
pub async fn execute(self) -> Result<Table> {
let table = self.parent.open_table(self.request).await?;

View File

@@ -84,9 +84,6 @@ pub struct OpenTableRequest {
pub namespace: Vec<String>,
pub index_cache_size: Option<u32>,
pub lance_read_params: Option<ReadParams>,
/// Optional custom location for the table. If not provided, the database will
/// derive a location based on its URI and the table name.
pub location: Option<String>,
}
pub type TableBuilderCallback = Box<dyn FnOnce(OpenTableRequest) -> OpenTableRequest + Send>;
@@ -167,9 +164,6 @@ pub struct CreateTableRequest {
pub mode: CreateTableMode,
/// Options to use when writing data (only used if `data` is not None)
pub write_options: WriteOptions,
/// Optional custom location for the table. If not provided, the database will
/// derive a location based on its URI and the table name.
pub location: Option<String>,
}
impl CreateTableRequest {
@@ -180,7 +174,6 @@ impl CreateTableRequest {
data,
mode: CreateTableMode::default(),
write_options: WriteOptions::default(),
location: None,
}
}
}

View File

@@ -12,7 +12,6 @@ use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode};
use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
use lance_datafusion::utils::StreamingWriteSource;
use lance_encoding::version::LanceFileVersion;
use lance_io::object_store::StorageOptionsProvider;
use lance_table::io::commit::commit_handler_from_url;
use object_store::local::LocalFileSystem;
use snafu::ResultExt;
@@ -219,9 +218,6 @@ pub struct ListingDatabase {
// Storage options to be inherited by tables created from this connection
storage_options: HashMap<String, String>,
// Dynamic storage options provider for automatic credential refresh
pub(crate) storage_options_provider: Option<Arc<dyn StorageOptionsProvider>>,
// Options for tables created by this connection
new_table_config: NewTableConfig,
@@ -339,9 +335,7 @@ impl ListingDatabase {
)
.await?;
if object_store.is_local() {
Self::try_create_dir(&plain_uri).context(CreateDirSnafu {
path: plain_uri.clone(),
})?;
Self::try_create_dir(&plain_uri).context(CreateDirSnafu { path: plain_uri })?;
}
let write_store_wrapper = match mirrored_store {
@@ -361,7 +355,6 @@ impl ListingDatabase {
store_wrapper: write_store_wrapper,
read_consistency_interval: request.read_consistency_interval,
storage_options: options.storage_options,
storage_options_provider: None,
new_table_config: options.new_table_config,
session,
})
@@ -403,7 +396,6 @@ impl ListingDatabase {
store_wrapper: None,
read_consistency_interval,
storage_options: HashMap::new(),
storage_options_provider: None,
new_table_config,
session,
})
@@ -411,20 +403,7 @@ impl ListingDatabase {
/// Try to create a local directory to store the lancedb dataset
fn try_create_dir(path: &str) -> core::result::Result<(), std::io::Error> {
// Strip file:// or file:/ scheme if present to get the actual filesystem path
// Note: file:///path becomes file:/path after url.to_string(), so we need to handle both
let fs_path = if let Some(stripped) = path.strip_prefix("file://") {
// file:///path or file://host/path format
stripped
} else if let Some(stripped) = path.strip_prefix("file:") {
// file:/path format (from url.to_string() on file:///path)
// The path after "file:" should already start with "/" for absolute paths
stripped
} else {
path
};
let path = Path::new(fs_path);
let path = Path::new(path);
if !path.try_exists()? {
create_dir_all(path)?;
}
@@ -550,14 +529,6 @@ impl ListingDatabase {
self.inherit_storage_options(storage_options);
}
// Set storage options provider if available
if self.storage_options_provider.is_some() {
write_params
.store_params
.get_or_insert_with(Default::default)
.storage_options_provider = self.storage_options_provider.clone();
}
write_params.data_storage_version = self
.new_table_config
.data_storage_version
@@ -598,7 +569,6 @@ impl ListingDatabase {
namespace: namespace.clone(),
index_cache_size: None,
lance_read_params: None,
location: None,
};
let req = (callback)(req);
let table = self.open_table(req).await?;
@@ -694,17 +664,12 @@ impl Database for ListingDatabase {
}
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
// When namespace is not empty, location must be provided
if !request.namespace.is_empty() && request.location.is_none() {
return Err(Error::InvalidInput {
message: "Location must be provided when namespace is not empty".into(),
if !request.namespace.is_empty() {
return Err(Error::NotSupported {
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
});
}
// Use provided location if available, otherwise derive from table name
let table_uri = request
.location
.clone()
.unwrap_or_else(|| self.table_uri(&request.name).unwrap());
let table_uri = self.table_uri(&request.name)?;
let (storage_version_override, v2_manifest_override) =
self.extract_storage_overrides(&request)?;
@@ -717,7 +682,6 @@ impl Database for ListingDatabase {
match NativeTable::create(
&table_uri,
&request.name,
request.namespace.clone(),
request.data,
self.store_wrapper.clone(),
Some(write_params),
@@ -789,7 +753,6 @@ impl Database for ListingDatabase {
let cloned_table = NativeTable::open_with_params(
&target_uri,
&request.target_table_name,
request.target_namespace,
self.store_wrapper.clone(),
None,
self.read_consistency_interval,
@@ -800,17 +763,12 @@ impl Database for ListingDatabase {
}
async fn open_table(&self, mut request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
// When namespace is not empty, location must be provided
if !request.namespace.is_empty() && request.location.is_none() {
return Err(Error::InvalidInput {
message: "Location must be provided when namespace is not empty".into(),
if !request.namespace.is_empty() {
return Err(Error::NotSupported {
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
});
}
// Use provided location if available, otherwise derive from table name
let table_uri = request
.location
.clone()
.unwrap_or_else(|| self.table_uri(&request.name).unwrap());
let table_uri = self.table_uri(&request.name)?;
// Only modify the storage options if we actually have something to
// inherit. There is a difference between storage_options=None and
@@ -830,16 +788,6 @@ impl Database for ListingDatabase {
self.inherit_storage_options(storage_options);
}
// Set storage options provider if available
if self.storage_options_provider.is_some() {
request
.lance_read_params
.get_or_insert_with(Default::default)
.store_options
.get_or_insert_with(Default::default)
.storage_options_provider = self.storage_options_provider.clone();
}
// Some ReadParams are exposed in the OpenTableBuilder, but we also
// let the user provide their own ReadParams.
//
@@ -860,7 +808,6 @@ impl Database for ListingDatabase {
NativeTable::open_with_params(
&table_uri,
&request.name,
request.namespace,
self.store_wrapper.clone(),
Some(read_params),
self.read_consistency_interval,
@@ -964,7 +911,6 @@ mod tests {
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema.clone())),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
})
.await
.unwrap();
@@ -1028,7 +974,6 @@ mod tests {
data: CreateTableData::Data(reader),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
})
.await
.unwrap();
@@ -1086,7 +1031,6 @@ mod tests {
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
})
.await
.unwrap();
@@ -1121,7 +1065,6 @@ mod tests {
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
})
.await
.unwrap();
@@ -1160,7 +1103,6 @@ mod tests {
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
})
.await
.unwrap();
@@ -1199,7 +1141,6 @@ mod tests {
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
})
.await
.unwrap();
@@ -1253,7 +1194,6 @@ mod tests {
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
})
.await
.unwrap();
@@ -1310,7 +1250,6 @@ mod tests {
data: CreateTableData::Data(reader),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
})
.await
.unwrap();
@@ -1395,7 +1334,6 @@ mod tests {
data: CreateTableData::Data(reader),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
})
.await
.unwrap();
@@ -1481,7 +1419,6 @@ mod tests {
data: CreateTableData::Data(reader),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
})
.await
.unwrap();
@@ -1574,7 +1511,6 @@ mod tests {
data: CreateTableData::Data(reader),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
})
.await
.unwrap();

View File

@@ -7,7 +7,6 @@ use std::collections::HashMap;
use std::sync::Arc;
use async_trait::async_trait;
use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsProvider};
use lance_namespace::{
models::{
CreateEmptyTableRequest, CreateNamespaceRequest, DescribeTableRequest,
@@ -17,14 +16,13 @@ use lance_namespace::{
};
use lance_namespace_impls::ConnectBuilder;
use crate::connection::ConnectRequest;
use crate::database::ReadConsistency;
use crate::database::listing::ListingDatabase;
use crate::error::{Error, Result};
use crate::{connection::ConnectRequest, database::ReadConsistency};
use super::{
listing::ListingDatabase, BaseTable, CloneTableRequest,
CreateNamespaceRequest as DbCreateNamespaceRequest, CreateTableMode,
CreateTableRequest as DbCreateTableRequest, Database,
BaseTable, CloneTableRequest, CreateNamespaceRequest as DbCreateNamespaceRequest,
CreateTableMode, CreateTableRequest as DbCreateTableRequest, Database,
DropNamespaceRequest as DbDropNamespaceRequest,
ListNamespacesRequest as DbListNamespacesRequest, OpenTableRequest, TableNamesRequest,
};
@@ -69,6 +67,58 @@ impl LanceNamespaceDatabase {
uri: format!("namespace://{}", ns_impl),
})
}
/// Helper method to create a ListingDatabase from a table location
///
/// This method:
/// 1. Validates that the location ends with <table_name>.lance
/// 2. Extracts the parent directory from the location
/// 3. Creates a ListingDatabase at that parent directory
async fn create_listing_database(
&self,
table_name: &str,
location: &str,
additional_storage_options: Option<HashMap<String, String>>,
) -> Result<Arc<ListingDatabase>> {
let expected_suffix = format!("{}.lance", table_name);
if !location.ends_with(&expected_suffix) {
return Err(Error::Runtime {
message: format!(
"Invalid table location '{}': expected to end with '{}'",
location, expected_suffix
),
});
}
let parent_dir = location
.rsplit_once('/')
.map(|(parent, _)| parent.to_string())
.ok_or_else(|| Error::Runtime {
message: format!("Invalid table location '{}': no parent directory", location),
})?;
let mut merged_storage_options = self.storage_options.clone();
if let Some(opts) = additional_storage_options {
merged_storage_options.extend(opts);
}
let connect_request = ConnectRequest {
uri: parent_dir,
options: merged_storage_options,
read_consistency_interval: self.read_consistency_interval,
session: self.session.clone(),
#[cfg(feature = "remote")]
client_config: Default::default(),
};
let listing_db = ListingDatabase::connect_with_options(&connect_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to create listing database: {}", e),
})?;
Ok(Arc::new(listing_db))
}
}
impl std::fmt::Debug for LanceNamespaceDatabase {
@@ -86,51 +136,6 @@ impl std::fmt::Display for LanceNamespaceDatabase {
}
}
impl LanceNamespaceDatabase {
/// Create a temporary listing database for the given location
///
/// Merges storage options with priority: connection < user < namespace
async fn create_listing_database(
&self,
location: &str,
table_id: Vec<String>,
user_storage_options: Option<&HashMap<String, String>>,
response_storage_options: Option<&HashMap<String, String>>,
) -> Result<ListingDatabase> {
// Merge storage options: connection < user < namespace
let mut merged_storage_options = self.storage_options.clone();
if let Some(opts) = user_storage_options {
merged_storage_options.extend(opts.clone());
}
if let Some(opts) = response_storage_options {
merged_storage_options.extend(opts.clone());
}
let request = ConnectRequest {
uri: location.to_string(),
#[cfg(feature = "remote")]
client_config: Default::default(),
options: merged_storage_options,
read_consistency_interval: self.read_consistency_interval,
session: self.session.clone(),
};
let mut listing_db = ListingDatabase::connect_with_options(&request).await?;
// Create storage options provider only if namespace returned storage options
// (not just user-provided options)
if response_storage_options.is_some() {
let provider = Arc::new(LanceNamespaceStorageOptionsProvider::new(
self.namespace.clone(),
table_id,
)) as Arc<dyn StorageOptionsProvider>;
listing_db.storage_options_provider = Some(provider);
}
Ok(listing_db)
}
}
#[async_trait]
impl Database for LanceNamespaceDatabase {
fn uri(&self) -> &str {
@@ -236,14 +241,6 @@ impl Database for LanceNamespaceDatabase {
}
async fn create_table(&self, request: DbCreateTableRequest) -> Result<Arc<dyn BaseTable>> {
// Extract user-provided storage options from request
let user_storage_options = request
.write_options
.lance_write_params
.as_ref()
.and_then(|lwp| lwp.store_params.as_ref())
.and_then(|sp| sp.storage_options.as_ref());
let mut table_id = request.namespace.clone();
table_id.push(request.name.clone());
let describe_request = DescribeTableRequest {
@@ -282,21 +279,15 @@ impl Database for LanceNamespaceDatabase {
})?;
let listing_db = self
.create_listing_database(
&location,
table_id.clone(),
user_storage_options,
response.storage_options.as_ref(),
)
.create_listing_database(&request.name, &location, response.storage_options)
.await?;
return listing_db
.open_table(OpenTableRequest {
name: request.name.clone(),
namespace: request.namespace.clone(),
namespace: vec![],
index_cache_size: None,
lance_read_params: None,
location: Some(location),
})
.await;
}
@@ -307,7 +298,7 @@ impl Database for LanceNamespaceDatabase {
table_id.push(request.name.clone());
let create_empty_request = CreateEmptyTableRequest {
id: Some(table_id.clone()),
id: Some(table_id),
location: None,
properties: if self.storage_options.is_empty() {
None
@@ -332,37 +323,28 @@ impl Database for LanceNamespaceDatabase {
let listing_db = self
.create_listing_database(
&request.name,
&location,
table_id,
user_storage_options,
create_empty_response.storage_options.as_ref(),
create_empty_response.storage_options,
)
.await?;
let create_request = DbCreateTableRequest {
name: request.name,
namespace: request.namespace,
namespace: vec![],
data: request.data,
mode: request.mode,
write_options: request.write_options,
location: Some(location),
};
listing_db.create_table(create_request).await
}
async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
// Extract user-provided storage options from request
let user_storage_options = request
.lance_read_params
.as_ref()
.and_then(|lrp| lrp.store_options.as_ref())
.and_then(|so| so.storage_options.as_ref());
let mut table_id = request.namespace.clone();
table_id.push(request.name.clone());
let describe_request = DescribeTableRequest {
id: Some(table_id.clone()),
id: Some(table_id),
version: None,
};
let response = self
@@ -378,20 +360,14 @@ impl Database for LanceNamespaceDatabase {
})?;
let listing_db = self
.create_listing_database(
&location,
table_id,
user_storage_options,
response.storage_options.as_ref(),
)
.create_listing_database(&request.name, &location, response.storage_options)
.await?;
let open_request = OpenTableRequest {
name: request.name.clone(),
namespace: request.namespace.clone(),
namespace: vec![],
index_cache_size: request.index_cache_size,
lance_read_params: request.lance_read_params,
location: Some(location),
};
listing_db.open_table(open_request).await
}
@@ -455,7 +431,6 @@ impl Database for LanceNamespaceDatabase {
mod tests {
use super::*;
use crate::connect_namespace;
use crate::database::CreateNamespaceRequest;
use crate::query::ExecutableQuery;
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray};
use arrow_schema::{DataType, Field, Schema};
@@ -566,18 +541,10 @@ mod tests {
.await
.expect("Failed to connect to namespace");
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
namespace: vec!["test_ns".into()],
})
.await
.expect("Failed to create namespace");
// Test: Create a table in the child namespace
// Test: Create a table
let test_data = create_test_data();
let table = conn
.create_table("test_table", test_data)
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to create table");
@@ -595,15 +562,9 @@ mod tests {
assert_eq!(results.len(), 1);
assert_eq!(results[0].num_rows(), 5);
// Verify: Table namespace is correct
assert_eq!(table.namespace(), &["test_ns"]);
assert_eq!(table.name(), "test_table");
assert_eq!(table.id(), "test_ns$test_table");
// Verify: Table appears in table_names for the child namespace
// Verify: Table appears in table_names
let table_names = conn
.table_names()
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to list tables");
@@ -625,18 +586,10 @@ mod tests {
.await
.expect("Failed to connect to namespace");
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
namespace: vec!["test_ns".into()],
})
.await
.expect("Failed to create namespace");
// Create a table in child namespace
// Create a table first
let test_data = create_test_data();
let _table = conn
.create_table("describe_test", test_data)
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to create table");
@@ -644,7 +597,6 @@ mod tests {
// Test: Open the table (which internally uses describe_table)
let opened_table = conn
.open_table("describe_test")
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to open table");
@@ -667,10 +619,6 @@ mod tests {
assert_eq!(schema.fields.len(), 2);
assert_eq!(schema.field(0).name(), "id");
assert_eq!(schema.field(1).name(), "name");
// Verify namespace and id
assert_eq!(opened_table.namespace(), &["test_ns"]);
assert_eq!(opened_table.id(), "test_ns$describe_test");
}
#[tokio::test]
@@ -687,18 +635,10 @@ mod tests {
.await
.expect("Failed to connect to namespace");
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
namespace: vec!["test_ns".into()],
})
.await
.expect("Failed to create namespace");
// Create initial table with 5 rows in child namespace
// Create initial table with 5 rows
let test_data1 = create_test_data();
let _table1 = conn
.create_table("overwrite_test", test_data1)
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to create table");
@@ -725,7 +665,6 @@ mod tests {
schema,
),
)
.namespace(vec!["test_ns".into()])
.mode(CreateTableMode::Overwrite)
.execute()
.await
@@ -769,18 +708,10 @@ mod tests {
.await
.expect("Failed to connect to namespace");
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
namespace: vec!["test_ns".into()],
})
.await
.expect("Failed to create namespace");
// Create initial table with test data in child namespace
// Create initial table with test data
let test_data1 = create_test_data();
let _table1 = conn
.create_table("exist_ok_test", test_data1)
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to create table");
@@ -789,7 +720,6 @@ mod tests {
let test_data2 = create_test_data();
let table2 = conn
.create_table("exist_ok_test", test_data2)
.namespace(vec!["test_ns".into()])
.mode(CreateTableMode::exist_ok(|req| req))
.execute()
.await
@@ -823,35 +753,25 @@ mod tests {
.await
.expect("Failed to connect to namespace");
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
namespace: vec!["test_ns".into()],
})
.await
.expect("Failed to create namespace");
// Create first table in child namespace
// Create first table
let test_data1 = create_test_data();
let _table1 = conn
.create_table("table1", test_data1)
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to create first table");
// Create second table in child namespace
// Create second table
let test_data2 = create_test_data();
let _table2 = conn
.create_table("table2", test_data2)
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to create second table");
// Verify: Both tables appear in table list for the child namespace
// Verify: Both tables appear in table list
let table_names = conn
.table_names()
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to list tables");
@@ -862,14 +782,12 @@ mod tests {
// Verify: Can open both tables
let opened_table1 = conn
.open_table("table1")
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to open table1");
let opened_table2 = conn
.open_table("table2")
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to open table2");
@@ -902,19 +820,8 @@ mod tests {
.await
.expect("Failed to connect to namespace");
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
namespace: vec!["test_ns".into()],
})
.await
.expect("Failed to create namespace");
// Test: Try to open a non-existent table in the child namespace
let result = conn
.open_table("non_existent_table")
.namespace(vec!["test_ns".into()])
.execute()
.await;
// Test: Try to open a non-existent table
let result = conn.open_table("non_existent_table").execute().await;
// Verify: Should return an error
assert!(result.is_err());
@@ -934,40 +841,30 @@ mod tests {
.await
.expect("Failed to connect to namespace");
// Create a child namespace first
conn.create_namespace(CreateNamespaceRequest {
namespace: vec!["test_ns".into()],
})
.await
.expect("Failed to create namespace");
// Create a table in child namespace
// Create a table first
let test_data = create_test_data();
let _table = conn
.create_table("drop_test", test_data)
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to create table");
// Verify table exists in child namespace
// Verify table exists
let table_names_before = conn
.table_names()
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to list tables");
assert!(table_names_before.contains(&"drop_test".to_string()));
// Test: Drop the table
conn.drop_table("drop_test", &["test_ns".into()])
conn.drop_table("drop_test", &[])
.await
.expect("Failed to drop table");
// Verify: Table no longer exists
let table_names_after = conn
.table_names()
.namespace(vec!["test_ns".into()])
.execute()
.await
.expect("Failed to list tables");

View File

@@ -416,7 +416,6 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
namespace: request.namespace.clone(),
index_cache_size: None,
lance_read_params: None,
location: None,
};
let req = (callback)(req);
self.open_table(req).await

View File

@@ -511,7 +511,7 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
/// Get the id of the table
///
/// This is the namespace of the table concatenated with the name
/// separated by $
/// separated by a dot (".")
fn id(&self) -> &str;
/// Get the arrow [Schema] of the table.
async fn schema(&self) -> Result<SchemaRef>;
@@ -734,16 +734,6 @@ impl Table {
self.inner.name()
}
/// Get the namespace of the table.
pub fn namespace(&self) -> &[String] {
self.inner.namespace()
}
/// Get the ID of the table (namespace + name joined by '$').
pub fn id(&self) -> &str {
self.inner.id()
}
/// Get the dataset of the table if it is a native table
///
/// Returns None otherwise
@@ -1478,8 +1468,6 @@ impl NativeTableExt for Arc<dyn BaseTable> {
#[derive(Debug, Clone)]
pub struct NativeTable {
name: String,
namespace: Vec<String>,
id: String,
uri: String,
pub(crate) dataset: dataset::DatasetConsistencyWrapper,
// This comes from the connection options. We store here so we can pass down
@@ -1519,7 +1507,7 @@ impl NativeTable {
/// * A [NativeTable] object.
pub async fn open(uri: &str) -> Result<Self> {
let name = Self::get_table_name(uri)?;
Self::open_with_params(uri, &name, vec![], None, None, None).await
Self::open_with_params(uri, &name, None, None, None).await
}
/// Opens an existing Table
@@ -1536,7 +1524,6 @@ impl NativeTable {
pub async fn open_with_params(
uri: &str,
name: &str,
namespace: Vec<String>,
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
params: Option<ReadParams>,
read_consistency_interval: Option<std::time::Duration>,
@@ -1561,12 +1548,9 @@ impl NativeTable {
})?;
let dataset = DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval);
let id = Self::build_id(&namespace, name);
Ok(Self {
name: name.to_string(),
namespace,
id,
uri: uri.to_string(),
dataset,
read_consistency_interval,
@@ -1589,24 +1573,12 @@ impl NativeTable {
Ok(name.to_string())
}
fn build_id(namespace: &[String], name: &str) -> String {
if namespace.is_empty() {
name.to_string()
} else {
let mut parts = namespace.to_vec();
parts.push(name.to_string());
parts.join("$")
}
}
/// Creates a new Table
///
/// # Arguments
///
/// * `uri` - The URI to the table. When namespace is not empty, the caller must
/// provide an explicit URI (location) rather than deriving it from the table name.
/// * `uri` - The URI to the table.
/// * `name` The Table name
/// * `namespace` - The namespace path. When non-empty, an explicit URI must be provided.
/// * `batches` RecordBatch to be saved in the database.
/// * `params` - Write parameters.
///
@@ -1616,7 +1588,6 @@ impl NativeTable {
pub async fn create(
uri: &str,
name: &str,
namespace: Vec<String>,
batches: impl StreamingWriteSource,
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
params: Option<WriteParams>,
@@ -1643,12 +1614,8 @@ impl NativeTable {
source => Error::Lance { source },
})?;
let id = Self::build_id(&namespace, name);
Ok(Self {
name: name.to_string(),
namespace,
id,
uri: uri.to_string(),
dataset: DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval),
read_consistency_interval,
@@ -1658,7 +1625,6 @@ impl NativeTable {
pub async fn create_empty(
uri: &str,
name: &str,
namespace: Vec<String>,
schema: SchemaRef,
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
params: Option<WriteParams>,
@@ -1668,7 +1634,6 @@ impl NativeTable {
Self::create(
uri,
name,
namespace,
batches,
write_store_wrapper,
params,
@@ -1810,17 +1775,8 @@ impl NativeTable {
}
// Helper to get num_sub_vectors with default calculation
fn get_num_sub_vectors(provided: Option<u32>, dim: u32, num_bits: Option<u32>) -> u32 {
if let Some(provided) = provided {
return provided;
}
let suggested = suggested_num_sub_vectors(dim);
if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 {
// num_sub_vectors must be even when 4 bits are used
suggested + 1
} else {
suggested
}
fn get_num_sub_vectors(provided: Option<u32>, dim: u32) -> u32 {
provided.unwrap_or_else(|| suggested_num_sub_vectors(dim))
}
// Helper to extract vector dimension from field
@@ -1843,7 +1799,7 @@ impl NativeTable {
// Use IvfPq as the default for auto vector indices
let dim = Self::get_vector_dimension(field)?;
let ivf_params = lance_index::vector::ivf::IvfBuildParams::default();
let num_sub_vectors = Self::get_num_sub_vectors(None, dim, None);
let num_sub_vectors = Self::get_num_sub_vectors(None, dim);
let pq_params =
lance_index::vector::pq::PQBuildParams::new(num_sub_vectors as usize, 8);
let lance_idx_params =
@@ -1910,8 +1866,7 @@ impl NativeTable {
index.sample_rate,
index.max_iterations,
);
let num_sub_vectors =
Self::get_num_sub_vectors(index.num_sub_vectors, dim, index.num_bits);
let num_sub_vectors = Self::get_num_sub_vectors(index.num_sub_vectors, dim);
let num_bits = index.num_bits.unwrap_or(8) as usize;
let mut pq_params = PQBuildParams::new(num_sub_vectors as usize, num_bits);
pq_params.max_iters = index.max_iterations as usize;
@@ -1947,8 +1902,7 @@ impl NativeTable {
index.sample_rate,
index.max_iterations,
);
let num_sub_vectors =
Self::get_num_sub_vectors(index.num_sub_vectors, dim, index.num_bits);
let num_sub_vectors = Self::get_num_sub_vectors(index.num_sub_vectors, dim);
let hnsw_params = HnswBuildParams::default()
.num_edges(index.m as usize)
.ef_construction(index.ef_construction as usize);
@@ -2124,11 +2078,13 @@ impl BaseTable for NativeTable {
}
fn namespace(&self) -> &[String] {
&self.namespace
// Native tables don't support namespaces yet, return empty slice for root namespace
&[]
}
fn id(&self) -> &str {
&self.id
// For native tables, id is same as name since no namespace support
self.name.as_str()
}
async fn version(&self) -> Result<u64> {
@@ -2928,7 +2884,7 @@ mod tests {
let batches = make_test_batches();
let batches = Box::new(batches) as Box<dyn RecordBatchReader + Send>;
let table = NativeTable::create(uri, "test", vec![], batches, None, None, None)
let table = NativeTable::create(uri, "test", batches, None, None, None)
.await
.unwrap();
@@ -4133,8 +4089,6 @@ mod tests {
table.prewarm_index("text_idx").await.unwrap();
}
// Windows does not support precise sleep durations due to timer resolution limitations.
#[cfg(not(target_os = "windows"))]
#[tokio::test]
async fn test_read_consistency_interval() {
let intervals = vec![