mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
Compare commits
40 Commits
rpgreen/fi
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a497db66f9 | ||
|
|
f5076269fe | ||
|
|
a61461331c | ||
|
|
b0170ea86a | ||
|
|
d1efc6ad8a | ||
|
|
9d638cb3c7 | ||
|
|
3cd73f9f5a | ||
|
|
b2d06a3a73 | ||
|
|
9d129c7e86 | ||
|
|
44878dd9a5 | ||
|
|
4b5bb2d76c | ||
|
|
434f4124fc | ||
|
|
03a1a99270 | ||
|
|
0110e3b6f8 | ||
|
|
f1f85b0a84 | ||
|
|
d6daa08b54 | ||
|
|
17b71de22e | ||
|
|
a250d8e7df | ||
|
|
5a2b33581e | ||
|
|
3d254f61b0 | ||
|
|
d15e380be1 | ||
|
|
0baf807be0 | ||
|
|
76bcc78910 | ||
|
|
135dfdc7ec | ||
|
|
6f39108857 | ||
|
|
bb6b0bea0c | ||
|
|
0084eb238b | ||
|
|
28ab29a3f0 | ||
|
|
7d3f5348a7 | ||
|
|
3531393523 | ||
|
|
93b8ac8e3e | ||
|
|
1b78ccedaf | ||
|
|
ca8d118f78 | ||
|
|
386fc9e466 | ||
|
|
ce1bafec1a | ||
|
|
8f18a7feed | ||
|
|
e2be699544 | ||
|
|
f77b0ef37d | ||
|
|
c41401f20f | ||
|
|
1cf3917a87 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.22.4-beta.0"
|
||||
current_version = "0.22.4-beta.3"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -19,7 +19,7 @@ rustflags = [
|
||||
"-Wclippy::string_add_assign",
|
||||
"-Wclippy::string_add",
|
||||
"-Wclippy::string_lit_as_bytes",
|
||||
"-Wclippy::string_to_string",
|
||||
"-Wclippy::implicit_clone",
|
||||
"-Wclippy::use_self",
|
||||
"-Dclippy::cargo",
|
||||
"-Dclippy::dbg_macro",
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/documentation.yml
vendored
2
.github/ISSUE_TEMPLATE/documentation.yml
vendored
@@ -18,6 +18,6 @@ body:
|
||||
label: Link
|
||||
description: >
|
||||
Provide a link to the existing documentation, if applicable.
|
||||
placeholder: ex. https://lancedb.github.io/lancedb/guides/tables/...
|
||||
placeholder: ex. https://lancedb.com/docs/tables/...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
@@ -31,7 +31,7 @@ runs:
|
||||
with:
|
||||
command: build
|
||||
working-directory: python
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
target: x86_64-unknown-linux-gnu
|
||||
manylinux: ${{ inputs.manylinux }}
|
||||
args: ${{ inputs.args }}
|
||||
@@ -46,7 +46,7 @@ runs:
|
||||
with:
|
||||
command: build
|
||||
working-directory: python
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
target: aarch64-unknown-linux-gnu
|
||||
manylinux: ${{ inputs.manylinux }}
|
||||
args: ${{ inputs.args }}
|
||||
|
||||
2
.github/workflows/build_mac_wheel/action.yml
vendored
2
.github/workflows/build_mac_wheel/action.yml
vendored
@@ -22,5 +22,5 @@ runs:
|
||||
command: build
|
||||
# TODO: pass through interpreter
|
||||
args: ${{ inputs.args }}
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
working-directory: python
|
||||
|
||||
@@ -26,7 +26,7 @@ runs:
|
||||
with:
|
||||
command: build
|
||||
args: ${{ inputs.args }}
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
working-directory: python
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
|
||||
@@ -98,3 +98,30 @@ jobs:
|
||||
|
||||
printenv OPENAI_API_KEY | codex login --with-api-key
|
||||
codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat /tmp/codex-prompt.txt)"
|
||||
|
||||
- name: Trigger sophon dependency update
|
||||
env:
|
||||
TAG: ${{ inputs.tag }}
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
VERSION="${TAG#refs/tags/}"
|
||||
VERSION="${VERSION#v}"
|
||||
LANCEDB_BRANCH="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
|
||||
|
||||
echo "Triggering sophon workflow with:"
|
||||
echo " lance_ref: ${TAG#refs/tags/}"
|
||||
echo " lancedb_ref: ${LANCEDB_BRANCH}"
|
||||
|
||||
gh workflow run codex-bump-lancedb-lance.yml \
|
||||
--repo lancedb/sophon \
|
||||
-f lance_ref="${TAG#refs/tags/}" \
|
||||
-f lancedb_ref="${LANCEDB_BRANCH}"
|
||||
|
||||
- name: Show latest sophon workflow run
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
echo "Latest sophon workflow run:"
|
||||
gh run list --repo lancedb/sophon --workflow codex-bump-lancedb-lance.yml --limit 1 --json databaseId,url,displayTitle
|
||||
|
||||
6
.github/workflows/docs.yml
vendored
6
.github/workflows/docs.yml
vendored
@@ -24,7 +24,7 @@ env:
|
||||
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
|
||||
# CI builds are faster with incremental disabled.
|
||||
CARGO_INCREMENTAL: "0"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||
|
||||
jobs:
|
||||
# Single deploy job since we're just deploying
|
||||
@@ -50,8 +50,8 @@ jobs:
|
||||
- name: Build Python
|
||||
working-directory: python
|
||||
run: |
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
|
||||
- name: Set up node
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
|
||||
62
.github/workflows/lance-release-timer.yml
vendored
Normal file
62
.github/workflows/lance-release-timer.yml
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
name: Lance Release Timer
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "*/10 * * * *"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
actions: write
|
||||
|
||||
concurrency:
|
||||
group: lance-release-timer
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
trigger-update:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Check for new Lance tag
|
||||
id: check
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
run: |
|
||||
python3 ci/check_lance_release.py --github-output "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Look for existing PR
|
||||
if: steps.check.outputs.needs_update == 'true'
|
||||
id: pr
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
TITLE="chore: update lance dependency to v${{ steps.check.outputs.latest_version }}"
|
||||
COUNT=$(gh pr list --search "\"$TITLE\" in:title" --state open --limit 1 --json number --jq 'length')
|
||||
if [ "$COUNT" -gt 0 ]; then
|
||||
echo "Open PR already exists for $TITLE"
|
||||
echo "pr_exists=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "No existing PR for $TITLE"
|
||||
echo "pr_exists=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Trigger codex update workflow
|
||||
if: steps.check.outputs.needs_update == 'true' && steps.pr.outputs.pr_exists != 'true'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
TAG=${{ steps.check.outputs.latest_tag }}
|
||||
gh workflow run codex-update-lance-dependency.yml -f tag=refs/tags/$TAG
|
||||
|
||||
- name: Show latest codex workflow run
|
||||
if: steps.check.outputs.needs_update == 'true' && steps.pr.outputs.pr_exists != 'true'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
gh run list --workflow codex-update-lance-dependency.yml --limit 1 --json databaseId,url,displayTitle
|
||||
25
.github/workflows/nodejs.yml
vendored
25
.github/workflows/nodejs.yml
vendored
@@ -16,9 +16,6 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||
# "1" means line tables only, which is useful for panic tracebacks.
|
||||
RUSTFLAGS: "-C debuginfo=1"
|
||||
RUST_BACKTRACE: "1"
|
||||
|
||||
jobs:
|
||||
@@ -43,18 +40,20 @@ jobs:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
components: rustfmt, clippy
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
components: rustfmt, clippy
|
||||
- name: Lint
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Format Rust
|
||||
run: cargo fmt --all -- --check
|
||||
- name: Lint Rust
|
||||
run: cargo clippy --profile ci --all --all-features -- -D warnings
|
||||
- name: Lint Typescript
|
||||
run: |
|
||||
cargo fmt --all -- --check
|
||||
cargo clippy --all --all-features -- -D warnings
|
||||
npm ci
|
||||
npm run lint-ci
|
||||
- name: Lint examples
|
||||
@@ -90,7 +89,8 @@ jobs:
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci
|
||||
npm run build
|
||||
npm run build:debug -- --profile ci
|
||||
npm run tsc
|
||||
- name: Setup localstack
|
||||
working-directory: .
|
||||
run: docker compose up --detach --wait
|
||||
@@ -147,7 +147,8 @@ jobs:
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci
|
||||
npm run build
|
||||
npm run build:debug -- --profile ci
|
||||
npm run tsc
|
||||
- name: Test
|
||||
run: |
|
||||
npm run test
|
||||
|
||||
6
.github/workflows/npm-publish.yml
vendored
6
.github/workflows/npm-publish.yml
vendored
@@ -97,12 +97,6 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
settings:
|
||||
- target: x86_64-apple-darwin
|
||||
host: macos-latest
|
||||
features: ","
|
||||
pre_build: |-
|
||||
brew install protobuf
|
||||
rustup target add x86_64-apple-darwin
|
||||
- target: aarch64-apple-darwin
|
||||
host: macos-latest
|
||||
features: fp16kernels
|
||||
|
||||
4
.github/workflows/pypi-publish.yml
vendored
4
.github/workflows/pypi-publish.yml
vendored
@@ -11,7 +11,7 @@ on:
|
||||
- Cargo.toml # Change in dependency frequently breaks builds
|
||||
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
@@ -64,8 +64,6 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
- target: x86_64-apple-darwin
|
||||
runner: macos-13
|
||||
- target: aarch64-apple-darwin
|
||||
runner: warp-macos-14-arm64-6x
|
||||
env:
|
||||
|
||||
38
.github/workflows/python.yml
vendored
38
.github/workflows/python.yml
vendored
@@ -18,7 +18,8 @@ env:
|
||||
# Color output for pytest is off by default.
|
||||
PYTEST_ADDOPTS: "--color=yes"
|
||||
FORCE_COLOR: "1"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||
RUST_BACKTRACE: "1"
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
@@ -78,7 +79,7 @@ jobs:
|
||||
doctest:
|
||||
name: "Doctest"
|
||||
timeout-minutes: 30
|
||||
runs-on: "ubuntu-24.04"
|
||||
runs-on: ubuntu-2404-8x-x64
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@@ -97,12 +98,9 @@ jobs:
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: python
|
||||
- name: Install
|
||||
run: |
|
||||
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
|
||||
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
|
||||
pip install tantivy
|
||||
pip install mlx
|
||||
- name: Doctest
|
||||
@@ -131,10 +129,9 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.${{ matrix.python-minor-version }}
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: python
|
||||
- uses: ./.github/workflows/build_linux_wheel
|
||||
with:
|
||||
args: --profile ci
|
||||
- uses: ./.github/workflows/run_tests
|
||||
with:
|
||||
integration: true
|
||||
@@ -146,16 +143,9 @@ jobs:
|
||||
- name: Delete wheels
|
||||
run: rm -rf target/wheels
|
||||
platform:
|
||||
name: "Mac: ${{ matrix.config.name }}"
|
||||
name: "Mac"
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
- name: x86
|
||||
runner: macos-13
|
||||
- name: Arm
|
||||
runner: macos-14
|
||||
runs-on: "${{ matrix.config.runner }}"
|
||||
runs-on: macos-14
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@@ -169,10 +159,9 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: python
|
||||
- uses: ./.github/workflows/build_mac_wheel
|
||||
with:
|
||||
args: --profile ci
|
||||
- uses: ./.github/workflows/run_tests
|
||||
# Make sure wheels are not included in the Rust cache
|
||||
- name: Delete wheels
|
||||
@@ -199,10 +188,9 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: python
|
||||
- uses: ./.github/workflows/build_windows_wheel
|
||||
with:
|
||||
args: --profile ci
|
||||
- uses: ./.github/workflows/run_tests
|
||||
# Make sure wheels are not included in the Rust cache
|
||||
- name: Delete wheels
|
||||
@@ -231,7 +219,7 @@ jobs:
|
||||
run: |
|
||||
pip install "pydantic<2"
|
||||
pip install pyarrow==16
|
||||
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
||||
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
||||
pip install tantivy
|
||||
- name: Run tests
|
||||
run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests
|
||||
|
||||
2
.github/workflows/run_tests/action.yml
vendored
2
.github/workflows/run_tests/action.yml
vendored
@@ -15,7 +15,7 @@ runs:
|
||||
- name: Install lancedb
|
||||
shell: bash
|
||||
run: |
|
||||
pip3 install --extra-index-url https://pypi.fury.io/lancedb/ $(ls target/wheels/lancedb-*.whl)[tests,dev]
|
||||
pip3 install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ $(ls target/wheels/lancedb-*.whl)[tests,dev]
|
||||
- name: Setup localstack for integration tests
|
||||
if: ${{ inputs.integration == 'true' }}
|
||||
shell: bash
|
||||
|
||||
44
.github/workflows/rust.yml
vendored
44
.github/workflows/rust.yml
vendored
@@ -18,11 +18,7 @@ env:
|
||||
# This env var is used by Swatinem/rust-cache@v2 for the cache
|
||||
# key, so we set it to make sure it is always consistent.
|
||||
CARGO_TERM_COLOR: always
|
||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||
# "1" means line tables only, which is useful for panic tracebacks.
|
||||
RUSTFLAGS: "-C debuginfo=1"
|
||||
RUST_BACKTRACE: "1"
|
||||
CARGO_INCREMENTAL: 0
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
@@ -44,8 +40,6 @@ jobs:
|
||||
with:
|
||||
components: rustfmt, clippy
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
@@ -53,7 +47,7 @@ jobs:
|
||||
- name: Run format
|
||||
run: cargo fmt --all -- --check
|
||||
- name: Run clippy
|
||||
run: cargo clippy --workspace --tests --all-features -- -D warnings
|
||||
run: cargo clippy --profile ci --workspace --tests --all-features -- -D warnings
|
||||
|
||||
build-no-lock:
|
||||
runs-on: ubuntu-24.04
|
||||
@@ -80,7 +74,7 @@ jobs:
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Build all
|
||||
run: |
|
||||
cargo build --benches --all-features --tests
|
||||
cargo build --profile ci --benches --all-features --tests
|
||||
|
||||
linux:
|
||||
timeout-minutes: 30
|
||||
@@ -103,14 +97,8 @@ jobs:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
# This shaves 2 minutes off this step in CI. This doesn't seem to be
|
||||
# necessary in standard runners, but it is in the 4x runners.
|
||||
sudo rm /var/lib/man-db/auto-update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
run: sudo apt install -y protobuf-compiler libssl-dev
|
||||
- uses: rui314/setup-mold@v1
|
||||
- name: Make Swap
|
||||
run: |
|
||||
@@ -119,22 +107,22 @@ jobs:
|
||||
sudo mkswap /swapfile
|
||||
sudo swapon /swapfile
|
||||
- name: Build
|
||||
run: cargo build --all-features --tests --locked --examples
|
||||
run: cargo build --profile ci --all-features --tests --locked --examples
|
||||
- name: Run feature tests
|
||||
run: make -C ./lancedb feature-tests
|
||||
run: CARGO_ARGS="--profile ci" make -C ./lancedb feature-tests
|
||||
- name: Run examples
|
||||
run: cargo run --example simple --locked
|
||||
run: cargo run --profile ci --example simple --locked
|
||||
- name: Run remote tests
|
||||
# Running this requires access to secrets, so skip if this is
|
||||
# a PR from a fork.
|
||||
if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
|
||||
run: make -C ./lancedb remote-tests
|
||||
run: CARGO_ARGS="--profile ci" make -C ./lancedb remote-tests
|
||||
|
||||
macos:
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
matrix:
|
||||
mac-runner: ["macos-13", "macos-14"]
|
||||
mac-runner: ["macos-14", "macos-15"]
|
||||
runs-on: "${{ matrix.mac-runner }}"
|
||||
defaults:
|
||||
run:
|
||||
@@ -148,8 +136,6 @@ jobs:
|
||||
- name: CPU features
|
||||
run: sysctl -a | grep cpu
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install dependencies
|
||||
run: brew install protobuf
|
||||
- name: Run tests
|
||||
@@ -159,7 +145,7 @@ jobs:
|
||||
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps \
|
||||
| jq -r '.packages[] | .features | keys | .[]' \
|
||||
| grep -v s3-test | sort | uniq | paste -s -d "," -`
|
||||
cargo test --features $ALL_FEATURES --locked
|
||||
cargo test --profile ci --features $ALL_FEATURES --locked
|
||||
|
||||
windows:
|
||||
runs-on: windows-2022
|
||||
@@ -173,22 +159,21 @@ jobs:
|
||||
working-directory: rust/lancedb
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set target
|
||||
run: rustup target add ${{ matrix.target }}
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: rust
|
||||
- name: Install Protoc v21.12
|
||||
run: choco install --no-progress protoc
|
||||
- name: Build
|
||||
run: |
|
||||
rustup target add ${{ matrix.target }}
|
||||
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
|
||||
cargo build --features remote --tests --locked --target ${{ matrix.target }}
|
||||
cargo build --profile ci --features remote --tests --locked --target ${{ matrix.target }}
|
||||
- name: Run tests
|
||||
# Can only run tests when target matches host
|
||||
if: ${{ matrix.target == 'x86_64-pc-windows-msvc' }}
|
||||
run: |
|
||||
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
|
||||
cargo test --features remote --locked
|
||||
cargo test --profile ci --features remote --locked
|
||||
|
||||
msrv:
|
||||
# Check the minimum supported Rust version
|
||||
@@ -213,6 +198,7 @@ jobs:
|
||||
uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ matrix.msrv }}
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Downgrade dependencies
|
||||
# These packages have newer requirements for MSRV
|
||||
run: |
|
||||
@@ -226,4 +212,4 @@ jobs:
|
||||
cargo update -p aws-sdk-sts --precise 1.51.0
|
||||
cargo update -p home --precise 0.5.9
|
||||
- name: cargo +${{ matrix.msrv }} check
|
||||
run: cargo check --workspace --tests --benches --all-features
|
||||
run: cargo check --profile ci --workspace --tests --benches --all-features
|
||||
|
||||
774
Cargo.lock
generated
774
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
42
Cargo.toml
42
Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.78.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=1.0.0-beta.2", default-features = false, "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-core = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-datagen = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-file = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-io = { "version" = "=1.0.0-beta.2", default-features = false, "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-index = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-linalg = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-namespace = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=1.0.0-beta.2", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"], "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-table = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-testing = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-datafusion = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-encoding = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-arrow = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance = { "version" = "=1.1.0-beta.1", default-features = false, "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=1.1.0-beta.1", default-features = false, "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=1.1.0-beta.1", default-features = false, "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=1.1.0-beta.1", "tag" = "v1.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "56.2", optional = false }
|
||||
@@ -63,3 +63,17 @@ regex = "1.10"
|
||||
lazy_static = "1"
|
||||
semver = "1.0.25"
|
||||
chrono = "0.4"
|
||||
|
||||
[profile.ci]
|
||||
debug = "line-tables-only"
|
||||
inherits = "dev"
|
||||
incremental = false
|
||||
|
||||
# This rule applies to every package except workspace members (dependencies
|
||||
# such as `arrow` and `tokio`). It disables debug info and related features on
|
||||
# dependencies so their binaries stay smaller, improving cache reuse.
|
||||
[profile.ci.package."*"]
|
||||
debug = false
|
||||
debug-assertions = false
|
||||
strip = "debuginfo"
|
||||
incremental = false
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# **The Multimodal AI Lakehouse**
|
||||
|
||||
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://lancedb.github.io/lancedb/) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
|
||||
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://lancedb.com/docs) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
|
||||
|
||||
**The ultimate multimodal data platform for AI/ML applications.**
|
||||
|
||||
|
||||
208
ci/check_lance_release.py
Executable file
208
ci/check_lance_release.py
Executable file
@@ -0,0 +1,208 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Determine whether a newer Lance tag exists and expose results for CI."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Sequence, Tuple, Union
|
||||
|
||||
try: # Python >=3.11
|
||||
import tomllib # type: ignore
|
||||
except ModuleNotFoundError: # pragma: no cover - fallback for older Python
|
||||
import tomli as tomllib # type: ignore
|
||||
|
||||
LANCE_REPO = "lance-format/lance"
|
||||
|
||||
SEMVER_RE = re.compile(
|
||||
r"^\s*(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)"
|
||||
r"(?:-(?P<prerelease>[0-9A-Za-z.-]+))?"
|
||||
r"(?:\+[0-9A-Za-z.-]+)?\s*$"
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SemVer:
|
||||
major: int
|
||||
minor: int
|
||||
patch: int
|
||||
prerelease: Tuple[Union[int, str], ...]
|
||||
|
||||
def __lt__(self, other: "SemVer") -> bool: # pragma: no cover - simple comparison
|
||||
if (self.major, self.minor, self.patch) != (other.major, other.minor, other.patch):
|
||||
return (self.major, self.minor, self.patch) < (other.major, other.minor, other.patch)
|
||||
if self.prerelease == other.prerelease:
|
||||
return False
|
||||
if not self.prerelease:
|
||||
return False # release > anything else
|
||||
if not other.prerelease:
|
||||
return True
|
||||
for left, right in zip(self.prerelease, other.prerelease):
|
||||
if left == right:
|
||||
continue
|
||||
if isinstance(left, int) and isinstance(right, int):
|
||||
return left < right
|
||||
if isinstance(left, int):
|
||||
return True
|
||||
if isinstance(right, int):
|
||||
return False
|
||||
return str(left) < str(right)
|
||||
return len(self.prerelease) < len(other.prerelease)
|
||||
|
||||
def __eq__(self, other: object) -> bool: # pragma: no cover - trivial
|
||||
if not isinstance(other, SemVer):
|
||||
return NotImplemented
|
||||
return (
|
||||
self.major == other.major
|
||||
and self.minor == other.minor
|
||||
and self.patch == other.patch
|
||||
and self.prerelease == other.prerelease
|
||||
)
|
||||
|
||||
|
||||
def parse_semver(raw: str) -> SemVer:
|
||||
match = SEMVER_RE.match(raw)
|
||||
if not match:
|
||||
raise ValueError(f"Unsupported version format: {raw}")
|
||||
prerelease = match.group("prerelease")
|
||||
parts: Tuple[Union[int, str], ...] = ()
|
||||
if prerelease:
|
||||
parsed: List[Union[int, str]] = []
|
||||
for piece in prerelease.split("."):
|
||||
if piece.isdigit():
|
||||
parsed.append(int(piece))
|
||||
else:
|
||||
parsed.append(piece)
|
||||
parts = tuple(parsed)
|
||||
return SemVer(
|
||||
major=int(match.group("major")),
|
||||
minor=int(match.group("minor")),
|
||||
patch=int(match.group("patch")),
|
||||
prerelease=parts,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TagInfo:
|
||||
tag: str # e.g. v1.0.0-beta.2
|
||||
version: str # e.g. 1.0.0-beta.2
|
||||
semver: SemVer
|
||||
|
||||
|
||||
def run_command(cmd: Sequence[str]) -> str:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=False)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(
|
||||
f"Command {' '.join(cmd)} failed with {result.returncode}: {result.stderr.strip()}"
|
||||
)
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def fetch_remote_tags() -> List[TagInfo]:
|
||||
output = run_command(
|
||||
[
|
||||
"gh",
|
||||
"api",
|
||||
"-X",
|
||||
"GET",
|
||||
f"repos/{LANCE_REPO}/git/refs/tags",
|
||||
"--paginate",
|
||||
"--jq",
|
||||
".[].ref",
|
||||
]
|
||||
)
|
||||
tags: List[TagInfo] = []
|
||||
for line in output.splitlines():
|
||||
ref = line.strip()
|
||||
if not ref.startswith("refs/tags/v"):
|
||||
continue
|
||||
tag = ref.split("refs/tags/")[-1]
|
||||
version = tag.lstrip("v")
|
||||
try:
|
||||
tags.append(TagInfo(tag=tag, version=version, semver=parse_semver(version)))
|
||||
except ValueError:
|
||||
continue
|
||||
if not tags:
|
||||
raise RuntimeError("No Lance tags could be parsed from GitHub API output")
|
||||
return tags
|
||||
|
||||
|
||||
def read_current_version(repo_root: Path) -> str:
|
||||
cargo_path = repo_root / "Cargo.toml"
|
||||
with cargo_path.open("rb") as fh:
|
||||
data = tomllib.load(fh)
|
||||
try:
|
||||
deps = data["workspace"]["dependencies"]
|
||||
entry = deps["lance"]
|
||||
except KeyError as exc: # pragma: no cover - configuration guard
|
||||
raise RuntimeError("Failed to locate workspace.dependencies.lance in Cargo.toml") from exc
|
||||
|
||||
if isinstance(entry, str):
|
||||
raw_version = entry
|
||||
elif isinstance(entry, dict):
|
||||
raw_version = entry.get("version", "")
|
||||
else: # pragma: no cover - defensive
|
||||
raise RuntimeError("Unexpected lance dependency format")
|
||||
|
||||
raw_version = raw_version.strip()
|
||||
if not raw_version:
|
||||
raise RuntimeError("lance dependency does not declare a version")
|
||||
return raw_version.lstrip("=")
|
||||
|
||||
|
||||
def determine_latest_tag(tags: Iterable[TagInfo]) -> TagInfo:
|
||||
return max(tags, key=lambda tag: tag.semver)
|
||||
|
||||
|
||||
def write_outputs(args: argparse.Namespace, payload: dict) -> None:
|
||||
target = getattr(args, "github_output", None)
|
||||
if not target:
|
||||
return
|
||||
with open(target, "a", encoding="utf-8") as handle:
|
||||
for key, value in payload.items():
|
||||
handle.write(f"{key}={value}\n")
|
||||
|
||||
|
||||
def main(argv: Sequence[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--repo-root",
|
||||
default=Path(__file__).resolve().parents[1],
|
||||
type=Path,
|
||||
help="Path to the lancedb repository root",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--github-output",
|
||||
default=os.environ.get("GITHUB_OUTPUT"),
|
||||
help="Optional file path for writing GitHub Action outputs",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
repo_root = Path(args.repo_root)
|
||||
current_version = read_current_version(repo_root)
|
||||
current_semver = parse_semver(current_version)
|
||||
|
||||
tags = fetch_remote_tags()
|
||||
latest = determine_latest_tag(tags)
|
||||
needs_update = latest.semver > current_semver
|
||||
|
||||
payload = {
|
||||
"current_version": current_version,
|
||||
"current_tag": f"v{current_version}",
|
||||
"latest_version": latest.version,
|
||||
"latest_tag": latest.tag,
|
||||
"needs_update": "true" if needs_update else "false",
|
||||
}
|
||||
|
||||
print(json.dumps(payload))
|
||||
write_outputs(args, payload)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -3,6 +3,8 @@ import re
|
||||
import sys
|
||||
import json
|
||||
|
||||
LANCE_GIT_URL = "https://github.com/lance-format/lance.git"
|
||||
|
||||
|
||||
def run_command(command: str) -> str:
|
||||
"""
|
||||
@@ -29,7 +31,7 @@ def get_latest_stable_version() -> str:
|
||||
|
||||
def get_latest_preview_version() -> str:
|
||||
lance_tags = run_command(
|
||||
"git ls-remote --tags https://github.com/lancedb/lance.git | grep 'refs/tags/v[0-9beta.-]\\+$'"
|
||||
f"git ls-remote --tags {LANCE_GIT_URL} | grep 'refs/tags/v[0-9beta.-]\\+$'"
|
||||
).splitlines()
|
||||
lance_tags = (
|
||||
tag.split("refs/tags/")[1]
|
||||
@@ -176,8 +178,8 @@ def set_stable_version(version: str):
|
||||
def set_preview_version(version: str):
|
||||
"""
|
||||
Sets lines to
|
||||
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-io = { "version" = "=0.29.0", default-features = false, "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.29.0-beta.2", "git" = LANCE_GIT_URL }
|
||||
lance-io = { "version" = "=0.29.0", default-features = false, "tag" = "v0.29.0-beta.2", "git" = LANCE_GIT_URL }
|
||||
...
|
||||
"""
|
||||
|
||||
@@ -194,7 +196,7 @@ def set_preview_version(version: str):
|
||||
config["features"] = features
|
||||
|
||||
config["tag"] = f"v{version}"
|
||||
config["git"] = "https://github.com/lancedb/lance.git"
|
||||
config["git"] = LANCE_GIT_URL
|
||||
|
||||
return dict_to_toml_line(package_name, config)
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# LanceDB Documentation
|
||||
|
||||
LanceDB docs are deployed to https://lancedb.github.io/lancedb/.
|
||||
LanceDB docs are available at [lancedb.com/docs](https://lancedb.com/docs).
|
||||
|
||||
Docs is built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
|
||||
The SDK docs are built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
|
||||
whenever a commit is pushed to the `main` branch. So it is possible for the docs to show
|
||||
unreleased features.
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example.
|
||||
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Contributing to LanceDB Typescript
|
||||
|
||||
This document outlines the process for contributing to LanceDB Typescript.
|
||||
For general contribution guidelines, see [CONTRIBUTING.md](../../../../CONTRIBUTING.md).
|
||||
For general contribution guidelines, see [CONTRIBUTING.md](../CONTRIBUTING.md).
|
||||
|
||||
## Project layout
|
||||
|
||||
|
||||
@@ -147,7 +147,7 @@ A new PermutationBuilder instance
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
builder.splitCalculated("user_id % 3");
|
||||
builder.splitCalculated({ calculation: "user_id % 3" });
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
@@ -89,4 +89,4 @@ optional storageOptions: Record<string, string>;
|
||||
|
||||
(For LanceDB OSS only): configuration for object storage.
|
||||
|
||||
The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
|
||||
@@ -97,4 +97,4 @@ Configuration for object storage.
|
||||
Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
|
||||
@@ -8,6 +8,14 @@
|
||||
|
||||
## Properties
|
||||
|
||||
### numAttempts
|
||||
|
||||
```ts
|
||||
numAttempts: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### numDeletedRows
|
||||
|
||||
```ts
|
||||
|
||||
@@ -42,4 +42,4 @@ Configuration for object storage.
|
||||
Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
The available options are described at https://lancedb.com/docs/storage/
|
||||
|
||||
@@ -30,6 +30,12 @@ is also an [asynchronous API client](#connections-asynchronous).
|
||||
|
||||
::: lancedb.table.Table
|
||||
|
||||
::: lancedb.table.FragmentStatistics
|
||||
|
||||
::: lancedb.table.FragmentSummaryStats
|
||||
|
||||
::: lancedb.table.Tags
|
||||
|
||||
## Querying (Synchronous)
|
||||
|
||||
::: lancedb.query.Query
|
||||
@@ -58,6 +64,14 @@ is also an [asynchronous API client](#connections-asynchronous).
|
||||
|
||||
::: lancedb.embeddings.open_clip.OpenClipEmbeddings
|
||||
|
||||
## Remote configuration
|
||||
|
||||
::: lancedb.remote.ClientConfig
|
||||
|
||||
::: lancedb.remote.TimeoutConfig
|
||||
|
||||
::: lancedb.remote.RetryConfig
|
||||
|
||||
## Context
|
||||
|
||||
::: lancedb.context.contextualize
|
||||
@@ -115,6 +129,8 @@ Table hold your actual data as a collection of records / rows.
|
||||
|
||||
::: lancedb.table.AsyncTable
|
||||
|
||||
::: lancedb.table.AsyncTags
|
||||
|
||||
## Indices (Asynchronous)
|
||||
|
||||
Indices can be created on a table to speed up queries. This section
|
||||
@@ -136,6 +152,8 @@ lists the indices that LanceDb supports.
|
||||
|
||||
::: lancedb.index.IvfFlat
|
||||
|
||||
::: lancedb.table.IndexStatistics
|
||||
|
||||
## Querying (Asynchronous)
|
||||
|
||||
Queries allow you to return data from your database. Basic queries can be
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.22.4-beta.0</version>
|
||||
<version>0.22.4-beta.3</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.22.4-beta.0</version>
|
||||
<version>0.22.4-beta.3</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.22.4-beta.0</version>
|
||||
<version>0.22.4-beta.3</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.22.4-beta.0"
|
||||
version = "0.22.4-beta.3"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -30,7 +30,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example.
|
||||
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ export interface CreateTableOptions {
|
||||
* Options already set on the connection will be inherited by the table,
|
||||
* but can be overridden here.
|
||||
*
|
||||
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
* The available options are described at https://lancedb.com/docs/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>;
|
||||
|
||||
@@ -78,7 +78,7 @@ export interface OpenTableOptions {
|
||||
* Options already set on the connection will be inherited by the table,
|
||||
* but can be overridden here.
|
||||
*
|
||||
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
* The available options are described at https://lancedb.com/docs/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>;
|
||||
/**
|
||||
|
||||
@@ -118,7 +118,7 @@ export class PermutationBuilder {
|
||||
* @returns A new PermutationBuilder instance
|
||||
* @example
|
||||
* ```ts
|
||||
* builder.splitCalculated("user_id % 3");
|
||||
* builder.splitCalculated({ calculation: "user_id % 3" });
|
||||
* ```
|
||||
*/
|
||||
splitCalculated(options: SplitCalculatedOptions): PermutationBuilder {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.22.4-beta.0",
|
||||
"version": "0.22.4-beta.3",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.22.4-beta.0",
|
||||
"version": "0.22.4-beta.3",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.22.4-beta.0",
|
||||
"version": "0.22.4-beta.3",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.22.4-beta.0",
|
||||
"version": "0.22.4-beta.3",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.22.4-beta.0",
|
||||
"version": "0.22.4-beta.3",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.22.4-beta.0",
|
||||
"version": "0.22.4-beta.3",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.22.4-beta.0",
|
||||
"version": "0.22.4-beta.3",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.22.4-beta.0",
|
||||
"version": "0.22.4-beta.3",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.22.4-beta.0",
|
||||
"version": "0.22.4-beta.3",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.22.4-beta.0",
|
||||
"version": "0.22.4-beta.3",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.22.4-beta.0",
|
||||
"version": "0.22.4-beta.3",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
@@ -73,8 +73,10 @@
|
||||
"scripts": {
|
||||
"artifacts": "napi artifacts",
|
||||
"build:debug": "napi build --platform --no-const-enum --dts ../lancedb/native.d.ts --js ../lancedb/native.js lancedb",
|
||||
"postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
|
||||
"build:release": "napi build --platform --no-const-enum --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
|
||||
"build": "npm run build:debug && npm run tsc && shx cp lancedb/*.node dist/",
|
||||
"postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
|
||||
"build": "npm run build:debug && npm run tsc",
|
||||
"build-release": "npm run build:release && npm run tsc",
|
||||
"tsc": "tsc -b",
|
||||
"posttsc": "shx cp lancedb/native.d.ts dist/native.d.ts",
|
||||
|
||||
@@ -35,7 +35,7 @@ pub struct ConnectionOptions {
|
||||
pub read_consistency_interval: Option<f64>,
|
||||
/// (For LanceDB OSS only): configuration for object storage.
|
||||
///
|
||||
/// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
/// The available options are described at https://lancedb.com/docs/storage/
|
||||
pub storage_options: Option<HashMap<String, String>>,
|
||||
/// (For LanceDB OSS only): the session to use for this connection. Holds
|
||||
/// shared caches and other session-specific state.
|
||||
|
||||
@@ -740,6 +740,7 @@ pub struct MergeResult {
|
||||
pub num_inserted_rows: i64,
|
||||
pub num_updated_rows: i64,
|
||||
pub num_deleted_rows: i64,
|
||||
pub num_attempts: i64,
|
||||
}
|
||||
|
||||
impl From<lancedb::table::MergeResult> for MergeResult {
|
||||
@@ -749,6 +750,7 @@ impl From<lancedb::table::MergeResult> for MergeResult {
|
||||
num_inserted_rows: value.num_inserted_rows as i64,
|
||||
num_updated_rows: value.num_updated_rows as i64,
|
||||
num_deleted_rows: value.num_deleted_rows as i64,
|
||||
num_attempts: value.num_attempts as i64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.25.4-beta.0"
|
||||
current_version = "0.26.0-beta.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.25.4-beta.0"
|
||||
version = "0.26.0-beta.0"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
@@ -18,6 +18,7 @@ arrow = { version = "56.2", features = ["pyarrow"] }
|
||||
async-trait = "0.1"
|
||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||
lance-core.workspace = true
|
||||
lance-namespace.workspace = true
|
||||
lance-io.workspace = true
|
||||
env_logger.workspace = true
|
||||
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
PIP_EXTRA_INDEX_URL ?= https://pypi.fury.io/lancedb/
|
||||
PIP_EXTRA_INDEX_URL ?= https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/
|
||||
|
||||
help: ## Show this help.
|
||||
@sed -ne '/@sed/!s/## //p' $(MAKEFILE_LIST)
|
||||
|
||||
.PHONY: develop
|
||||
develop: ## Install the package in development mode.
|
||||
PIP_EXTRA_INDEX_URL=$(PIP_EXTRA_INDEX_URL) maturin develop --extras tests,dev,embeddings
|
||||
PIP_EXTRA_INDEX_URL="$(PIP_EXTRA_INDEX_URL)" maturin develop --extras tests,dev,embeddings
|
||||
|
||||
.PHONY: format
|
||||
format: ## Format the code.
|
||||
|
||||
@@ -10,7 +10,7 @@ dependencies = [
|
||||
"pyarrow>=16",
|
||||
"pydantic>=1.10",
|
||||
"tqdm>=4.27.0",
|
||||
"lance-namespace>=0.0.21"
|
||||
"lance-namespace>=0.2.1"
|
||||
]
|
||||
description = "lancedb"
|
||||
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
||||
@@ -45,7 +45,7 @@ repository = "https://github.com/lancedb/lancedb"
|
||||
|
||||
[project.optional-dependencies]
|
||||
pylance = [
|
||||
"pylance>=0.25",
|
||||
"pylance>=1.0.0b14",
|
||||
]
|
||||
tests = [
|
||||
"aiohttp",
|
||||
@@ -59,7 +59,7 @@ tests = [
|
||||
"polars>=0.19, <=1.3.0",
|
||||
"tantivy",
|
||||
"pyarrow-stubs",
|
||||
"pylance>=1.0.0b2",
|
||||
"pylance>=1.0.0b14",
|
||||
"requests",
|
||||
"datafusion",
|
||||
]
|
||||
|
||||
@@ -20,7 +20,12 @@ from .remote.db import RemoteDBConnection
|
||||
from .schema import vector
|
||||
from .table import AsyncTable, Table
|
||||
from ._lancedb import Session
|
||||
from .namespace import connect_namespace, LanceNamespaceDBConnection
|
||||
from .namespace import (
|
||||
connect_namespace,
|
||||
connect_namespace_async,
|
||||
LanceNamespaceDBConnection,
|
||||
AsyncLanceNamespaceDBConnection,
|
||||
)
|
||||
|
||||
|
||||
def connect(
|
||||
@@ -36,7 +41,7 @@ def connect(
|
||||
session: Optional[Session] = None,
|
||||
**kwargs: Any,
|
||||
) -> DBConnection:
|
||||
"""Connect to a LanceDB database. YAY!
|
||||
"""Connect to a LanceDB database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -67,7 +72,7 @@ def connect(
|
||||
default configuration is used.
|
||||
storage_options: dict, optional
|
||||
Additional options for the storage backend. See available options at
|
||||
<https://lancedb.github.io/lancedb/guides/storage/>
|
||||
<https://lancedb.com/docs/storage/>
|
||||
session: Session, optional
|
||||
(For LanceDB OSS only)
|
||||
A session to use for this connection. Sessions allow you to configure
|
||||
@@ -169,7 +174,7 @@ async def connect_async(
|
||||
default configuration is used.
|
||||
storage_options: dict, optional
|
||||
Additional options for the storage backend. See available options at
|
||||
<https://lancedb.github.io/lancedb/guides/storage/>
|
||||
<https://lancedb.com/docs/storage/>
|
||||
session: Session, optional
|
||||
(For LanceDB OSS only)
|
||||
A session to use for this connection. Sessions allow you to configure
|
||||
@@ -224,7 +229,9 @@ __all__ = [
|
||||
"connect",
|
||||
"connect_async",
|
||||
"connect_namespace",
|
||||
"connect_namespace_async",
|
||||
"AsyncConnection",
|
||||
"AsyncLanceNamespaceDBConnection",
|
||||
"AsyncTable",
|
||||
"URI",
|
||||
"sanitize_uri",
|
||||
|
||||
@@ -3,10 +3,30 @@ from typing import Dict, List, Optional, Tuple, Any, TypedDict, Union, Literal
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
|
||||
from .index import (
|
||||
BTree,
|
||||
IvfFlat,
|
||||
IvfPq,
|
||||
IvfSq,
|
||||
Bitmap,
|
||||
LabelList,
|
||||
HnswPq,
|
||||
HnswSq,
|
||||
FTS,
|
||||
)
|
||||
from .io import StorageOptionsProvider
|
||||
from lance_namespace import (
|
||||
ListNamespacesResponse,
|
||||
CreateNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
ListTablesResponse,
|
||||
)
|
||||
from .remote import ClientConfig
|
||||
|
||||
IvfHnswPq: type[HnswPq] = HnswPq
|
||||
IvfHnswSq: type[HnswSq] = HnswSq
|
||||
|
||||
class Session:
|
||||
def __init__(
|
||||
self,
|
||||
@@ -26,24 +46,44 @@ class Connection(object):
|
||||
async def close(self): ...
|
||||
async def list_namespaces(
|
||||
self,
|
||||
namespace: List[str],
|
||||
page_token: Optional[str],
|
||||
limit: Optional[int],
|
||||
) -> List[str]: ...
|
||||
async def create_namespace(self, namespace: List[str]) -> None: ...
|
||||
async def drop_namespace(self, namespace: List[str]) -> None: ...
|
||||
async def table_names(
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse: ...
|
||||
async def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse: ...
|
||||
async def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse: ...
|
||||
async def describe_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
) -> DescribeNamespaceResponse: ...
|
||||
async def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse: ...
|
||||
async def table_names(
|
||||
self,
|
||||
namespace: Optional[List[str]],
|
||||
start_after: Optional[str],
|
||||
limit: Optional[int],
|
||||
) -> list[str]: ...
|
||||
) -> list[str]: ... # Deprecated: Use list_tables instead
|
||||
async def create_table(
|
||||
self,
|
||||
name: str,
|
||||
mode: str,
|
||||
data: pa.RecordBatchReader,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
location: Optional[str] = None,
|
||||
@@ -53,7 +93,7 @@ class Connection(object):
|
||||
name: str,
|
||||
mode: str,
|
||||
schema: pa.Schema,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
location: Optional[str] = None,
|
||||
@@ -61,7 +101,7 @@ class Connection(object):
|
||||
async def open_table(
|
||||
self,
|
||||
name: str,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
@@ -71,7 +111,7 @@ class Connection(object):
|
||||
self,
|
||||
target_table_name: str,
|
||||
source_uri: str,
|
||||
target_namespace: List[str] = [],
|
||||
target_namespace: Optional[List[str]] = None,
|
||||
source_version: Optional[int] = None,
|
||||
source_tag: Optional[str] = None,
|
||||
is_shallow: bool = True,
|
||||
@@ -80,11 +120,13 @@ class Connection(object):
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace: List[str] = [],
|
||||
new_namespace: List[str] = [],
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
) -> None: ...
|
||||
async def drop_table(self, name: str, namespace: List[str] = []) -> None: ...
|
||||
async def drop_all_tables(self, namespace: List[str] = []) -> None: ...
|
||||
async def drop_table(
|
||||
self, name: str, namespace: Optional[List[str]] = None
|
||||
) -> None: ...
|
||||
async def drop_all_tables(self, namespace: Optional[List[str]] = None) -> None: ...
|
||||
|
||||
class Table:
|
||||
def name(self) -> str: ...
|
||||
@@ -102,7 +144,17 @@ class Table:
|
||||
async def create_index(
|
||||
self,
|
||||
column: str,
|
||||
index: Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS],
|
||||
index: Union[
|
||||
IvfFlat,
|
||||
IvfSq,
|
||||
IvfPq,
|
||||
HnswPq,
|
||||
HnswSq,
|
||||
BTree,
|
||||
Bitmap,
|
||||
LabelList,
|
||||
FTS,
|
||||
],
|
||||
replace: Optional[bool],
|
||||
wait_timeout: Optional[object],
|
||||
*,
|
||||
@@ -306,6 +358,7 @@ class MergeResult:
|
||||
num_updated_rows: int
|
||||
num_inserted_rows: int
|
||||
num_deleted_rows: int
|
||||
num_attempts: int
|
||||
|
||||
class AddColumnsResult:
|
||||
version: int
|
||||
|
||||
@@ -96,7 +96,7 @@ def data_to_reader(
|
||||
f"Unknown data type {type(data)}. "
|
||||
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
|
||||
"pyarrow Table/RecordBatch, or Pydantic models. "
|
||||
"See https://lancedb.github.io/lancedb/guides/tables/ for examples."
|
||||
"See https://lancedb.com/docs/tables/ for examples."
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -22,6 +22,13 @@ from lancedb.embeddings.registry import EmbeddingFunctionRegistry
|
||||
|
||||
from lancedb.common import data_to_reader, sanitize_uri, validate_schema
|
||||
from lancedb.background_loop import LOOP
|
||||
from lance_namespace import (
|
||||
ListNamespacesResponse,
|
||||
CreateNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
ListTablesResponse,
|
||||
)
|
||||
|
||||
from . import __version__
|
||||
from ._lancedb import connect as lancedb_connect # type: ignore
|
||||
@@ -48,16 +55,22 @@ if TYPE_CHECKING:
|
||||
from .io import StorageOptionsProvider
|
||||
from ._lancedb import Session
|
||||
|
||||
from .namespace_utils import (
|
||||
_normalize_create_namespace_mode,
|
||||
_normalize_drop_namespace_mode,
|
||||
_normalize_drop_namespace_behavior,
|
||||
)
|
||||
|
||||
|
||||
class DBConnection(EnforceOverrides):
|
||||
"""An active LanceDB connection interface."""
|
||||
|
||||
def list_namespaces(
|
||||
self,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
) -> Iterable[str]:
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
"""List immediate child namespace names in the given namespace.
|
||||
|
||||
Parameters
|
||||
@@ -66,48 +79,126 @@ class DBConnection(EnforceOverrides):
|
||||
The parent namespace to list namespaces in.
|
||||
Empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
The token to use for pagination. If not present, start from the beginning.
|
||||
limit: int, default 10
|
||||
The size of the page to return.
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Iterable of str
|
||||
List of immediate child namespace names
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
"""
|
||||
return []
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return ListNamespacesResponse(namespaces=[], page_token=None)
|
||||
|
||||
def create_namespace(self, namespace: List[str]) -> None:
|
||||
def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
"""Create a new namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
or "overwrite" (replace if exists). Case insensitive.
|
||||
properties: Dict[str, str], optional
|
||||
Properties to set on the namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing the properties of the created namespace.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Namespace operations are not supported for this connection type"
|
||||
)
|
||||
|
||||
def drop_namespace(self, namespace: List[str]) -> None:
|
||||
def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
"""Drop a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior: str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Namespace operations are not supported for this connection type"
|
||||
)
|
||||
|
||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Namespace operations are not supported for this connection type"
|
||||
)
|
||||
|
||||
def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
"""List all tables in this database with pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"list_tables is not supported for this connection type"
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def table_names(
|
||||
self,
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
) -> Iterable[str]:
|
||||
"""List all tables in this database, in sorted order
|
||||
|
||||
@@ -142,7 +233,7 @@ class DBConnection(EnforceOverrides):
|
||||
fill_value: float = 0.0,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
data_storage_version: Optional[str] = None,
|
||||
@@ -191,7 +282,11 @@ class DBConnection(EnforceOverrides):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://lancedb.github.io/lancedb/guides/storage/>
|
||||
<https://lancedb.com/docs/storage/>
|
||||
|
||||
To enable stable row IDs (row IDs remain stable after compaction,
|
||||
update, delete, and merges), set `new_table_enable_stable_row_ids`
|
||||
to `"true"` in storage_options when connecting to the database.
|
||||
data_storage_version: optional, str, default "stable"
|
||||
Deprecated. Set `storage_options` when connecting to the database and set
|
||||
`new_table_data_storage_version` in the options.
|
||||
@@ -308,7 +403,7 @@ class DBConnection(EnforceOverrides):
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
@@ -339,7 +434,7 @@ class DBConnection(EnforceOverrides):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://lancedb.github.io/lancedb/guides/storage/>
|
||||
<https://lancedb.com/docs/storage/>
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -347,7 +442,7 @@ class DBConnection(EnforceOverrides):
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def drop_table(self, name: str, namespace: List[str] = []):
|
||||
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
|
||||
"""Drop a table from the database.
|
||||
|
||||
Parameters
|
||||
@@ -358,14 +453,16 @@ class DBConnection(EnforceOverrides):
|
||||
The namespace to drop the table from.
|
||||
Empty list represents root namespace.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
raise NotImplementedError
|
||||
|
||||
def rename_table(
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace: List[str] = [],
|
||||
new_namespace: List[str] = [],
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
):
|
||||
"""Rename a table in the database.
|
||||
|
||||
@@ -382,6 +479,10 @@ class DBConnection(EnforceOverrides):
|
||||
The namespace to move the table to.
|
||||
If not specified, defaults to the same as cur_namespace.
|
||||
"""
|
||||
if cur_namespace is None:
|
||||
cur_namespace = []
|
||||
if new_namespace is None:
|
||||
new_namespace = []
|
||||
raise NotImplementedError
|
||||
|
||||
def drop_database(self):
|
||||
@@ -391,7 +492,7 @@ class DBConnection(EnforceOverrides):
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def drop_all_tables(self, namespace: List[str] = []):
|
||||
def drop_all_tables(self, namespace: Optional[List[str]] = None):
|
||||
"""
|
||||
Drop all tables from the database
|
||||
|
||||
@@ -401,6 +502,8 @@ class DBConnection(EnforceOverrides):
|
||||
The namespace to drop all tables from.
|
||||
None or empty list represents root namespace.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
@@ -472,6 +575,12 @@ class LanceDBConnection(DBConnection):
|
||||
uri = uri[7:] # Remove "file://"
|
||||
elif uri.startswith("file:/"):
|
||||
uri = uri[5:] # Remove "file:"
|
||||
|
||||
if sys.platform == "win32":
|
||||
# On Windows, a path like /C:/path should become C:/path
|
||||
if len(uri) >= 3 and uri[0] == "/" and uri[2] == ":":
|
||||
uri = uri[1:]
|
||||
|
||||
uri = Path(uri)
|
||||
uri = uri.expanduser().absolute()
|
||||
Path(uri).mkdir(parents=True, exist_ok=True)
|
||||
@@ -535,10 +644,10 @@ class LanceDBConnection(DBConnection):
|
||||
@override
|
||||
def list_namespaces(
|
||||
self,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
) -> Iterable[str]:
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
"""List immediate child namespace names in the given namespace.
|
||||
|
||||
Parameters
|
||||
@@ -547,15 +656,18 @@ class LanceDBConnection(DBConnection):
|
||||
The parent namespace to list namespaces in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
The token to use for pagination. If not present, start from the beginning.
|
||||
limit: int, default 10
|
||||
The size of the page to return.
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Iterable of str
|
||||
List of immediate child namespace names
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return LOOP.run(
|
||||
self._conn.list_namespaces(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
@@ -563,26 +675,111 @@ class LanceDBConnection(DBConnection):
|
||||
)
|
||||
|
||||
@override
|
||||
def create_namespace(self, namespace: List[str]) -> None:
|
||||
def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
"""Create a new namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
or "overwrite" (replace if exists). Case insensitive.
|
||||
properties: Dict[str, str], optional
|
||||
Properties to set on the namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing the properties of the created namespace.
|
||||
"""
|
||||
LOOP.run(self._conn.create_namespace(namespace=namespace))
|
||||
return LOOP.run(
|
||||
self._conn.create_namespace(
|
||||
namespace=namespace, mode=mode, properties=properties
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def drop_namespace(self, namespace: List[str]) -> None:
|
||||
def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
"""Drop a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior: str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
return LOOP.run(self._conn.drop_namespace(namespace=namespace))
|
||||
return LOOP.run(
|
||||
self._conn.drop_namespace(namespace=namespace, mode=mode, behavior=behavior)
|
||||
)
|
||||
|
||||
@override
|
||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
return LOOP.run(self._conn.describe_namespace(namespace=namespace))
|
||||
|
||||
@override
|
||||
def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
"""List all tables in this database with pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return LOOP.run(
|
||||
self._conn.list_tables(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def table_names(
|
||||
@@ -590,10 +787,13 @@ class LanceDBConnection(DBConnection):
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
) -> Iterable[str]:
|
||||
"""Get the names of all tables in the database. The names are sorted.
|
||||
|
||||
.. deprecated::
|
||||
Use :meth:`list_tables` instead, which provides proper pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
@@ -608,6 +808,15 @@ class LanceDBConnection(DBConnection):
|
||||
Iterator of str.
|
||||
A list of table names.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"table_names() is deprecated, use list_tables() instead",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return LOOP.run(
|
||||
self._conn.table_names(
|
||||
namespace=namespace, start_after=page_token, limit=limit
|
||||
@@ -632,7 +841,7 @@ class LanceDBConnection(DBConnection):
|
||||
fill_value: float = 0.0,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
data_storage_version: Optional[str] = None,
|
||||
@@ -649,6 +858,8 @@ class LanceDBConnection(DBConnection):
|
||||
---
|
||||
DBConnection.create_table
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if mode.lower() not in ["create", "overwrite"]:
|
||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||
validate_table_name(name)
|
||||
@@ -674,7 +885,7 @@ class LanceDBConnection(DBConnection):
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
@@ -692,6 +903,8 @@ class LanceDBConnection(DBConnection):
|
||||
-------
|
||||
A LanceTable object representing the table.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if index_cache_size is not None:
|
||||
import warnings
|
||||
|
||||
@@ -717,7 +930,7 @@ class LanceDBConnection(DBConnection):
|
||||
target_table_name: str,
|
||||
source_uri: str,
|
||||
*,
|
||||
target_namespace: List[str] = [],
|
||||
target_namespace: Optional[List[str]] = None,
|
||||
source_version: Optional[int] = None,
|
||||
source_tag: Optional[str] = None,
|
||||
is_shallow: bool = True,
|
||||
@@ -750,6 +963,8 @@ class LanceDBConnection(DBConnection):
|
||||
-------
|
||||
A LanceTable object representing the cloned table.
|
||||
"""
|
||||
if target_namespace is None:
|
||||
target_namespace = []
|
||||
LOOP.run(
|
||||
self._conn.clone_table(
|
||||
target_table_name,
|
||||
@@ -770,7 +985,7 @@ class LanceDBConnection(DBConnection):
|
||||
def drop_table(
|
||||
self,
|
||||
name: str,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
ignore_missing: bool = False,
|
||||
):
|
||||
"""Drop a table from the database.
|
||||
@@ -784,6 +999,8 @@ class LanceDBConnection(DBConnection):
|
||||
ignore_missing: bool, default False
|
||||
If True, ignore if the table does not exist.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
LOOP.run(
|
||||
self._conn.drop_table(
|
||||
name, namespace=namespace, ignore_missing=ignore_missing
|
||||
@@ -791,7 +1008,9 @@ class LanceDBConnection(DBConnection):
|
||||
)
|
||||
|
||||
@override
|
||||
def drop_all_tables(self, namespace: List[str] = []):
|
||||
def drop_all_tables(self, namespace: Optional[List[str]] = None):
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
LOOP.run(self._conn.drop_all_tables(namespace=namespace))
|
||||
|
||||
@override
|
||||
@@ -799,8 +1018,8 @@ class LanceDBConnection(DBConnection):
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace: List[str] = [],
|
||||
new_namespace: List[str] = [],
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
):
|
||||
"""Rename a table in the database.
|
||||
|
||||
@@ -815,6 +1034,10 @@ class LanceDBConnection(DBConnection):
|
||||
new_namespace: List[str], optional
|
||||
The namespace to move the table to.
|
||||
"""
|
||||
if cur_namespace is None:
|
||||
cur_namespace = []
|
||||
if new_namespace is None:
|
||||
new_namespace = []
|
||||
LOOP.run(
|
||||
self._conn.rename_table(
|
||||
cur_name,
|
||||
@@ -904,10 +1127,10 @@ class AsyncConnection(object):
|
||||
|
||||
async def list_namespaces(
|
||||
self,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
) -> Iterable[str]:
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
"""List immediate child namespace names in the given namespace.
|
||||
|
||||
Parameters
|
||||
@@ -917,47 +1140,141 @@ class AsyncConnection(object):
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
The token to use for pagination. If not present, start from the beginning.
|
||||
limit: int, default 10
|
||||
The size of the page to return.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Iterable of str
|
||||
List of immediate child namespace names (not full paths)
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional pagination token
|
||||
"""
|
||||
return await self._inner.list_namespaces(
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
result = await self._inner.list_namespaces(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
return ListNamespacesResponse(**result)
|
||||
|
||||
async def create_namespace(self, namespace: List[str]) -> None:
|
||||
async def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
"""Create a new namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to create.
|
||||
"""
|
||||
await self._inner.create_namespace(namespace)
|
||||
mode: str, optional
|
||||
Creation mode - "create", "exist_ok", or "overwrite". Case insensitive.
|
||||
properties: Dict[str, str], optional
|
||||
Properties to associate with the namespace
|
||||
|
||||
async def drop_namespace(self, namespace: List[str]) -> None:
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing namespace properties
|
||||
"""
|
||||
result = await self._inner.create_namespace(
|
||||
namespace,
|
||||
mode=_normalize_create_namespace_mode(mode),
|
||||
properties=properties,
|
||||
)
|
||||
return CreateNamespaceResponse(**result)
|
||||
|
||||
async def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
"""Drop a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior: str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
await self._inner.drop_namespace(namespace)
|
||||
result = await self._inner.drop_namespace(
|
||||
namespace,
|
||||
mode=_normalize_drop_namespace_mode(mode),
|
||||
behavior=_normalize_drop_namespace_behavior(behavior),
|
||||
)
|
||||
return DropNamespaceResponse(**result)
|
||||
|
||||
async def describe_namespace(
|
||||
self, namespace: List[str]
|
||||
) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
result = await self._inner.describe_namespace(namespace)
|
||||
return DescribeNamespaceResponse(**result)
|
||||
|
||||
async def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
"""List all tables in this database with pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
result = await self._inner.list_tables(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
return ListTablesResponse(**result)
|
||||
|
||||
async def table_names(
|
||||
self,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
start_after: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> Iterable[str]:
|
||||
"""List all tables in this database, in sorted order
|
||||
|
||||
.. deprecated::
|
||||
Use :meth:`list_tables` instead, which provides proper pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
@@ -976,6 +1293,15 @@ class AsyncConnection(object):
|
||||
-------
|
||||
Iterable of str
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"table_names() is deprecated, use list_tables() instead",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return await self._inner.table_names(
|
||||
namespace=namespace, start_after=start_after, limit=limit
|
||||
)
|
||||
@@ -992,7 +1318,7 @@ class AsyncConnection(object):
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
location: Optional[str] = None,
|
||||
) -> AsyncTable:
|
||||
@@ -1039,7 +1365,11 @@ class AsyncConnection(object):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://lancedb.github.io/lancedb/guides/storage/>
|
||||
<https://lancedb.com/docs/storage/>
|
||||
|
||||
To enable stable row IDs (row IDs remain stable after compaction,
|
||||
update, delete, and merges), set `new_table_enable_stable_row_ids`
|
||||
to `"true"` in storage_options when connecting to the database.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -1149,6 +1479,8 @@ class AsyncConnection(object):
|
||||
... await db.create_table("table4", make_batches(), schema=schema)
|
||||
>>> asyncio.run(iterable_example())
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
metadata = None
|
||||
|
||||
if embedding_functions is not None:
|
||||
@@ -1206,7 +1538,7 @@ class AsyncConnection(object):
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
@@ -1225,7 +1557,7 @@ class AsyncConnection(object):
|
||||
Additional options for the storage backend. Options already set on the
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://lancedb.github.io/lancedb/guides/storage/>
|
||||
<https://lancedb.com/docs/storage/>
|
||||
index_cache_size: int, default 256
|
||||
**Deprecated**: Use session-level cache configuration instead.
|
||||
Create a Session with custom cache sizes and pass it to lancedb.connect().
|
||||
@@ -1248,6 +1580,8 @@ class AsyncConnection(object):
|
||||
-------
|
||||
A LanceTable object representing the table.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
table = await self._inner.open_table(
|
||||
name,
|
||||
namespace=namespace,
|
||||
@@ -1263,7 +1597,7 @@ class AsyncConnection(object):
|
||||
target_table_name: str,
|
||||
source_uri: str,
|
||||
*,
|
||||
target_namespace: List[str] = [],
|
||||
target_namespace: Optional[List[str]] = None,
|
||||
source_version: Optional[int] = None,
|
||||
source_tag: Optional[str] = None,
|
||||
is_shallow: bool = True,
|
||||
@@ -1296,6 +1630,8 @@ class AsyncConnection(object):
|
||||
-------
|
||||
An AsyncTable object representing the cloned table.
|
||||
"""
|
||||
if target_namespace is None:
|
||||
target_namespace = []
|
||||
table = await self._inner.clone_table(
|
||||
target_table_name,
|
||||
source_uri,
|
||||
@@ -1310,8 +1646,8 @@ class AsyncConnection(object):
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace: List[str] = [],
|
||||
new_namespace: List[str] = [],
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
):
|
||||
"""Rename a table in the database.
|
||||
|
||||
@@ -1328,6 +1664,10 @@ class AsyncConnection(object):
|
||||
The namespace to move the table to.
|
||||
If not specified, defaults to the same as cur_namespace.
|
||||
"""
|
||||
if cur_namespace is None:
|
||||
cur_namespace = []
|
||||
if new_namespace is None:
|
||||
new_namespace = []
|
||||
await self._inner.rename_table(
|
||||
cur_name, new_name, cur_namespace=cur_namespace, new_namespace=new_namespace
|
||||
)
|
||||
@@ -1336,7 +1676,7 @@ class AsyncConnection(object):
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
ignore_missing: bool = False,
|
||||
):
|
||||
"""Drop a table from the database.
|
||||
@@ -1351,6 +1691,8 @@ class AsyncConnection(object):
|
||||
ignore_missing: bool, default False
|
||||
If True, ignore if the table does not exist.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
try:
|
||||
await self._inner.drop_table(name, namespace=namespace)
|
||||
except ValueError as e:
|
||||
@@ -1359,7 +1701,7 @@ class AsyncConnection(object):
|
||||
if f"Table '{name}' was not found" not in str(e):
|
||||
raise e
|
||||
|
||||
async def drop_all_tables(self, namespace: List[str] = []):
|
||||
async def drop_all_tables(self, namespace: Optional[List[str]] = None):
|
||||
"""Drop all tables from the database.
|
||||
|
||||
Parameters
|
||||
@@ -1368,6 +1710,8 @@ class AsyncConnection(object):
|
||||
The namespace to drop all tables from.
|
||||
None or empty list represents root namespace.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
await self._inner.drop_all_tables(namespace=namespace)
|
||||
|
||||
@deprecation.deprecated(
|
||||
|
||||
@@ -376,6 +376,11 @@ class HnswSq:
|
||||
target_partition_size: Optional[int] = None
|
||||
|
||||
|
||||
# Backwards-compatible aliases
|
||||
IvfHnswPq = HnswPq
|
||||
IvfHnswSq = HnswSq
|
||||
|
||||
|
||||
@dataclass
|
||||
class IvfFlat:
|
||||
"""Describes an IVF Flat Index
|
||||
@@ -475,6 +480,36 @@ class IvfFlat:
|
||||
target_partition_size: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class IvfSq:
|
||||
"""Describes an IVF Scalar Quantization (SQ) index.
|
||||
|
||||
This index applies scalar quantization to compress vectors and organizes the
|
||||
quantized vectors into IVF partitions. It offers a balance between search
|
||||
speed and storage efficiency while keeping good recall.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
distance_type: str, default "l2"
|
||||
The distance metric used to train and search the index. Supported values
|
||||
are "l2", "cosine", and "dot".
|
||||
num_partitions: int, default sqrt(num_rows)
|
||||
Number of IVF partitions to create.
|
||||
max_iterations: int, default 50
|
||||
Maximum iterations for kmeans during partition training.
|
||||
sample_rate: int, default 256
|
||||
Controls the number of training vectors: sample_rate * num_partitions.
|
||||
target_partition_size: int, optional
|
||||
Target size for each partition; adjusts the balance between speed and accuracy.
|
||||
"""
|
||||
|
||||
distance_type: Literal["l2", "cosine", "dot"] = "l2"
|
||||
num_partitions: Optional[int] = None
|
||||
max_iterations: int = 50
|
||||
sample_rate: int = 256
|
||||
target_partition_size: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class IvfPq:
|
||||
"""Describes an IVF PQ Index
|
||||
@@ -609,9 +644,19 @@ class IvfPq:
|
||||
class IvfRq:
|
||||
"""Describes an IVF RQ Index
|
||||
|
||||
IVF-RQ (Residual Quantization) stores a compressed copy of each vector using
|
||||
residual quantization and organizes them into IVF partitions. Parameters
|
||||
largely mirror IVF-PQ for consistency.
|
||||
IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
|
||||
and organizes them into IVF partitions.
|
||||
|
||||
The compression scheme is called RabitQ quantization. Each dimension is
|
||||
quantized into a small number of bits. The parameters `num_bits` and
|
||||
`num_partitions` control this process, providing a tradeoff between
|
||||
index size (and thus search speed) and index accuracy.
|
||||
|
||||
The partitioning process is called IVF and the `num_partitions` parameter
|
||||
controls how many groups to create.
|
||||
|
||||
Note that training an IVF RQ index on a large dataset is a slow operation
|
||||
and currently is also a memory intensive operation.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
@@ -628,7 +673,7 @@ class IvfRq:
|
||||
Number of IVF partitions to create.
|
||||
|
||||
num_bits: int, default 1
|
||||
Number of bits to encode each dimension.
|
||||
Number of bits to encode each dimension in the RabitQ codebook.
|
||||
|
||||
max_iterations: int, default 50
|
||||
Max iterations to train kmeans when computing IVF partitions.
|
||||
@@ -651,6 +696,9 @@ class IvfRq:
|
||||
__all__ = [
|
||||
"BTree",
|
||||
"IvfPq",
|
||||
"IvfHnswPq",
|
||||
"IvfHnswSq",
|
||||
"IvfSq",
|
||||
"IvfRq",
|
||||
"IvfFlat",
|
||||
"HnswPq",
|
||||
|
||||
@@ -10,6 +10,7 @@ through a namespace abstraction.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from typing import Dict, Iterable, List, Optional, Union
|
||||
|
||||
@@ -22,27 +23,39 @@ from datetime import timedelta
|
||||
import pyarrow as pa
|
||||
|
||||
from lancedb.db import DBConnection, LanceDBConnection
|
||||
from lancedb.namespace_utils import (
|
||||
_normalize_create_namespace_mode,
|
||||
_normalize_drop_namespace_mode,
|
||||
_normalize_drop_namespace_behavior,
|
||||
)
|
||||
from lancedb.io import StorageOptionsProvider
|
||||
from lancedb.table import LanceTable, Table
|
||||
from lance_namespace import (
|
||||
LanceNamespace,
|
||||
connect as namespace_connect,
|
||||
CreateNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
ListNamespacesResponse,
|
||||
ListTablesResponse,
|
||||
ListTablesRequest,
|
||||
DescribeTableRequest,
|
||||
DescribeNamespaceRequest,
|
||||
DropTableRequest,
|
||||
ListNamespacesRequest,
|
||||
CreateNamespaceRequest,
|
||||
DropNamespaceRequest,
|
||||
CreateEmptyTableRequest,
|
||||
)
|
||||
from lancedb.table import AsyncTable, LanceTable, Table
|
||||
from lancedb.util import validate_table_name
|
||||
from lancedb.common import DATA
|
||||
from lancedb.pydantic import LanceModel
|
||||
from lancedb.embeddings import EmbeddingFunctionConfig
|
||||
from ._lancedb import Session
|
||||
|
||||
from lance_namespace import LanceNamespace, connect as namespace_connect
|
||||
from lance_namespace_urllib3_client.models import (
|
||||
ListTablesRequest,
|
||||
DescribeTableRequest,
|
||||
DropTableRequest,
|
||||
ListNamespacesRequest,
|
||||
CreateNamespaceRequest,
|
||||
DropNamespaceRequest,
|
||||
CreateEmptyTableRequest,
|
||||
JsonArrowSchema,
|
||||
JsonArrowField,
|
||||
JsonArrowDataType,
|
||||
)
|
||||
from lance_namespace_urllib3_client.models.json_arrow_schema import JsonArrowSchema
|
||||
from lance_namespace_urllib3_client.models.json_arrow_field import JsonArrowField
|
||||
from lance_namespace_urllib3_client.models.json_arrow_data_type import JsonArrowDataType
|
||||
|
||||
|
||||
def _convert_pyarrow_type_to_json(arrow_type: pa.DataType) -> JsonArrowDataType:
|
||||
@@ -126,13 +139,17 @@ class LanceNamespaceStorageOptionsProvider(StorageOptionsProvider):
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from lance_namespace import connect as namespace_connect
|
||||
>>> namespace = namespace_connect("rest", {"url": "https://..."})
|
||||
>>> provider = LanceNamespaceStorageOptionsProvider(
|
||||
... namespace=namespace,
|
||||
... table_id=["my_namespace", "my_table"]
|
||||
... )
|
||||
>>> options = provider.fetch_storage_options()
|
||||
Create a provider and fetch storage options::
|
||||
|
||||
from lance_namespace import connect as namespace_connect
|
||||
|
||||
# Connect to namespace (requires a running namespace server)
|
||||
namespace = namespace_connect("rest", {"uri": "https://..."})
|
||||
provider = LanceNamespaceStorageOptionsProvider(
|
||||
namespace=namespace,
|
||||
table_id=["my_namespace", "my_table"]
|
||||
)
|
||||
options = provider.fetch_storage_options()
|
||||
"""
|
||||
|
||||
def __init__(self, namespace: LanceNamespace, table_id: List[str]):
|
||||
@@ -234,8 +251,23 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
) -> Iterable[str]:
|
||||
"""
|
||||
List table names in the database.
|
||||
|
||||
.. deprecated::
|
||||
Use :meth:`list_tables` instead, which provides proper pagination support.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"table_names() is deprecated, use list_tables() instead",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
||||
response = self._ns.list_tables(request)
|
||||
return response.tables if response.tables else []
|
||||
@@ -252,12 +284,14 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
fill_value: float = 0.0,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
data_storage_version: Optional[str] = None,
|
||||
enable_v2_manifest_paths: Optional[bool] = None,
|
||||
) -> Table:
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if mode.lower() not in ["create", "overwrite"]:
|
||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||
validate_table_name(name)
|
||||
@@ -346,11 +380,13 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
) -> Table:
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
table_id = namespace + [name]
|
||||
request = DescribeTableRequest(id=table_id)
|
||||
response = self._ns.describe_table(request)
|
||||
@@ -380,8 +416,10 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
)
|
||||
|
||||
@override
|
||||
def drop_table(self, name: str, namespace: List[str] = []):
|
||||
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
|
||||
# Use namespace drop_table directly
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
table_id = namespace + [name]
|
||||
request = DropTableRequest(id=table_id)
|
||||
self._ns.drop_table(request)
|
||||
@@ -391,9 +429,13 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace: List[str] = [],
|
||||
new_namespace: List[str] = [],
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
):
|
||||
if cur_namespace is None:
|
||||
cur_namespace = []
|
||||
if new_namespace is None:
|
||||
new_namespace = []
|
||||
raise NotImplementedError(
|
||||
"rename_table is not supported for namespace connections"
|
||||
)
|
||||
@@ -405,17 +447,19 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
)
|
||||
|
||||
@override
|
||||
def drop_all_tables(self, namespace: List[str] = []):
|
||||
def drop_all_tables(self, namespace: Optional[List[str]] = None):
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
for table_name in self.table_names(namespace=namespace):
|
||||
self.drop_table(table_name, namespace=namespace)
|
||||
|
||||
@override
|
||||
def list_namespaces(
|
||||
self,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
) -> Iterable[str]:
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
"""
|
||||
List child namespaces under the given namespace.
|
||||
|
||||
@@ -425,23 +469,34 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
The parent namespace to list children from.
|
||||
If None, lists root-level namespaces.
|
||||
page_token : Optional[str]
|
||||
Pagination token for listing results.
|
||||
limit : int
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit : int, optional
|
||||
Maximum number of namespaces to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Iterable[str]
|
||||
Names of child namespaces.
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
request = ListNamespacesRequest(
|
||||
id=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
response = self._ns.list_namespaces(request)
|
||||
return response.namespaces if response.namespaces else []
|
||||
return ListNamespacesResponse(
|
||||
namespaces=response.namespaces if response.namespaces else [],
|
||||
page_token=response.page_token,
|
||||
)
|
||||
|
||||
@override
|
||||
def create_namespace(self, namespace: List[str]) -> None:
|
||||
def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
"""
|
||||
Create a new namespace.
|
||||
|
||||
@@ -449,12 +504,34 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
----------
|
||||
namespace : List[str]
|
||||
The namespace path to create.
|
||||
mode : str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
or "overwrite" (replace if exists). Case insensitive.
|
||||
properties : Dict[str, str], optional
|
||||
Properties to set on the namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing the properties of the created namespace.
|
||||
"""
|
||||
request = CreateNamespaceRequest(id=namespace)
|
||||
self._ns.create_namespace(request)
|
||||
request = CreateNamespaceRequest(
|
||||
id=namespace,
|
||||
mode=_normalize_create_namespace_mode(mode),
|
||||
properties=properties,
|
||||
)
|
||||
response = self._ns.create_namespace(request)
|
||||
return CreateNamespaceResponse(
|
||||
properties=response.properties if hasattr(response, "properties") else None
|
||||
)
|
||||
|
||||
@override
|
||||
def drop_namespace(self, namespace: List[str]) -> None:
|
||||
def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
"""
|
||||
Drop a namespace.
|
||||
|
||||
@@ -462,22 +539,102 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
----------
|
||||
namespace : List[str]
|
||||
The namespace path to drop.
|
||||
mode : str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior : str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
request = DropNamespaceRequest(id=namespace)
|
||||
self._ns.drop_namespace(request)
|
||||
request = DropNamespaceRequest(
|
||||
id=namespace,
|
||||
mode=_normalize_drop_namespace_mode(mode),
|
||||
behavior=_normalize_drop_namespace_behavior(behavior),
|
||||
)
|
||||
response = self._ns.drop_namespace(request)
|
||||
return DropNamespaceResponse(
|
||||
properties=(
|
||||
response.properties if hasattr(response, "properties") else None
|
||||
),
|
||||
transaction_id=(
|
||||
response.transaction_id if hasattr(response, "transaction_id") else None
|
||||
),
|
||||
)
|
||||
|
||||
@override
|
||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
||||
"""
|
||||
Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
request = DescribeNamespaceRequest(id=namespace)
|
||||
response = self._ns.describe_namespace(request)
|
||||
return DescribeNamespaceResponse(
|
||||
properties=response.properties if hasattr(response, "properties") else None
|
||||
)
|
||||
|
||||
@override
|
||||
def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
"""
|
||||
List all tables in this database with pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token : str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit : int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
||||
response = self._ns.list_tables(request)
|
||||
return ListTablesResponse(
|
||||
tables=response.tables if response.tables else [],
|
||||
page_token=response.page_token,
|
||||
)
|
||||
|
||||
def _lance_table_from_uri(
|
||||
self,
|
||||
name: str,
|
||||
table_uri: str,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
) -> LanceTable:
|
||||
# Open a table directly from a URI using the location parameter
|
||||
# Note: storage_options should already be merged by the caller
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
temp_conn = LanceDBConnection(
|
||||
table_uri, # Use the table location as the connection URI
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
@@ -497,6 +654,431 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
)
|
||||
|
||||
|
||||
class AsyncLanceNamespaceDBConnection:
|
||||
"""
|
||||
An async LanceDB connection that uses a namespace for table management.
|
||||
|
||||
This connection delegates table URI resolution to a lance_namespace instance,
|
||||
while providing async methods for all operations.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
namespace: LanceNamespace,
|
||||
*,
|
||||
read_consistency_interval: Optional[timedelta] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
):
|
||||
"""
|
||||
Initialize an async namespace-based LanceDB connection.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : LanceNamespace
|
||||
The namespace instance to use for table management
|
||||
read_consistency_interval : Optional[timedelta]
|
||||
The interval at which to check for updates to the table from other
|
||||
processes. If None, then consistency is not checked.
|
||||
storage_options : Optional[Dict[str, str]]
|
||||
Additional options for the storage backend
|
||||
session : Optional[Session]
|
||||
A session to use for this connection
|
||||
"""
|
||||
self._ns = namespace
|
||||
self.read_consistency_interval = read_consistency_interval
|
||||
self.storage_options = storage_options or {}
|
||||
self.session = session
|
||||
|
||||
async def table_names(
|
||||
self,
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
) -> Iterable[str]:
|
||||
"""
|
||||
List table names in the namespace.
|
||||
|
||||
.. deprecated::
|
||||
Use :meth:`list_tables` instead, which provides proper pagination support.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"table_names() is deprecated, use list_tables() instead",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
||||
response = self._ns.list_tables(request)
|
||||
return response.tables if response.tables else []
|
||||
|
||||
async def create_table(
|
||||
self,
|
||||
name: str,
|
||||
data: Optional[DATA] = None,
|
||||
schema: Optional[Union[pa.Schema, LanceModel]] = None,
|
||||
mode: str = "create",
|
||||
exist_ok: bool = False,
|
||||
on_bad_vectors: str = "error",
|
||||
fill_value: float = 0.0,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
data_storage_version: Optional[str] = None,
|
||||
enable_v2_manifest_paths: Optional[bool] = None,
|
||||
) -> AsyncTable:
|
||||
"""Create a new table in the namespace."""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if mode.lower() not in ["create", "overwrite"]:
|
||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||
validate_table_name(name)
|
||||
|
||||
# Get location from namespace
|
||||
table_id = namespace + [name]
|
||||
|
||||
# Step 1: Get the table location and storage options from namespace
|
||||
location = None
|
||||
namespace_storage_options = None
|
||||
if mode.lower() == "overwrite":
|
||||
# Try to describe the table first to see if it exists
|
||||
try:
|
||||
describe_request = DescribeTableRequest(id=table_id)
|
||||
describe_response = self._ns.describe_table(describe_request)
|
||||
location = describe_response.location
|
||||
namespace_storage_options = describe_response.storage_options
|
||||
except Exception:
|
||||
# Table doesn't exist, will create a new one below
|
||||
pass
|
||||
|
||||
if location is None:
|
||||
# Table doesn't exist or mode is "create", reserve a new location
|
||||
create_empty_request = CreateEmptyTableRequest(
|
||||
id=table_id,
|
||||
location=None,
|
||||
properties=self.storage_options if self.storage_options else None,
|
||||
)
|
||||
create_empty_response = self._ns.create_empty_table(create_empty_request)
|
||||
|
||||
if not create_empty_response.location:
|
||||
raise ValueError(
|
||||
"Table location is missing from create_empty_table response"
|
||||
)
|
||||
|
||||
location = create_empty_response.location
|
||||
namespace_storage_options = create_empty_response.storage_options
|
||||
|
||||
# Merge storage options: self.storage_options < user options < namespace options
|
||||
merged_storage_options = dict(self.storage_options)
|
||||
if storage_options:
|
||||
merged_storage_options.update(storage_options)
|
||||
if namespace_storage_options:
|
||||
merged_storage_options.update(namespace_storage_options)
|
||||
|
||||
# Step 2: Create table using LanceTable.create with the location
|
||||
# Run the sync operation in a thread
|
||||
def _create_table():
|
||||
temp_conn = LanceDBConnection(
|
||||
location,
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
storage_options=merged_storage_options,
|
||||
session=self.session,
|
||||
)
|
||||
|
||||
# Create a storage options provider if not provided by user
|
||||
if (
|
||||
storage_options_provider is None
|
||||
and namespace_storage_options is not None
|
||||
):
|
||||
provider = LanceNamespaceStorageOptionsProvider(
|
||||
namespace=self._ns,
|
||||
table_id=table_id,
|
||||
)
|
||||
else:
|
||||
provider = storage_options_provider
|
||||
|
||||
return LanceTable.create(
|
||||
temp_conn,
|
||||
name,
|
||||
data,
|
||||
schema,
|
||||
mode=mode,
|
||||
exist_ok=exist_ok,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
embedding_functions=embedding_functions,
|
||||
namespace=namespace,
|
||||
storage_options=merged_storage_options,
|
||||
storage_options_provider=provider,
|
||||
location=location,
|
||||
)
|
||||
|
||||
lance_table = await asyncio.to_thread(_create_table)
|
||||
# Get the underlying async table from LanceTable
|
||||
return lance_table._table
|
||||
|
||||
async def open_table(
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
) -> AsyncTable:
|
||||
"""Open an existing table from the namespace."""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
table_id = namespace + [name]
|
||||
request = DescribeTableRequest(id=table_id)
|
||||
response = self._ns.describe_table(request)
|
||||
|
||||
# Merge storage options: self.storage_options < user options < namespace options
|
||||
merged_storage_options = dict(self.storage_options)
|
||||
if storage_options:
|
||||
merged_storage_options.update(storage_options)
|
||||
if response.storage_options:
|
||||
merged_storage_options.update(response.storage_options)
|
||||
|
||||
# Create a storage options provider if not provided by user
|
||||
if storage_options_provider is None and response.storage_options is not None:
|
||||
storage_options_provider = LanceNamespaceStorageOptionsProvider(
|
||||
namespace=self._ns,
|
||||
table_id=table_id,
|
||||
)
|
||||
|
||||
# Open table in a thread
|
||||
def _open_table():
|
||||
temp_conn = LanceDBConnection(
|
||||
response.location,
|
||||
read_consistency_interval=self.read_consistency_interval,
|
||||
storage_options=merged_storage_options,
|
||||
session=self.session,
|
||||
)
|
||||
|
||||
return LanceTable.open(
|
||||
temp_conn,
|
||||
name,
|
||||
namespace=namespace,
|
||||
storage_options=merged_storage_options,
|
||||
storage_options_provider=storage_options_provider,
|
||||
index_cache_size=index_cache_size,
|
||||
location=response.location,
|
||||
)
|
||||
|
||||
lance_table = await asyncio.to_thread(_open_table)
|
||||
return lance_table._table
|
||||
|
||||
async def drop_table(self, name: str, namespace: Optional[List[str]] = None):
|
||||
"""Drop a table from the namespace."""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
table_id = namespace + [name]
|
||||
request = DropTableRequest(id=table_id)
|
||||
self._ns.drop_table(request)
|
||||
|
||||
async def rename_table(
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
):
|
||||
"""Rename is not supported for namespace connections."""
|
||||
if cur_namespace is None:
|
||||
cur_namespace = []
|
||||
if new_namespace is None:
|
||||
new_namespace = []
|
||||
raise NotImplementedError(
|
||||
"rename_table is not supported for namespace connections"
|
||||
)
|
||||
|
||||
async def drop_database(self):
|
||||
"""Deprecated method."""
|
||||
raise NotImplementedError(
|
||||
"drop_database is deprecated, use drop_all_tables instead"
|
||||
)
|
||||
|
||||
async def drop_all_tables(self, namespace: Optional[List[str]] = None):
|
||||
"""Drop all tables in the namespace."""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
table_names = await self.table_names(namespace=namespace)
|
||||
for table_name in table_names:
|
||||
await self.drop_table(table_name, namespace=namespace)
|
||||
|
||||
async def list_namespaces(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
"""
|
||||
List child namespaces under the given namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : Optional[List[str]]
|
||||
The parent namespace to list children from.
|
||||
If None, lists root-level namespaces.
|
||||
page_token : Optional[str]
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit : int, optional
|
||||
Maximum number of namespaces to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
request = ListNamespacesRequest(
|
||||
id=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
response = self._ns.list_namespaces(request)
|
||||
return ListNamespacesResponse(
|
||||
namespaces=response.namespaces if response.namespaces else [],
|
||||
page_token=response.page_token,
|
||||
)
|
||||
|
||||
async def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
"""
|
||||
Create a new namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : List[str]
|
||||
The namespace path to create.
|
||||
mode : str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
or "overwrite" (replace if exists). Case insensitive.
|
||||
properties : Dict[str, str], optional
|
||||
Properties to set on the namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing the properties of the created namespace.
|
||||
"""
|
||||
request = CreateNamespaceRequest(
|
||||
id=namespace,
|
||||
mode=_normalize_create_namespace_mode(mode),
|
||||
properties=properties,
|
||||
)
|
||||
response = self._ns.create_namespace(request)
|
||||
return CreateNamespaceResponse(
|
||||
properties=response.properties if hasattr(response, "properties") else None
|
||||
)
|
||||
|
||||
async def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
"""
|
||||
Drop a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : List[str]
|
||||
The namespace path to drop.
|
||||
mode : str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior : str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
request = DropNamespaceRequest(
|
||||
id=namespace,
|
||||
mode=_normalize_drop_namespace_mode(mode),
|
||||
behavior=_normalize_drop_namespace_behavior(behavior),
|
||||
)
|
||||
response = self._ns.drop_namespace(request)
|
||||
return DropNamespaceResponse(
|
||||
properties=(
|
||||
response.properties if hasattr(response, "properties") else None
|
||||
),
|
||||
transaction_id=(
|
||||
response.transaction_id if hasattr(response, "transaction_id") else None
|
||||
),
|
||||
)
|
||||
|
||||
async def describe_namespace(
|
||||
self, namespace: List[str]
|
||||
) -> DescribeNamespaceResponse:
|
||||
"""
|
||||
Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
request = DescribeNamespaceRequest(id=namespace)
|
||||
response = self._ns.describe_namespace(request)
|
||||
return DescribeNamespaceResponse(
|
||||
properties=response.properties if hasattr(response, "properties") else None
|
||||
)
|
||||
|
||||
async def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
"""
|
||||
List all tables in this database with pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token : str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit : int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
||||
response = self._ns.list_tables(request)
|
||||
return ListTablesResponse(
|
||||
tables=response.tables if response.tables else [],
|
||||
page_token=response.page_token,
|
||||
)
|
||||
|
||||
|
||||
def connect_namespace(
|
||||
impl: str,
|
||||
properties: Dict[str, str],
|
||||
@@ -541,3 +1123,62 @@ def connect_namespace(
|
||||
storage_options=storage_options,
|
||||
session=session,
|
||||
)
|
||||
|
||||
|
||||
def connect_namespace_async(
|
||||
impl: str,
|
||||
properties: Dict[str, str],
|
||||
*,
|
||||
read_consistency_interval: Optional[timedelta] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
) -> AsyncLanceNamespaceDBConnection:
|
||||
"""
|
||||
Connect to a LanceDB database through a namespace (returns async connection).
|
||||
|
||||
This function is synchronous but returns an AsyncLanceNamespaceDBConnection
|
||||
that provides async methods for all database operations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
impl : str
|
||||
The namespace implementation to use. For examples:
|
||||
- "dir" for DirectoryNamespace
|
||||
- "rest" for REST-based namespace
|
||||
- Full module path for custom implementations
|
||||
properties : Dict[str, str]
|
||||
Configuration properties for the namespace implementation.
|
||||
Different namespace implemenation has different config properties.
|
||||
For example, use DirectoryNamespace with {"root": "/path/to/directory"}
|
||||
read_consistency_interval : Optional[timedelta]
|
||||
The interval at which to check for updates to the table from other
|
||||
processes. If None, then consistency is not checked.
|
||||
storage_options : Optional[Dict[str, str]]
|
||||
Additional options for the storage backend
|
||||
session : Optional[Session]
|
||||
A session to use for this connection
|
||||
|
||||
Returns
|
||||
-------
|
||||
AsyncLanceNamespaceDBConnection
|
||||
An async namespace-based connection to LanceDB
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import lancedb
|
||||
>>> # This function is sync, but returns an async connection
|
||||
>>> db = lancedb.connect_namespace_async("dir", {"root": "/path/to/db"})
|
||||
>>> # Use async methods on the connection
|
||||
>>> async def use_db():
|
||||
... tables = await db.table_names()
|
||||
... table = await db.create_table("my_table", schema=schema)
|
||||
"""
|
||||
namespace = namespace_connect(impl, properties)
|
||||
|
||||
# Return the async namespace-based connection
|
||||
return AsyncLanceNamespaceDBConnection(
|
||||
namespace,
|
||||
read_consistency_interval=read_consistency_interval,
|
||||
storage_options=storage_options,
|
||||
session=session,
|
||||
)
|
||||
|
||||
27
python/python/lancedb/namespace_utils.py
Normal file
27
python/python/lancedb/namespace_utils.py
Normal file
@@ -0,0 +1,27 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
"""Utility functions for namespace operations."""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def _normalize_create_namespace_mode(mode: Optional[str]) -> Optional[str]:
|
||||
"""Normalize create namespace mode to lowercase (API expects lowercase)."""
|
||||
if mode is None:
|
||||
return None
|
||||
return mode.lower()
|
||||
|
||||
|
||||
def _normalize_drop_namespace_mode(mode: Optional[str]) -> Optional[str]:
|
||||
"""Normalize drop namespace mode to uppercase (API expects uppercase)."""
|
||||
if mode is None:
|
||||
return None
|
||||
return mode.upper()
|
||||
|
||||
|
||||
def _normalize_drop_namespace_behavior(behavior: Optional[str]) -> Optional[str]:
|
||||
"""Normalize drop namespace behavior to uppercase (API expects uppercase)."""
|
||||
if behavior is None:
|
||||
return None
|
||||
return behavior.upper()
|
||||
@@ -14,6 +14,7 @@ from typing import (
|
||||
Literal,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
TypeVar,
|
||||
Union,
|
||||
Any,
|
||||
@@ -786,10 +787,7 @@ class LanceQueryBuilder(ABC):
|
||||
-------
|
||||
List[LanceModel]
|
||||
"""
|
||||
return [
|
||||
model(**{k: v for k, v in row.items() if k in model.field_names()})
|
||||
for row in self.to_arrow(timeout=timeout).to_pylist()
|
||||
]
|
||||
return [model(**row) for row in self.to_arrow(timeout=timeout).to_pylist()]
|
||||
|
||||
def to_polars(self, *, timeout: Optional[timedelta] = None) -> "pl.DataFrame":
|
||||
"""
|
||||
@@ -885,7 +883,7 @@ class LanceQueryBuilder(ABC):
|
||||
----------
|
||||
where: str
|
||||
The where clause which is a valid SQL where clause. See
|
||||
`Lance filter pushdown <https://lancedb.github.io/lance/read_and_write.html#filter-push-down>`_
|
||||
`Lance filter pushdown <https://lance.org/guide/read_and_write#filter-push-down>`_
|
||||
for valid SQL expressions.
|
||||
prefilter: bool, default True
|
||||
If True, apply the filter before vector search, otherwise the
|
||||
@@ -1358,7 +1356,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
----------
|
||||
where: str
|
||||
The where clause which is a valid SQL where clause. See
|
||||
`Lance filter pushdown <https://lancedb.github.io/lance/read_and_write.html#filter-push-down>`_
|
||||
`Lance filter pushdown <https://lance.org/guide/read_and_write#filter-push-down>`_
|
||||
for valid SQL expressions.
|
||||
prefilter: bool, default True
|
||||
If True, apply the filter before vector search, otherwise the
|
||||
@@ -1497,7 +1495,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
if self._phrase_query:
|
||||
if isinstance(query, str):
|
||||
if not query.startswith('"') or not query.endswith('"'):
|
||||
query = f'"{query}"'
|
||||
self._query = f'"{query}"'
|
||||
elif isinstance(query, FullTextQuery) and not isinstance(
|
||||
query, PhraseQuery
|
||||
):
|
||||
@@ -2400,6 +2398,28 @@ class AsyncQueryBase(object):
|
||||
|
||||
return pl.from_arrow(await self.to_arrow(timeout=timeout))
|
||||
|
||||
async def to_pydantic(
|
||||
self, model: Type[LanceModel], *, timeout: Optional[timedelta] = None
|
||||
) -> List[LanceModel]:
|
||||
"""
|
||||
Convert results to a list of pydantic models.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : Type[LanceModel]
|
||||
The pydantic model to use.
|
||||
timeout : timedelta, optional
|
||||
The maximum time to wait for the query to complete.
|
||||
If None, wait indefinitely.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[LanceModel]
|
||||
"""
|
||||
return [
|
||||
model(**row) for row in (await self.to_arrow(timeout=timeout)).to_pylist()
|
||||
]
|
||||
|
||||
async def explain_plan(self, verbose: Optional[bool] = False):
|
||||
"""Return the execution plan for this query.
|
||||
|
||||
@@ -2409,9 +2429,8 @@ class AsyncQueryBase(object):
|
||||
>>> from lancedb import connect_async
|
||||
>>> async def doctest_example():
|
||||
... conn = await connect_async("./.lancedb")
|
||||
... table = await conn.create_table("my_table", [{"vector": [99, 99]}])
|
||||
... query = [100, 100]
|
||||
... plan = await table.query().nearest_to([1, 2]).explain_plan(True)
|
||||
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0]}])
|
||||
... plan = await table.query().nearest_to([1.0, 2.0]).explain_plan(True)
|
||||
... print(plan)
|
||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
||||
@@ -2420,6 +2439,7 @@ class AsyncQueryBase(object):
|
||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
|
||||
KNNVectorDistance: metric=l2
|
||||
LanceRead: uri=..., projection=[vector], ...
|
||||
<BLANKLINE>
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -3121,10 +3141,9 @@ class AsyncHybridQuery(AsyncStandardQuery, AsyncVectorQueryBase):
|
||||
>>> from lancedb.index import FTS
|
||||
>>> async def doctest_example():
|
||||
... conn = await connect_async("./.lancedb")
|
||||
... table = await conn.create_table("my_table", [{"vector": [99, 99], "text": "hello world"}])
|
||||
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0], "text": "hello world"}])
|
||||
... await table.create_index("text", config=FTS(with_position=False))
|
||||
... query = [100, 100]
|
||||
... plan = await table.query().nearest_to([1, 2]).nearest_to_text("hello").explain_plan(True)
|
||||
... plan = await table.query().nearest_to([1.0, 2.0]).nearest_to_text("hello").explain_plan(True)
|
||||
... print(plan)
|
||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||
Vector Search Plan:
|
||||
@@ -3398,9 +3417,8 @@ class BaseQueryBuilder(object):
|
||||
>>> from lancedb import connect_async
|
||||
>>> async def doctest_example():
|
||||
... conn = await connect_async("./.lancedb")
|
||||
... table = await conn.create_table("my_table", [{"vector": [99, 99]}])
|
||||
... query = [100, 100]
|
||||
... plan = await table.query().nearest_to([1, 2]).explain_plan(True)
|
||||
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0]}])
|
||||
... plan = await table.query().nearest_to([1.0, 2.0]).explain_plan(True)
|
||||
... print(plan)
|
||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
||||
@@ -3409,6 +3427,7 @@ class BaseQueryBuilder(object):
|
||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
|
||||
KNNVectorDistance: metric=l2
|
||||
LanceRead: uri=..., projection=[vector], ...
|
||||
<BLANKLINE>
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
@@ -23,6 +23,13 @@ import pyarrow as pa
|
||||
from ..common import DATA
|
||||
from ..db import DBConnection, LOOP
|
||||
from ..embeddings import EmbeddingFunctionConfig
|
||||
from lance_namespace import (
|
||||
CreateNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
ListNamespacesResponse,
|
||||
ListTablesResponse,
|
||||
)
|
||||
from ..pydantic import LanceModel
|
||||
from ..table import Table
|
||||
from ..util import validate_table_name
|
||||
@@ -104,10 +111,10 @@ class RemoteDBConnection(DBConnection):
|
||||
@override
|
||||
def list_namespaces(
|
||||
self,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
) -> Iterable[str]:
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
"""List immediate child namespace names in the given namespace.
|
||||
|
||||
Parameters
|
||||
@@ -116,15 +123,18 @@ class RemoteDBConnection(DBConnection):
|
||||
The parent namespace to list namespaces in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
The token to use for pagination. If not present, start from the beginning.
|
||||
limit: int, default 10
|
||||
The size of the page to return.
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Iterable of str
|
||||
List of immediate child namespace names
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return LOOP.run(
|
||||
self._conn.list_namespaces(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
@@ -132,26 +142,111 @@ class RemoteDBConnection(DBConnection):
|
||||
)
|
||||
|
||||
@override
|
||||
def create_namespace(self, namespace: List[str]) -> None:
|
||||
def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
"""Create a new namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
or "overwrite" (replace if exists). Case insensitive.
|
||||
properties: Dict[str, str], optional
|
||||
Properties to set on the namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CreateNamespaceResponse
|
||||
Response containing the properties of the created namespace.
|
||||
"""
|
||||
LOOP.run(self._conn.create_namespace(namespace=namespace))
|
||||
return LOOP.run(
|
||||
self._conn.create_namespace(
|
||||
namespace=namespace, mode=mode, properties=properties
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def drop_namespace(self, namespace: List[str]) -> None:
|
||||
def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
"""Drop a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
behavior: str, optional
|
||||
Whether to restrict drop if not empty ("RESTRICT") or cascade ("CASCADE").
|
||||
Case insensitive.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DropNamespaceResponse
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
return LOOP.run(self._conn.drop_namespace(namespace=namespace))
|
||||
return LOOP.run(
|
||||
self._conn.drop_namespace(namespace=namespace, mode=mode, behavior=behavior)
|
||||
)
|
||||
|
||||
@override
|
||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
return LOOP.run(self._conn.describe_namespace(namespace=namespace))
|
||||
|
||||
@override
|
||||
def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
"""List all tables in this database with pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
Token for pagination. Use the token from a previous response
|
||||
to get the next page of results.
|
||||
limit: int, optional
|
||||
The maximum number of results to return.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return LOOP.run(
|
||||
self._conn.list_tables(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def table_names(
|
||||
@@ -159,10 +254,13 @@ class RemoteDBConnection(DBConnection):
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
) -> Iterable[str]:
|
||||
"""List the names of all tables in the database.
|
||||
|
||||
.. deprecated::
|
||||
Use :meth:`list_tables` instead, which provides proper pagination support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], default []
|
||||
@@ -177,6 +275,15 @@ class RemoteDBConnection(DBConnection):
|
||||
-------
|
||||
An iterator of table names.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"table_names() is deprecated, use list_tables() instead",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
return LOOP.run(
|
||||
self._conn.table_names(
|
||||
namespace=namespace, start_after=page_token, limit=limit
|
||||
@@ -188,7 +295,7 @@ class RemoteDBConnection(DBConnection):
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
) -> Table:
|
||||
@@ -208,6 +315,8 @@ class RemoteDBConnection(DBConnection):
|
||||
"""
|
||||
from .table import RemoteTable
|
||||
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if index_cache_size is not None:
|
||||
logging.info(
|
||||
"index_cache_size is ignored in LanceDb Cloud"
|
||||
@@ -222,7 +331,7 @@ class RemoteDBConnection(DBConnection):
|
||||
target_table_name: str,
|
||||
source_uri: str,
|
||||
*,
|
||||
target_namespace: List[str] = [],
|
||||
target_namespace: Optional[List[str]] = None,
|
||||
source_version: Optional[int] = None,
|
||||
source_tag: Optional[str] = None,
|
||||
is_shallow: bool = True,
|
||||
@@ -252,6 +361,8 @@ class RemoteDBConnection(DBConnection):
|
||||
"""
|
||||
from .table import RemoteTable
|
||||
|
||||
if target_namespace is None:
|
||||
target_namespace = []
|
||||
table = LOOP.run(
|
||||
self._conn.clone_table(
|
||||
target_table_name,
|
||||
@@ -275,7 +386,7 @@ class RemoteDBConnection(DBConnection):
|
||||
mode: Optional[str] = None,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
) -> Table:
|
||||
"""Create a [Table][lancedb.table.Table] in the database.
|
||||
|
||||
@@ -372,6 +483,8 @@ class RemoteDBConnection(DBConnection):
|
||||
LanceTable(table4)
|
||||
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
validate_table_name(name)
|
||||
if embedding_functions is not None:
|
||||
logging.warning(
|
||||
@@ -396,7 +509,7 @@ class RemoteDBConnection(DBConnection):
|
||||
return RemoteTable(table, self.db_name)
|
||||
|
||||
@override
|
||||
def drop_table(self, name: str, namespace: List[str] = []):
|
||||
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
|
||||
"""Drop a table from the database.
|
||||
|
||||
Parameters
|
||||
@@ -407,6 +520,8 @@ class RemoteDBConnection(DBConnection):
|
||||
The namespace to drop the table from.
|
||||
None or empty list represents root namespace.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
LOOP.run(self._conn.drop_table(name, namespace=namespace))
|
||||
|
||||
@override
|
||||
@@ -414,8 +529,8 @@ class RemoteDBConnection(DBConnection):
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace: List[str] = [],
|
||||
new_namespace: List[str] = [],
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
):
|
||||
"""Rename a table in the database.
|
||||
|
||||
@@ -426,6 +541,10 @@ class RemoteDBConnection(DBConnection):
|
||||
new_name: str
|
||||
The new name of the table.
|
||||
"""
|
||||
if cur_namespace is None:
|
||||
cur_namespace = []
|
||||
if new_namespace is None:
|
||||
new_namespace = []
|
||||
LOOP.run(
|
||||
self._conn.rename_table(
|
||||
cur_name,
|
||||
|
||||
@@ -18,7 +18,7 @@ from lancedb._lancedb import (
|
||||
UpdateResult,
|
||||
)
|
||||
from lancedb.embeddings.base import EmbeddingFunctionConfig
|
||||
from lancedb.index import FTS, BTree, Bitmap, HnswSq, IvfFlat, IvfPq, LabelList
|
||||
from lancedb.index import FTS, BTree, Bitmap, HnswSq, IvfFlat, IvfPq, IvfSq, LabelList
|
||||
from lancedb.remote.db import LOOP
|
||||
import pyarrow as pa
|
||||
|
||||
@@ -265,6 +265,8 @@ class RemoteTable(Table):
|
||||
num_sub_vectors=num_sub_vectors,
|
||||
num_bits=num_bits,
|
||||
)
|
||||
elif index_type == "IVF_SQ":
|
||||
config = IvfSq(distance_type=metric, num_partitions=num_partitions)
|
||||
elif index_type == "IVF_HNSW_PQ":
|
||||
raise ValueError(
|
||||
"IVF_HNSW_PQ is not supported on LanceDB cloud."
|
||||
@@ -277,7 +279,7 @@ class RemoteTable(Table):
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unknown vector index type: {index_type}. Valid options are"
|
||||
" 'IVF_FLAT', 'IVF_PQ', 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
|
||||
" 'IVF_FLAT', 'IVF_SQ', 'IVF_PQ', 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
|
||||
)
|
||||
|
||||
LOOP.run(
|
||||
@@ -652,6 +654,17 @@ class RemoteTable(Table):
|
||||
"migrate_v2_manifest_paths() is not supported on the LanceDB Cloud"
|
||||
)
|
||||
|
||||
def head(self, n=5) -> pa.Table:
|
||||
"""
|
||||
Return the first `n` rows of the table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n: int, default 5
|
||||
The number of rows to return.
|
||||
"""
|
||||
return LOOP.run(self._table.query().limit(n).to_arrow())
|
||||
|
||||
|
||||
def add_index(tbl: pa.Table, i: int) -> pa.Table:
|
||||
return tbl.add_column(
|
||||
|
||||
@@ -44,7 +44,18 @@ import numpy as np
|
||||
|
||||
from .common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||
from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
|
||||
from .index import BTree, IvfFlat, IvfPq, Bitmap, IvfRq, LabelList, HnswPq, HnswSq, FTS
|
||||
from .index import (
|
||||
BTree,
|
||||
IvfFlat,
|
||||
IvfPq,
|
||||
IvfSq,
|
||||
Bitmap,
|
||||
IvfRq,
|
||||
LabelList,
|
||||
HnswPq,
|
||||
HnswSq,
|
||||
FTS,
|
||||
)
|
||||
from .merge import LanceMergeInsertBuilder
|
||||
from .pydantic import LanceModel, model_to_dict
|
||||
from .query import (
|
||||
@@ -178,7 +189,7 @@ def _into_pyarrow_reader(
|
||||
f"Unknown data type {type(data)}. "
|
||||
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
|
||||
"pyarrow Table/RecordBatch, or Pydantic models. "
|
||||
"See https://lancedb.github.io/lancedb/guides/tables/ for examples."
|
||||
"See https://lancedb.com/docs/tables/ for examples."
|
||||
)
|
||||
|
||||
|
||||
@@ -1018,7 +1029,7 @@ class Table(ABC):
|
||||
... .when_not_matched_insert_all() \\
|
||||
... .execute(new_data)
|
||||
>>> res
|
||||
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0)
|
||||
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0, num_attempts=1)
|
||||
>>> # The order of new rows is non-deterministic since we use
|
||||
>>> # a hash-join as part of this operation and so we sort here
|
||||
>>> table.to_arrow().sort_by("a").to_pandas()
|
||||
@@ -1708,15 +1719,18 @@ class LanceTable(Table):
|
||||
connection: "LanceDBConnection",
|
||||
name: str,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
location: Optional[str] = None,
|
||||
_async: AsyncTable = None,
|
||||
):
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
self._conn = connection
|
||||
self._namespace = namespace
|
||||
self._location = location # Store location for use in _dataset_path
|
||||
if _async is not None:
|
||||
self._table = _async
|
||||
else:
|
||||
@@ -1765,12 +1779,14 @@ class LanceTable(Table):
|
||||
db,
|
||||
name,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
location: Optional[str] = None,
|
||||
):
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
tbl = cls(
|
||||
db,
|
||||
name,
|
||||
@@ -1794,6 +1810,10 @@ class LanceTable(Table):
|
||||
@cached_property
|
||||
def _dataset_path(self) -> str:
|
||||
# Cacheable since it's deterministic
|
||||
# If table was opened with explicit location (e.g., from namespace),
|
||||
# use that location directly instead of constructing from base URI
|
||||
if self._location is not None:
|
||||
return self._location
|
||||
return _table_path(self._conn.uri, self.name)
|
||||
|
||||
def to_lance(self, **kwargs) -> lance.LanceDataset:
|
||||
@@ -2045,7 +2065,7 @@ class LanceTable(Table):
|
||||
index_cache_size: Optional[int] = None,
|
||||
num_bits: int = 8,
|
||||
index_type: Literal[
|
||||
"IVF_FLAT", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
|
||||
"IVF_FLAT", "IVF_SQ", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
|
||||
] = "IVF_PQ",
|
||||
max_iterations: int = 50,
|
||||
sample_rate: int = 256,
|
||||
@@ -2083,6 +2103,14 @@ class LanceTable(Table):
|
||||
sample_rate=sample_rate,
|
||||
target_partition_size=target_partition_size,
|
||||
)
|
||||
elif index_type == "IVF_SQ":
|
||||
config = IvfSq(
|
||||
distance_type=metric,
|
||||
num_partitions=num_partitions,
|
||||
max_iterations=max_iterations,
|
||||
sample_rate=sample_rate,
|
||||
target_partition_size=target_partition_size,
|
||||
)
|
||||
elif index_type == "IVF_PQ":
|
||||
config = IvfPq(
|
||||
distance_type=metric,
|
||||
@@ -2618,7 +2646,7 @@ class LanceTable(Table):
|
||||
fill_value: float = 0.0,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
*,
|
||||
namespace: List[str] = [],
|
||||
namespace: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str | bool]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
data_storage_version: Optional[str] = None,
|
||||
@@ -2678,9 +2706,12 @@ class LanceTable(Table):
|
||||
Deprecated. Set `storage_options` when connecting to the database and set
|
||||
`new_table_enable_v2_manifest_paths` in the options.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
self = cls.__new__(cls)
|
||||
self._conn = db
|
||||
self._namespace = namespace
|
||||
self._location = location
|
||||
|
||||
if data_storage_version is not None:
|
||||
warnings.warn(
|
||||
@@ -3444,11 +3475,22 @@ class AsyncTable:
|
||||
if config is not None:
|
||||
if not isinstance(
|
||||
config,
|
||||
(IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS),
|
||||
(
|
||||
IvfFlat,
|
||||
IvfSq,
|
||||
IvfPq,
|
||||
IvfRq,
|
||||
HnswPq,
|
||||
HnswSq,
|
||||
BTree,
|
||||
Bitmap,
|
||||
LabelList,
|
||||
FTS,
|
||||
),
|
||||
):
|
||||
raise TypeError(
|
||||
"config must be an instance of IvfPq, IvfRq, HnswPq, HnswSq, BTree,"
|
||||
" Bitmap, LabelList, or FTS, but got " + str(type(config))
|
||||
"config must be an instance of IvfSq, IvfPq, IvfRq, HnswPq, HnswSq,"
|
||||
" BTree, Bitmap, LabelList, or FTS, but got " + str(type(config))
|
||||
)
|
||||
try:
|
||||
await self._inner.create_index(
|
||||
@@ -3622,7 +3664,7 @@ class AsyncTable:
|
||||
... .when_not_matched_insert_all() \\
|
||||
... .execute(new_data)
|
||||
>>> res
|
||||
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0)
|
||||
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0, num_attempts=1)
|
||||
>>> # The order of new rows is non-deterministic since we use
|
||||
>>> # a hash-join as part of this operation and so we sort here
|
||||
>>> table.to_arrow().sort_by("a").to_pandas()
|
||||
|
||||
@@ -18,12 +18,20 @@ AddMode = Literal["append", "overwrite"]
|
||||
CreateMode = Literal["create", "overwrite"]
|
||||
|
||||
# Index type literals
|
||||
VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ", "IVF_RQ"]
|
||||
VectorIndexType = Literal[
|
||||
"IVF_FLAT",
|
||||
"IVF_SQ",
|
||||
"IVF_PQ",
|
||||
"IVF_HNSW_SQ",
|
||||
"IVF_HNSW_PQ",
|
||||
"IVF_RQ",
|
||||
]
|
||||
ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"]
|
||||
IndexType = Literal[
|
||||
"IVF_PQ",
|
||||
"IVF_HNSW_PQ",
|
||||
"IVF_HNSW_SQ",
|
||||
"IVF_SQ",
|
||||
"FTS",
|
||||
"BTREE",
|
||||
"BITMAP",
|
||||
|
||||
@@ -441,6 +441,150 @@ async def test_create_table_v2_manifest_paths_async(tmp_path):
|
||||
assert re.match(r"\d{20}\.manifest", manifest)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_table_stable_row_ids_via_storage_options(tmp_path):
|
||||
"""Test stable_row_ids via storage_options at connect time."""
|
||||
import lance
|
||||
|
||||
# Connect with stable row IDs enabled as default for new tables
|
||||
db_with = await lancedb.connect_async(
|
||||
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
|
||||
)
|
||||
# Connect without stable row IDs (default)
|
||||
db_without = await lancedb.connect_async(
|
||||
tmp_path, storage_options={"new_table_enable_stable_row_ids": "false"}
|
||||
)
|
||||
|
||||
# Create table using connection with stable row IDs enabled
|
||||
await db_with.create_table(
|
||||
"with_stable_via_opts",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
)
|
||||
lance_ds_with = lance.dataset(tmp_path / "with_stable_via_opts.lance")
|
||||
fragments_with = lance_ds_with.get_fragments()
|
||||
assert len(fragments_with) > 0
|
||||
assert fragments_with[0].metadata.row_id_meta is not None
|
||||
|
||||
# Create table using connection without stable row IDs
|
||||
await db_without.create_table(
|
||||
"without_stable_via_opts",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
)
|
||||
lance_ds_without = lance.dataset(tmp_path / "without_stable_via_opts.lance")
|
||||
fragments_without = lance_ds_without.get_fragments()
|
||||
assert len(fragments_without) > 0
|
||||
assert fragments_without[0].metadata.row_id_meta is None
|
||||
|
||||
|
||||
def test_create_table_stable_row_ids_via_storage_options_sync(tmp_path):
|
||||
"""Test that enable_stable_row_ids can be set via storage_options (sync API)."""
|
||||
# Connect with stable row IDs enabled as default for new tables
|
||||
db_with = lancedb.connect(
|
||||
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
|
||||
)
|
||||
# Connect without stable row IDs (default)
|
||||
db_without = lancedb.connect(
|
||||
tmp_path, storage_options={"new_table_enable_stable_row_ids": "false"}
|
||||
)
|
||||
|
||||
# Create table using connection with stable row IDs enabled
|
||||
tbl_with = db_with.create_table(
|
||||
"with_stable_sync",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
)
|
||||
lance_ds_with = tbl_with.to_lance()
|
||||
fragments_with = lance_ds_with.get_fragments()
|
||||
assert len(fragments_with) > 0
|
||||
assert fragments_with[0].metadata.row_id_meta is not None
|
||||
|
||||
# Create table using connection without stable row IDs
|
||||
tbl_without = db_without.create_table(
|
||||
"without_stable_sync",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
)
|
||||
lance_ds_without = tbl_without.to_lance()
|
||||
fragments_without = lance_ds_without.get_fragments()
|
||||
assert len(fragments_without) > 0
|
||||
assert fragments_without[0].metadata.row_id_meta is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_table_stable_row_ids_table_level_override(tmp_path):
|
||||
"""Test that stable_row_ids can be enabled/disabled at create_table level."""
|
||||
import lance
|
||||
|
||||
# Connect without any stable row ID setting
|
||||
db_default = await lancedb.connect_async(tmp_path)
|
||||
|
||||
# Connect with stable row IDs enabled at connection level
|
||||
db_with_stable = await lancedb.connect_async(
|
||||
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
|
||||
)
|
||||
|
||||
# Case 1: No connection setting, enable at table level
|
||||
await db_default.create_table(
|
||||
"table_level_enabled",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
storage_options={"new_table_enable_stable_row_ids": "true"},
|
||||
)
|
||||
lance_ds = lance.dataset(tmp_path / "table_level_enabled.lance")
|
||||
fragments = lance_ds.get_fragments()
|
||||
assert len(fragments) > 0
|
||||
assert fragments[0].metadata.row_id_meta is not None, (
|
||||
"Table should have stable row IDs when enabled at table level"
|
||||
)
|
||||
|
||||
# Case 2: Connection has stable row IDs, override with false at table level
|
||||
await db_with_stable.create_table(
|
||||
"table_level_disabled",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
storage_options={"new_table_enable_stable_row_ids": "false"},
|
||||
)
|
||||
lance_ds = lance.dataset(tmp_path / "table_level_disabled.lance")
|
||||
fragments = lance_ds.get_fragments()
|
||||
assert len(fragments) > 0
|
||||
assert fragments[0].metadata.row_id_meta is None, (
|
||||
"Table should NOT have stable row IDs when disabled at table level"
|
||||
)
|
||||
|
||||
|
||||
def test_create_table_stable_row_ids_table_level_override_sync(tmp_path):
|
||||
"""Test that stable_row_ids can be enabled/disabled at create_table level (sync)."""
|
||||
# Connect without any stable row ID setting
|
||||
db_default = lancedb.connect(tmp_path)
|
||||
|
||||
# Connect with stable row IDs enabled at connection level
|
||||
db_with_stable = lancedb.connect(
|
||||
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
|
||||
)
|
||||
|
||||
# Case 1: No connection setting, enable at table level
|
||||
tbl = db_default.create_table(
|
||||
"table_level_enabled_sync",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
storage_options={"new_table_enable_stable_row_ids": "true"},
|
||||
)
|
||||
lance_ds = tbl.to_lance()
|
||||
fragments = lance_ds.get_fragments()
|
||||
assert len(fragments) > 0
|
||||
assert fragments[0].metadata.row_id_meta is not None, (
|
||||
"Table should have stable row IDs when enabled at table level"
|
||||
)
|
||||
|
||||
# Case 2: Connection has stable row IDs, override with false at table level
|
||||
tbl = db_with_stable.create_table(
|
||||
"table_level_disabled_sync",
|
||||
data=[{"id": i} for i in range(10)],
|
||||
storage_options={"new_table_enable_stable_row_ids": "false"},
|
||||
)
|
||||
lance_ds = tbl.to_lance()
|
||||
fragments = lance_ds.get_fragments()
|
||||
assert len(fragments) > 0
|
||||
assert fragments[0].metadata.row_id_meta is None, (
|
||||
"Table should NOT have stable row IDs when disabled at table level"
|
||||
)
|
||||
|
||||
|
||||
def test_open_table_sync(tmp_db: lancedb.DBConnection):
|
||||
tmp_db.create_table("test", data=[{"id": 0}])
|
||||
assert tmp_db.open_table("test").count_rows() == 1
|
||||
@@ -748,7 +892,7 @@ def test_local_namespace_operations(tmp_path):
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
# Test list_namespaces returns empty list for root namespace
|
||||
namespaces = list(db.list_namespaces())
|
||||
namespaces = db.list_namespaces().namespaces
|
||||
assert namespaces == []
|
||||
|
||||
# Test list_namespaces with non-empty namespace raises NotImplementedError
|
||||
@@ -756,7 +900,7 @@ def test_local_namespace_operations(tmp_path):
|
||||
NotImplementedError,
|
||||
match="Namespace operations are not supported for listing database",
|
||||
):
|
||||
list(db.list_namespaces(namespace=["test"]))
|
||||
db.list_namespaces(namespace=["test"])
|
||||
|
||||
|
||||
def test_local_create_namespace_not_supported(tmp_path):
|
||||
|
||||
@@ -32,6 +32,7 @@ import numpy as np
|
||||
import pyarrow as pa
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from utils import exception_output
|
||||
|
||||
pytest.importorskip("lancedb.fts")
|
||||
@@ -90,7 +91,7 @@ def table(tmp_path) -> ldb.table.LanceTable:
|
||||
return table
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@pytest_asyncio.fixture
|
||||
async def async_table(tmp_path) -> ldb.table.AsyncTable:
|
||||
# Use local random state to avoid affecting other tests
|
||||
rng = np.random.RandomState(42)
|
||||
@@ -253,7 +254,7 @@ def test_search_fts(table, use_tantivy):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fts_select_async(async_table):
|
||||
tbl = await async_table
|
||||
tbl = async_table
|
||||
await tbl.create_index("text", config=FTS())
|
||||
await tbl.create_index("text2", config=FTS())
|
||||
results = (
|
||||
@@ -324,11 +325,18 @@ def test_search_fts_phrase_query(table):
|
||||
pass
|
||||
table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True)
|
||||
results = table.search("puppy").limit(100).to_list()
|
||||
|
||||
# Test with quotation marks
|
||||
phrase_results = table.search('"puppy runs"').limit(100).to_list()
|
||||
assert len(results) > len(phrase_results)
|
||||
assert len(phrase_results) > 0
|
||||
|
||||
# Test with a query
|
||||
# Test with .phrase_query()
|
||||
phrase_results = table.search("puppy runs").phrase_query().limit(100).to_list()
|
||||
assert len(results) > len(phrase_results)
|
||||
assert len(phrase_results) > 0
|
||||
|
||||
# Test with PhraseQuery()
|
||||
phrase_results = (
|
||||
table.search(PhraseQuery("puppy runs", "text")).limit(100).to_list()
|
||||
)
|
||||
@@ -338,7 +346,6 @@ def test_search_fts_phrase_query(table):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_fts_phrase_query_async(async_table):
|
||||
async_table = await async_table
|
||||
await async_table.create_index("text", config=FTS(with_position=False))
|
||||
try:
|
||||
phrase_results = (
|
||||
@@ -393,7 +400,6 @@ def test_search_fts_specify_column(table):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_fts_async(async_table):
|
||||
async_table = await async_table
|
||||
await async_table.create_index("text", config=FTS())
|
||||
results = await async_table.query().nearest_to_text("puppy").limit(5).to_list()
|
||||
assert len(results) == 5
|
||||
@@ -424,7 +430,6 @@ async def test_search_fts_async(async_table):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_fts_specify_column_async(async_table):
|
||||
async_table = await async_table
|
||||
await async_table.create_index("text", config=FTS())
|
||||
await async_table.create_index("text2", config=FTS())
|
||||
|
||||
|
||||
@@ -12,6 +12,9 @@ from lancedb.index import (
|
||||
BTree,
|
||||
IvfFlat,
|
||||
IvfPq,
|
||||
IvfSq,
|
||||
IvfHnswPq,
|
||||
IvfHnswSq,
|
||||
IvfRq,
|
||||
Bitmap,
|
||||
LabelList,
|
||||
@@ -229,6 +232,35 @@ async def test_create_hnswsq_index(some_table: AsyncTable):
|
||||
assert len(indices) == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_hnswsq_alias_index(some_table: AsyncTable):
|
||||
await some_table.create_index("vector", config=IvfHnswSq(num_partitions=5))
|
||||
indices = await some_table.list_indices()
|
||||
assert len(indices) == 1
|
||||
assert indices[0].index_type in {"HnswSq", "IvfHnswSq"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_hnswpq_alias_index(some_table: AsyncTable):
|
||||
await some_table.create_index("vector", config=IvfHnswPq(num_partitions=5))
|
||||
indices = await some_table.list_indices()
|
||||
assert len(indices) == 1
|
||||
assert indices[0].index_type in {"HnswPq", "IvfHnswPq"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_ivfsq_index(some_table: AsyncTable):
|
||||
await some_table.create_index("vector", config=IvfSq(num_partitions=10))
|
||||
indices = await some_table.list_indices()
|
||||
assert len(indices) == 1
|
||||
assert indices[0].index_type == "IvfSq"
|
||||
stats = await some_table.index_stats(indices[0].name)
|
||||
assert stats.index_type == "IVF_SQ"
|
||||
assert stats.distance_type == "l2"
|
||||
assert stats.num_indexed_rows == await some_table.count_rows()
|
||||
assert stats.num_unindexed_rows == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_index_with_binary_vectors(binary_table: AsyncTable):
|
||||
await binary_table.create_index(
|
||||
|
||||
@@ -279,13 +279,13 @@ class TestNamespaceConnection:
|
||||
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||
|
||||
# Initially no namespaces
|
||||
assert len(list(db.list_namespaces())) == 0
|
||||
assert len(db.list_namespaces().namespaces) == 0
|
||||
|
||||
# Create a namespace
|
||||
db.create_namespace(["test_namespace"])
|
||||
|
||||
# Verify namespace exists
|
||||
namespaces = list(db.list_namespaces())
|
||||
namespaces = db.list_namespaces().namespaces
|
||||
assert "test_namespace" in namespaces
|
||||
assert len(namespaces) == 1
|
||||
|
||||
@@ -322,7 +322,7 @@ class TestNamespaceConnection:
|
||||
db.drop_namespace(["test_namespace"])
|
||||
|
||||
# Verify namespace no longer exists
|
||||
namespaces = list(db.list_namespaces())
|
||||
namespaces = db.list_namespaces().namespaces
|
||||
assert len(namespaces) == 0
|
||||
|
||||
def test_namespace_with_tables_cannot_be_dropped(self):
|
||||
@@ -423,3 +423,218 @@ class TestNamespaceConnection:
|
||||
db.drop_table("same_name_table", namespace=["namespace_b"])
|
||||
db.drop_namespace(["namespace_a"])
|
||||
db.drop_namespace(["namespace_b"])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestAsyncNamespaceConnection:
|
||||
"""Test async namespace-based LanceDB connection using DirectoryNamespace."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
def teardown_method(self):
|
||||
"""Clean up test fixtures."""
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
async def test_connect_namespace_async(self):
|
||||
"""Test connecting to LanceDB through DirectoryNamespace asynchronously."""
|
||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||
|
||||
# Should be an AsyncLanceNamespaceDBConnection
|
||||
assert isinstance(db, lancedb.AsyncLanceNamespaceDBConnection)
|
||||
|
||||
# Initially no tables in root
|
||||
table_names = await db.table_names()
|
||||
assert len(list(table_names)) == 0
|
||||
|
||||
async def test_create_table_async(self):
|
||||
"""Test creating a table asynchronously through namespace."""
|
||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||
|
||||
# Create a child namespace first
|
||||
await db.create_namespace(["test_ns"])
|
||||
|
||||
# Define schema for empty table
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
pa.field("text", pa.string()),
|
||||
]
|
||||
)
|
||||
|
||||
# Create empty table in child namespace
|
||||
table = await db.create_table(
|
||||
"test_table", schema=schema, namespace=["test_ns"]
|
||||
)
|
||||
assert table is not None
|
||||
assert isinstance(table, lancedb.AsyncTable)
|
||||
|
||||
# Table should appear in child namespace
|
||||
table_names = await db.table_names(namespace=["test_ns"])
|
||||
assert "test_table" in list(table_names)
|
||||
|
||||
async def test_open_table_async(self):
|
||||
"""Test opening an existing table asynchronously through namespace."""
|
||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||
|
||||
# Create a child namespace first
|
||||
await db.create_namespace(["test_ns"])
|
||||
|
||||
# Create a table with schema in child namespace
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
]
|
||||
)
|
||||
await db.create_table("test_table", schema=schema, namespace=["test_ns"])
|
||||
|
||||
# Open the table
|
||||
table = await db.open_table("test_table", namespace=["test_ns"])
|
||||
assert table is not None
|
||||
assert isinstance(table, lancedb.AsyncTable)
|
||||
|
||||
# Test write operation - add data to the table
|
||||
test_data = [
|
||||
{"id": 1, "vector": [1.0, 2.0]},
|
||||
{"id": 2, "vector": [3.0, 4.0]},
|
||||
{"id": 3, "vector": [5.0, 6.0]},
|
||||
]
|
||||
await table.add(test_data)
|
||||
|
||||
# Test read operation - query the table
|
||||
result = await table.to_arrow()
|
||||
assert len(result) == 3
|
||||
assert result.schema.field("id").type == pa.int64()
|
||||
assert result.schema.field("vector").type == pa.list_(pa.float32(), 2)
|
||||
|
||||
# Verify data content
|
||||
result_df = result.to_pandas()
|
||||
assert result_df["id"].tolist() == [1, 2, 3]
|
||||
assert [v.tolist() for v in result_df["vector"]] == [
|
||||
[1.0, 2.0],
|
||||
[3.0, 4.0],
|
||||
[5.0, 6.0],
|
||||
]
|
||||
|
||||
# Test update operation
|
||||
await table.update({"id": 20}, where="id = 2")
|
||||
result = await table.to_arrow()
|
||||
result_df = result.to_pandas().sort_values("id").reset_index(drop=True)
|
||||
assert result_df["id"].tolist() == [1, 3, 20]
|
||||
|
||||
# Test delete operation
|
||||
await table.delete("id = 1")
|
||||
result = await table.to_arrow()
|
||||
assert len(result) == 2
|
||||
result_df = result.to_pandas().sort_values("id").reset_index(drop=True)
|
||||
assert result_df["id"].tolist() == [3, 20]
|
||||
|
||||
async def test_drop_table_async(self):
|
||||
"""Test dropping a table asynchronously through namespace."""
|
||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||
|
||||
# Create a child namespace first
|
||||
await db.create_namespace(["test_ns"])
|
||||
|
||||
# Create tables in child namespace
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
]
|
||||
)
|
||||
await db.create_table("table1", schema=schema, namespace=["test_ns"])
|
||||
await db.create_table("table2", schema=schema, namespace=["test_ns"])
|
||||
|
||||
# Verify both tables exist in child namespace
|
||||
table_names = list(await db.table_names(namespace=["test_ns"]))
|
||||
assert "table1" in table_names
|
||||
assert "table2" in table_names
|
||||
assert len(table_names) == 2
|
||||
|
||||
# Drop one table
|
||||
await db.drop_table("table1", namespace=["test_ns"])
|
||||
|
||||
# Verify only table2 remains
|
||||
table_names = list(await db.table_names(namespace=["test_ns"]))
|
||||
assert "table1" not in table_names
|
||||
assert "table2" in table_names
|
||||
assert len(table_names) == 1
|
||||
|
||||
async def test_namespace_operations_async(self):
|
||||
"""Test namespace management operations asynchronously."""
|
||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||
|
||||
# Initially no namespaces
|
||||
namespaces = await db.list_namespaces()
|
||||
assert len(namespaces.namespaces) == 0
|
||||
|
||||
# Create a namespace
|
||||
await db.create_namespace(["test_namespace"])
|
||||
|
||||
# Verify namespace exists
|
||||
namespaces = (await db.list_namespaces()).namespaces
|
||||
assert "test_namespace" in namespaces
|
||||
assert len(namespaces) == 1
|
||||
|
||||
# Create table in namespace
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
]
|
||||
)
|
||||
table = await db.create_table(
|
||||
"test_table", schema=schema, namespace=["test_namespace"]
|
||||
)
|
||||
assert table is not None
|
||||
|
||||
# Verify table exists in namespace
|
||||
tables_in_namespace = list(await db.table_names(namespace=["test_namespace"]))
|
||||
assert "test_table" in tables_in_namespace
|
||||
assert len(tables_in_namespace) == 1
|
||||
|
||||
# Drop table from namespace
|
||||
await db.drop_table("test_table", namespace=["test_namespace"])
|
||||
|
||||
# Verify table no longer exists in namespace
|
||||
tables_in_namespace = list(await db.table_names(namespace=["test_namespace"]))
|
||||
assert len(tables_in_namespace) == 0
|
||||
|
||||
# Drop namespace
|
||||
await db.drop_namespace(["test_namespace"])
|
||||
|
||||
# Verify namespace no longer exists
|
||||
namespaces = (await db.list_namespaces()).namespaces
|
||||
assert len(namespaces) == 0
|
||||
|
||||
async def test_drop_all_tables_async(self):
|
||||
"""Test dropping all tables asynchronously through namespace."""
|
||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||
|
||||
# Create a child namespace first
|
||||
await db.create_namespace(["test_ns"])
|
||||
|
||||
# Create multiple tables in child namespace
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
]
|
||||
)
|
||||
for i in range(3):
|
||||
await db.create_table(f"table{i}", schema=schema, namespace=["test_ns"])
|
||||
|
||||
# Verify tables exist in child namespace
|
||||
table_names = await db.table_names(namespace=["test_ns"])
|
||||
assert len(list(table_names)) == 3
|
||||
|
||||
# Drop all tables in child namespace
|
||||
await db.drop_all_tables(namespace=["test_ns"])
|
||||
|
||||
# Verify all tables are gone from child namespace
|
||||
table_names = await db.table_names(namespace=["test_ns"])
|
||||
assert len(list(table_names)) == 0
|
||||
|
||||
@@ -412,3 +412,50 @@ def test_multi_vector_in_lance_model():
|
||||
|
||||
t = TestModel(id=1)
|
||||
assert t.vectors == [[0.0] * 16]
|
||||
|
||||
|
||||
def test_aliases_in_lance_model(mem_db):
|
||||
data = [
|
||||
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 6.5], "item": "bar", "price": 20.0},
|
||||
]
|
||||
tbl = mem_db.create_table("items", data=data)
|
||||
|
||||
class TestModel(LanceModel):
|
||||
name: str = Field(alias="item")
|
||||
price: float
|
||||
distance: float = Field(alias="_distance")
|
||||
|
||||
model = (
|
||||
tbl.search([5.9, 6.5])
|
||||
.distance_type("cosine")
|
||||
.limit(1)
|
||||
.to_pydantic(TestModel)[0]
|
||||
)
|
||||
assert hasattr(model, "name")
|
||||
assert hasattr(model, "distance")
|
||||
assert model.distance < 0.01
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_aliases_in_lance_model_async(mem_db_async):
|
||||
data = [
|
||||
{"vector": [8.3, 2.5], "item": "foo", "price": 12.0},
|
||||
{"vector": [7.7, 3.9], "item": "bar", "price": 11.2},
|
||||
]
|
||||
tbl = await mem_db_async.create_table("items", data=data)
|
||||
|
||||
class TestModel(LanceModel):
|
||||
name: str = Field(alias="item")
|
||||
price: float
|
||||
distance: float = Field(alias="_distance")
|
||||
|
||||
model = (
|
||||
await tbl.vector_search([7.7, 3.9])
|
||||
.distance_type("cosine")
|
||||
.limit(1)
|
||||
.to_pydantic(TestModel)
|
||||
)[0]
|
||||
assert hasattr(model, "name")
|
||||
assert hasattr(model, "distance")
|
||||
assert model.distance < 0.01
|
||||
|
||||
@@ -546,6 +546,22 @@ def query_test_table(query_handler, *, server_version=Version("0.1.0")):
|
||||
yield table
|
||||
|
||||
|
||||
def test_head():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"k": 5,
|
||||
"prefilter": True,
|
||||
"vector": [],
|
||||
"version": None,
|
||||
}
|
||||
|
||||
return pa.table({"id": [1, 2, 3]})
|
||||
|
||||
with query_test_table(handler) as table:
|
||||
data = table.head(5)
|
||||
assert data == pa.table({"id": [1, 2, 3]})
|
||||
|
||||
|
||||
def test_query_sync_minimal():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
|
||||
@@ -1487,7 +1487,7 @@ def setup_hybrid_search_table(db: DBConnection, embedding_func):
|
||||
table.add([{"text": p} for p in phrases])
|
||||
|
||||
# Create a fts index
|
||||
table.create_fts_index("text")
|
||||
table.create_fts_index("text", with_position=True)
|
||||
|
||||
return table, MyTable, emb
|
||||
|
||||
|
||||
@@ -10,8 +10,9 @@ use lancedb::{
|
||||
};
|
||||
use pyo3::{
|
||||
exceptions::{PyRuntimeError, PyValueError},
|
||||
pyclass, pyfunction, pymethods, Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult,
|
||||
Python,
|
||||
pyclass, pyfunction, pymethods,
|
||||
types::{PyDict, PyDictMethods},
|
||||
Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
|
||||
};
|
||||
use pyo3_async_runtimes::tokio::future_into_py;
|
||||
|
||||
@@ -292,40 +293,155 @@ impl Connection {
|
||||
limit: Option<u32>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
use lancedb::database::ListNamespacesRequest;
|
||||
let py = self_.py();
|
||||
future_into_py(py, async move {
|
||||
use lance_namespace::models::ListNamespacesRequest;
|
||||
let request = ListNamespacesRequest {
|
||||
namespace,
|
||||
id: if namespace.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(namespace)
|
||||
},
|
||||
page_token,
|
||||
limit,
|
||||
limit: limit.map(|l| l as i32),
|
||||
};
|
||||
inner.list_namespaces(request).await.infer_error()
|
||||
let response = inner.list_namespaces(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("namespaces", response.namespaces)?;
|
||||
dict.set_item("page_token", response.page_token)?;
|
||||
Ok(dict.unbind())
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (namespace,))]
|
||||
#[pyo3(signature = (namespace, mode=None, properties=None))]
|
||||
pub fn create_namespace(
|
||||
self_: PyRef<'_, Self>,
|
||||
namespace: Vec<String>,
|
||||
mode: Option<String>,
|
||||
properties: Option<std::collections::HashMap<String, String>>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
use lancedb::database::CreateNamespaceRequest;
|
||||
let request = CreateNamespaceRequest { namespace };
|
||||
inner.create_namespace(request).await.infer_error()
|
||||
let py = self_.py();
|
||||
future_into_py(py, async move {
|
||||
use lance_namespace::models::{create_namespace_request, CreateNamespaceRequest};
|
||||
let mode_enum = mode.and_then(|m| match m.to_lowercase().as_str() {
|
||||
"create" => Some(create_namespace_request::Mode::Create),
|
||||
"exist_ok" => Some(create_namespace_request::Mode::ExistOk),
|
||||
"overwrite" => Some(create_namespace_request::Mode::Overwrite),
|
||||
_ => None,
|
||||
});
|
||||
let request = CreateNamespaceRequest {
|
||||
id: if namespace.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(namespace)
|
||||
},
|
||||
mode: mode_enum,
|
||||
properties,
|
||||
};
|
||||
let response = inner.create_namespace(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
Ok(dict.unbind())
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (namespace, mode=None, behavior=None))]
|
||||
pub fn drop_namespace(
|
||||
self_: PyRef<'_, Self>,
|
||||
namespace: Vec<String>,
|
||||
mode: Option<String>,
|
||||
behavior: Option<String>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
let py = self_.py();
|
||||
future_into_py(py, async move {
|
||||
use lance_namespace::models::{drop_namespace_request, DropNamespaceRequest};
|
||||
let mode_enum = mode.and_then(|m| match m.to_uppercase().as_str() {
|
||||
"SKIP" => Some(drop_namespace_request::Mode::Skip),
|
||||
"FAIL" => Some(drop_namespace_request::Mode::Fail),
|
||||
_ => None,
|
||||
});
|
||||
let behavior_enum = behavior.and_then(|b| match b.to_uppercase().as_str() {
|
||||
"RESTRICT" => Some(drop_namespace_request::Behavior::Restrict),
|
||||
"CASCADE" => Some(drop_namespace_request::Behavior::Cascade),
|
||||
_ => None,
|
||||
});
|
||||
let request = DropNamespaceRequest {
|
||||
id: if namespace.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(namespace)
|
||||
},
|
||||
mode: mode_enum,
|
||||
behavior: behavior_enum,
|
||||
};
|
||||
let response = inner.drop_namespace(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
dict.set_item("transaction_id", response.transaction_id)?;
|
||||
Ok(dict.unbind())
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (namespace,))]
|
||||
pub fn drop_namespace(
|
||||
pub fn describe_namespace(
|
||||
self_: PyRef<'_, Self>,
|
||||
namespace: Vec<String>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
use lancedb::database::DropNamespaceRequest;
|
||||
let request = DropNamespaceRequest { namespace };
|
||||
inner.drop_namespace(request).await.infer_error()
|
||||
let py = self_.py();
|
||||
future_into_py(py, async move {
|
||||
use lance_namespace::models::DescribeNamespaceRequest;
|
||||
let request = DescribeNamespaceRequest {
|
||||
id: if namespace.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(namespace)
|
||||
},
|
||||
};
|
||||
let response = inner.describe_namespace(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
Ok(dict.unbind())
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (namespace=vec![], page_token=None, limit=None))]
|
||||
pub fn list_tables(
|
||||
self_: PyRef<'_, Self>,
|
||||
namespace: Vec<String>,
|
||||
page_token: Option<String>,
|
||||
limit: Option<u32>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
let py = self_.py();
|
||||
future_into_py(py, async move {
|
||||
use lance_namespace::models::ListTablesRequest;
|
||||
let request = ListTablesRequest {
|
||||
id: if namespace.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(namespace)
|
||||
},
|
||||
page_token,
|
||||
limit: limit.map(|l| l as i32),
|
||||
};
|
||||
let response = inner.list_tables(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("tables", response.tables)?;
|
||||
dict.set_item("page_token", response.page_token)?;
|
||||
Ok(dict.unbind())
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder};
|
||||
use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder};
|
||||
use lancedb::index::{
|
||||
scalar::{BTreeIndexBuilder, FtsIndexBuilder},
|
||||
vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
|
||||
@@ -87,6 +87,21 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
||||
}
|
||||
Ok(LanceDbIndex::IvfPq(ivf_pq_builder))
|
||||
},
|
||||
"IvfSq" => {
|
||||
let params = source.extract::<IvfSqParams>()?;
|
||||
let distance_type = parse_distance_type(params.distance_type)?;
|
||||
let mut ivf_sq_builder = IvfSqIndexBuilder::default()
|
||||
.distance_type(distance_type)
|
||||
.max_iterations(params.max_iterations)
|
||||
.sample_rate(params.sample_rate);
|
||||
if let Some(num_partitions) = params.num_partitions {
|
||||
ivf_sq_builder = ivf_sq_builder.num_partitions(num_partitions);
|
||||
}
|
||||
if let Some(target_partition_size) = params.target_partition_size {
|
||||
ivf_sq_builder = ivf_sq_builder.target_partition_size(target_partition_size);
|
||||
}
|
||||
Ok(LanceDbIndex::IvfSq(ivf_sq_builder))
|
||||
},
|
||||
"IvfRq" => {
|
||||
let params = source.extract::<IvfRqParams>()?;
|
||||
let distance_type = parse_distance_type(params.distance_type)?;
|
||||
@@ -142,7 +157,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
||||
Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
|
||||
},
|
||||
not_supported => Err(PyValueError::new_err(format!(
|
||||
"Invalid index type '{}'. Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfHnswPq, or IvfHnswSq",
|
||||
"Invalid index type '{}'. Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, or IvfHnswSq",
|
||||
not_supported
|
||||
))),
|
||||
}
|
||||
@@ -186,6 +201,15 @@ struct IvfPqParams {
|
||||
target_partition_size: Option<u32>,
|
||||
}
|
||||
|
||||
#[derive(FromPyObject)]
|
||||
struct IvfSqParams {
|
||||
distance_type: String,
|
||||
num_partitions: Option<u32>,
|
||||
max_iterations: u32,
|
||||
sample_rate: u32,
|
||||
target_partition_size: Option<u32>,
|
||||
}
|
||||
|
||||
#[derive(FromPyObject)]
|
||||
struct IvfRqParams {
|
||||
distance_type: String,
|
||||
|
||||
@@ -690,7 +690,7 @@ impl FTSQuery {
|
||||
}
|
||||
|
||||
pub fn get_query(&self) -> String {
|
||||
self.fts_query.query.query().to_owned()
|
||||
self.fts_query.query.query().clone()
|
||||
}
|
||||
|
||||
pub fn to_query_request(&self) -> PyQueryRequest {
|
||||
|
||||
@@ -134,17 +134,19 @@ pub struct MergeResult {
|
||||
pub num_updated_rows: u64,
|
||||
pub num_inserted_rows: u64,
|
||||
pub num_deleted_rows: u64,
|
||||
pub num_attempts: u32,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl MergeResult {
|
||||
pub fn __repr__(&self) -> String {
|
||||
format!(
|
||||
"MergeResult(version={}, num_updated_rows={}, num_inserted_rows={}, num_deleted_rows={})",
|
||||
"MergeResult(version={}, num_updated_rows={}, num_inserted_rows={}, num_deleted_rows={}, num_attempts={})",
|
||||
self.version,
|
||||
self.num_updated_rows,
|
||||
self.num_inserted_rows,
|
||||
self.num_deleted_rows
|
||||
self.num_deleted_rows,
|
||||
self.num_attempts
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -156,6 +158,7 @@ impl From<lancedb::table::MergeResult> for MergeResult {
|
||||
num_updated_rows: result.num_updated_rows,
|
||||
num_inserted_rows: result.num_inserted_rows,
|
||||
num_deleted_rows: result.num_deleted_rows,
|
||||
num_attempts: result.num_attempts,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.22.4-beta.0"
|
||||
version = "0.22.4-beta.3"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
@@ -105,12 +105,12 @@ test-log = "0.2"
|
||||
|
||||
[features]
|
||||
default = ["aws", "gcs", "azure", "dynamodb", "oss"]
|
||||
aws = ["lance/aws", "lance-io/aws"]
|
||||
oss = ["lance/oss", "lance-io/oss"]
|
||||
gcs = ["lance/gcp", "lance-io/gcp"]
|
||||
azure = ["lance/azure", "lance-io/azure"]
|
||||
aws = ["lance/aws", "lance-io/aws", "lance-namespace-impls/dir-aws"]
|
||||
oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"]
|
||||
gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"]
|
||||
azure = ["lance/azure", "lance-io/azure", "lance-namespace-impls/dir-azure"]
|
||||
dynamodb = ["lance/dynamodb", "aws"]
|
||||
remote = ["dep:reqwest", "dep:http"]
|
||||
remote = ["dep:reqwest", "dep:http", "lance-namespace-impls/rest"]
|
||||
fp16kernels = ["lance-linalg/fp16kernels"]
|
||||
s3-test = []
|
||||
bedrock = ["dep:aws-sdk-bedrockruntime"]
|
||||
|
||||
@@ -9,11 +9,11 @@ all-tests: feature-tests remote-tests
|
||||
# the environment.
|
||||
feature-tests:
|
||||
../../ci/run_with_docker_compose.sh \
|
||||
cargo test --all-features --tests --locked --examples
|
||||
cargo test --all-features --tests --locked --examples $(CARGO_ARGS)
|
||||
.PHONY: feature-tests
|
||||
|
||||
# Run tests against remote endpoints.
|
||||
remote-tests:
|
||||
../../ci/run_with_test_connection.sh \
|
||||
cargo test --features remote --locked
|
||||
cargo test --features remote --locked $(CARGO_ARGS)
|
||||
.PHONY: remote-tests
|
||||
|
||||
@@ -9,6 +9,11 @@ use std::sync::Arc;
|
||||
use arrow_array::RecordBatchReader;
|
||||
use arrow_schema::{Field, SchemaRef};
|
||||
use lance::dataset::ReadParams;
|
||||
use lance_namespace::models::{
|
||||
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
|
||||
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
|
||||
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||
};
|
||||
#[cfg(feature = "aws")]
|
||||
use object_store::aws::AwsCredential;
|
||||
|
||||
@@ -17,9 +22,8 @@ use crate::database::listing::{
|
||||
ListingDatabase, OPT_NEW_TABLE_STORAGE_VERSION, OPT_NEW_TABLE_V2_MANIFEST_PATHS,
|
||||
};
|
||||
use crate::database::{
|
||||
CloneTableRequest, CreateNamespaceRequest, CreateTableData, CreateTableMode,
|
||||
CreateTableRequest, Database, DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest,
|
||||
OpenTableRequest, ReadConsistency, TableNamesRequest,
|
||||
CloneTableRequest, CreateTableData, CreateTableMode, CreateTableRequest, Database,
|
||||
DatabaseOptions, OpenTableRequest, ReadConsistency, TableNamesRequest,
|
||||
};
|
||||
use crate::embeddings::{
|
||||
EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, MemoryRegistry, WithEmbeddings,
|
||||
@@ -74,6 +78,7 @@ impl TableNamesBuilder {
|
||||
}
|
||||
|
||||
/// Execute the table names operation
|
||||
#[allow(deprecated)]
|
||||
pub async fn execute(self) -> Result<Vec<String>> {
|
||||
self.parent.clone().table_names(self.request).await
|
||||
}
|
||||
@@ -239,7 +244,7 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
let store_options = self
|
||||
.request
|
||||
@@ -259,7 +264,7 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -408,6 +413,7 @@ impl OpenTableBuilder {
|
||||
index_cache_size: None,
|
||||
lance_read_params: None,
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
},
|
||||
embedding_registry,
|
||||
}
|
||||
@@ -442,7 +448,7 @@ impl OpenTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
let storage_options = self
|
||||
.request
|
||||
@@ -461,7 +467,7 @@ impl OpenTableBuilder {
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -767,20 +773,42 @@ impl Connection {
|
||||
}
|
||||
|
||||
/// List immediate child namespace names in the given namespace
|
||||
pub async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>> {
|
||||
pub async fn list_namespaces(
|
||||
&self,
|
||||
request: ListNamespacesRequest,
|
||||
) -> Result<ListNamespacesResponse> {
|
||||
self.internal.list_namespaces(request).await
|
||||
}
|
||||
|
||||
/// Create a new namespace
|
||||
pub async fn create_namespace(&self, request: CreateNamespaceRequest) -> Result<()> {
|
||||
pub async fn create_namespace(
|
||||
&self,
|
||||
request: CreateNamespaceRequest,
|
||||
) -> Result<CreateNamespaceResponse> {
|
||||
self.internal.create_namespace(request).await
|
||||
}
|
||||
|
||||
/// Drop a namespace
|
||||
pub async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<()> {
|
||||
pub async fn drop_namespace(
|
||||
&self,
|
||||
request: DropNamespaceRequest,
|
||||
) -> Result<DropNamespaceResponse> {
|
||||
self.internal.drop_namespace(request).await
|
||||
}
|
||||
|
||||
/// Describe a namespace
|
||||
pub async fn describe_namespace(
|
||||
&self,
|
||||
request: DescribeNamespaceRequest,
|
||||
) -> Result<DescribeNamespaceResponse> {
|
||||
self.internal.describe_namespace(request).await
|
||||
}
|
||||
|
||||
/// List tables with pagination support
|
||||
pub async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
|
||||
self.internal.list_tables(request).await
|
||||
}
|
||||
|
||||
/// Get the in-memory embedding registry.
|
||||
/// It's important to note that the embedding registry is not persisted across connections.
|
||||
/// So if a table contains embeddings, you will need to make sure that you are using a connection that has the same embedding functions registered
|
||||
@@ -959,7 +987,7 @@ impl ConnectBuilder {
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.request.options.insert(key.into(), value.into());
|
||||
self
|
||||
@@ -967,7 +995,7 @@ impl ConnectBuilder {
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -1086,6 +1114,7 @@ pub struct ConnectNamespaceBuilder {
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
server_side_query_enabled: bool,
|
||||
}
|
||||
|
||||
impl ConnectNamespaceBuilder {
|
||||
@@ -1097,12 +1126,13 @@ impl ConnectNamespaceBuilder {
|
||||
read_consistency_interval: None,
|
||||
embedding_registry: None,
|
||||
session: None,
|
||||
server_side_query_enabled: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.storage_options.insert(key.into(), value.into());
|
||||
self
|
||||
@@ -1110,7 +1140,7 @@ impl ConnectNamespaceBuilder {
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -1151,6 +1181,18 @@ impl ConnectNamespaceBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable server-side query execution.
|
||||
///
|
||||
/// When enabled, queries will be executed on the namespace server instead of
|
||||
/// locally. This can improve performance by reducing data transfer and
|
||||
/// leveraging server-side compute resources.
|
||||
///
|
||||
/// Default is `false` (queries executed locally).
|
||||
pub fn server_side_query(mut self, enabled: bool) -> Self {
|
||||
self.server_side_query_enabled = enabled;
|
||||
self
|
||||
}
|
||||
|
||||
/// Execute the connection
|
||||
pub async fn execute(self) -> Result<Connection> {
|
||||
use crate::database::namespace::LanceNamespaceDatabase;
|
||||
@@ -1162,6 +1204,7 @@ impl ConnectNamespaceBuilder {
|
||||
self.storage_options,
|
||||
self.read_consistency_interval,
|
||||
self.session,
|
||||
self.server_side_query_enabled,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
|
||||
@@ -24,6 +24,12 @@ use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
||||
use futures::stream;
|
||||
use lance::dataset::ReadParams;
|
||||
use lance_datafusion::utils::StreamingWriteSource;
|
||||
use lance_namespace::models::{
|
||||
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
|
||||
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
|
||||
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||
};
|
||||
use lance_namespace::LanceNamespace;
|
||||
|
||||
use crate::arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt};
|
||||
use crate::error::Result;
|
||||
@@ -36,32 +42,7 @@ pub trait DatabaseOptions {
|
||||
fn serialize_into_map(&self, map: &mut HashMap<String, String>);
|
||||
}
|
||||
|
||||
/// A request to list namespaces in the database
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct ListNamespacesRequest {
|
||||
/// The parent namespace to list namespaces in. Empty list represents root namespace.
|
||||
pub namespace: Vec<String>,
|
||||
/// If present, only return names that come lexicographically after the supplied value.
|
||||
pub page_token: Option<String>,
|
||||
/// The maximum number of namespace names to return
|
||||
pub limit: Option<u32>,
|
||||
}
|
||||
|
||||
/// A request to create a namespace
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CreateNamespaceRequest {
|
||||
/// The namespace identifier to create
|
||||
pub namespace: Vec<String>,
|
||||
}
|
||||
|
||||
/// A request to drop a namespace
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DropNamespaceRequest {
|
||||
/// The namespace identifier to drop
|
||||
pub namespace: Vec<String>,
|
||||
}
|
||||
|
||||
/// A request to list names of tables in the database
|
||||
/// A request to list names of tables in the database (deprecated, use ListTablesRequest)
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct TableNamesRequest {
|
||||
/// The namespace to list tables in. Empty list represents root namespace.
|
||||
@@ -77,7 +58,7 @@ pub struct TableNamesRequest {
|
||||
}
|
||||
|
||||
/// A request to open a table
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone)]
|
||||
pub struct OpenTableRequest {
|
||||
pub name: String,
|
||||
/// The namespace to open the table from. Empty list represents root namespace.
|
||||
@@ -87,6 +68,22 @@ pub struct OpenTableRequest {
|
||||
/// Optional custom location for the table. If not provided, the database will
|
||||
/// derive a location based on its URI and the table name.
|
||||
pub location: Option<String>,
|
||||
/// Optional namespace client for server-side query execution.
|
||||
/// When set, queries will be executed on the namespace server instead of locally.
|
||||
pub namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for OpenTableRequest {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("OpenTableRequest")
|
||||
.field("name", &self.name)
|
||||
.field("namespace", &self.namespace)
|
||||
.field("index_cache_size", &self.index_cache_size)
|
||||
.field("lance_read_params", &self.lance_read_params)
|
||||
.field("location", &self.location)
|
||||
.field("namespace_client", &self.namespace_client)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
pub type TableBuilderCallback = Box<dyn FnOnce(OpenTableRequest) -> OpenTableRequest + Send>;
|
||||
@@ -170,6 +167,9 @@ pub struct CreateTableRequest {
|
||||
/// Optional custom location for the table. If not provided, the database will
|
||||
/// derive a location based on its URI and the table name.
|
||||
pub location: Option<String>,
|
||||
/// Optional namespace client for server-side query execution.
|
||||
/// When set, queries will be executed on the namespace server instead of locally.
|
||||
pub namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
}
|
||||
|
||||
impl CreateTableRequest {
|
||||
@@ -181,6 +181,7 @@ impl CreateTableRequest {
|
||||
mode: CreateTableMode::default(),
|
||||
write_options: WriteOptions::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -247,13 +248,30 @@ pub trait Database:
|
||||
/// Get the read consistency of the database
|
||||
async fn read_consistency(&self) -> Result<ReadConsistency>;
|
||||
/// List immediate child namespace names in the given namespace
|
||||
async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>>;
|
||||
async fn list_namespaces(
|
||||
&self,
|
||||
request: ListNamespacesRequest,
|
||||
) -> Result<ListNamespacesResponse>;
|
||||
/// Create a new namespace
|
||||
async fn create_namespace(&self, request: CreateNamespaceRequest) -> Result<()>;
|
||||
async fn create_namespace(
|
||||
&self,
|
||||
request: CreateNamespaceRequest,
|
||||
) -> Result<CreateNamespaceResponse>;
|
||||
/// Drop a namespace
|
||||
async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<()>;
|
||||
async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<DropNamespaceResponse>;
|
||||
/// Describe a namespace (get its properties)
|
||||
async fn describe_namespace(
|
||||
&self,
|
||||
request: DescribeNamespaceRequest,
|
||||
) -> Result<DescribeNamespaceResponse>;
|
||||
/// List the names of tables in the database
|
||||
///
|
||||
/// # Deprecated
|
||||
/// Use `list_tables` instead for pagination support
|
||||
#[deprecated(note = "Use list_tables instead")]
|
||||
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>>;
|
||||
/// List tables in the database with pagination support
|
||||
async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse>;
|
||||
/// Create a table in the database
|
||||
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>>;
|
||||
/// Clone a table in the database.
|
||||
|
||||
@@ -24,10 +24,15 @@ use crate::io::object_store::MirroringObjectStoreWrapper;
|
||||
use crate::table::NativeTable;
|
||||
use crate::utils::validate_table_name;
|
||||
|
||||
use lance_namespace::models::{
|
||||
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
|
||||
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
|
||||
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||
};
|
||||
|
||||
use super::{
|
||||
BaseTable, CloneTableRequest, CreateNamespaceRequest, CreateTableMode, CreateTableRequest,
|
||||
Database, DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest, OpenTableRequest,
|
||||
TableNamesRequest,
|
||||
BaseTable, CloneTableRequest, CreateTableMode, CreateTableRequest, Database, DatabaseOptions,
|
||||
OpenTableRequest, TableNamesRequest,
|
||||
};
|
||||
|
||||
/// File extension to indicate a lance table
|
||||
@@ -35,6 +40,7 @@ pub const LANCE_FILE_EXTENSION: &str = "lance";
|
||||
|
||||
pub const OPT_NEW_TABLE_STORAGE_VERSION: &str = "new_table_data_storage_version";
|
||||
pub const OPT_NEW_TABLE_V2_MANIFEST_PATHS: &str = "new_table_enable_v2_manifest_paths";
|
||||
pub const OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS: &str = "new_table_enable_stable_row_ids";
|
||||
|
||||
/// Controls how new tables should be created
|
||||
#[derive(Clone, Debug, Default)]
|
||||
@@ -48,6 +54,12 @@ pub struct NewTableConfig {
|
||||
/// V2 manifest paths are more efficient than V2 manifest paths but are not
|
||||
/// supported by old clients.
|
||||
pub enable_v2_manifest_paths: Option<bool>,
|
||||
/// Whether to enable stable row IDs for new tables
|
||||
///
|
||||
/// When enabled, row IDs remain stable after compaction, update, delete,
|
||||
/// and merges. This is useful for materialized views and other use cases
|
||||
/// that need to track source rows across these operations.
|
||||
pub enable_stable_row_ids: Option<bool>,
|
||||
}
|
||||
|
||||
/// Options specific to the listing database
|
||||
@@ -60,7 +72,7 @@ pub struct ListingDatabaseOptions {
|
||||
/// These are used to create/list tables and they are inherited by all tables
|
||||
/// opened by this database.
|
||||
///
|
||||
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub storage_options: HashMap<String, String>,
|
||||
}
|
||||
|
||||
@@ -87,6 +99,14 @@ impl ListingDatabaseOptions {
|
||||
})
|
||||
})
|
||||
.transpose()?,
|
||||
enable_stable_row_ids: map
|
||||
.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS)
|
||||
.map(|s| {
|
||||
s.parse::<bool>().map_err(|_| Error::InvalidInput {
|
||||
message: format!("enable_stable_row_ids must be a boolean, received {}", s),
|
||||
})
|
||||
})
|
||||
.transpose()?,
|
||||
};
|
||||
// We just assume that any options that are not new table config options are storage options
|
||||
let storage_options = map
|
||||
@@ -94,6 +114,7 @@ impl ListingDatabaseOptions {
|
||||
.filter(|(key, _)| {
|
||||
key.as_str() != OPT_NEW_TABLE_STORAGE_VERSION
|
||||
&& key.as_str() != OPT_NEW_TABLE_V2_MANIFEST_PATHS
|
||||
&& key.as_str() != OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS
|
||||
})
|
||||
.map(|(key, value)| (key.clone(), value.clone()))
|
||||
.collect();
|
||||
@@ -118,6 +139,12 @@ impl DatabaseOptions for ListingDatabaseOptions {
|
||||
enable_v2_manifest_paths.to_string(),
|
||||
);
|
||||
}
|
||||
if let Some(enable_stable_row_ids) = self.new_table_config.enable_stable_row_ids {
|
||||
map.insert(
|
||||
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||
enable_stable_row_ids.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -157,7 +184,7 @@ impl ListingDatabaseOptionsBuilder {
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
self.options
|
||||
.storage_options
|
||||
@@ -167,7 +194,7 @@ impl ListingDatabaseOptionsBuilder {
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
@@ -475,7 +502,7 @@ impl ListingDatabase {
|
||||
// this error is not lance::Error::DatasetNotFound, as the method
|
||||
// `remove_dir_all` may be used to remove something not be a dataset
|
||||
lance::Error::NotFound { .. } => Error::TableNotFound {
|
||||
name: name.to_owned(),
|
||||
name: name.clone(),
|
||||
source: Box::new(err),
|
||||
},
|
||||
_ => Error::from(err),
|
||||
@@ -497,7 +524,7 @@ impl ListingDatabase {
|
||||
fn extract_storage_overrides(
|
||||
&self,
|
||||
request: &CreateTableRequest,
|
||||
) -> Result<(Option<LanceFileVersion>, Option<bool>)> {
|
||||
) -> Result<(Option<LanceFileVersion>, Option<bool>, Option<bool>)> {
|
||||
let storage_options = request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
@@ -518,7 +545,19 @@ impl ListingDatabase {
|
||||
message: "enable_v2_manifest_paths must be a boolean".to_string(),
|
||||
})?;
|
||||
|
||||
Ok((storage_version_override, v2_manifest_override))
|
||||
let stable_row_ids_override = storage_options
|
||||
.and_then(|opts| opts.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS))
|
||||
.map(|s| s.parse::<bool>())
|
||||
.transpose()
|
||||
.map_err(|_| Error::InvalidInput {
|
||||
message: "enable_stable_row_ids must be a boolean".to_string(),
|
||||
})?;
|
||||
|
||||
Ok((
|
||||
storage_version_override,
|
||||
v2_manifest_override,
|
||||
stable_row_ids_override,
|
||||
))
|
||||
}
|
||||
|
||||
/// Prepare write parameters for table creation
|
||||
@@ -527,6 +566,7 @@ impl ListingDatabase {
|
||||
request: &CreateTableRequest,
|
||||
storage_version_override: Option<LanceFileVersion>,
|
||||
v2_manifest_override: Option<bool>,
|
||||
stable_row_ids_override: Option<bool>,
|
||||
) -> lance::dataset::WriteParams {
|
||||
let mut write_params = request
|
||||
.write_options
|
||||
@@ -571,6 +611,13 @@ impl ListingDatabase {
|
||||
write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
|
||||
}
|
||||
|
||||
// Apply enable_stable_row_ids: table-level override takes precedence over connection config
|
||||
if let Some(enable_stable_row_ids) =
|
||||
stable_row_ids_override.or(self.new_table_config.enable_stable_row_ids)
|
||||
{
|
||||
write_params.enable_stable_row_ids = enable_stable_row_ids;
|
||||
}
|
||||
|
||||
if matches!(&request.mode, CreateTableMode::Overwrite) {
|
||||
write_params.mode = WriteMode::Overwrite;
|
||||
}
|
||||
@@ -599,6 +646,7 @@ impl ListingDatabase {
|
||||
index_cache_size: None,
|
||||
lance_read_params: None,
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
};
|
||||
let req = (callback)(req);
|
||||
let table = self.open_table(req).await?;
|
||||
@@ -620,14 +668,20 @@ impl ListingDatabase {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Database for ListingDatabase {
|
||||
async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>> {
|
||||
if !request.namespace.is_empty() {
|
||||
async fn list_namespaces(
|
||||
&self,
|
||||
request: ListNamespacesRequest,
|
||||
) -> Result<ListNamespacesResponse> {
|
||||
if request.id.as_ref().map(|v| !v.is_empty()).unwrap_or(false) {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Vec::new())
|
||||
Ok(ListNamespacesResponse {
|
||||
namespaces: Vec::new(),
|
||||
page_token: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn uri(&self) -> &str {
|
||||
@@ -646,13 +700,28 @@ impl Database for ListingDatabase {
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_namespace(&self, _request: CreateNamespaceRequest) -> Result<()> {
|
||||
async fn create_namespace(
|
||||
&self,
|
||||
_request: CreateNamespaceRequest,
|
||||
) -> Result<CreateNamespaceResponse> {
|
||||
Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn drop_namespace(&self, _request: DropNamespaceRequest) -> Result<()> {
|
||||
async fn drop_namespace(
|
||||
&self,
|
||||
_request: DropNamespaceRequest,
|
||||
) -> Result<DropNamespaceResponse> {
|
||||
Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn describe_namespace(
|
||||
&self,
|
||||
_request: DescribeNamespaceRequest,
|
||||
) -> Result<DescribeNamespaceResponse> {
|
||||
Err(Error::NotSupported {
|
||||
message: "Namespace operations are not supported for listing database".into(),
|
||||
})
|
||||
@@ -693,6 +762,57 @@ impl Database for ListingDatabase {
|
||||
Ok(f)
|
||||
}
|
||||
|
||||
async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
|
||||
if request.id.as_ref().map(|v| !v.is_empty()).unwrap_or(false) {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
|
||||
});
|
||||
}
|
||||
let mut f = self
|
||||
.object_store
|
||||
.read_dir(self.base_path.clone())
|
||||
.await?
|
||||
.iter()
|
||||
.map(Path::new)
|
||||
.filter(|path| {
|
||||
let is_lance = path
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.map(|e| e == LANCE_EXTENSION);
|
||||
is_lance.unwrap_or(false)
|
||||
})
|
||||
.filter_map(|p| p.file_stem().and_then(|s| s.to_str().map(String::from)))
|
||||
.collect::<Vec<String>>();
|
||||
f.sort();
|
||||
|
||||
// Handle pagination with page_token
|
||||
if let Some(ref page_token) = request.page_token {
|
||||
let index = f
|
||||
.iter()
|
||||
.position(|name| name.as_str() > page_token.as_str())
|
||||
.unwrap_or(f.len());
|
||||
f.drain(0..index);
|
||||
}
|
||||
|
||||
// Determine if there's a next page
|
||||
let next_page_token = if let Some(limit) = request.limit {
|
||||
if f.len() > limit as usize {
|
||||
let token = f[limit as usize].clone();
|
||||
f.truncate(limit as usize);
|
||||
Some(token)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(ListTablesResponse {
|
||||
tables: f,
|
||||
page_token: next_page_token,
|
||||
})
|
||||
}
|
||||
|
||||
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
// When namespace is not empty, location must be provided
|
||||
if !request.namespace.is_empty() && request.location.is_none() {
|
||||
@@ -706,11 +826,15 @@ impl Database for ListingDatabase {
|
||||
.clone()
|
||||
.unwrap_or_else(|| self.table_uri(&request.name).unwrap());
|
||||
|
||||
let (storage_version_override, v2_manifest_override) =
|
||||
let (storage_version_override, v2_manifest_override, stable_row_ids_override) =
|
||||
self.extract_storage_overrides(&request)?;
|
||||
|
||||
let write_params =
|
||||
self.prepare_write_params(&request, storage_version_override, v2_manifest_override);
|
||||
let write_params = self.prepare_write_params(
|
||||
&request,
|
||||
storage_version_override,
|
||||
v2_manifest_override,
|
||||
stable_row_ids_override,
|
||||
);
|
||||
|
||||
let data_schema = request.data.arrow_schema();
|
||||
|
||||
@@ -722,6 +846,7 @@ impl Database for ListingDatabase {
|
||||
self.store_wrapper.clone(),
|
||||
Some(write_params),
|
||||
self.read_consistency_interval,
|
||||
request.namespace_client,
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -793,6 +918,7 @@ impl Database for ListingDatabase {
|
||||
self.store_wrapper.clone(),
|
||||
None,
|
||||
self.read_consistency_interval,
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -864,6 +990,7 @@ impl Database for ListingDatabase {
|
||||
self.store_wrapper.clone(),
|
||||
Some(read_params),
|
||||
self.read_consistency_interval,
|
||||
request.namespace_client,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
@@ -901,6 +1028,7 @@ impl Database for ListingDatabase {
|
||||
self.drop_tables(vec![name.to_string()]).await
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
async fn drop_all_tables(&self, namespace: &[String]) -> Result<()> {
|
||||
// Check if namespace parameter is provided
|
||||
if !namespace.is_empty() {
|
||||
@@ -921,7 +1049,7 @@ impl Database for ListingDatabase {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::connection::ConnectRequest;
|
||||
use crate::database::{CreateTableData, CreateTableMode, CreateTableRequest};
|
||||
use crate::database::{CreateTableData, CreateTableMode, CreateTableRequest, WriteOptions};
|
||||
use crate::table::{Table, TableDefinition};
|
||||
use arrow_array::{Int32Array, RecordBatch, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
@@ -965,6 +1093,7 @@ mod tests {
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -986,6 +1115,7 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
// Verify both tables exist
|
||||
#[allow(deprecated)]
|
||||
let table_names = db.table_names(TableNamesRequest::default()).await.unwrap();
|
||||
assert!(table_names.contains(&"source_table".to_string()));
|
||||
assert!(table_names.contains(&"cloned_table".to_string()));
|
||||
@@ -1029,6 +1159,7 @@ mod tests {
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1087,6 +1218,7 @@ mod tests {
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1122,6 +1254,7 @@ mod tests {
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1161,6 +1294,7 @@ mod tests {
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1200,6 +1334,7 @@ mod tests {
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1254,6 +1389,7 @@ mod tests {
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1311,6 +1447,7 @@ mod tests {
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1396,6 +1533,7 @@ mod tests {
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1482,6 +1620,7 @@ mod tests {
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1575,6 +1714,7 @@ mod tests {
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1621,4 +1761,270 @@ mod tests {
|
||||
// Cloned table should have all 8 rows from the latest version
|
||||
assert_eq!(cloned_table.count_rows(None).await.unwrap(), 8);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_with_stable_row_ids_connection_level() {
|
||||
let tempdir = tempdir().unwrap();
|
||||
let uri = tempdir.path().to_str().unwrap();
|
||||
|
||||
// Create database with stable row IDs enabled at connection level
|
||||
let mut options = HashMap::new();
|
||||
options.insert(
|
||||
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||
"true".to_string(),
|
||||
);
|
||||
|
||||
let request = ConnectRequest {
|
||||
uri: uri.to_string(),
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
|
||||
let db = ListingDatabase::connect_with_options(&request)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify the config was parsed correctly
|
||||
assert_eq!(db.new_table_config.enable_stable_row_ids, Some(true));
|
||||
|
||||
// Create a table - it should inherit the stable row IDs setting
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
||||
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch)],
|
||||
schema.clone(),
|
||||
));
|
||||
|
||||
let table = db
|
||||
.create_table(CreateTableRequest {
|
||||
name: "test_stable".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Data(reader),
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify table was created successfully
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_with_stable_row_ids_table_level() {
|
||||
let (_tempdir, db) = setup_database().await;
|
||||
|
||||
// Verify connection has no stable row IDs config
|
||||
assert_eq!(db.new_table_config.enable_stable_row_ids, None);
|
||||
|
||||
// Create a table with stable row IDs enabled at table level via storage_options
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
||||
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch)],
|
||||
schema.clone(),
|
||||
));
|
||||
|
||||
let mut storage_options = HashMap::new();
|
||||
storage_options.insert(
|
||||
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||
"true".to_string(),
|
||||
);
|
||||
|
||||
let write_options = WriteOptions {
|
||||
lance_write_params: Some(lance::dataset::WriteParams {
|
||||
store_params: Some(lance::io::ObjectStoreParams {
|
||||
storage_options: Some(storage_options),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
}),
|
||||
};
|
||||
|
||||
let table = db
|
||||
.create_table(CreateTableRequest {
|
||||
name: "test_stable_table_level".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Data(reader),
|
||||
mode: CreateTableMode::Create,
|
||||
write_options,
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify table was created successfully
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_stable_row_ids_table_overrides_connection() {
|
||||
let tempdir = tempdir().unwrap();
|
||||
let uri = tempdir.path().to_str().unwrap();
|
||||
|
||||
// Create database with stable row IDs enabled at connection level
|
||||
let mut options = HashMap::new();
|
||||
options.insert(
|
||||
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||
"true".to_string(),
|
||||
);
|
||||
|
||||
let request = ConnectRequest {
|
||||
uri: uri.to_string(),
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
|
||||
let db = ListingDatabase::connect_with_options(&request)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(db.new_table_config.enable_stable_row_ids, Some(true));
|
||||
|
||||
// Create table with stable row IDs disabled at table level (overrides connection)
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
||||
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch)],
|
||||
schema.clone(),
|
||||
));
|
||||
|
||||
let mut storage_options = HashMap::new();
|
||||
storage_options.insert(
|
||||
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||
"false".to_string(),
|
||||
);
|
||||
|
||||
let write_options = WriteOptions {
|
||||
lance_write_params: Some(lance::dataset::WriteParams {
|
||||
store_params: Some(lance::io::ObjectStoreParams {
|
||||
storage_options: Some(storage_options),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
}),
|
||||
};
|
||||
|
||||
let table = db
|
||||
.create_table(CreateTableRequest {
|
||||
name: "test_override".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Data(reader),
|
||||
mode: CreateTableMode::Create,
|
||||
write_options,
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify table was created successfully
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_stable_row_ids_invalid_value() {
|
||||
let tempdir = tempdir().unwrap();
|
||||
let uri = tempdir.path().to_str().unwrap();
|
||||
|
||||
// Try to create database with invalid stable row IDs value
|
||||
let mut options = HashMap::new();
|
||||
options.insert(
|
||||
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||
"not_a_boolean".to_string(),
|
||||
);
|
||||
|
||||
let request = ConnectRequest {
|
||||
uri: uri.to_string(),
|
||||
#[cfg(feature = "remote")]
|
||||
client_config: Default::default(),
|
||||
options,
|
||||
read_consistency_interval: None,
|
||||
session: None,
|
||||
};
|
||||
|
||||
let result = ListingDatabase::connect_with_options(&request).await;
|
||||
|
||||
assert!(result.is_err());
|
||||
assert!(matches!(
|
||||
result.unwrap_err(),
|
||||
Error::InvalidInput { message } if message.contains("enable_stable_row_ids must be a boolean")
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stable_row_ids_config_serialization() {
|
||||
// Test that ListingDatabaseOptions correctly serializes stable_row_ids
|
||||
let mut options = HashMap::new();
|
||||
options.insert(
|
||||
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||
"true".to_string(),
|
||||
);
|
||||
|
||||
// Parse the options
|
||||
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
|
||||
assert_eq!(
|
||||
db_options.new_table_config.enable_stable_row_ids,
|
||||
Some(true)
|
||||
);
|
||||
|
||||
// Serialize back to map
|
||||
let mut serialized = HashMap::new();
|
||||
db_options.serialize_into_map(&mut serialized);
|
||||
|
||||
assert_eq!(
|
||||
serialized.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS),
|
||||
Some(&"true".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stable_row_ids_config_parse_false() {
|
||||
let mut options = HashMap::new();
|
||||
options.insert(
|
||||
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||
"false".to_string(),
|
||||
);
|
||||
|
||||
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
|
||||
assert_eq!(
|
||||
db_options.new_table_config.enable_stable_row_ids,
|
||||
Some(false)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stable_row_ids_config_not_set() {
|
||||
let options = HashMap::new();
|
||||
|
||||
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
|
||||
assert_eq!(db_options.new_table_config.enable_stable_row_ids, None);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,8 +10,10 @@ use async_trait::async_trait;
|
||||
use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsProvider};
|
||||
use lance_namespace::{
|
||||
models::{
|
||||
CreateEmptyTableRequest, CreateNamespaceRequest, DescribeTableRequest,
|
||||
DropNamespaceRequest, DropTableRequest, ListNamespacesRequest, ListTablesRequest,
|
||||
CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
|
||||
DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest,
|
||||
DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest,
|
||||
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||
},
|
||||
LanceNamespace,
|
||||
};
|
||||
@@ -22,11 +24,8 @@ use crate::database::ReadConsistency;
|
||||
use crate::error::{Error, Result};
|
||||
|
||||
use super::{
|
||||
listing::ListingDatabase, BaseTable, CloneTableRequest,
|
||||
CreateNamespaceRequest as DbCreateNamespaceRequest, CreateTableMode,
|
||||
CreateTableRequest as DbCreateTableRequest, Database,
|
||||
DropNamespaceRequest as DbDropNamespaceRequest,
|
||||
ListNamespacesRequest as DbListNamespacesRequest, OpenTableRequest, TableNamesRequest,
|
||||
listing::ListingDatabase, BaseTable, CloneTableRequest, CreateTableMode,
|
||||
CreateTableRequest as DbCreateTableRequest, Database, OpenTableRequest, TableNamesRequest,
|
||||
};
|
||||
|
||||
/// A database implementation that uses lance-namespace for table management
|
||||
@@ -40,6 +39,8 @@ pub struct LanceNamespaceDatabase {
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
// database URI
|
||||
uri: String,
|
||||
// Whether to enable server-side query execution
|
||||
server_side_query_enabled: bool,
|
||||
}
|
||||
|
||||
impl LanceNamespaceDatabase {
|
||||
@@ -49,6 +50,7 @@ impl LanceNamespaceDatabase {
|
||||
storage_options: HashMap<String, String>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
session: Option<Arc<lance::session::Session>>,
|
||||
server_side_query_enabled: bool,
|
||||
) -> Result<Self> {
|
||||
let mut builder = ConnectBuilder::new(ns_impl);
|
||||
for (key, value) in ns_properties.clone() {
|
||||
@@ -67,6 +69,7 @@ impl LanceNamespaceDatabase {
|
||||
read_consistency_interval,
|
||||
session,
|
||||
uri: format!("namespace://{}", ns_impl),
|
||||
server_side_query_enabled,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -76,6 +79,7 @@ impl std::fmt::Debug for LanceNamespaceDatabase {
|
||||
f.debug_struct("LanceNamespaceDatabase")
|
||||
.field("storage_options", &self.storage_options)
|
||||
.field("read_consistency_interval", &self.read_consistency_interval)
|
||||
.field("server_side_query_enabled", &self.server_side_query_enabled)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
@@ -149,92 +153,47 @@ impl Database for LanceNamespaceDatabase {
|
||||
}
|
||||
}
|
||||
|
||||
async fn list_namespaces(&self, request: DbListNamespacesRequest) -> Result<Vec<String>> {
|
||||
let ns_request = ListNamespacesRequest {
|
||||
id: if request.namespace.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(request.namespace)
|
||||
},
|
||||
page_token: request.page_token,
|
||||
limit: request.limit.map(|l| l as i32),
|
||||
};
|
||||
|
||||
let response = self
|
||||
.namespace
|
||||
.list_namespaces(ns_request)
|
||||
.await
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to list namespaces: {}", e),
|
||||
})?;
|
||||
|
||||
Ok(response.namespaces)
|
||||
async fn list_namespaces(
|
||||
&self,
|
||||
request: ListNamespacesRequest,
|
||||
) -> Result<ListNamespacesResponse> {
|
||||
Ok(self.namespace.list_namespaces(request).await?)
|
||||
}
|
||||
|
||||
async fn create_namespace(&self, request: DbCreateNamespaceRequest) -> Result<()> {
|
||||
let ns_request = CreateNamespaceRequest {
|
||||
id: if request.namespace.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(request.namespace)
|
||||
},
|
||||
mode: None,
|
||||
properties: None,
|
||||
};
|
||||
|
||||
self.namespace
|
||||
.create_namespace(ns_request)
|
||||
.await
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to create namespace: {}", e),
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
async fn create_namespace(
|
||||
&self,
|
||||
request: CreateNamespaceRequest,
|
||||
) -> Result<CreateNamespaceResponse> {
|
||||
Ok(self.namespace.create_namespace(request).await?)
|
||||
}
|
||||
|
||||
async fn drop_namespace(&self, request: DbDropNamespaceRequest) -> Result<()> {
|
||||
let ns_request = DropNamespaceRequest {
|
||||
id: if request.namespace.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(request.namespace)
|
||||
},
|
||||
mode: None,
|
||||
behavior: None,
|
||||
};
|
||||
async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<DropNamespaceResponse> {
|
||||
Ok(self.namespace.drop_namespace(request).await?)
|
||||
}
|
||||
|
||||
self.namespace
|
||||
.drop_namespace(ns_request)
|
||||
.await
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to drop namespace: {}", e),
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
async fn describe_namespace(
|
||||
&self,
|
||||
request: DescribeNamespaceRequest,
|
||||
) -> Result<DescribeNamespaceResponse> {
|
||||
Ok(self.namespace.describe_namespace(request).await?)
|
||||
}
|
||||
|
||||
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>> {
|
||||
let ns_request = ListTablesRequest {
|
||||
id: if request.namespace.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(request.namespace)
|
||||
},
|
||||
id: Some(request.namespace),
|
||||
page_token: request.start_after,
|
||||
limit: request.limit.map(|l| l as i32),
|
||||
};
|
||||
|
||||
let response =
|
||||
self.namespace
|
||||
.list_tables(ns_request)
|
||||
.await
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to list tables: {}", e),
|
||||
})?;
|
||||
let response = self.namespace.list_tables(ns_request).await?;
|
||||
|
||||
Ok(response.tables)
|
||||
}
|
||||
|
||||
async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
|
||||
Ok(self.namespace.list_tables(request).await?)
|
||||
}
|
||||
|
||||
async fn create_table(&self, request: DbCreateTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
// Extract user-provided storage options from request
|
||||
let user_storage_options = request
|
||||
@@ -290,6 +249,10 @@ impl Database for LanceNamespaceDatabase {
|
||||
)
|
||||
.await?;
|
||||
|
||||
let namespace_client = self
|
||||
.server_side_query_enabled
|
||||
.then(|| self.namespace.clone());
|
||||
|
||||
return listing_db
|
||||
.open_table(OpenTableRequest {
|
||||
name: request.name.clone(),
|
||||
@@ -297,6 +260,7 @@ impl Database for LanceNamespaceDatabase {
|
||||
index_cache_size: None,
|
||||
lance_read_params: None,
|
||||
location: Some(location),
|
||||
namespace_client,
|
||||
})
|
||||
.await;
|
||||
}
|
||||
@@ -333,12 +297,16 @@ impl Database for LanceNamespaceDatabase {
|
||||
let listing_db = self
|
||||
.create_listing_database(
|
||||
&location,
|
||||
table_id,
|
||||
table_id.clone(),
|
||||
user_storage_options,
|
||||
create_empty_response.storage_options.as_ref(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let namespace_client = self
|
||||
.server_side_query_enabled
|
||||
.then(|| self.namespace.clone());
|
||||
|
||||
let create_request = DbCreateTableRequest {
|
||||
name: request.name,
|
||||
namespace: request.namespace,
|
||||
@@ -346,7 +314,9 @@ impl Database for LanceNamespaceDatabase {
|
||||
mode: request.mode,
|
||||
write_options: request.write_options,
|
||||
location: Some(location),
|
||||
namespace_client,
|
||||
};
|
||||
|
||||
listing_db.create_table(create_request).await
|
||||
}
|
||||
|
||||
@@ -380,19 +350,25 @@ impl Database for LanceNamespaceDatabase {
|
||||
let listing_db = self
|
||||
.create_listing_database(
|
||||
&location,
|
||||
table_id,
|
||||
table_id.clone(),
|
||||
user_storage_options,
|
||||
response.storage_options.as_ref(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let namespace_client = self
|
||||
.server_side_query_enabled
|
||||
.then(|| self.namespace.clone());
|
||||
|
||||
let open_request = OpenTableRequest {
|
||||
name: request.name.clone(),
|
||||
namespace: request.namespace.clone(),
|
||||
index_cache_size: request.index_cache_size,
|
||||
lance_read_params: request.lance_read_params,
|
||||
location: Some(location),
|
||||
namespace_client,
|
||||
};
|
||||
|
||||
listing_db.open_table(open_request).await
|
||||
}
|
||||
|
||||
@@ -429,6 +405,7 @@ impl Database for LanceNamespaceDatabase {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
async fn drop_all_tables(&self, namespace: &[String]) -> Result<()> {
|
||||
let tables = self
|
||||
.table_names(TableNamesRequest {
|
||||
@@ -455,7 +432,6 @@ impl Database for LanceNamespaceDatabase {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::connect_namespace;
|
||||
use crate::database::CreateNamespaceRequest;
|
||||
use crate::query::ExecutableQuery;
|
||||
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
@@ -568,7 +544,9 @@ mod tests {
|
||||
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
namespace: vec!["test_ns".into()],
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to create namespace");
|
||||
@@ -627,7 +605,9 @@ mod tests {
|
||||
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
namespace: vec!["test_ns".into()],
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to create namespace");
|
||||
@@ -689,7 +669,9 @@ mod tests {
|
||||
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
namespace: vec!["test_ns".into()],
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to create namespace");
|
||||
@@ -771,7 +753,9 @@ mod tests {
|
||||
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
namespace: vec!["test_ns".into()],
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to create namespace");
|
||||
@@ -825,7 +809,9 @@ mod tests {
|
||||
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
namespace: vec!["test_ns".into()],
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to create namespace");
|
||||
@@ -904,7 +890,9 @@ mod tests {
|
||||
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
namespace: vec!["test_ns".into()],
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to create namespace");
|
||||
@@ -936,7 +924,9 @@ mod tests {
|
||||
|
||||
// Create a child namespace first
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
namespace: vec!["test_ns".into()],
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
mode: None,
|
||||
properties: None,
|
||||
})
|
||||
.await
|
||||
.expect("Failed to create namespace");
|
||||
@@ -977,4 +967,46 @@ mod tests {
|
||||
let open_result = conn.open_table("drop_test").execute().await;
|
||||
assert!(open_result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_table_names_at_root() {
|
||||
// Test that table_names at root (empty namespace) works correctly
|
||||
// This is a regression test for a bug where empty namespace was converted to None
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let root_path = tmp_dir.path().to_str().unwrap().to_string();
|
||||
|
||||
let mut properties = HashMap::new();
|
||||
properties.insert("root".to_string(), root_path);
|
||||
|
||||
let conn = connect_namespace("dir", properties)
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to connect to namespace");
|
||||
|
||||
// Create multiple tables at root namespace
|
||||
let test_data1 = create_test_data();
|
||||
let _table1 = conn
|
||||
.create_table("table1", test_data1)
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to create table1 at root");
|
||||
|
||||
let test_data2 = create_test_data();
|
||||
let _table2 = conn
|
||||
.create_table("table2", test_data2)
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to create table2 at root");
|
||||
|
||||
// List tables at root using table_names (empty namespace means root)
|
||||
let table_names = conn
|
||||
.table_names()
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to list tables at root");
|
||||
|
||||
assert!(table_names.contains(&"table1".to_string()));
|
||||
assert!(table_names.contains(&"table2".to_string()));
|
||||
assert_eq!(table_names.len(), 2);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ use crate::{table::BaseTable, DistanceType, Error, Result};
|
||||
|
||||
use self::{
|
||||
scalar::{BTreeIndexBuilder, BitmapIndexBuilder, LabelListIndexBuilder},
|
||||
vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
|
||||
vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder, IvfSqIndexBuilder},
|
||||
};
|
||||
|
||||
pub mod scalar;
|
||||
@@ -54,6 +54,9 @@ pub enum Index {
|
||||
/// IVF index with Product Quantization
|
||||
IvfPq(IvfPqIndexBuilder),
|
||||
|
||||
/// IVF index with Scalar Quantization
|
||||
IvfSq(IvfSqIndexBuilder),
|
||||
|
||||
/// IVF index with RabitQ Quantization
|
||||
IvfRq(IvfRqIndexBuilder),
|
||||
|
||||
@@ -277,6 +280,8 @@ pub enum IndexType {
|
||||
// Vector
|
||||
#[serde(alias = "IVF_FLAT")]
|
||||
IvfFlat,
|
||||
#[serde(alias = "IVF_SQ")]
|
||||
IvfSq,
|
||||
#[serde(alias = "IVF_PQ")]
|
||||
IvfPq,
|
||||
#[serde(alias = "IVF_RQ")]
|
||||
@@ -301,6 +306,7 @@ impl std::fmt::Display for IndexType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::IvfFlat => write!(f, "IVF_FLAT"),
|
||||
Self::IvfSq => write!(f, "IVF_SQ"),
|
||||
Self::IvfPq => write!(f, "IVF_PQ"),
|
||||
Self::IvfRq => write!(f, "IVF_RQ"),
|
||||
Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
|
||||
@@ -323,6 +329,7 @@ impl std::str::FromStr for IndexType {
|
||||
"LABEL_LIST" | "LABELLIST" => Ok(Self::LabelList),
|
||||
"FTS" | "INVERTED" => Ok(Self::FTS),
|
||||
"IVF_FLAT" => Ok(Self::IvfFlat),
|
||||
"IVF_SQ" => Ok(Self::IvfSq),
|
||||
"IVF_PQ" => Ok(Self::IvfPq),
|
||||
"IVF_RQ" => Ok(Self::IvfRq),
|
||||
"IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
|
||||
|
||||
@@ -209,6 +209,38 @@ impl IvfFlatIndexBuilder {
|
||||
impl_ivf_params_setter!();
|
||||
}
|
||||
|
||||
/// Builder for an IVF SQ index.
|
||||
///
|
||||
/// This index compresses vectors using scalar quantization and groups them into IVF partitions.
|
||||
/// It offers a balance between search performance and storage footprint.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IvfSqIndexBuilder {
|
||||
pub(crate) distance_type: DistanceType,
|
||||
|
||||
// IVF
|
||||
pub(crate) num_partitions: Option<u32>,
|
||||
pub(crate) sample_rate: u32,
|
||||
pub(crate) max_iterations: u32,
|
||||
pub(crate) target_partition_size: Option<u32>,
|
||||
}
|
||||
|
||||
impl Default for IvfSqIndexBuilder {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
distance_type: DistanceType::L2,
|
||||
num_partitions: None,
|
||||
sample_rate: 256,
|
||||
max_iterations: 50,
|
||||
target_partition_size: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IvfSqIndexBuilder {
|
||||
impl_distance_type_setter!();
|
||||
impl_ivf_params_setter!();
|
||||
}
|
||||
|
||||
/// Builder for an IVF PQ index.
|
||||
///
|
||||
/// This index stores a compressed (quantized) copy of every vector. These vectors
|
||||
|
||||
@@ -71,7 +71,7 @@
|
||||
//! It treats [`FixedSizeList<Float16/Float32>`](https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeListArray.html)
|
||||
//! columns as vector columns.
|
||||
//!
|
||||
//! For more details, please refer to [LanceDB documentation](https://lancedb.github.io/lancedb/).
|
||||
//! For more details, please refer to the [LanceDB documentation](https://lancedb.com/docs).
|
||||
//!
|
||||
//! #### Create a table
|
||||
//!
|
||||
|
||||
@@ -10,13 +10,17 @@ use http::StatusCode;
|
||||
use lance_io::object_store::StorageOptions;
|
||||
use moka::future::Cache;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use serde::Deserialize;
|
||||
use tokio::task::spawn_blocking;
|
||||
|
||||
use lance_namespace::models::{
|
||||
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
|
||||
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
|
||||
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||
};
|
||||
|
||||
use crate::database::{
|
||||
CloneTableRequest, CreateNamespaceRequest, CreateTableData, CreateTableMode,
|
||||
CreateTableRequest, Database, DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest,
|
||||
OpenTableRequest, ReadConsistency, TableNamesRequest,
|
||||
CloneTableRequest, CreateTableData, CreateTableMode, CreateTableRequest, Database,
|
||||
DatabaseOptions, OpenTableRequest, ReadConsistency, TableNamesRequest,
|
||||
};
|
||||
use crate::error::Result;
|
||||
use crate::table::BaseTable;
|
||||
@@ -90,7 +94,7 @@ pub struct RemoteDatabaseOptions {
|
||||
pub host_override: Option<String>,
|
||||
/// Storage options configure the storage layer (e.g. S3, GCS, Azure, etc.)
|
||||
///
|
||||
/// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
///
|
||||
/// These options are only used for LanceDB Enterprise and only a subset of options
|
||||
/// are supported.
|
||||
@@ -180,11 +184,6 @@ impl RemoteDatabaseOptionsBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ListTablesResponse {
|
||||
tables: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RemoteDatabase<S: HttpSend = Sender> {
|
||||
client: RestfulLanceDbClient<S>,
|
||||
@@ -337,7 +336,6 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
||||
self.client
|
||||
.get(&format!("/v1/namespace/{}/table/list", namespace_id))
|
||||
} else {
|
||||
// TODO: use new API for all listing operations once stable
|
||||
self.client.get("/v1/table/")
|
||||
};
|
||||
|
||||
@@ -371,6 +369,44 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
||||
Ok(tables)
|
||||
}
|
||||
|
||||
async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
|
||||
let namespace_parts = request.id.as_deref().unwrap_or(&[]);
|
||||
let namespace_id = build_namespace_identifier(namespace_parts, &self.client.id_delimiter);
|
||||
let mut req = self
|
||||
.client
|
||||
.get(&format!("/v1/namespace/{}/table/list", namespace_id));
|
||||
|
||||
if let Some(limit) = request.limit {
|
||||
req = req.query(&[("limit", limit)]);
|
||||
}
|
||||
if let Some(ref page_token) = request.page_token {
|
||||
req = req.query(&[("page_token", page_token)]);
|
||||
}
|
||||
|
||||
let (request_id, rsp) = self.client.send_with_retry(req, None, true).await?;
|
||||
let rsp = self.client.check_response(&request_id, rsp).await?;
|
||||
let version = parse_server_version(&request_id, &rsp)?;
|
||||
let response: ListTablesResponse = rsp.json().await.err_to_http(request_id)?;
|
||||
|
||||
// Cache the tables for future use
|
||||
let namespace_vec = namespace_parts.to_vec();
|
||||
for table in &response.tables {
|
||||
let table_identifier =
|
||||
build_table_identifier(table, &namespace_vec, &self.client.id_delimiter);
|
||||
let cache_key = build_cache_key(table, &namespace_vec);
|
||||
let remote_table = Arc::new(RemoteTable::new(
|
||||
self.client.clone(),
|
||||
table.clone(),
|
||||
namespace_vec.clone(),
|
||||
table_identifier.clone(),
|
||||
version.clone(),
|
||||
));
|
||||
self.table_cache.insert(cache_key, remote_table).await;
|
||||
}
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
let data = match request.data {
|
||||
CreateTableData::Data(data) => data,
|
||||
@@ -417,6 +453,7 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
||||
index_cache_size: None,
|
||||
lance_read_params: None,
|
||||
location: None,
|
||||
namespace_client: None,
|
||||
};
|
||||
let req = (callback)(req);
|
||||
self.open_table(req).await
|
||||
@@ -590,53 +627,101 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
||||
})
|
||||
}
|
||||
|
||||
async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>> {
|
||||
let namespace_id =
|
||||
build_namespace_identifier(request.namespace.as_slice(), &self.client.id_delimiter);
|
||||
async fn list_namespaces(
|
||||
&self,
|
||||
request: ListNamespacesRequest,
|
||||
) -> Result<ListNamespacesResponse> {
|
||||
let namespace_parts = request.id.as_deref().unwrap_or(&[]);
|
||||
let namespace_id = build_namespace_identifier(namespace_parts, &self.client.id_delimiter);
|
||||
let mut req = self
|
||||
.client
|
||||
.get(&format!("/v1/namespace/{}/list", namespace_id));
|
||||
if let Some(limit) = request.limit {
|
||||
req = req.query(&[("limit", limit)]);
|
||||
}
|
||||
if let Some(page_token) = request.page_token {
|
||||
if let Some(ref page_token) = request.page_token {
|
||||
req = req.query(&[("page_token", page_token)]);
|
||||
}
|
||||
|
||||
let (request_id, resp) = self.client.send(req).await?;
|
||||
let resp = self.client.check_response(&request_id, resp).await?;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ListNamespacesResponse {
|
||||
namespaces: Vec<String>,
|
||||
}
|
||||
|
||||
let parsed: ListNamespacesResponse = resp.json().await.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to parse namespace response: {}", e),
|
||||
})?;
|
||||
Ok(parsed.namespaces)
|
||||
resp.json().await.err_to_http(request_id)
|
||||
}
|
||||
|
||||
async fn create_namespace(&self, request: CreateNamespaceRequest) -> Result<()> {
|
||||
let namespace_id =
|
||||
build_namespace_identifier(request.namespace.as_slice(), &self.client.id_delimiter);
|
||||
let req = self
|
||||
async fn create_namespace(
|
||||
&self,
|
||||
request: CreateNamespaceRequest,
|
||||
) -> Result<CreateNamespaceResponse> {
|
||||
let namespace_parts = request.id.as_deref().unwrap_or(&[]);
|
||||
let namespace_id = build_namespace_identifier(namespace_parts, &self.client.id_delimiter);
|
||||
let mut req = self
|
||||
.client
|
||||
.post(&format!("/v1/namespace/{}/create", namespace_id));
|
||||
|
||||
// Build request body with mode and properties if present
|
||||
#[derive(serde::Serialize)]
|
||||
struct CreateNamespaceRequestBody {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
mode: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
properties: Option<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
let body = CreateNamespaceRequestBody {
|
||||
mode: request.mode.as_ref().map(|m| format!("{:?}", m)),
|
||||
properties: request.properties,
|
||||
};
|
||||
|
||||
req = req.json(&body);
|
||||
let (request_id, resp) = self.client.send(req).await?;
|
||||
self.client.check_response(&request_id, resp).await?;
|
||||
Ok(())
|
||||
let resp = self.client.check_response(&request_id, resp).await?;
|
||||
|
||||
resp.json().await.err_to_http(request_id)
|
||||
}
|
||||
|
||||
async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<()> {
|
||||
let namespace_id =
|
||||
build_namespace_identifier(request.namespace.as_slice(), &self.client.id_delimiter);
|
||||
let req = self
|
||||
async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<DropNamespaceResponse> {
|
||||
let namespace_parts = request.id.as_deref().unwrap_or(&[]);
|
||||
let namespace_id = build_namespace_identifier(namespace_parts, &self.client.id_delimiter);
|
||||
let mut req = self
|
||||
.client
|
||||
.post(&format!("/v1/namespace/{}/drop", namespace_id));
|
||||
|
||||
// Build request body with mode and behavior if present
|
||||
#[derive(serde::Serialize)]
|
||||
struct DropNamespaceRequestBody {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
mode: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
behavior: Option<String>,
|
||||
}
|
||||
|
||||
let body = DropNamespaceRequestBody {
|
||||
mode: request.mode.as_ref().map(|m| format!("{:?}", m)),
|
||||
behavior: request.behavior.as_ref().map(|b| format!("{:?}", b)),
|
||||
};
|
||||
|
||||
req = req.json(&body);
|
||||
let (request_id, resp) = self.client.send(req).await?;
|
||||
self.client.check_response(&request_id, resp).await?;
|
||||
Ok(())
|
||||
let resp = self.client.check_response(&request_id, resp).await?;
|
||||
|
||||
resp.json().await.err_to_http(request_id)
|
||||
}
|
||||
|
||||
async fn describe_namespace(
|
||||
&self,
|
||||
request: DescribeNamespaceRequest,
|
||||
) -> Result<DescribeNamespaceResponse> {
|
||||
let namespace_parts = request.id.as_deref().unwrap_or(&[]);
|
||||
let namespace_id = build_namespace_identifier(namespace_parts, &self.client.id_delimiter);
|
||||
let req = self
|
||||
.client
|
||||
.get(&format!("/v1/namespace/{}/describe", namespace_id));
|
||||
|
||||
let (request_id, resp) = self.client.send(req).await?;
|
||||
let resp = self.client.check_response(&request_id, resp).await?;
|
||||
|
||||
resp.json().await.err_to_http(request_id)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
|
||||
@@ -1072,6 +1072,14 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
body["num_bits"] = serde_json::Value::Number(num_bits.into());
|
||||
}
|
||||
}
|
||||
Index::IvfSq(index) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_SQ".to_string());
|
||||
body[METRIC_TYPE_KEY] =
|
||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
||||
if let Some(num_partitions) = index.num_partitions {
|
||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
||||
}
|
||||
}
|
||||
Index::IvfHnswSq(index) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_HNSW_SQ".to_string());
|
||||
body[METRIC_TYPE_KEY] =
|
||||
@@ -1183,6 +1191,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
num_deleted_rows: 0,
|
||||
num_inserted_rows: 0,
|
||||
num_updated_rows: 0,
|
||||
num_attempts: 0,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -40,6 +40,11 @@ use lance_index::vector::pq::PQBuildParams;
|
||||
use lance_index::vector::sq::builder::SQBuildParams;
|
||||
use lance_index::DatasetIndexExt;
|
||||
use lance_index::IndexType;
|
||||
use lance_namespace::models::{
|
||||
QueryTableRequest as NsQueryTableRequest, QueryTableRequestFullTextQuery,
|
||||
QueryTableRequestVector, StringFtsQuery,
|
||||
};
|
||||
use lance_namespace::LanceNamespace;
|
||||
use lance_table::format::Manifest;
|
||||
use lance_table::io::commit::ManifestNamingScheme;
|
||||
use log::info;
|
||||
@@ -467,6 +472,11 @@ pub struct MergeResult {
|
||||
/// However those rows are not shared with the user.
|
||||
#[serde(default)]
|
||||
pub num_deleted_rows: u64,
|
||||
/// Number of attempts performed during the merge operation.
|
||||
/// This includes the initial attempt plus any retries due to transaction conflicts.
|
||||
/// A value of 1 means the operation succeeded on the first try.
|
||||
#[serde(default)]
|
||||
pub num_attempts: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
@@ -1475,7 +1485,7 @@ impl NativeTableExt for Arc<dyn BaseTable> {
|
||||
}
|
||||
|
||||
/// A table in a LanceDB database.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Clone)]
|
||||
pub struct NativeTable {
|
||||
name: String,
|
||||
namespace: Vec<String>,
|
||||
@@ -1485,6 +1495,22 @@ pub struct NativeTable {
|
||||
// This comes from the connection options. We store here so we can pass down
|
||||
// to the dataset when we recreate it (for example, in checkout_latest).
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
// Optional namespace client for server-side query execution.
|
||||
// When set, queries will be executed on the namespace server instead of locally.
|
||||
namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for NativeTable {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("NativeTable")
|
||||
.field("name", &self.name)
|
||||
.field("namespace", &self.namespace)
|
||||
.field("id", &self.id)
|
||||
.field("uri", &self.uri)
|
||||
.field("read_consistency_interval", &self.read_consistency_interval)
|
||||
.field("namespace_client", &self.namespace_client)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for NativeTable {
|
||||
@@ -1519,7 +1545,7 @@ impl NativeTable {
|
||||
/// * A [NativeTable] object.
|
||||
pub async fn open(uri: &str) -> Result<Self> {
|
||||
let name = Self::get_table_name(uri)?;
|
||||
Self::open_with_params(uri, &name, vec![], None, None, None).await
|
||||
Self::open_with_params(uri, &name, vec![], None, None, None, None).await
|
||||
}
|
||||
|
||||
/// Opens an existing Table
|
||||
@@ -1529,10 +1555,12 @@ impl NativeTable {
|
||||
/// * `base_path` - The base path where the table is located
|
||||
/// * `name` The Table name
|
||||
/// * `params` The [ReadParams] to use when opening the table
|
||||
/// * `namespace_client` - Optional namespace client for server-side query execution
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * A [NativeTable] object.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn open_with_params(
|
||||
uri: &str,
|
||||
name: &str,
|
||||
@@ -1540,6 +1568,7 @@ impl NativeTable {
|
||||
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
||||
params: Option<ReadParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
) -> Result<Self> {
|
||||
let params = params.unwrap_or_default();
|
||||
// patch the params if we have a write store wrapper
|
||||
@@ -1570,9 +1599,18 @@ impl NativeTable {
|
||||
uri: uri.to_string(),
|
||||
dataset,
|
||||
read_consistency_interval,
|
||||
namespace_client,
|
||||
})
|
||||
}
|
||||
|
||||
/// Set the namespace client for server-side query execution.
|
||||
///
|
||||
/// When set, queries will be executed on the namespace server instead of locally.
|
||||
pub fn with_namespace_client(mut self, namespace_client: Arc<dyn LanceNamespace>) -> Self {
|
||||
self.namespace_client = Some(namespace_client);
|
||||
self
|
||||
}
|
||||
|
||||
fn get_table_name(uri: &str) -> Result<String> {
|
||||
let path = Path::new(uri);
|
||||
let name = path
|
||||
@@ -1609,10 +1647,12 @@ impl NativeTable {
|
||||
/// * `namespace` - The namespace path. When non-empty, an explicit URI must be provided.
|
||||
/// * `batches` RecordBatch to be saved in the database.
|
||||
/// * `params` - Write parameters.
|
||||
/// * `namespace_client` - Optional namespace client for server-side query execution
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * A [TableImpl] object.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn create(
|
||||
uri: &str,
|
||||
name: &str,
|
||||
@@ -1621,6 +1661,7 @@ impl NativeTable {
|
||||
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
||||
params: Option<WriteParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
) -> Result<Self> {
|
||||
// Default params uses format v1.
|
||||
let params = params.unwrap_or(WriteParams {
|
||||
@@ -1652,9 +1693,11 @@ impl NativeTable {
|
||||
uri: uri.to_string(),
|
||||
dataset: DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval),
|
||||
read_consistency_interval,
|
||||
namespace_client,
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn create_empty(
|
||||
uri: &str,
|
||||
name: &str,
|
||||
@@ -1663,6 +1706,7 @@ impl NativeTable {
|
||||
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
||||
params: Option<WriteParams>,
|
||||
read_consistency_interval: Option<std::time::Duration>,
|
||||
namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||
) -> Result<Self> {
|
||||
let batches = RecordBatchIterator::new(vec![], schema);
|
||||
Self::create(
|
||||
@@ -1673,6 +1717,7 @@ impl NativeTable {
|
||||
write_store_wrapper,
|
||||
params,
|
||||
read_consistency_interval,
|
||||
namespace_client,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -1810,8 +1855,17 @@ impl NativeTable {
|
||||
}
|
||||
|
||||
// Helper to get num_sub_vectors with default calculation
|
||||
fn get_num_sub_vectors(provided: Option<u32>, dim: u32) -> u32 {
|
||||
provided.unwrap_or_else(|| suggested_num_sub_vectors(dim))
|
||||
fn get_num_sub_vectors(provided: Option<u32>, dim: u32, num_bits: Option<u32>) -> u32 {
|
||||
if let Some(provided) = provided {
|
||||
return provided;
|
||||
}
|
||||
let suggested = suggested_num_sub_vectors(dim);
|
||||
if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 {
|
||||
// num_sub_vectors must be even when 4 bits are used
|
||||
suggested + 1
|
||||
} else {
|
||||
suggested
|
||||
}
|
||||
}
|
||||
|
||||
// Helper to extract vector dimension from field
|
||||
@@ -1834,7 +1888,7 @@ impl NativeTable {
|
||||
// Use IvfPq as the default for auto vector indices
|
||||
let dim = Self::get_vector_dimension(field)?;
|
||||
let ivf_params = lance_index::vector::ivf::IvfBuildParams::default();
|
||||
let num_sub_vectors = Self::get_num_sub_vectors(None, dim);
|
||||
let num_sub_vectors = Self::get_num_sub_vectors(None, dim, None);
|
||||
let pq_params =
|
||||
lance_index::vector::pq::PQBuildParams::new(num_sub_vectors as usize, 8);
|
||||
let lance_idx_params =
|
||||
@@ -1892,6 +1946,25 @@ impl NativeTable {
|
||||
VectorIndexParams::with_ivf_flat_params(index.distance_type.into(), ivf_params);
|
||||
Ok(Box::new(lance_idx_params))
|
||||
}
|
||||
Index::IvfSq(index) => {
|
||||
Self::validate_index_type(field, "IVF SQ", supported_vector_data_type)?;
|
||||
let ivf_params = Self::build_ivf_params(
|
||||
index.num_partitions,
|
||||
index.target_partition_size,
|
||||
index.sample_rate,
|
||||
index.max_iterations,
|
||||
);
|
||||
let sq_params = SQBuildParams {
|
||||
sample_rate: index.sample_rate as usize,
|
||||
..Default::default()
|
||||
};
|
||||
let lance_idx_params = VectorIndexParams::with_ivf_sq_params(
|
||||
index.distance_type.into(),
|
||||
ivf_params,
|
||||
sq_params,
|
||||
);
|
||||
Ok(Box::new(lance_idx_params))
|
||||
}
|
||||
Index::IvfPq(index) => {
|
||||
Self::validate_index_type(field, "IVF PQ", supported_vector_data_type)?;
|
||||
let dim = Self::get_vector_dimension(field)?;
|
||||
@@ -1901,7 +1974,8 @@ impl NativeTable {
|
||||
index.sample_rate,
|
||||
index.max_iterations,
|
||||
);
|
||||
let num_sub_vectors = Self::get_num_sub_vectors(index.num_sub_vectors, dim);
|
||||
let num_sub_vectors =
|
||||
Self::get_num_sub_vectors(index.num_sub_vectors, dim, index.num_bits);
|
||||
let num_bits = index.num_bits.unwrap_or(8) as usize;
|
||||
let mut pq_params = PQBuildParams::new(num_sub_vectors as usize, num_bits);
|
||||
pq_params.max_iters = index.max_iterations as usize;
|
||||
@@ -1937,7 +2011,8 @@ impl NativeTable {
|
||||
index.sample_rate,
|
||||
index.max_iterations,
|
||||
);
|
||||
let num_sub_vectors = Self::get_num_sub_vectors(index.num_sub_vectors, dim);
|
||||
let num_sub_vectors =
|
||||
Self::get_num_sub_vectors(index.num_sub_vectors, dim, index.num_bits);
|
||||
let hnsw_params = HnswBuildParams::default()
|
||||
.num_edges(index.m as usize)
|
||||
.ef_construction(index.ef_construction as usize);
|
||||
@@ -1997,6 +2072,7 @@ impl NativeTable {
|
||||
Index::LabelList(_) => IndexType::LabelList,
|
||||
Index::FTS(_) => IndexType::Inverted,
|
||||
Index::IvfFlat(_)
|
||||
| Index::IvfSq(_)
|
||||
| Index::IvfPq(_)
|
||||
| Index::IvfRq(_)
|
||||
| Index::IvfHnswPq(_)
|
||||
@@ -2019,6 +2095,278 @@ impl NativeTable {
|
||||
Ok(DatasetRecordBatchStream::new(inner))
|
||||
}
|
||||
|
||||
/// Execute a query on the namespace server instead of locally.
|
||||
async fn namespace_query(
|
||||
&self,
|
||||
namespace_client: Arc<dyn LanceNamespace>,
|
||||
query: &AnyQuery,
|
||||
_options: QueryExecutionOptions,
|
||||
) -> Result<DatasetRecordBatchStream> {
|
||||
// Build table_id from namespace + table name
|
||||
let mut table_id = self.namespace.clone();
|
||||
table_id.push(self.name.clone());
|
||||
|
||||
// Convert AnyQuery to namespace QueryTableRequest
|
||||
let mut ns_request = self.convert_to_namespace_query(query)?;
|
||||
// Set the table ID on the request
|
||||
ns_request.id = Some(table_id);
|
||||
|
||||
// Call the namespace query_table API
|
||||
let response_bytes = namespace_client
|
||||
.query_table(ns_request)
|
||||
.await
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to execute server-side query: {}", e),
|
||||
})?;
|
||||
|
||||
// Parse the Arrow IPC response into a RecordBatchStream
|
||||
self.parse_arrow_ipc_response(response_bytes).await
|
||||
}
|
||||
|
||||
/// Convert a QueryFilter to a SQL string for the namespace API.
|
||||
fn filter_to_sql(&self, filter: &QueryFilter) -> Result<String> {
|
||||
match filter {
|
||||
QueryFilter::Sql(sql) => Ok(sql.clone()),
|
||||
QueryFilter::Substrait(_) => Err(Error::NotSupported {
|
||||
message: "Substrait filters are not supported for server-side queries".to_string(),
|
||||
}),
|
||||
QueryFilter::Datafusion(_) => Err(Error::NotSupported {
|
||||
message: "Datafusion expression filters are not supported for server-side queries. Use SQL filter instead.".to_string(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an AnyQuery to the namespace QueryTableRequest format.
|
||||
fn convert_to_namespace_query(&self, query: &AnyQuery) -> Result<NsQueryTableRequest> {
|
||||
match query {
|
||||
AnyQuery::VectorQuery(vq) => {
|
||||
// Extract the query vector(s)
|
||||
let vector = self.extract_query_vector(&vq.query_vector)?;
|
||||
|
||||
// Convert filter to SQL string
|
||||
let filter = match &vq.base.filter {
|
||||
Some(f) => Some(self.filter_to_sql(f)?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
// Convert select to columns list
|
||||
let columns = match &vq.base.select {
|
||||
Select::All => None,
|
||||
Select::Columns(cols) => Some(cols.clone()),
|
||||
Select::Dynamic(_) => {
|
||||
return Err(Error::NotSupported {
|
||||
message:
|
||||
"Dynamic column selection is not supported for server-side queries"
|
||||
.to_string(),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// Check for unsupported features
|
||||
if vq.base.reranker.is_some() {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Reranker is not supported for server-side queries".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Convert FTS query if present
|
||||
let full_text_query = vq.base.full_text_search.as_ref().map(|fts| {
|
||||
let columns = fts.columns();
|
||||
let columns_vec = if columns.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(columns.into_iter().collect())
|
||||
};
|
||||
Box::new(QueryTableRequestFullTextQuery {
|
||||
string_query: Some(Box::new(StringFtsQuery {
|
||||
query: fts.query.to_string(),
|
||||
columns: columns_vec,
|
||||
})),
|
||||
structured_query: None,
|
||||
})
|
||||
});
|
||||
|
||||
Ok(NsQueryTableRequest {
|
||||
id: None, // Will be set in namespace_query
|
||||
k: vq.base.limit.unwrap_or(10) as i32,
|
||||
vector: Box::new(vector),
|
||||
vector_column: vq.column.clone(),
|
||||
filter,
|
||||
columns,
|
||||
offset: vq.base.offset.map(|o| o as i32),
|
||||
distance_type: vq.distance_type.map(|dt| dt.to_string()),
|
||||
nprobes: Some(vq.minimum_nprobes as i32),
|
||||
ef: vq.ef.map(|e| e as i32),
|
||||
refine_factor: vq.refine_factor.map(|r| r as i32),
|
||||
lower_bound: vq.lower_bound,
|
||||
upper_bound: vq.upper_bound,
|
||||
prefilter: Some(vq.base.prefilter),
|
||||
fast_search: Some(vq.base.fast_search),
|
||||
with_row_id: Some(vq.base.with_row_id),
|
||||
bypass_vector_index: Some(!vq.use_index),
|
||||
full_text_query,
|
||||
version: None,
|
||||
})
|
||||
}
|
||||
AnyQuery::Query(q) => {
|
||||
// For non-vector queries, pass an empty vector (similar to remote table implementation)
|
||||
if q.reranker.is_some() {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Reranker is not supported for server-side query execution"
|
||||
.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
let filter = q
|
||||
.filter
|
||||
.as_ref()
|
||||
.map(|f| self.filter_to_sql(f))
|
||||
.transpose()?;
|
||||
|
||||
let columns = match &q.select {
|
||||
Select::All => None,
|
||||
Select::Columns(cols) => Some(cols.clone()),
|
||||
Select::Dynamic(_) => {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Dynamic columns are not supported for server-side query"
|
||||
.to_string(),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// Handle full text search if present
|
||||
let full_text_query = q.full_text_search.as_ref().map(|fts| {
|
||||
let columns_vec = if fts.columns().is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(fts.columns().iter().cloned().collect())
|
||||
};
|
||||
Box::new(QueryTableRequestFullTextQuery {
|
||||
string_query: Some(Box::new(StringFtsQuery {
|
||||
query: fts.query.to_string(),
|
||||
columns: columns_vec,
|
||||
})),
|
||||
structured_query: None,
|
||||
})
|
||||
});
|
||||
|
||||
// Empty vector for non-vector queries
|
||||
let vector = Box::new(QueryTableRequestVector {
|
||||
single_vector: Some(vec![]),
|
||||
multi_vector: None,
|
||||
});
|
||||
|
||||
Ok(NsQueryTableRequest {
|
||||
id: None, // Will be set by caller
|
||||
vector,
|
||||
k: q.limit.unwrap_or(10) as i32,
|
||||
filter,
|
||||
columns,
|
||||
prefilter: Some(q.prefilter),
|
||||
offset: q.offset.map(|o| o as i32),
|
||||
ef: None,
|
||||
refine_factor: None,
|
||||
distance_type: None,
|
||||
nprobes: None,
|
||||
vector_column: None, // No vector column for plain queries
|
||||
with_row_id: Some(q.with_row_id),
|
||||
bypass_vector_index: Some(true), // No vector index for plain queries
|
||||
full_text_query,
|
||||
version: None,
|
||||
fast_search: None,
|
||||
lower_bound: None,
|
||||
upper_bound: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract query vector(s) from Arrow arrays into the namespace format.
|
||||
fn extract_query_vector(
|
||||
&self,
|
||||
query_vectors: &[Arc<dyn arrow_array::Array>],
|
||||
) -> Result<QueryTableRequestVector> {
|
||||
if query_vectors.is_empty() {
|
||||
return Err(Error::InvalidInput {
|
||||
message: "Query vector is required for vector search".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Handle single vector case
|
||||
if query_vectors.len() == 1 {
|
||||
let arr = &query_vectors[0];
|
||||
let single_vector = self.array_to_f32_vec(arr)?;
|
||||
Ok(QueryTableRequestVector {
|
||||
single_vector: Some(single_vector),
|
||||
multi_vector: None,
|
||||
})
|
||||
} else {
|
||||
// Handle multi-vector case
|
||||
let multi_vector: Result<Vec<Vec<f32>>> = query_vectors
|
||||
.iter()
|
||||
.map(|arr| self.array_to_f32_vec(arr))
|
||||
.collect();
|
||||
Ok(QueryTableRequestVector {
|
||||
single_vector: None,
|
||||
multi_vector: Some(multi_vector?),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an Arrow array to a Vec<f32>.
|
||||
fn array_to_f32_vec(&self, arr: &Arc<dyn arrow_array::Array>) -> Result<Vec<f32>> {
|
||||
// Handle FixedSizeList (common for vectors)
|
||||
if let Some(fsl) = arr
|
||||
.as_any()
|
||||
.downcast_ref::<arrow_array::FixedSizeListArray>()
|
||||
{
|
||||
let values = fsl.values();
|
||||
if let Some(f32_arr) = values.as_any().downcast_ref::<arrow_array::Float32Array>() {
|
||||
return Ok(f32_arr.values().to_vec());
|
||||
}
|
||||
}
|
||||
|
||||
// Handle direct Float32Array
|
||||
if let Some(f32_arr) = arr.as_any().downcast_ref::<arrow_array::Float32Array>() {
|
||||
return Ok(f32_arr.values().to_vec());
|
||||
}
|
||||
|
||||
Err(Error::InvalidInput {
|
||||
message: "Query vector must be Float32 type".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse Arrow IPC response from the namespace server.
|
||||
async fn parse_arrow_ipc_response(
|
||||
&self,
|
||||
bytes: bytes::Bytes,
|
||||
) -> Result<DatasetRecordBatchStream> {
|
||||
use arrow_ipc::reader::StreamReader;
|
||||
use std::io::Cursor;
|
||||
|
||||
let cursor = Cursor::new(bytes);
|
||||
let reader = StreamReader::try_new(cursor, None).map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to parse Arrow IPC response: {}", e),
|
||||
})?;
|
||||
|
||||
// Collect all record batches
|
||||
let schema = reader.schema();
|
||||
let batches: Vec<_> = reader
|
||||
.into_iter()
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to read Arrow IPC batches: {}", e),
|
||||
})?;
|
||||
|
||||
// Create a stream from the batches
|
||||
let stream = futures::stream::iter(batches.into_iter().map(Ok));
|
||||
let record_batch_stream = Box::pin(
|
||||
datafusion_physical_plan::stream::RecordBatchStreamAdapter::new(schema, stream),
|
||||
);
|
||||
|
||||
Ok(DatasetRecordBatchStream::new(record_batch_stream))
|
||||
}
|
||||
|
||||
/// Check whether the table uses V2 manifest paths.
|
||||
///
|
||||
/// See [Self::migrate_manifest_paths_v2] and [ManifestNamingScheme] for
|
||||
@@ -2450,6 +2798,12 @@ impl BaseTable for NativeTable {
|
||||
query: &AnyQuery,
|
||||
options: QueryExecutionOptions,
|
||||
) -> Result<DatasetRecordBatchStream> {
|
||||
// If namespace client is configured, use server-side query execution
|
||||
if let Some(ref namespace_client) = self.namespace_client {
|
||||
return self
|
||||
.namespace_query(namespace_client.clone(), query, options)
|
||||
.await;
|
||||
}
|
||||
self.generic_query(query, options).await
|
||||
}
|
||||
|
||||
@@ -2520,6 +2874,7 @@ impl BaseTable for NativeTable {
|
||||
num_updated_rows: stats.num_updated_rows,
|
||||
num_inserted_rows: stats.num_inserted_rows,
|
||||
num_deleted_rows: stats.num_deleted_rows,
|
||||
num_attempts: stats.num_attempts,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -2917,7 +3272,7 @@ mod tests {
|
||||
|
||||
let batches = make_test_batches();
|
||||
let batches = Box::new(batches) as Box<dyn RecordBatchReader + Send>;
|
||||
let table = NativeTable::create(uri, "test", vec![], batches, None, None, None)
|
||||
let table = NativeTable::create(uri, "test", vec![], batches, None, None, None, None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -2979,9 +3334,13 @@ mod tests {
|
||||
// Perform a "insert if not exists"
|
||||
let mut merge_insert_builder = table.merge_insert(&["i"]);
|
||||
merge_insert_builder.when_not_matched_insert_all();
|
||||
merge_insert_builder.execute(new_batches).await.unwrap();
|
||||
let result = merge_insert_builder.execute(new_batches).await.unwrap();
|
||||
// Only 5 rows should actually be inserted
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 15);
|
||||
assert_eq!(result.num_inserted_rows, 5);
|
||||
assert_eq!(result.num_updated_rows, 0);
|
||||
assert_eq!(result.num_deleted_rows, 0);
|
||||
assert_eq!(result.num_attempts, 1);
|
||||
|
||||
// Create new data with i=15..25 (no id matches)
|
||||
let new_batches = Box::new(merge_insert_test_batches(15, 2));
|
||||
@@ -4122,6 +4481,8 @@ mod tests {
|
||||
table.prewarm_index("text_idx").await.unwrap();
|
||||
}
|
||||
|
||||
// Windows does not support precise sleep durations due to timer resolution limitations.
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
#[tokio::test]
|
||||
async fn test_read_consistency_interval() {
|
||||
let intervals = vec![
|
||||
@@ -4551,4 +4912,91 @@ mod tests {
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].index_type, crate::index::IndexType::Bitmap);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_convert_to_namespace_query_vector() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let dataset_path = tmp_dir.path().join("test_ns_query.lance");
|
||||
|
||||
let batches = make_test_batches();
|
||||
Dataset::write(batches, dataset_path.to_str().unwrap(), None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table = NativeTable::open(dataset_path.to_str().unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Create a vector query
|
||||
let query_vector = Arc::new(Float32Array::from(vec![1.0, 2.0, 3.0, 4.0]));
|
||||
let vq = VectorQueryRequest {
|
||||
base: QueryRequest {
|
||||
limit: Some(10),
|
||||
offset: Some(5),
|
||||
filter: Some(QueryFilter::Sql("id > 0".to_string())),
|
||||
select: Select::Columns(vec!["id".to_string()]),
|
||||
..Default::default()
|
||||
},
|
||||
column: Some("vector".to_string()),
|
||||
query_vector: vec![query_vector as Arc<dyn Array>],
|
||||
minimum_nprobes: 20,
|
||||
distance_type: Some(crate::DistanceType::L2),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let any_query = AnyQuery::VectorQuery(vq);
|
||||
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
|
||||
|
||||
assert_eq!(ns_request.k, 10);
|
||||
assert_eq!(ns_request.offset, Some(5));
|
||||
assert_eq!(ns_request.filter, Some("id > 0".to_string()));
|
||||
assert_eq!(ns_request.columns, Some(vec!["id".to_string()]));
|
||||
assert_eq!(ns_request.vector_column, Some("vector".to_string()));
|
||||
assert_eq!(ns_request.distance_type, Some("l2".to_string()));
|
||||
assert!(ns_request.vector.single_vector.is_some());
|
||||
assert_eq!(
|
||||
ns_request.vector.single_vector.as_ref().unwrap(),
|
||||
&vec![1.0, 2.0, 3.0, 4.0]
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_convert_to_namespace_query_plain_query() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let dataset_path = tmp_dir.path().join("test_ns_plain.lance");
|
||||
|
||||
let batches = make_test_batches();
|
||||
Dataset::write(batches, dataset_path.to_str().unwrap(), None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table = NativeTable::open(dataset_path.to_str().unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Create a plain (non-vector) query with filter and select
|
||||
let q = QueryRequest {
|
||||
limit: Some(20),
|
||||
offset: Some(5),
|
||||
filter: Some(QueryFilter::Sql("id > 5".to_string())),
|
||||
select: Select::Columns(vec!["id".to_string()]),
|
||||
with_row_id: true,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let any_query = AnyQuery::Query(q);
|
||||
let ns_request = table.convert_to_namespace_query(&any_query).unwrap();
|
||||
|
||||
// Plain queries should pass an empty vector
|
||||
assert_eq!(ns_request.k, 20);
|
||||
assert_eq!(ns_request.offset, Some(5));
|
||||
assert_eq!(ns_request.filter, Some("id > 5".to_string()));
|
||||
assert_eq!(ns_request.columns, Some(vec!["id".to_string()]));
|
||||
assert_eq!(ns_request.with_row_id, Some(true));
|
||||
assert_eq!(ns_request.bypass_vector_index, Some(true));
|
||||
assert!(ns_request.vector_column.is_none()); // No vector column for plain queries
|
||||
|
||||
// Should have an empty vector
|
||||
assert!(ns_request.vector.single_vector.as_ref().unwrap().is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user