Compare commits

...

10 Commits

Author SHA1 Message Date
Lance Release
93b8ac8e3e Bump version: 0.25.4-beta.1 → 0.25.4-beta.2 2025-11-19 20:24:46 +00:00
Jack Ye
1b78ccedaf feat: support async namespace connection (#2788)
Also fix 2 bugs:
1. make storage options provider serializable in ray
2. fix table.to_table() uri is wrong for namespace-backed tables
2025-11-19 12:23:50 -08:00
Mykola Skrynnyk
ca8d118f78 feat(python): support to_pydantic in async (#2438)
This request improves support for `pydantic` integration by adding
`to_pydantic` method to asynchronous queries and handling models that
use `alias` in field definitions. Fixes #2436 and closes #2437 .

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Added support for converting asynchronous query results to Pydantic
models.
- **Bug Fixes**
- Simplified conversion of query results to Pydantic models for improved
reliability.
- Improved handling of field aliases and computed fields when mapping
query results to Pydantic models.
- **Tests**
- Added tests to verify correct mapping of aliased and computed fields
in both synchronous and asynchronous scenarios.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2025-11-19 11:20:14 -08:00
Wyatt Alt
386fc9e466 feat: add num_attempts to merge insert result (#2795)
This pipes the num_attempts field from lance's merge insert result
through lancedb. This allows callers of merge_insert to get a better
idea of whether transaction conflicts are occurring.
2025-11-19 09:32:57 -08:00
Lance Release
ce1bafec1a Bump version: 0.22.4-beta.0 → 0.22.4-beta.1 2025-11-19 12:58:59 +00:00
Lance Release
8f18a7feed Bump version: 0.25.4-beta.0 → 0.25.4-beta.1 2025-11-19 12:58:16 +00:00
LanceDB Robot
e2be699544 chore: update lance dependency to v1.0.0-beta.3 (#2794)
## Summary
- update Lance dependencies to v1.0.0-beta.3 using
ci/set_lance_version.py
- verified with `cargo clippy --workspace --tests --all-features -- -D
warnings`
- formatted with `cargo fmt --all`

Refs: refs/tags/v1.0.0-beta.3
2025-11-19 09:25:45 -03:30
Xuanwo
f77b0ef37d ci: add timely lance release check (#2790)
This PR will add a timely lance release check for lancedb which will
auto bump lance while new tags released.

---

**This PR was primarily authored with Codex using GPT-5-Codex and then
hand-reviewed by me. I AM responsible for every change made in this PR.
I aimed to keep it aligned with our goals, though I may have missed
minor issues. Please flag anything that feels off, I'll fix it
quickly.**

---------

Signed-off-by: Xuanwo <github@xuanwo.io>
2025-11-19 16:43:41 +08:00
Wyatt Alt
c41401f20f chore: repath lance dependencies to lance-format (#2787) 2025-11-18 12:25:58 -08:00
Will Jones
1cf3917a87 ci: make rust ci faster, get ci green (#2782)
* Add `ci` profile for smaller build caches. This had a meaningful
impact in Lance, and I expect a similar impact here.
https://github.com/lancedb/lance/pull/5236
* Get caching working in Rust. Previously was not working due to
`workspaces: rust`.
* Get caching working in NodeJs lint job. Previously wasn't working
because we installed the toolchain **after** we called `- uses:
Swatinem/rust-cache@v2`, which invalidates the cache locally.
* Fix broken pytest from async io transition
(`pytest.PytestRemovedIn9Warning`)
* Altered `get_num_sub_vectors` to handle bug in case of 4-bit PQ. This
was cause of `rust future panicked: unknown error`. Raised an issue
upstream to change panic to error:
https://github.com/lancedb/lance/issues/5257
* Call `npm run docs` to fix doc issue.
* Disable flakey Windows test for consistency. It's just an OS-specific
timer issue, not our fault.
* Fix Windows absolute path handling in namespaces. Was causing CI
failure `OSError: [WinError 123] The filename, directory name, or volume
label syntax is incorrect: `
2025-11-18 09:04:56 -08:00
43 changed files with 1104 additions and 149 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.22.4-beta.0"
current_version = "0.22.4-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -0,0 +1,62 @@
name: Lance Release Timer
on:
schedule:
- cron: "*/10 * * * *"
workflow_dispatch:
permissions:
contents: read
actions: write
concurrency:
group: lance-release-timer
cancel-in-progress: false
jobs:
trigger-update:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Check for new Lance tag
id: check
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: |
python3 ci/check_lance_release.py --github-output "$GITHUB_OUTPUT"
- name: Look for existing PR
if: steps.check.outputs.needs_update == 'true'
id: pr
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: |
set -euo pipefail
TITLE="chore: update lance dependency to v${{ steps.check.outputs.latest_version }}"
COUNT=$(gh pr list --search "\"$TITLE\" in:title" --state open --limit 1 --json number --jq 'length')
if [ "$COUNT" -gt 0 ]; then
echo "Open PR already exists for $TITLE"
echo "pr_exists=true" >> "$GITHUB_OUTPUT"
else
echo "No existing PR for $TITLE"
echo "pr_exists=false" >> "$GITHUB_OUTPUT"
fi
- name: Trigger codex update workflow
if: steps.check.outputs.needs_update == 'true' && steps.pr.outputs.pr_exists != 'true'
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: |
set -euo pipefail
TAG=${{ steps.check.outputs.latest_tag }}
gh workflow run codex-update-lance-dependency.yml -f tag=refs/tags/$TAG
- name: Show latest codex workflow run
if: steps.check.outputs.needs_update == 'true' && steps.pr.outputs.pr_exists != 'true'
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: |
set -euo pipefail
gh run list --workflow codex-update-lance-dependency.yml --limit 1 --json databaseId,htmlUrl,displayTitle

View File

@@ -16,9 +16,6 @@ concurrency:
cancel-in-progress: true
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
RUST_BACKTRACE: "1"
jobs:
@@ -43,18 +40,20 @@ jobs:
node-version: 20
cache: 'npm'
cache-dependency-path: nodejs/package-lock.json
- uses: Swatinem/rust-cache@v2
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt, clippy
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt, clippy
- name: Lint
- uses: Swatinem/rust-cache@v2
- name: Format Rust
run: cargo fmt --all -- --check
- name: Lint Rust
run: cargo clippy --profile ci --all --all-features -- -D warnings
- name: Lint Typescript
run: |
cargo fmt --all -- --check
cargo clippy --all --all-features -- -D warnings
npm ci
npm run lint-ci
- name: Lint examples
@@ -90,7 +89,8 @@ jobs:
- name: Build
run: |
npm ci
npm run build
npm run build:debug -- --profile ci
npm run tsc
- name: Setup localstack
working-directory: .
run: docker compose up --detach --wait
@@ -147,7 +147,8 @@ jobs:
- name: Build
run: |
npm ci
npm run build
npm run build:debug -- --profile ci
npm run tsc
- name: Test
run: |
npm run test

View File

@@ -19,6 +19,7 @@ env:
PYTEST_ADDOPTS: "--color=yes"
FORCE_COLOR: "1"
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
RUST_BACKTRACE: "1"
jobs:
lint:
@@ -97,9 +98,6 @@ jobs:
run: |
sudo apt update
sudo apt install -y protobuf-compiler
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
- name: Install
run: |
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
@@ -131,10 +129,9 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: 3.${{ matrix.python-minor-version }}
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
- uses: ./.github/workflows/build_linux_wheel
with:
args: --profile ci
- uses: ./.github/workflows/run_tests
with:
integration: true
@@ -169,10 +166,9 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
- uses: ./.github/workflows/build_mac_wheel
with:
args: --profile ci
- uses: ./.github/workflows/run_tests
# Make sure wheels are not included in the Rust cache
- name: Delete wheels
@@ -199,10 +195,9 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
- uses: ./.github/workflows/build_windows_wheel
with:
args: --profile ci
- uses: ./.github/workflows/run_tests
# Make sure wheels are not included in the Rust cache
- name: Delete wheels

View File

@@ -18,11 +18,7 @@ env:
# This env var is used by Swatinem/rust-cache@v2 for the cache
# key, so we set it to make sure it is always consistent.
CARGO_TERM_COLOR: always
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
RUST_BACKTRACE: "1"
CARGO_INCREMENTAL: 0
jobs:
lint:
@@ -44,8 +40,6 @@ jobs:
with:
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install dependencies
run: |
sudo apt update
@@ -53,7 +47,7 @@ jobs:
- name: Run format
run: cargo fmt --all -- --check
- name: Run clippy
run: cargo clippy --workspace --tests --all-features -- -D warnings
run: cargo clippy --profile ci --workspace --tests --all-features -- -D warnings
build-no-lock:
runs-on: ubuntu-24.04
@@ -80,7 +74,7 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev
- name: Build all
run: |
cargo build --benches --all-features --tests
cargo build --profile ci --benches --all-features --tests
linux:
timeout-minutes: 30
@@ -103,14 +97,8 @@ jobs:
fetch-depth: 0
lfs: true
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install dependencies
run: |
# This shaves 2 minutes off this step in CI. This doesn't seem to be
# necessary in standard runners, but it is in the 4x runners.
sudo rm /var/lib/man-db/auto-update
sudo apt install -y protobuf-compiler libssl-dev
run: sudo apt install -y protobuf-compiler libssl-dev
- uses: rui314/setup-mold@v1
- name: Make Swap
run: |
@@ -119,16 +107,16 @@ jobs:
sudo mkswap /swapfile
sudo swapon /swapfile
- name: Build
run: cargo build --all-features --tests --locked --examples
run: cargo build --profile ci --all-features --tests --locked --examples
- name: Run feature tests
run: make -C ./lancedb feature-tests
run: CARGO_ARGS="--profile ci" make -C ./lancedb feature-tests
- name: Run examples
run: cargo run --example simple --locked
run: cargo run --profile ci --example simple --locked
- name: Run remote tests
# Running this requires access to secrets, so skip if this is
# a PR from a fork.
if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
run: make -C ./lancedb remote-tests
run: CARGO_ARGS="--profile ci" make -C ./lancedb remote-tests
macos:
timeout-minutes: 30
@@ -148,8 +136,6 @@ jobs:
- name: CPU features
run: sysctl -a | grep cpu
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install dependencies
run: brew install protobuf
- name: Run tests
@@ -159,7 +145,7 @@ jobs:
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps \
| jq -r '.packages[] | .features | keys | .[]' \
| grep -v s3-test | sort | uniq | paste -s -d "," -`
cargo test --features $ALL_FEATURES --locked
cargo test --profile ci --features $ALL_FEATURES --locked
windows:
runs-on: windows-2022
@@ -173,22 +159,21 @@ jobs:
working-directory: rust/lancedb
steps:
- uses: actions/checkout@v4
- name: Set target
run: rustup target add ${{ matrix.target }}
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install Protoc v21.12
run: choco install --no-progress protoc
- name: Build
run: |
rustup target add ${{ matrix.target }}
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
cargo build --features remote --tests --locked --target ${{ matrix.target }}
cargo build --profile ci --features remote --tests --locked --target ${{ matrix.target }}
- name: Run tests
# Can only run tests when target matches host
if: ${{ matrix.target == 'x86_64-pc-windows-msvc' }}
run: |
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
cargo test --features remote --locked
cargo test --profile ci --features remote --locked
msrv:
# Check the minimum supported Rust version
@@ -213,6 +198,7 @@ jobs:
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ matrix.msrv }}
- uses: Swatinem/rust-cache@v2
- name: Downgrade dependencies
# These packages have newer requirements for MSRV
run: |
@@ -226,4 +212,4 @@ jobs:
cargo update -p aws-sdk-sts --precise 1.51.0
cargo update -p home --precise 0.5.9
- name: cargo +${{ matrix.msrv }} check
run: cargo check --workspace --tests --benches --all-features
run: cargo check --profile ci --workspace --tests --benches --all-features

70
Cargo.lock generated
View File

@@ -3032,8 +3032,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4217,8 +4217,8 @@ dependencies = [
[[package]]
name = "lance"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow",
"arrow-arith",
@@ -4282,8 +4282,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4301,8 +4301,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrayref",
"paste",
@@ -4311,8 +4311,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4348,8 +4348,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow",
"arrow-array",
@@ -4378,8 +4378,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow",
"arrow-array",
@@ -4396,8 +4396,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4434,8 +4434,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4467,8 +4467,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow",
"arrow-arith",
@@ -4529,8 +4529,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow",
"arrow-arith",
@@ -4570,8 +4570,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4587,8 +4587,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow",
"async-trait",
@@ -4600,8 +4600,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4639,8 +4639,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow",
"arrow-array",
@@ -4679,8 +4679,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "1.0.0-beta.2"
source = "git+https://github.com/lancedb/lance.git?tag=v1.0.0-beta.2#254a8217ac26666585983aa7ec8c4234f4c3f99f"
version = "1.0.0-beta.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0-beta.3#95ff7f6e684c1911fc00c9c2811cce1a61c06ff5"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4691,7 +4691,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.22.4-beta.0"
version = "0.22.4-beta.1"
dependencies = [
"ahash",
"anyhow",
@@ -4786,7 +4786,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.22.4-beta.0"
version = "0.22.4-beta.1"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4806,7 +4806,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.25.4-beta.0"
version = "0.25.4-beta.1"
dependencies = [
"arrow",
"async-trait",

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=1.0.0-beta.2", default-features = false, "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-core = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-datagen = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-file = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=1.0.0-beta.2", default-features = false, "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-index = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-namespace = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-namespace-impls = { "version" = "=1.0.0-beta.2", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"], "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-table = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-arrow = { "version" = "=1.0.0-beta.2", "tag" = "v1.0.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance = { "version" = "=1.0.0-beta.3", default-features = false, "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.0.0-beta.3", default-features = false, "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.0.0-beta.3", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"], "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "56.2", optional = false }
@@ -63,3 +63,17 @@ regex = "1.10"
lazy_static = "1"
semver = "1.0.25"
chrono = "0.4"
[profile.ci]
debug = "line-tables-only"
inherits = "dev"
incremental = false
# This rule applies to every package except workspace members (dependencies
# such as `arrow` and `tokio`). It disables debug info and related features on
# dependencies so their binaries stay smaller, improving cache reuse.
[profile.ci.package."*"]
debug = false
debug-assertions = false
strip = "debuginfo"
incremental = false

208
ci/check_lance_release.py Executable file
View File

@@ -0,0 +1,208 @@
#!/usr/bin/env python3
"""Determine whether a newer Lance tag exists and expose results for CI."""
from __future__ import annotations
import argparse
import json
import os
import re
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, List, Sequence, Tuple, Union
try: # Python >=3.11
import tomllib # type: ignore
except ModuleNotFoundError: # pragma: no cover - fallback for older Python
import tomli as tomllib # type: ignore
LANCE_REPO = "lance-format/lance"
SEMVER_RE = re.compile(
r"^\s*(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)"
r"(?:-(?P<prerelease>[0-9A-Za-z.-]+))?"
r"(?:\+[0-9A-Za-z.-]+)?\s*$"
)
@dataclass(frozen=True)
class SemVer:
major: int
minor: int
patch: int
prerelease: Tuple[Union[int, str], ...]
def __lt__(self, other: "SemVer") -> bool: # pragma: no cover - simple comparison
if (self.major, self.minor, self.patch) != (other.major, other.minor, other.patch):
return (self.major, self.minor, self.patch) < (other.major, other.minor, other.patch)
if self.prerelease == other.prerelease:
return False
if not self.prerelease:
return False # release > anything else
if not other.prerelease:
return True
for left, right in zip(self.prerelease, other.prerelease):
if left == right:
continue
if isinstance(left, int) and isinstance(right, int):
return left < right
if isinstance(left, int):
return True
if isinstance(right, int):
return False
return str(left) < str(right)
return len(self.prerelease) < len(other.prerelease)
def __eq__(self, other: object) -> bool: # pragma: no cover - trivial
if not isinstance(other, SemVer):
return NotImplemented
return (
self.major == other.major
and self.minor == other.minor
and self.patch == other.patch
and self.prerelease == other.prerelease
)
def parse_semver(raw: str) -> SemVer:
match = SEMVER_RE.match(raw)
if not match:
raise ValueError(f"Unsupported version format: {raw}")
prerelease = match.group("prerelease")
parts: Tuple[Union[int, str], ...] = ()
if prerelease:
parsed: List[Union[int, str]] = []
for piece in prerelease.split("."):
if piece.isdigit():
parsed.append(int(piece))
else:
parsed.append(piece)
parts = tuple(parsed)
return SemVer(
major=int(match.group("major")),
minor=int(match.group("minor")),
patch=int(match.group("patch")),
prerelease=parts,
)
@dataclass
class TagInfo:
tag: str # e.g. v1.0.0-beta.2
version: str # e.g. 1.0.0-beta.2
semver: SemVer
def run_command(cmd: Sequence[str]) -> str:
result = subprocess.run(cmd, capture_output=True, text=True, check=False)
if result.returncode != 0:
raise RuntimeError(
f"Command {' '.join(cmd)} failed with {result.returncode}: {result.stderr.strip()}"
)
return result.stdout.strip()
def fetch_remote_tags() -> List[TagInfo]:
output = run_command(
[
"gh",
"api",
"-X",
"GET",
f"repos/{LANCE_REPO}/git/refs/tags",
"--paginate",
"--jq",
".[].ref",
]
)
tags: List[TagInfo] = []
for line in output.splitlines():
ref = line.strip()
if not ref.startswith("refs/tags/v"):
continue
tag = ref.split("refs/tags/")[-1]
version = tag.lstrip("v")
try:
tags.append(TagInfo(tag=tag, version=version, semver=parse_semver(version)))
except ValueError:
continue
if not tags:
raise RuntimeError("No Lance tags could be parsed from GitHub API output")
return tags
def read_current_version(repo_root: Path) -> str:
cargo_path = repo_root / "Cargo.toml"
with cargo_path.open("rb") as fh:
data = tomllib.load(fh)
try:
deps = data["workspace"]["dependencies"]
entry = deps["lance"]
except KeyError as exc: # pragma: no cover - configuration guard
raise RuntimeError("Failed to locate workspace.dependencies.lance in Cargo.toml") from exc
if isinstance(entry, str):
raw_version = entry
elif isinstance(entry, dict):
raw_version = entry.get("version", "")
else: # pragma: no cover - defensive
raise RuntimeError("Unexpected lance dependency format")
raw_version = raw_version.strip()
if not raw_version:
raise RuntimeError("lance dependency does not declare a version")
return raw_version.lstrip("=")
def determine_latest_tag(tags: Iterable[TagInfo]) -> TagInfo:
return max(tags, key=lambda tag: tag.semver)
def write_outputs(args: argparse.Namespace, payload: dict) -> None:
target = getattr(args, "github_output", None)
if not target:
return
with open(target, "a", encoding="utf-8") as handle:
for key, value in payload.items():
handle.write(f"{key}={value}\n")
def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--repo-root",
default=Path(__file__).resolve().parents[1],
type=Path,
help="Path to the lancedb repository root",
)
parser.add_argument(
"--github-output",
default=os.environ.get("GITHUB_OUTPUT"),
help="Optional file path for writing GitHub Action outputs",
)
args = parser.parse_args(argv)
repo_root = Path(args.repo_root)
current_version = read_current_version(repo_root)
current_semver = parse_semver(current_version)
tags = fetch_remote_tags()
latest = determine_latest_tag(tags)
needs_update = latest.semver > current_semver
payload = {
"current_version": current_version,
"current_tag": f"v{current_version}",
"latest_version": latest.version,
"latest_tag": latest.tag,
"needs_update": "true" if needs_update else "false",
}
print(json.dumps(payload))
write_outputs(args, payload)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -3,6 +3,8 @@ import re
import sys
import json
LANCE_GIT_URL = "https://github.com/lance-format/lance.git"
def run_command(command: str) -> str:
"""
@@ -29,7 +31,7 @@ def get_latest_stable_version() -> str:
def get_latest_preview_version() -> str:
lance_tags = run_command(
"git ls-remote --tags https://github.com/lancedb/lance.git | grep 'refs/tags/v[0-9beta.-]\\+$'"
f"git ls-remote --tags {LANCE_GIT_URL} | grep 'refs/tags/v[0-9beta.-]\\+$'"
).splitlines()
lance_tags = (
tag.split("refs/tags/")[1]
@@ -176,8 +178,8 @@ def set_stable_version(version: str):
def set_preview_version(version: str):
"""
Sets lines to
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.29.0", default-features = false, "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.29.0-beta.2", "git" = LANCE_GIT_URL }
lance-io = { "version" = "=0.29.0", default-features = false, "tag" = "v0.29.0-beta.2", "git" = LANCE_GIT_URL }
...
"""
@@ -194,7 +196,7 @@ def set_preview_version(version: str):
config["features"] = features
config["tag"] = f"v{version}"
config["git"] = "https://github.com/lancedb/lance.git"
config["git"] = LANCE_GIT_URL
return dict_to_toml_line(package_name, config)

View File

@@ -1,7 +1,7 @@
# Contributing to LanceDB Typescript
This document outlines the process for contributing to LanceDB Typescript.
For general contribution guidelines, see [CONTRIBUTING.md](../../../../CONTRIBUTING.md).
For general contribution guidelines, see [CONTRIBUTING.md](../CONTRIBUTING.md).
## Project layout

View File

@@ -8,6 +8,14 @@
## Properties
### numAttempts
```ts
numAttempts: number;
```
***
### numDeletedRows
```ts

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.0</version>
<version>0.22.4-beta.1</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.0</version>
<version>0.22.4-beta.1</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.4-beta.0</version>
<version>0.22.4-beta.1</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.22.4-beta.0"
version = "0.22.4-beta.1"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.22.4-beta.0",
"version": "0.22.4-beta.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.22.4-beta.0",
"version": "0.22.4-beta.1",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.22.4-beta.0",
"version": "0.22.4-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.22.4-beta.0",
"version": "0.22.4-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.22.4-beta.0",
"version": "0.22.4-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.22.4-beta.0",
"version": "0.22.4-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.22.4-beta.0",
"version": "0.22.4-beta.1",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.22.4-beta.0",
"version": "0.22.4-beta.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.22.4-beta.0",
"version": "0.22.4-beta.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.22.4-beta.0",
"version": "0.22.4-beta.1",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.22.4-beta.0",
"version": "0.22.4-beta.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",
@@ -73,8 +73,10 @@
"scripts": {
"artifacts": "napi artifacts",
"build:debug": "napi build --platform --no-const-enum --dts ../lancedb/native.d.ts --js ../lancedb/native.js lancedb",
"postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
"build:release": "napi build --platform --no-const-enum --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
"build": "npm run build:debug && npm run tsc && shx cp lancedb/*.node dist/",
"postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
"build": "npm run build:debug && npm run tsc",
"build-release": "npm run build:release && npm run tsc",
"tsc": "tsc -b",
"posttsc": "shx cp lancedb/native.d.ts dist/native.d.ts",

View File

@@ -740,6 +740,7 @@ pub struct MergeResult {
pub num_inserted_rows: i64,
pub num_updated_rows: i64,
pub num_deleted_rows: i64,
pub num_attempts: i64,
}
impl From<lancedb::table::MergeResult> for MergeResult {
@@ -749,6 +750,7 @@ impl From<lancedb::table::MergeResult> for MergeResult {
num_inserted_rows: value.num_inserted_rows as i64,
num_updated_rows: value.num_updated_rows as i64,
num_deleted_rows: value.num_deleted_rows as i64,
num_attempts: value.num_attempts as i64,
}
}
}

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.25.4-beta.0"
current_version = "0.25.4-beta.2"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.25.4-beta.0"
version = "0.25.4-beta.2"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -59,7 +59,7 @@ tests = [
"polars>=0.19, <=1.3.0",
"tantivy",
"pyarrow-stubs",
"pylance>=1.0.0b2",
"pylance>=1.0.0b4",
"requests",
"datafusion",
]

View File

@@ -20,7 +20,12 @@ from .remote.db import RemoteDBConnection
from .schema import vector
from .table import AsyncTable, Table
from ._lancedb import Session
from .namespace import connect_namespace, LanceNamespaceDBConnection
from .namespace import (
connect_namespace,
connect_namespace_async,
LanceNamespaceDBConnection,
AsyncLanceNamespaceDBConnection,
)
def connect(
@@ -36,7 +41,7 @@ def connect(
session: Optional[Session] = None,
**kwargs: Any,
) -> DBConnection:
"""Connect to a LanceDB database. YAY!
"""Connect to a LanceDB database.
Parameters
----------
@@ -224,7 +229,9 @@ __all__ = [
"connect",
"connect_async",
"connect_namespace",
"connect_namespace_async",
"AsyncConnection",
"AsyncLanceNamespaceDBConnection",
"AsyncTable",
"URI",
"sanitize_uri",

View File

@@ -306,6 +306,7 @@ class MergeResult:
num_updated_rows: int
num_inserted_rows: int
num_deleted_rows: int
num_attempts: int
class AddColumnsResult:
version: int

View File

@@ -472,6 +472,12 @@ class LanceDBConnection(DBConnection):
uri = uri[7:] # Remove "file://"
elif uri.startswith("file:/"):
uri = uri[5:] # Remove "file:"
if sys.platform == "win32":
# On Windows, a path like /C:/path should become C:/path
if len(uri) >= 3 and uri[0] == "/" and uri[2] == ":":
uri = uri[1:]
uri = Path(uri)
uri = uri.expanduser().absolute()
Path(uri).mkdir(parents=True, exist_ok=True)

View File

@@ -10,6 +10,7 @@ through a namespace abstraction.
from __future__ import annotations
import asyncio
import sys
from typing import Dict, Iterable, List, Optional, Union
@@ -23,7 +24,7 @@ import pyarrow as pa
from lancedb.db import DBConnection, LanceDBConnection
from lancedb.io import StorageOptionsProvider
from lancedb.table import LanceTable, Table
from lancedb.table import AsyncTable, LanceTable, Table
from lancedb.util import validate_table_name
from lancedb.common import DATA
from lancedb.pydantic import LanceModel
@@ -497,6 +498,294 @@ class LanceNamespaceDBConnection(DBConnection):
)
class AsyncLanceNamespaceDBConnection:
"""
An async LanceDB connection that uses a namespace for table management.
This connection delegates table URI resolution to a lance_namespace instance,
while providing async methods for all operations.
"""
def __init__(
self,
namespace: LanceNamespace,
*,
read_consistency_interval: Optional[timedelta] = None,
storage_options: Optional[Dict[str, str]] = None,
session: Optional[Session] = None,
):
"""
Initialize an async namespace-based LanceDB connection.
Parameters
----------
namespace : LanceNamespace
The namespace instance to use for table management
read_consistency_interval : Optional[timedelta]
The interval at which to check for updates to the table from other
processes. If None, then consistency is not checked.
storage_options : Optional[Dict[str, str]]
Additional options for the storage backend
session : Optional[Session]
A session to use for this connection
"""
self._ns = namespace
self.read_consistency_interval = read_consistency_interval
self.storage_options = storage_options or {}
self.session = session
async def table_names(
self,
page_token: Optional[str] = None,
limit: int = 10,
*,
namespace: List[str] = [],
) -> Iterable[str]:
"""List table names in the namespace."""
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
response = self._ns.list_tables(request)
return response.tables if response.tables else []
async def create_table(
self,
name: str,
data: Optional[DATA] = None,
schema: Optional[Union[pa.Schema, LanceModel]] = None,
mode: str = "create",
exist_ok: bool = False,
on_bad_vectors: str = "error",
fill_value: float = 0.0,
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
*,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
data_storage_version: Optional[str] = None,
enable_v2_manifest_paths: Optional[bool] = None,
) -> AsyncTable:
"""Create a new table in the namespace."""
if mode.lower() not in ["create", "overwrite"]:
raise ValueError("mode must be either 'create' or 'overwrite'")
validate_table_name(name)
# Get location from namespace
table_id = namespace + [name]
# Step 1: Get the table location and storage options from namespace
location = None
namespace_storage_options = None
if mode.lower() == "overwrite":
# Try to describe the table first to see if it exists
try:
describe_request = DescribeTableRequest(id=table_id)
describe_response = self._ns.describe_table(describe_request)
location = describe_response.location
namespace_storage_options = describe_response.storage_options
except Exception:
# Table doesn't exist, will create a new one below
pass
if location is None:
# Table doesn't exist or mode is "create", reserve a new location
create_empty_request = CreateEmptyTableRequest(
id=table_id,
location=None,
properties=self.storage_options if self.storage_options else None,
)
create_empty_response = self._ns.create_empty_table(create_empty_request)
if not create_empty_response.location:
raise ValueError(
"Table location is missing from create_empty_table response"
)
location = create_empty_response.location
namespace_storage_options = create_empty_response.storage_options
# Merge storage options: self.storage_options < user options < namespace options
merged_storage_options = dict(self.storage_options)
if storage_options:
merged_storage_options.update(storage_options)
if namespace_storage_options:
merged_storage_options.update(namespace_storage_options)
# Step 2: Create table using LanceTable.create with the location
# Run the sync operation in a thread
def _create_table():
temp_conn = LanceDBConnection(
location,
read_consistency_interval=self.read_consistency_interval,
storage_options=merged_storage_options,
session=self.session,
)
# Create a storage options provider if not provided by user
if (
storage_options_provider is None
and namespace_storage_options is not None
):
provider = LanceNamespaceStorageOptionsProvider(
namespace=self._ns,
table_id=table_id,
)
else:
provider = storage_options_provider
return LanceTable.create(
temp_conn,
name,
data,
schema,
mode=mode,
exist_ok=exist_ok,
on_bad_vectors=on_bad_vectors,
fill_value=fill_value,
embedding_functions=embedding_functions,
namespace=namespace,
storage_options=merged_storage_options,
storage_options_provider=provider,
location=location,
)
lance_table = await asyncio.to_thread(_create_table)
# Get the underlying async table from LanceTable
return lance_table._table
async def open_table(
self,
name: str,
*,
namespace: List[str] = [],
storage_options: Optional[Dict[str, str]] = None,
storage_options_provider: Optional[StorageOptionsProvider] = None,
index_cache_size: Optional[int] = None,
) -> AsyncTable:
"""Open an existing table from the namespace."""
table_id = namespace + [name]
request = DescribeTableRequest(id=table_id)
response = self._ns.describe_table(request)
# Merge storage options: self.storage_options < user options < namespace options
merged_storage_options = dict(self.storage_options)
if storage_options:
merged_storage_options.update(storage_options)
if response.storage_options:
merged_storage_options.update(response.storage_options)
# Create a storage options provider if not provided by user
if storage_options_provider is None and response.storage_options is not None:
storage_options_provider = LanceNamespaceStorageOptionsProvider(
namespace=self._ns,
table_id=table_id,
)
# Open table in a thread
def _open_table():
temp_conn = LanceDBConnection(
response.location,
read_consistency_interval=self.read_consistency_interval,
storage_options=merged_storage_options,
session=self.session,
)
return LanceTable.open(
temp_conn,
name,
namespace=namespace,
storage_options=merged_storage_options,
storage_options_provider=storage_options_provider,
index_cache_size=index_cache_size,
location=response.location,
)
lance_table = await asyncio.to_thread(_open_table)
return lance_table._table
async def drop_table(self, name: str, namespace: List[str] = []):
"""Drop a table from the namespace."""
table_id = namespace + [name]
request = DropTableRequest(id=table_id)
self._ns.drop_table(request)
async def rename_table(
self,
cur_name: str,
new_name: str,
cur_namespace: List[str] = [],
new_namespace: List[str] = [],
):
"""Rename is not supported for namespace connections."""
raise NotImplementedError(
"rename_table is not supported for namespace connections"
)
async def drop_database(self):
"""Deprecated method."""
raise NotImplementedError(
"drop_database is deprecated, use drop_all_tables instead"
)
async def drop_all_tables(self, namespace: List[str] = []):
"""Drop all tables in the namespace."""
table_names = await self.table_names(namespace=namespace)
for table_name in table_names:
await self.drop_table(table_name, namespace=namespace)
async def list_namespaces(
self,
namespace: List[str] = [],
page_token: Optional[str] = None,
limit: int = 10,
) -> Iterable[str]:
"""
List child namespaces under the given namespace.
Parameters
----------
namespace : Optional[List[str]]
The parent namespace to list children from.
If None, lists root-level namespaces.
page_token : Optional[str]
Pagination token for listing results.
limit : int
Maximum number of namespaces to return.
Returns
-------
Iterable[str]
Names of child namespaces.
"""
request = ListNamespacesRequest(
id=namespace, page_token=page_token, limit=limit
)
response = self._ns.list_namespaces(request)
return response.namespaces if response.namespaces else []
async def create_namespace(self, namespace: List[str]) -> None:
"""
Create a new namespace.
Parameters
----------
namespace : List[str]
The namespace path to create.
"""
request = CreateNamespaceRequest(id=namespace)
self._ns.create_namespace(request)
async def drop_namespace(self, namespace: List[str]) -> None:
"""
Drop a namespace.
Parameters
----------
namespace : List[str]
The namespace path to drop.
"""
request = DropNamespaceRequest(id=namespace)
self._ns.drop_namespace(request)
def connect_namespace(
impl: str,
properties: Dict[str, str],
@@ -541,3 +830,62 @@ def connect_namespace(
storage_options=storage_options,
session=session,
)
def connect_namespace_async(
impl: str,
properties: Dict[str, str],
*,
read_consistency_interval: Optional[timedelta] = None,
storage_options: Optional[Dict[str, str]] = None,
session: Optional[Session] = None,
) -> AsyncLanceNamespaceDBConnection:
"""
Connect to a LanceDB database through a namespace (returns async connection).
This function is synchronous but returns an AsyncLanceNamespaceDBConnection
that provides async methods for all database operations.
Parameters
----------
impl : str
The namespace implementation to use. For examples:
- "dir" for DirectoryNamespace
- "rest" for REST-based namespace
- Full module path for custom implementations
properties : Dict[str, str]
Configuration properties for the namespace implementation.
Different namespace implemenation has different config properties.
For example, use DirectoryNamespace with {"root": "/path/to/directory"}
read_consistency_interval : Optional[timedelta]
The interval at which to check for updates to the table from other
processes. If None, then consistency is not checked.
storage_options : Optional[Dict[str, str]]
Additional options for the storage backend
session : Optional[Session]
A session to use for this connection
Returns
-------
AsyncLanceNamespaceDBConnection
An async namespace-based connection to LanceDB
Examples
--------
>>> import lancedb
>>> # This function is sync, but returns an async connection
>>> db = lancedb.connect_namespace_async("dir", {"root": "/path/to/db"})
>>> # Use async methods on the connection
>>> async def use_db():
... tables = await db.table_names()
... table = await db.create_table("my_table", schema=schema)
"""
namespace = namespace_connect(impl, properties)
# Return the async namespace-based connection
return AsyncLanceNamespaceDBConnection(
namespace,
read_consistency_interval=read_consistency_interval,
storage_options=storage_options,
session=session,
)

View File

@@ -14,6 +14,7 @@ from typing import (
Literal,
Optional,
Tuple,
Type,
TypeVar,
Union,
Any,
@@ -786,10 +787,7 @@ class LanceQueryBuilder(ABC):
-------
List[LanceModel]
"""
return [
model(**{k: v for k, v in row.items() if k in model.field_names()})
for row in self.to_arrow(timeout=timeout).to_pylist()
]
return [model(**row) for row in self.to_arrow(timeout=timeout).to_pylist()]
def to_polars(self, *, timeout: Optional[timedelta] = None) -> "pl.DataFrame":
"""
@@ -2400,6 +2398,28 @@ class AsyncQueryBase(object):
return pl.from_arrow(await self.to_arrow(timeout=timeout))
async def to_pydantic(
self, model: Type[LanceModel], *, timeout: Optional[timedelta] = None
) -> List[LanceModel]:
"""
Convert results to a list of pydantic models.
Parameters
----------
model : Type[LanceModel]
The pydantic model to use.
timeout : timedelta, optional
The maximum time to wait for the query to complete.
If None, wait indefinitely.
Returns
-------
list[LanceModel]
"""
return [
model(**row) for row in (await self.to_arrow(timeout=timeout)).to_pylist()
]
async def explain_plan(self, verbose: Optional[bool] = False):
"""Return the execution plan for this query.

View File

@@ -1717,6 +1717,7 @@ class LanceTable(Table):
):
self._conn = connection
self._namespace = namespace
self._location = location # Store location for use in _dataset_path
if _async is not None:
self._table = _async
else:
@@ -1794,6 +1795,10 @@ class LanceTable(Table):
@cached_property
def _dataset_path(self) -> str:
# Cacheable since it's deterministic
# If table was opened with explicit location (e.g., from namespace),
# use that location directly instead of constructing from base URI
if self._location is not None:
return self._location
return _table_path(self._conn.uri, self.name)
def to_lance(self, **kwargs) -> lance.LanceDataset:
@@ -2681,6 +2686,7 @@ class LanceTable(Table):
self = cls.__new__(cls)
self._conn = db
self._namespace = namespace
self._location = location
if data_storage_version is not None:
warnings.warn(

View File

@@ -32,6 +32,7 @@ import numpy as np
import pyarrow as pa
import pandas as pd
import pytest
import pytest_asyncio
from utils import exception_output
pytest.importorskip("lancedb.fts")
@@ -90,7 +91,7 @@ def table(tmp_path) -> ldb.table.LanceTable:
return table
@pytest.fixture
@pytest_asyncio.fixture
async def async_table(tmp_path) -> ldb.table.AsyncTable:
# Use local random state to avoid affecting other tests
rng = np.random.RandomState(42)
@@ -253,7 +254,7 @@ def test_search_fts(table, use_tantivy):
@pytest.mark.asyncio
async def test_fts_select_async(async_table):
tbl = await async_table
tbl = async_table
await tbl.create_index("text", config=FTS())
await tbl.create_index("text2", config=FTS())
results = (
@@ -338,7 +339,6 @@ def test_search_fts_phrase_query(table):
@pytest.mark.asyncio
async def test_search_fts_phrase_query_async(async_table):
async_table = await async_table
await async_table.create_index("text", config=FTS(with_position=False))
try:
phrase_results = (
@@ -393,7 +393,6 @@ def test_search_fts_specify_column(table):
@pytest.mark.asyncio
async def test_search_fts_async(async_table):
async_table = await async_table
await async_table.create_index("text", config=FTS())
results = await async_table.query().nearest_to_text("puppy").limit(5).to_list()
assert len(results) == 5
@@ -424,7 +423,6 @@ async def test_search_fts_async(async_table):
@pytest.mark.asyncio
async def test_search_fts_specify_column_async(async_table):
async_table = await async_table
await async_table.create_index("text", config=FTS())
await async_table.create_index("text2", config=FTS())

View File

@@ -423,3 +423,218 @@ class TestNamespaceConnection:
db.drop_table("same_name_table", namespace=["namespace_b"])
db.drop_namespace(["namespace_a"])
db.drop_namespace(["namespace_b"])
@pytest.mark.asyncio
class TestAsyncNamespaceConnection:
"""Test async namespace-based LanceDB connection using DirectoryNamespace."""
def setup_method(self):
"""Set up test fixtures."""
self.temp_dir = tempfile.mkdtemp()
def teardown_method(self):
"""Clean up test fixtures."""
shutil.rmtree(self.temp_dir, ignore_errors=True)
async def test_connect_namespace_async(self):
"""Test connecting to LanceDB through DirectoryNamespace asynchronously."""
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
# Should be an AsyncLanceNamespaceDBConnection
assert isinstance(db, lancedb.AsyncLanceNamespaceDBConnection)
# Initially no tables in root
table_names = await db.table_names()
assert len(list(table_names)) == 0
async def test_create_table_async(self):
"""Test creating a table asynchronously through namespace."""
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
# Create a child namespace first
await db.create_namespace(["test_ns"])
# Define schema for empty table
schema = pa.schema(
[
pa.field("id", pa.int64()),
pa.field("vector", pa.list_(pa.float32(), 2)),
pa.field("text", pa.string()),
]
)
# Create empty table in child namespace
table = await db.create_table(
"test_table", schema=schema, namespace=["test_ns"]
)
assert table is not None
assert isinstance(table, lancedb.AsyncTable)
# Table should appear in child namespace
table_names = await db.table_names(namespace=["test_ns"])
assert "test_table" in list(table_names)
async def test_open_table_async(self):
"""Test opening an existing table asynchronously through namespace."""
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
# Create a child namespace first
await db.create_namespace(["test_ns"])
# Create a table with schema in child namespace
schema = pa.schema(
[
pa.field("id", pa.int64()),
pa.field("vector", pa.list_(pa.float32(), 2)),
]
)
await db.create_table("test_table", schema=schema, namespace=["test_ns"])
# Open the table
table = await db.open_table("test_table", namespace=["test_ns"])
assert table is not None
assert isinstance(table, lancedb.AsyncTable)
# Test write operation - add data to the table
test_data = [
{"id": 1, "vector": [1.0, 2.0]},
{"id": 2, "vector": [3.0, 4.0]},
{"id": 3, "vector": [5.0, 6.0]},
]
await table.add(test_data)
# Test read operation - query the table
result = await table.to_arrow()
assert len(result) == 3
assert result.schema.field("id").type == pa.int64()
assert result.schema.field("vector").type == pa.list_(pa.float32(), 2)
# Verify data content
result_df = result.to_pandas()
assert result_df["id"].tolist() == [1, 2, 3]
assert [v.tolist() for v in result_df["vector"]] == [
[1.0, 2.0],
[3.0, 4.0],
[5.0, 6.0],
]
# Test update operation
await table.update({"id": 20}, where="id = 2")
result = await table.to_arrow()
result_df = result.to_pandas().sort_values("id").reset_index(drop=True)
assert result_df["id"].tolist() == [1, 3, 20]
# Test delete operation
await table.delete("id = 1")
result = await table.to_arrow()
assert len(result) == 2
result_df = result.to_pandas().sort_values("id").reset_index(drop=True)
assert result_df["id"].tolist() == [3, 20]
async def test_drop_table_async(self):
"""Test dropping a table asynchronously through namespace."""
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
# Create a child namespace first
await db.create_namespace(["test_ns"])
# Create tables in child namespace
schema = pa.schema(
[
pa.field("id", pa.int64()),
pa.field("vector", pa.list_(pa.float32(), 2)),
]
)
await db.create_table("table1", schema=schema, namespace=["test_ns"])
await db.create_table("table2", schema=schema, namespace=["test_ns"])
# Verify both tables exist in child namespace
table_names = list(await db.table_names(namespace=["test_ns"]))
assert "table1" in table_names
assert "table2" in table_names
assert len(table_names) == 2
# Drop one table
await db.drop_table("table1", namespace=["test_ns"])
# Verify only table2 remains
table_names = list(await db.table_names(namespace=["test_ns"]))
assert "table1" not in table_names
assert "table2" in table_names
assert len(table_names) == 1
async def test_namespace_operations_async(self):
"""Test namespace management operations asynchronously."""
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
# Initially no namespaces
namespaces = await db.list_namespaces()
assert len(list(namespaces)) == 0
# Create a namespace
await db.create_namespace(["test_namespace"])
# Verify namespace exists
namespaces = list(await db.list_namespaces())
assert "test_namespace" in namespaces
assert len(namespaces) == 1
# Create table in namespace
schema = pa.schema(
[
pa.field("id", pa.int64()),
pa.field("vector", pa.list_(pa.float32(), 2)),
]
)
table = await db.create_table(
"test_table", schema=schema, namespace=["test_namespace"]
)
assert table is not None
# Verify table exists in namespace
tables_in_namespace = list(await db.table_names(namespace=["test_namespace"]))
assert "test_table" in tables_in_namespace
assert len(tables_in_namespace) == 1
# Drop table from namespace
await db.drop_table("test_table", namespace=["test_namespace"])
# Verify table no longer exists in namespace
tables_in_namespace = list(await db.table_names(namespace=["test_namespace"]))
assert len(tables_in_namespace) == 0
# Drop namespace
await db.drop_namespace(["test_namespace"])
# Verify namespace no longer exists
namespaces = list(await db.list_namespaces())
assert len(namespaces) == 0
async def test_drop_all_tables_async(self):
"""Test dropping all tables asynchronously through namespace."""
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
# Create a child namespace first
await db.create_namespace(["test_ns"])
# Create multiple tables in child namespace
schema = pa.schema(
[
pa.field("id", pa.int64()),
pa.field("vector", pa.list_(pa.float32(), 2)),
]
)
for i in range(3):
await db.create_table(f"table{i}", schema=schema, namespace=["test_ns"])
# Verify tables exist in child namespace
table_names = await db.table_names(namespace=["test_ns"])
assert len(list(table_names)) == 3
# Drop all tables in child namespace
await db.drop_all_tables(namespace=["test_ns"])
# Verify all tables are gone from child namespace
table_names = await db.table_names(namespace=["test_ns"])
assert len(list(table_names)) == 0

View File

@@ -412,3 +412,50 @@ def test_multi_vector_in_lance_model():
t = TestModel(id=1)
assert t.vectors == [[0.0] * 16]
def test_aliases_in_lance_model(mem_db):
data = [
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 6.5], "item": "bar", "price": 20.0},
]
tbl = mem_db.create_table("items", data=data)
class TestModel(LanceModel):
name: str = Field(alias="item")
price: float
distance: float = Field(alias="_distance")
model = (
tbl.search([5.9, 6.5])
.distance_type("cosine")
.limit(1)
.to_pydantic(TestModel)[0]
)
assert hasattr(model, "name")
assert hasattr(model, "distance")
assert model.distance < 0.01
@pytest.mark.asyncio
async def test_aliases_in_lance_model_async(mem_db_async):
data = [
{"vector": [8.3, 2.5], "item": "foo", "price": 12.0},
{"vector": [7.7, 3.9], "item": "bar", "price": 11.2},
]
tbl = await mem_db_async.create_table("items", data=data)
class TestModel(LanceModel):
name: str = Field(alias="item")
price: float
distance: float = Field(alias="_distance")
model = (
await tbl.vector_search([7.7, 3.9])
.distance_type("cosine")
.limit(1)
.to_pydantic(TestModel)
)[0]
assert hasattr(model, "name")
assert hasattr(model, "distance")
assert model.distance < 0.01

View File

@@ -134,17 +134,19 @@ pub struct MergeResult {
pub num_updated_rows: u64,
pub num_inserted_rows: u64,
pub num_deleted_rows: u64,
pub num_attempts: u32,
}
#[pymethods]
impl MergeResult {
pub fn __repr__(&self) -> String {
format!(
"MergeResult(version={}, num_updated_rows={}, num_inserted_rows={}, num_deleted_rows={})",
"MergeResult(version={}, num_updated_rows={}, num_inserted_rows={}, num_deleted_rows={}, num_attempts={})",
self.version,
self.num_updated_rows,
self.num_inserted_rows,
self.num_deleted_rows
self.num_deleted_rows,
self.num_attempts
)
}
}
@@ -156,6 +158,7 @@ impl From<lancedb::table::MergeResult> for MergeResult {
num_updated_rows: result.num_updated_rows,
num_inserted_rows: result.num_inserted_rows,
num_deleted_rows: result.num_deleted_rows,
num_attempts: result.num_attempts,
}
}
}

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.22.4-beta.0"
version = "0.22.4-beta.1"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -9,11 +9,11 @@ all-tests: feature-tests remote-tests
# the environment.
feature-tests:
../../ci/run_with_docker_compose.sh \
cargo test --all-features --tests --locked --examples
cargo test --all-features --tests --locked --examples $(CARGO_ARGS)
.PHONY: feature-tests
# Run tests against remote endpoints.
remote-tests:
../../ci/run_with_test_connection.sh \
cargo test --features remote --locked
cargo test --features remote --locked $(CARGO_ARGS)
.PHONY: remote-tests

View File

@@ -1183,6 +1183,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
num_deleted_rows: 0,
num_inserted_rows: 0,
num_updated_rows: 0,
num_attempts: 0,
});
}

View File

@@ -467,6 +467,11 @@ pub struct MergeResult {
/// However those rows are not shared with the user.
#[serde(default)]
pub num_deleted_rows: u64,
/// Number of attempts performed during the merge operation.
/// This includes the initial attempt plus any retries due to transaction conflicts.
/// A value of 1 means the operation succeeded on the first try.
#[serde(default)]
pub num_attempts: u32,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
@@ -1810,8 +1815,17 @@ impl NativeTable {
}
// Helper to get num_sub_vectors with default calculation
fn get_num_sub_vectors(provided: Option<u32>, dim: u32) -> u32 {
provided.unwrap_or_else(|| suggested_num_sub_vectors(dim))
fn get_num_sub_vectors(provided: Option<u32>, dim: u32, num_bits: Option<u32>) -> u32 {
if let Some(provided) = provided {
return provided;
}
let suggested = suggested_num_sub_vectors(dim);
if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 {
// num_sub_vectors must be even when 4 bits are used
suggested + 1
} else {
suggested
}
}
// Helper to extract vector dimension from field
@@ -1834,7 +1848,7 @@ impl NativeTable {
// Use IvfPq as the default for auto vector indices
let dim = Self::get_vector_dimension(field)?;
let ivf_params = lance_index::vector::ivf::IvfBuildParams::default();
let num_sub_vectors = Self::get_num_sub_vectors(None, dim);
let num_sub_vectors = Self::get_num_sub_vectors(None, dim, None);
let pq_params =
lance_index::vector::pq::PQBuildParams::new(num_sub_vectors as usize, 8);
let lance_idx_params =
@@ -1901,7 +1915,8 @@ impl NativeTable {
index.sample_rate,
index.max_iterations,
);
let num_sub_vectors = Self::get_num_sub_vectors(index.num_sub_vectors, dim);
let num_sub_vectors =
Self::get_num_sub_vectors(index.num_sub_vectors, dim, index.num_bits);
let num_bits = index.num_bits.unwrap_or(8) as usize;
let mut pq_params = PQBuildParams::new(num_sub_vectors as usize, num_bits);
pq_params.max_iters = index.max_iterations as usize;
@@ -1937,7 +1952,8 @@ impl NativeTable {
index.sample_rate,
index.max_iterations,
);
let num_sub_vectors = Self::get_num_sub_vectors(index.num_sub_vectors, dim);
let num_sub_vectors =
Self::get_num_sub_vectors(index.num_sub_vectors, dim, index.num_bits);
let hnsw_params = HnswBuildParams::default()
.num_edges(index.m as usize)
.ef_construction(index.ef_construction as usize);
@@ -2520,6 +2536,7 @@ impl BaseTable for NativeTable {
num_updated_rows: stats.num_updated_rows,
num_inserted_rows: stats.num_inserted_rows,
num_deleted_rows: stats.num_deleted_rows,
num_attempts: stats.num_attempts,
})
}
@@ -2979,9 +2996,13 @@ mod tests {
// Perform a "insert if not exists"
let mut merge_insert_builder = table.merge_insert(&["i"]);
merge_insert_builder.when_not_matched_insert_all();
merge_insert_builder.execute(new_batches).await.unwrap();
let result = merge_insert_builder.execute(new_batches).await.unwrap();
// Only 5 rows should actually be inserted
assert_eq!(table.count_rows(None).await.unwrap(), 15);
assert_eq!(result.num_inserted_rows, 5);
assert_eq!(result.num_updated_rows, 0);
assert_eq!(result.num_deleted_rows, 0);
assert_eq!(result.num_attempts, 1);
// Create new data with i=15..25 (no id matches)
let new_batches = Box::new(merge_insert_test_batches(15, 2));
@@ -4122,6 +4143,8 @@ mod tests {
table.prewarm_index("text_idx").await.unwrap();
}
// Windows does not support precise sleep durations due to timer resolution limitations.
#[cfg(not(target_os = "windows"))]
#[tokio::test]
async fn test_read_consistency_interval() {
let intervals = vec![