Compare commits

...

21 Commits

Author SHA1 Message Date
Lei Xu
b57faf9835 rename gha task 2026-01-29 16:26:28 -08:00
Lei Xu
4800f31479 fixlint 2026-01-29 16:21:23 -08:00
Lei Xu
ec464ad01e remove pydantic 1 support 2026-01-29 16:18:56 -08:00
Weston Pace
9be28448f5 fix: don't store all columns in the permutation table (#2957)
The permutation table was always intended to be a small table of row id
pointers (and split id). However, it was accidentally doing a full
materialization of the base table 🤦

This PR changes the permutation builder to only store row id and split
id.
2026-01-29 16:06:36 -08:00
Lei Xu
357197bacc chore!: change support python version from 3.10 to 3.13 (#2955)
Python 3.9 is EOL since Oct 2025. and last two pyarrow builts were
against python3.10-3.13.

* This PR is contributed by codex-gpt5.2
2026-01-30 01:47:50 +08:00
Lei Xu
ad51e2dd1f fix: support pydantic list of structs or optional struct (#2953)
Closes #2950

*This code is generated by codex-gpt5.2*
2026-01-28 21:08:18 -08:00
Weston Pace
e9e904783c feat: allow the permutation builder memory limit to be configured by env var (#2946)
Running into issues with DF sorting again. This will at least allow the
memory limit to be set large to bypass problems.
2026-01-28 09:02:59 +05:30
Lance Release
8500b16eca Bump version: 0.24.1-beta.0 → 0.24.1 2026-01-26 23:39:18 +00:00
Lance Release
57e7282342 Bump version: 0.24.0 → 0.24.1-beta.0 2026-01-26 23:38:50 +00:00
Lance Release
cc5f8070d7 Bump version: 0.27.1-beta.0 → 0.27.1 2026-01-26 23:38:24 +00:00
Lance Release
dc0fb01f6b Bump version: 0.27.0 → 0.27.1-beta.0 2026-01-26 23:38:23 +00:00
LanceDB Robot
94b7781551 feat: update lance dependency to v1.0.4 (#2944)
## Summary
- bump Lance dependencies to v1.0.4
- run `cargo clippy --workspace --tests --all-features -- -D warnings`
- run `cargo fmt --all`

## Testing
- `cargo clippy --workspace --tests --all-features -- -D warnings`

## Reference
- https://github.com/lance-format/lance/releases/tag/v1.0.4
2026-01-26 15:37:28 -08:00
Jack Ye
7bf020b3d5 chore: fix clippy when remote flag is not set (#2943)
Also add a step in CI to ensure this does not happen in the future
2026-01-26 13:59:31 -08:00
LanceDB Robot
12a98479dc chore: update lance dependency to v1.0.4-rc.1 (#2942)
## Summary
- bump Lance dependencies to v1.0.4-rc.1
- verified `cargo clippy --workspace --tests --all-features -- -D
warnings`
- ran `cargo fmt --all`

## References
- https://github.com/lance-format/lance/releases/tag/v1.0.4-rc.1
2026-01-26 12:17:22 -08:00
Jack Ye
e4552e577a chore(revert): revert update lance dependency to v2.0.0-rc.1 (#2936) (#2941)
This reverts commit bd84bba14d, so that we
can bump version to 1.0.4-rc.1
2026-01-26 11:13:59 -08:00
Will Jones
f979a902ad ci(rust): fix MSRV check (#2940)
Realized our MSRV check was inert because `rust-toolchain.toml` was
overriding the Rust version. We set the `RUSTUP_TOOLCHAIN` environment
variable, which overrides that.

Also needed to update to MSRV 1.88 (due to dependencies like Lance and
DataFusion) and fix some clippy warnings.
2026-01-23 15:57:09 -08:00
Colin Patrick McCabe
5a7a8da567 feat: check AZURE_STORAGE_ACCOUNT_NAME in remote conns (#2918)
Unlike in Amazon S3, in Azure bucket names are not globally unique.
Instead, the combination of (storage_account_name, bucket_name) is
unique.

Therefore, when using Azure blob store, we always need a way to
configure the storage account name. One way is to use the
storage_options hash map and set azure_storage_account_name. Another way
is to set an environment variable, AZURE_STORAGE_ACCOUNT_NAME.

Prior to this PR, the second way (environment variable) did not work
with remote connections. This is because the existing code that checks
for these environment variables happens inside the Azure object store
implementation itself, which does not run locally when using remote
connections.

This PR addresses that situation by adding a check of the environment
variable. This functions as a default if the relevant storage option is
not set in the storage_options hash map.
2026-01-22 13:36:05 -08:00
Jack Ye
0db8176445 test: fix failing remote doctest reference to aws feature (#2935)
Closes https://github.com/lancedb/lancedb/issues/2933
2026-01-22 13:17:03 -08:00
LanceDB Robot
bd84bba14d chore: update lance dependency to v2.0.0-rc.1 (#2936)
## Summary
- bump Lance dependencies to v2.0.0-rc.1 (git tag)
- align Arrow/DataFusion/PyO3 versions for the new Lance release
- update Python bindings for PyO3 0.26 (attach API + Py<PyAny>)

## Verification
- `cargo clippy --workspace --tests --all-features -- -D warnings`
- `cargo fmt --all`

## Reference
- https://github.com/lance-format/lance/releases/tag/v2.0.0-rc.1

---------

Co-authored-by: Jack Ye <yezhaoqin@gmail.com>
Co-authored-by: Will Jones <willjones127@gmail.com>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
Co-authored-by: BubbleCal <bubble_cal@outlook.com>
2026-01-22 13:14:38 -08:00
Lance Release
ac07f8068c Bump version: 0.24.0-beta.1 → 0.24.0 2026-01-22 01:10:15 +00:00
Lance Release
bba362d372 Bump version: 0.24.0-beta.0 → 0.24.0-beta.1 2026-01-22 01:09:53 +00:00
39 changed files with 518 additions and 234 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.24.0-beta.0"
current_version = "0.24.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -3,7 +3,7 @@ name: build-linux-wheel
description: "Build a manylinux wheel for lance"
inputs:
python-minor-version:
description: "8, 9, 10, 11, 12"
description: "10, 11, 12, 13"
required: true
args:
description: "--release"

View File

@@ -3,7 +3,7 @@ name: build_wheel
description: "Build a lance wheel"
inputs:
python-minor-version:
description: "8, 9, 10, 11"
description: "10, 11, 12, 13"
required: true
args:
description: "--release"

View File

@@ -3,7 +3,7 @@ name: build_wheel
description: "Build a lance wheel"
inputs:
python-minor-version:
description: "8, 9, 10, 11"
description: "10, 11, 12, 13, 14"
required: true
args:
description: "--release"

View File

@@ -41,7 +41,7 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev
rustup update && rustup default
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: "3.10"
cache: "pip"

View File

@@ -44,12 +44,12 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v6
with:
python-version: 3.8
python-version: "3.10"
- uses: ./.github/workflows/build_linux_wheel
with:
python-minor-version: 8
python-minor-version: 10
args: "--release --strip ${{ matrix.config.extra_args }}"
arm-build: ${{ matrix.config.platform == 'aarch64' }}
manylinux: ${{ matrix.config.manylinux }}
@@ -74,12 +74,12 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v6
with:
python-version: 3.12
python-version: "3.13"
- uses: ./.github/workflows/build_mac_wheel
with:
python-minor-version: 8
python-minor-version: 10
args: "--release --strip --target ${{ matrix.config.target }} --features fp16kernels"
- uses: ./.github/workflows/upload_wheel
if: startsWith(github.ref, 'refs/tags/python-v')
@@ -95,12 +95,12 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v6
with:
python-version: 3.12
python-version: "3.13"
- uses: ./.github/workflows/build_windows_wheel
with:
python-minor-version: 8
python-minor-version: 10
args: "--release --strip"
vcpkg_token: ${{ secrets.VCPKG_GITHUB_PACKAGES }}
- uses: ./.github/workflows/upload_wheel

View File

@@ -25,7 +25,7 @@ jobs:
lint:
name: "Lint"
timeout-minutes: 30
runs-on: "ubuntu-22.04"
runs-on: "ubuntu-24.04"
defaults:
run:
shell: bash
@@ -36,9 +36,9 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: "3.12"
python-version: "3.13"
- name: Install ruff
run: |
pip install ruff==0.9.9
@@ -61,9 +61,9 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: "3.12"
python-version: "3.13"
- name: Install protobuf compiler
run: |
sudo apt update
@@ -90,9 +90,9 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: "3.12"
python-version: "3.13"
cache: "pip"
- name: Install protobuf
run: |
@@ -110,7 +110,7 @@ jobs:
timeout-minutes: 30
strategy:
matrix:
python-minor-version: ["9", "12"]
python-minor-version: ["10", "13"]
runs-on: "ubuntu-24.04"
defaults:
run:
@@ -126,7 +126,7 @@ jobs:
sudo apt update
sudo apt install -y protobuf-compiler
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: 3.${{ matrix.python-minor-version }}
- uses: ./.github/workflows/build_linux_wheel
@@ -156,9 +156,9 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: "3.12"
python-version: "3.13"
- uses: ./.github/workflows/build_mac_wheel
with:
args: --profile ci
@@ -185,9 +185,9 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: "3.12"
python-version: "3.13"
- uses: ./.github/workflows/build_windows_wheel
with:
args: --profile ci
@@ -195,7 +195,7 @@ jobs:
# Make sure wheels are not included in the Rust cache
- name: Delete wheels
run: rm -rf target/wheels
pydantic1x:
min-deps:
timeout-minutes: 30
runs-on: "ubuntu-24.04"
defaults:
@@ -212,12 +212,11 @@ jobs:
sudo apt update
sudo apt install -y protobuf-compiler
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: 3.9
python-version: "3.10"
- name: Install lancedb
run: |
pip install "pydantic<2"
pip install pyarrow==16
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
pip install tantivy

View File

@@ -48,6 +48,8 @@ jobs:
run: cargo fmt --all -- --check
- name: Run clippy
run: cargo clippy --profile ci --workspace --tests --all-features -- -D warnings
- name: Run clippy (without remote feature)
run: cargo clippy --profile ci --workspace --tests -- -D warnings
build-no-lock:
runs-on: ubuntu-24.04
@@ -181,7 +183,7 @@ jobs:
runs-on: ubuntu-24.04
strategy:
matrix:
msrv: ["1.78.0"] # This should match up with rust-version in Cargo.toml
msrv: ["1.88.0"] # This should match up with rust-version in Cargo.toml
env:
# Need up-to-date compilers for kernels
CC: clang-18
@@ -212,4 +214,6 @@ jobs:
cargo update -p aws-sdk-sts --precise 1.51.0
cargo update -p home --precise 0.5.9
- name: cargo +${{ matrix.msrv }} check
env:
RUSTUP_TOOLCHAIN: ${{ matrix.msrv }}
run: cargo check --profile ci --workspace --tests --benches --all-features

76
Cargo.lock generated
View File

@@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "adler2"
@@ -3141,8 +3141,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4478,8 +4478,8 @@ dependencies = [
[[package]]
name = "lance"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow",
"arrow-arith",
@@ -4544,8 +4544,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4563,8 +4563,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrayref",
"paste",
@@ -4573,8 +4573,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4610,8 +4610,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow",
"arrow-array",
@@ -4641,8 +4641,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow",
"arrow-array",
@@ -4659,8 +4659,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4697,8 +4697,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4730,8 +4730,8 @@ dependencies = [
[[package]]
name = "lance-geo"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"datafusion",
"geo-types",
@@ -4742,8 +4742,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow",
"arrow-arith",
@@ -4804,8 +4804,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow",
"arrow-arith",
@@ -4845,8 +4845,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4862,8 +4862,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow",
"async-trait",
@@ -4875,8 +4875,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4919,8 +4919,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow",
"arrow-array",
@@ -4959,8 +4959,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "1.0.3"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.3#dd66ca47101ccd51e59677f5385158f05cabd6f9"
version = "1.0.4"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.4#a93eaad1f6909a843cf8aa00d5530359012a7aaa"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4971,7 +4971,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.24.0-beta.0"
version = "0.24.1"
dependencies = [
"ahash",
"anyhow",
@@ -5050,7 +5050,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.24.0-beta.0"
version = "0.24.1"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -5070,7 +5070,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.27.0-beta.0"
version = "0.27.1"
dependencies = [
"arrow",
"async-trait",

View File

@@ -12,23 +12,23 @@ repository = "https://github.com/lancedb/lancedb"
description = "Serverless, low-latency vector database for AI applications"
keywords = ["lancedb", "lance", "database", "vector", "search"]
categories = ["database-implementations"]
rust-version = "1.78.0"
rust-version = "1.88.0"
[workspace.dependencies]
lance = { "version" = "=1.0.3", default-features = false, "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.0.3", default-features = false, "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.0.3", default-features = false, "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.0.3", "tag" = "v1.0.3", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "56.2", optional = false }

View File

@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-core</artifactId>
<version>0.24.0-beta.0</version>
<version>0.24.1</version>
</dependency>
```

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.24.0-beta.0</version>
<version>0.24.1-final.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.24.0-beta.0</version>
<version>0.24.1-final.0</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.24.0-beta.0"
version = "0.24.1"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.24.0-beta.0",
"version": "0.24.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.24.0-beta.0",
"version": "0.24.1",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.24.0-beta.0",
"version": "0.24.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.24.0-beta.0",
"version": "0.24.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.24.0-beta.0",
"version": "0.24.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.24.0-beta.0",
"version": "0.24.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.24.0-beta.0",
"version": "0.24.1",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.24.0-beta.0",
"version": "0.24.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.24.0-beta.0",
"version": "0.24.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.24.0-beta.0",
"version": "0.24.1",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.24.0-beta.0",
"version": "0.24.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.27.0"
current_version = "0.27.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -16,7 +16,7 @@ The Python package is a wrapper around the Rust library, `lancedb`. We use
To set up your development environment, you will need to install the following:
1. Python 3.9 or later
1. Python 3.10 or later
2. Cargo (Rust's package manager). Use [rustup](https://rustup.rs/) to install.
3. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)

View File

@@ -1,13 +1,13 @@
[package]
name = "lancedb-python"
version = "0.27.0"
version = "0.27.1"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true
repository.workspace = true
keywords.workspace = true
categories.workspace = true
rust-version = "1.75.0"
rust-version = "1.88.0"
[lib]
name = "_lancedb"
@@ -21,7 +21,7 @@ lance-core.workspace = true
lance-namespace.workspace = true
lance-io.workspace = true
env_logger.workspace = true
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py310"] }
pyo3-async-runtimes = { version = "0.25", features = [
"attributes",
"tokio-runtime",
@@ -34,7 +34,7 @@ tokio = { version = "1.40", features = ["sync"] }
[build-dependencies]
pyo3-build-config = { version = "0.25", features = [
"extension-module",
"abi3-py39",
"abi3-py310",
] }
[features]

View File

@@ -8,7 +8,7 @@ dependencies = [
"overrides>=0.7; python_version<'3.12'",
"packaging",
"pyarrow>=16",
"pydantic>=1.10",
"pydantic>=2",
"tqdm>=4.27.0",
"lance-namespace>=0.3.2"
]
@@ -16,7 +16,7 @@ description = "lancedb"
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
license = { file = "LICENSE" }
readme = "README.md"
requires-python = ">=3.9"
requires-python = ">=3.10"
keywords = [
"data-format",
"data-science",
@@ -33,10 +33,10 @@ classifiers = [
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering",
]
@@ -137,4 +137,4 @@ include = [
"python/lancedb/_lancedb.pyi",
]
exclude = ["python/tests/"]
pythonVersion = "3.12"
pythonVersion = "3.13"

View File

@@ -6,7 +6,6 @@
from __future__ import annotations
import inspect
import sys
import types
from abc import ABC, abstractmethod
from datetime import date, datetime
@@ -141,14 +140,6 @@ def Vector(
raise TypeError("A list of numbers or numpy.ndarray is needed")
return cls(v)
if PYDANTIC_VERSION.major < 2:
@classmethod
def __modify_schema__(cls, field_schema: Dict[str, Any]):
field_schema["items"] = {"type": "number"}
field_schema["maxItems"] = dim
field_schema["minItems"] = dim
return FixedSizeList
@@ -226,26 +217,14 @@ def MultiVector(
def __get_validators__(cls) -> Generator[Callable, None, None]:
yield cls.validate
# For pydantic v1
@classmethod
def validate(cls, v):
if not isinstance(v, (list, range)):
raise TypeError("A list of vectors is needed")
for vec in v:
if not isinstance(vec, (list, range, np.ndarray)) or len(vec) != dim:
raise TypeError(f"Each vector must be a list of {dim} numbers")
return cls(v)
if PYDANTIC_VERSION.major < 2:
@classmethod
def __modify_schema__(cls, field_schema: Dict[str, Any]):
field_schema["items"] = {
"type": "array",
"items": {"type": "number"},
"minItems": dim,
"maxItems": dim,
}
def __modify_schema__(cls, field_schema: Dict[str, Any]):
field_schema["items"] = {
"type": "array",
"items": {"type": "number"},
"minItems": dim,
"maxItems": dim,
}
return MultiVectorList
@@ -275,35 +254,31 @@ def _py_type_to_arrow_type(py_type: Type[Any], field: FieldInfo) -> pa.DataType:
return pa.timestamp("us", tz=tz)
elif getattr(py_type, "__origin__", None) in (list, tuple):
child = py_type.__args__[0]
return pa.list_(_py_type_to_arrow_type(child, field))
return _pydantic_list_child_to_arrow(child, field)
raise TypeError(
f"Converting Pydantic type to Arrow Type: unsupported type {py_type}."
)
if PYDANTIC_VERSION.major < 2:
def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
return [
_pydantic_to_field(name, field) for name, field in model.__fields__.items()
]
else:
def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
return [
_pydantic_to_field(name, field)
for name, field in model.model_fields.items()
]
def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
return [
_pydantic_to_field(name, field) for name, field in model.model_fields.items()
]
def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
def _safe_issubclass(candidate: Any, base: type) -> bool:
try:
return issubclass(candidate, base)
except TypeError:
return False
if inspect.isclass(tp):
if issubclass(tp, pydantic.BaseModel):
if _safe_issubclass(tp, pydantic.BaseModel):
# Struct
fields = _pydantic_model_to_fields(tp)
return pa.struct(fields)
if issubclass(tp, FixedSizeListMixin):
if _safe_issubclass(tp, FixedSizeListMixin):
if getattr(tp, "is_multi_vector", lambda: False)():
return pa.list_(pa.list_(tp.value_arrow_type(), tp.dim()))
# For regular Vector
@@ -311,45 +286,67 @@ def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
return _py_type_to_arrow_type(tp, field)
def _pydantic_list_child_to_arrow(child: Any, field: FieldInfo) -> pa.DataType:
unwrapped = _unwrap_optional_annotation(child)
if unwrapped is not None:
return pa.list_(
pa.field("item", _pydantic_type_to_arrow_type(unwrapped, field), True)
)
return pa.list_(_pydantic_type_to_arrow_type(child, field))
def _unwrap_optional_annotation(annotation: Any) -> Any | None:
if isinstance(annotation, (_GenericAlias, GenericAlias)):
origin = annotation.__origin__
args = annotation.__args__
if origin == Union:
non_none = [arg for arg in args if arg is not type(None)]
if len(non_none) == 1 and len(non_none) != len(args):
return non_none[0]
elif isinstance(annotation, types.UnionType):
args = annotation.__args__
non_none = [arg for arg in args if arg is not type(None)]
if len(non_none) == 1 and len(non_none) != len(args):
return non_none[0]
return None
def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
"""Convert a Pydantic FieldInfo to Arrow DataType"""
unwrapped = _unwrap_optional_annotation(field.annotation)
if unwrapped is not None:
return _pydantic_type_to_arrow_type(unwrapped, field)
if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
origin = field.annotation.__origin__
args = field.annotation.__args__
if origin is list:
child = args[0]
return pa.list_(_py_type_to_arrow_type(child, field))
elif origin == Union:
if len(args) == 2 and args[1] is type(None):
return _pydantic_type_to_arrow_type(args[0], field)
elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType):
args = field.annotation.__args__
if len(args) == 2:
for typ in args:
if typ is type(None):
continue
return _py_type_to_arrow_type(typ, field)
return _pydantic_list_child_to_arrow(child, field)
return _pydantic_type_to_arrow_type(field.annotation, field)
def is_nullable(field: FieldInfo) -> bool:
"""Check if a Pydantic FieldInfo is nullable."""
if _unwrap_optional_annotation(field.annotation) is not None:
return True
if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
origin = field.annotation.__origin__
args = field.annotation.__args__
if origin == Union:
if len(args) == 2 and args[1] is type(None):
if any(typ is type(None) for typ in args):
return True
elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType):
elif isinstance(field.annotation, types.UnionType):
args = field.annotation.__args__
for typ in args:
if typ is type(None):
return True
elif inspect.isclass(field.annotation) and issubclass(
field.annotation, FixedSizeListMixin
):
return field.annotation.nullable()
elif inspect.isclass(field.annotation):
try:
if issubclass(field.annotation, FixedSizeListMixin):
return field.annotation.nullable()
except TypeError:
return False
return False
@@ -446,8 +443,6 @@ class LanceModel(pydantic.BaseModel):
@classmethod
def safe_get_fields(cls):
if PYDANTIC_VERSION.major < 2:
return cls.__fields__
return cls.model_fields
@classmethod
@@ -490,18 +485,8 @@ def get_extras(field_info: FieldInfo, key: str) -> Any:
return (field_info.field_info.extra or {}).get("json_schema_extra", {}).get(key)
if PYDANTIC_VERSION.major < 2:
def model_to_dict(model: pydantic.BaseModel) -> Dict[str, Any]:
"""
Convert a Pydantic model to a dictionary.
"""
return model.dict()
else:
def model_to_dict(model: pydantic.BaseModel) -> Dict[str, Any]:
"""
Convert a Pydantic model to a dictionary.
"""
return model.model_dump()
def model_to_dict(model: pydantic.BaseModel) -> Dict[str, Any]:
"""
Convert a Pydantic model to a dictionary.
"""
return model.model_dump()

View File

@@ -2,7 +2,6 @@
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
import json
import sys
from datetime import date, datetime
from typing import List, Optional, Tuple
@@ -20,10 +19,6 @@ from pydantic import BaseModel
from pydantic import Field
@pytest.mark.skipif(
sys.version_info < (3, 9),
reason="using native type alias requires python3.9 or higher",
)
def test_pydantic_to_arrow():
class StructModel(pydantic.BaseModel):
a: str
@@ -83,10 +78,6 @@ def test_pydantic_to_arrow():
assert schema == expect_schema
@pytest.mark.skipif(
sys.version_info < (3, 10),
reason="using | type syntax requires python3.10 or higher",
)
def test_optional_types_py310():
class TestModel(pydantic.BaseModel):
a: str | None
@@ -105,10 +96,233 @@ def test_optional_types_py310():
assert schema == expect_schema
@pytest.mark.skipif(
sys.version_info > (3, 8),
reason="using native type alias requires python3.9 or higher",
)
def test_optional_structs():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
split: SplitInfo | None = None
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"split",
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
),
True,
),
]
)
assert schema == expect_schema
def test_optional_struct_list_py310():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
splits: list[SplitInfo] | None = None
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"splits",
pa.list_(
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
)
),
True,
),
]
)
assert schema == expect_schema
def test_nested_struct_list():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
splits: list[SplitInfo]
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"splits",
pa.list_(
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
)
),
False,
),
]
)
assert schema == expect_schema
def test_nested_struct_list_optional():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
splits: Optional[list[SplitInfo]] = None
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"splits",
pa.list_(
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
)
),
True,
),
]
)
assert schema == expect_schema
def test_nested_struct_list_optional_items():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
splits: list[Optional[SplitInfo]]
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"splits",
pa.list_(
pa.field(
"item",
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
),
True,
)
),
False,
),
]
)
assert schema == expect_schema
def test_nested_struct_list_optional_container_and_items():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
splits: Optional[list[Optional[SplitInfo]]] = None
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"splits",
pa.list_(
pa.field(
"item",
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
),
True,
)
),
True,
),
]
)
assert schema == expect_schema
def test_nested_struct_list_optional_items_pep604():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
splits: list[SplitInfo | None]
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"splits",
pa.list_(
pa.field(
"item",
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
),
True,
)
),
False,
),
]
)
assert schema == expect_schema
def test_pydantic_to_arrow_py38():
class StructModel(pydantic.BaseModel):
a: str

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.24.0-beta.0"
version = "0.24.1"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -892,6 +892,10 @@ pub struct ConnectBuilder {
embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
}
#[cfg(feature = "remote")]
const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 1] =
[("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name")];
impl ConnectBuilder {
/// Create a new [`ConnectOptions`] with the given database URI.
pub fn new(uri: &str) -> Self {
@@ -1075,11 +1079,27 @@ impl ConnectBuilder {
self
}
#[cfg(feature = "remote")]
fn apply_env_defaults(
env_var_to_remote_storage_option: &[(&str, &str)],
options: &mut HashMap<String, String>,
) {
for (env_key, opt_key) in env_var_to_remote_storage_option {
if let Ok(env_value) = std::env::var(env_key) {
if !options.contains_key(*opt_key) {
options.insert((*opt_key).to_string(), env_value);
}
}
}
}
#[cfg(feature = "remote")]
fn execute_remote(self) -> Result<Connection> {
use crate::remote::db::RemoteDatabaseOptions;
let options = RemoteDatabaseOptions::parse_from_map(&self.request.options)?;
let mut merged_options = self.request.options.clone();
Self::apply_env_defaults(&ENV_VARS_TO_STORAGE_OPTS, &mut merged_options);
let options = RemoteDatabaseOptions::parse_from_map(&merged_options)?;
let region = options.region.ok_or_else(|| Error::InvalidInput {
message: "A region is required when connecting to LanceDb Cloud".to_string(),
@@ -1324,6 +1344,23 @@ mod tests {
assert_eq!(tc.connection.uri(), tc.uri);
}
#[cfg(feature = "remote")]
#[test]
fn test_apply_env_defaults() {
let env_key = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_KEY";
let env_val = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_VAL";
let opts_key = "test_apply_env_defaults_environment_variable_opts_key";
std::env::set_var(env_key, env_val);
let mut options = HashMap::new();
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
assert_eq!(Some(&env_val.to_string()), options.get(opts_key));
options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string());
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
assert_eq!(Some(&"EXPLICIT-VALUE".to_string()), options.get(opts_key));
}
#[cfg(not(windows))]
#[tokio::test]
async fn test_connect_relative() {

View File

@@ -19,7 +19,7 @@ use crate::{
split::{SplitStrategy, Splitter, SPLIT_ID_COLUMN},
util::{rename_column, TemporaryDirectory},
},
query::{ExecutableQuery, QueryBase},
query::{ExecutableQuery, QueryBase, Select},
Error, Result, Table,
};
@@ -27,6 +27,8 @@ pub const SRC_ROW_ID_COL: &str = "row_id";
pub const SPLIT_NAMES_CONFIG_KEY: &str = "split_names";
pub const DEFAULT_MEMORY_LIMIT: usize = 100 * 1024 * 1024;
/// Where to store the permutation table
#[derive(Debug, Clone, Default)]
enum PermutationDestination {
@@ -167,10 +169,20 @@ impl PermutationBuilder {
&self,
data: SendableRecordBatchStream,
) -> Result<SendableRecordBatchStream> {
let memory_limit = std::env::var("LANCEDB_PERM_BUILDER_MEMORY_LIMIT")
.unwrap_or_else(|_| DEFAULT_MEMORY_LIMIT.to_string())
.parse::<usize>()
.unwrap_or_else(|_| {
log::error!(
"Failed to parse LANCEDB_PERM_BUILDER_MEMORY_LIMIT, using default: {}",
DEFAULT_MEMORY_LIMIT
);
DEFAULT_MEMORY_LIMIT
});
let ctx = SessionContext::new_with_config_rt(
SessionConfig::default(),
RuntimeEnvBuilder::new()
.with_memory_limit(100 * 1024 * 1024, 1.0)
.with_memory_limit(memory_limit, 1.0)
.with_disk_manager_builder(
DiskManagerBuilder::default()
.with_mode(self.config.temp_dir.to_disk_manager_mode()),
@@ -232,7 +244,7 @@ impl PermutationBuilder {
/// Builds the permutation table and stores it in the given database.
pub async fn build(self) -> Result<Table> {
// First pass, apply filter and load row ids
let mut rows = self.base_table.query().with_row_id();
let mut rows = self.base_table.query().select(Select::columns(&[ROW_ID]));
if let Some(filter) = &self.config.filter {
rows = rows.only_if(filter);
@@ -321,6 +333,47 @@ mod tests {
use super::*;
#[tokio::test]
async fn test_permutation_table_only_stores_row_id_and_split_id() {
let temp_dir = tempfile::tempdir().unwrap();
let db = connect(temp_dir.path().to_str().unwrap())
.execute()
.await
.unwrap();
let initial_data = lance_datagen::gen_batch()
.col("col_a", lance_datagen::array::step::<Int32Type>())
.col("col_b", lance_datagen::array::step::<Int32Type>())
.into_ldb_stream(RowCount::from(100), BatchCount::from(10));
let data_table = db
.create_table_streaming("base_tbl", initial_data)
.execute()
.await
.unwrap();
let permutation_table = PermutationBuilder::new(data_table.clone())
.with_split_strategy(
SplitStrategy::Sequential {
sizes: SplitSizes::Percentages(vec![0.5, 0.5]),
},
None,
)
.with_filter("col_a > 57".to_string())
.build()
.await
.unwrap();
let schema = permutation_table.schema().await.unwrap();
let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
assert_eq!(
field_names,
vec!["row_id", "split_id"],
"Permutation table should only contain row_id and split_id columns, but found: {:?}",
field_names,
);
}
#[tokio::test]
async fn test_permutation_builder() {
let temp_dir = tempfile::tempdir().unwrap();
@@ -352,8 +405,6 @@ mod tests {
.await
.unwrap();
println!("permutation_table: {:?}", permutation_table);
// Potentially brittle seed-dependent values below
assert_eq!(permutation_table.count_rows(None).await.unwrap(), 330);
assert_eq!(

View File

@@ -171,7 +171,7 @@ impl Shuffler {
// This is kind of an annoying limitation but if we allow runt clumps from batches then
// clumps will get unaligned and we will mess up the clumps when we do the in-memory
// shuffle step. If this is a problem we can probably figure out a better way to do this.
if !is_last && batch.num_rows() as u64 % clump_size != 0 {
if !is_last && !(batch.num_rows() as u64).is_multiple_of(clump_size) {
return Err(Error::Runtime {
message: format!(
"Expected batch size ({}) to be divisible by clump size ({})",

View File

@@ -1,12 +1,9 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::{
iter,
sync::{
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
Arc,
},
use std::sync::{
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
Arc,
};
use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array};
@@ -158,7 +155,7 @@ impl Splitter {
remaining_in_split
};
split_ids.extend(iter::repeat(split_id as u64).take(rows_to_add as usize));
split_ids.extend(std::iter::repeat_n(split_id as u64, rows_to_add as usize));
if done {
// Quit early if we've run out of splits
break;
@@ -662,7 +659,7 @@ mod tests {
assert_eq!(split_batch.num_rows(), total_split_sizes as usize);
let mut expected = Vec::with_capacity(total_split_sizes as usize);
for (i, size) in expected_split_sizes.iter().enumerate() {
expected.extend(iter::repeat(i as u64).take(*size as usize));
expected.extend(std::iter::repeat_n(i as u64, *size as usize));
}
let expected = Arc::new(UInt64Array::from(expected)) as Arc<dyn Array>;

View File

@@ -297,10 +297,10 @@ impl IvfPqIndexBuilder {
}
pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
if dim % 16 == 0 {
if dim.is_multiple_of(16) {
// Should be more aggressive than this default.
dim / 16
} else if dim % 8 == 0 {
} else if dim.is_multiple_of(8) {
dim / 8
} else {
log::warn!(

View File

@@ -51,17 +51,15 @@
//! - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud object store
//! - `db://dbname` - Lance Cloud
//!
//! You can also use [`ConnectOptions`] to configure the connection to the database.
//! You can also use [`ConnectBuilder`] to configure the connection to the database.
//!
//! ```rust
//! use object_store::aws::AwsCredential;
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
//! let db = lancedb::connect("data/sample-lancedb")
//! .aws_creds(AwsCredential {
//! key_id: "some_key".to_string(),
//! secret_key: "some_secret".to_string(),
//! token: None,
//! })
//! .storage_options([
//! ("aws_access_key_id", "some_key"),
//! ("aws_secret_access_key", "some_secret"),
//! ])
//! .execute()
//! .await
//! .unwrap();

View File

@@ -2059,7 +2059,7 @@ impl NativeTable {
return provided;
}
let suggested = suggested_num_sub_vectors(dim);
if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 {
if num_bits.is_some_and(|num_bits| num_bits == 4) && !suggested.is_multiple_of(2) {
// num_sub_vectors must be even when 4 bits are used
suggested + 1
} else {
@@ -3400,7 +3400,6 @@ pub struct FragmentSummaryStats {
#[cfg(test)]
#[allow(deprecated)]
mod tests {
use std::iter;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
@@ -4017,7 +4016,7 @@ mod tests {
schema.clone(),
vec![
Arc::new(Int32Array::from_iter_values(offset..(offset + 10))),
Arc::new(Int32Array::from_iter_values(iter::repeat(age).take(10))),
Arc::new(Int32Array::from_iter_values(std::iter::repeat_n(age, 10))),
],
)],
schema,

View File

@@ -4,7 +4,6 @@
use std::{
borrow::Cow,
collections::{HashMap, HashSet},
iter::repeat,
sync::Arc,
};
@@ -268,9 +267,10 @@ fn create_some_records() -> Result<impl IntoArrow> {
schema.clone(),
vec![
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
Arc::new(StringArray::from_iter(
repeat(Some("hello world".to_string())).take(TOTAL),
)),
Arc::new(StringArray::from_iter(std::iter::repeat_n(
Some("hello world".to_string()),
TOTAL,
))),
],
)
.unwrap()]