Compare commits

..

1 Commits

Author SHA1 Message Date
Lance Release
adc8abc203 Bump version: 0.21.2-beta.0 → 0.21.2-beta.1 2025-07-22 15:40:41 +00:00
66 changed files with 281 additions and 1071 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.21.2" current_version = "0.21.2-beta.1"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -5,8 +5,8 @@ on:
tags-ignore: tags-ignore:
# We don't publish pre-releases for Rust. Crates.io is just a source # We don't publish pre-releases for Rust. Crates.io is just a source
# distribution, so we don't need to publish pre-releases. # distribution, so we don't need to publish pre-releases.
- "v*-beta*" - 'v*-beta*'
- "*-v*" # for example, python-vX.Y.Z - '*-v*' # for example, python-vX.Y.Z
env: env:
# This env var is used by Swatinem/rust-cache@v2 for the cache # This env var is used by Swatinem/rust-cache@v2 for the cache
@@ -19,8 +19,6 @@ env:
jobs: jobs:
build: build:
runs-on: ubuntu-22.04 runs-on: ubuntu-22.04
permissions:
id-token: write
timeout-minutes: 30 timeout-minutes: 30
# Only runs on tags that matches the make-release action # Only runs on tags that matches the make-release action
if: startsWith(github.ref, 'refs/tags/v') if: startsWith(github.ref, 'refs/tags/v')
@@ -33,8 +31,6 @@ jobs:
run: | run: |
sudo apt update sudo apt update
sudo apt install -y protobuf-compiler libssl-dev sudo apt install -y protobuf-compiler libssl-dev
- uses: rust-lang/crates-io-auth-action@v1
id: auth
- name: Publish the package - name: Publish the package
run: | run: |
cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }} cargo publish -p lancedb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}

View File

@@ -1,24 +0,0 @@
LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
remote (against LanceDB Cloud).
The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
Project layout:
* `rust/lancedb`: The LanceDB core Rust implementation.
* `python`: The Python bindings, using PyO3.
* `nodejs`: The Typescript bindings, using napi-rs
* `java`: The Java bindings
(`rust/ffi` and `node/` are for a deprecated package. You can ignore them.)
Common commands:
* Check for compiler errors: `cargo check --features remote --tests --examples`
* Run tests: `cargo test --features remote --tests`
* Run specific test: `cargo test --features remote -p <package_name> --test <test_name>`
* Lint: `cargo clippy --features remote --tests --examples`
* Format: `cargo fmt --all`
Before committing changes, run formatting.

174
Cargo.lock generated
View File

@@ -1039,17 +1039,6 @@ dependencies = [
"tokio", "tokio",
] ]
[[package]]
name = "backon"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "302eaff5357a264a2c42f127ecb8bac761cf99749fc3dc95677e2743991f99e7"
dependencies = [
"fastrand",
"gloo-timers",
"tokio",
]
[[package]] [[package]]
name = "backtrace" name = "backtrace"
version = "0.3.75" version = "0.3.75"
@@ -2488,7 +2477,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [ dependencies = [
"block-buffer", "block-buffer",
"const-oid",
"crypto-common", "crypto-common",
"subtle", "subtle",
] ]
@@ -2852,10 +2840,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]] [[package]]
name = "fsst" name = "fsst"
version = "0.32.1" version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf" source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [ dependencies = [
"arrow-array",
"rand 0.8.5", "rand 0.8.5",
] ]
@@ -3269,18 +3256,6 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
[[package]]
name = "gloo-timers"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994"
dependencies = [
"futures-channel",
"futures-core",
"js-sys",
"wasm-bindgen",
]
[[package]] [[package]]
name = "group" name = "group"
version = "0.12.1" version = "0.12.1"
@@ -3817,17 +3792,6 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "io-uring"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
dependencies = [
"bitflags 2.9.1",
"cfg-if",
"libc",
]
[[package]] [[package]]
name = "ipnet" name = "ipnet"
version = "2.11.0" version = "2.11.0"
@@ -3966,8 +3930,8 @@ dependencies = [
[[package]] [[package]]
name = "lance" name = "lance"
version = "0.32.1" version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf" source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-arith", "arrow-arith",
@@ -4029,8 +3993,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-arrow" name = "lance-arrow"
version = "0.32.1" version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf" source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-buffer", "arrow-buffer",
@@ -4047,8 +4011,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-core" name = "lance-core"
version = "0.32.1" version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf" source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-buffer", "arrow-buffer",
@@ -4083,8 +4047,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-datafusion" name = "lance-datafusion"
version = "0.32.1" version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf" source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4112,8 +4076,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-datagen" name = "lance-datagen"
version = "0.32.1" version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf" source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4129,8 +4093,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-encoding" name = "lance-encoding"
version = "0.32.1" version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf" source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [ dependencies = [
"arrayref", "arrayref",
"arrow", "arrow",
@@ -4169,8 +4133,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-file" name = "lance-file"
version = "0.32.1" version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf" source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [ dependencies = [
"arrow-arith", "arrow-arith",
"arrow-array", "arrow-array",
@@ -4204,8 +4168,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-index" name = "lance-index"
version = "0.32.1" version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf" source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4239,6 +4203,7 @@ dependencies = [
"lance-linalg", "lance-linalg",
"lance-table", "lance-table",
"log", "log",
"moka",
"num-traits", "num-traits",
"object_store", "object_store",
"prost", "prost",
@@ -4258,8 +4223,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-io" name = "lance-io"
version = "0.32.1" version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf" source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-arith", "arrow-arith",
@@ -4283,8 +4248,6 @@ dependencies = [
"lance-core", "lance-core",
"log", "log",
"object_store", "object_store",
"object_store_opendal",
"opendal",
"path_abs", "path_abs",
"pin-project", "pin-project",
"prost", "prost",
@@ -4299,8 +4262,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-linalg" name = "lance-linalg"
version = "0.32.1" version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf" source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-buffer", "arrow-buffer",
@@ -4323,8 +4286,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-table" name = "lance-table"
version = "0.32.1" version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf" source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4362,8 +4325,8 @@ dependencies = [
[[package]] [[package]]
name = "lance-testing" name = "lance-testing"
version = "0.32.1" version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf" source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-schema", "arrow-schema",
@@ -4374,7 +4337,7 @@ dependencies = [
[[package]] [[package]]
name = "lancedb" name = "lancedb"
version = "0.21.2" version = "0.21.2-beta.0"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4461,7 +4424,7 @@ dependencies = [
[[package]] [[package]]
name = "lancedb-node" name = "lancedb-node"
version = "0.21.2" version = "0.21.2-beta.0"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-ipc", "arrow-ipc",
@@ -4486,7 +4449,7 @@ dependencies = [
[[package]] [[package]]
name = "lancedb-nodejs" name = "lancedb-nodejs"
version = "0.21.2" version = "0.21.2-beta.0"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-ipc", "arrow-ipc",
@@ -4506,7 +4469,7 @@ dependencies = [
[[package]] [[package]]
name = "lancedb-python" name = "lancedb-python"
version = "0.24.2" version = "0.24.2-beta.0"
dependencies = [ dependencies = [
"arrow", "arrow",
"env_logger", "env_logger",
@@ -5252,21 +5215,6 @@ dependencies = [
"web-time", "web-time",
] ]
[[package]]
name = "object_store_opendal"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ce697ee723fdc3eaf6c457abf4059034be15167022b18b619993802cd1443d5"
dependencies = [
"async-trait",
"bytes",
"futures",
"object_store",
"opendal",
"pin-project",
"tokio",
]
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.21.3" version = "1.21.3"
@@ -5307,33 +5255,6 @@ dependencies = [
"pkg-config", "pkg-config",
] ]
[[package]]
name = "opendal"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffb9838d0575c6dbaf3fcec7255af8d5771996d4af900bbb6fa9a314dec00a1a"
dependencies = [
"anyhow",
"backon",
"base64 0.22.1",
"bytes",
"chrono",
"futures",
"getrandom 0.2.16",
"http 1.3.1",
"http-body 1.0.1",
"log",
"md-5",
"percent-encoding",
"quick-xml",
"reqsign",
"reqwest",
"serde",
"serde_json",
"tokio",
"uuid",
]
[[package]] [[package]]
name = "openssl-probe" name = "openssl-probe"
version = "0.1.6" version = "0.1.6"
@@ -6539,33 +6460,6 @@ version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
[[package]]
name = "reqsign"
version = "0.16.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701"
dependencies = [
"anyhow",
"async-trait",
"base64 0.22.1",
"chrono",
"form_urlencoded",
"getrandom 0.2.16",
"hex",
"hmac",
"home",
"http 1.3.1",
"log",
"once_cell",
"percent-encoding",
"rand 0.8.5",
"reqwest",
"serde",
"serde_json",
"sha1",
"sha2",
]
[[package]] [[package]]
name = "reqwest" name = "reqwest"
version = "0.12.20" version = "0.12.20"
@@ -7838,18 +7732,16 @@ dependencies = [
[[package]] [[package]]
name = "tokio" name = "tokio"
version = "1.46.1" version = "1.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17" checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779"
dependencies = [ dependencies = [
"backtrace", "backtrace",
"bytes", "bytes",
"io-uring",
"libc", "libc",
"mio", "mio",
"pin-project-lite", "pin-project-lite",
"signal-hook-registry", "signal-hook-registry",
"slab",
"socket2", "socket2",
"tokio-macros", "tokio-macros",
"windows-sys 0.52.0", "windows-sys 0.52.0",

View File

@@ -21,16 +21,16 @@ categories = ["database-implementations"]
rust-version = "1.78.0" rust-version = "1.78.0"
[workspace.dependencies] [workspace.dependencies]
lance = { "version" = "=0.32.1", "features" = [ lance = { "version" = "=0.31.2", "features" = [
"dynamodb", "dynamodb",
], "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" } ], "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" } lance-io = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-index = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" } lance-index = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" } lance-linalg = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-table = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" } lance-table = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" } lance-testing = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" } lance-datafusion = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" } lance-encoding = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
# Note that this one does not include pyarrow # Note that this one does not include pyarrow
arrow = { version = "55.1", optional = false } arrow = { version = "55.1", optional = false }
arrow-array = "55.1" arrow-array = "55.1"

View File

@@ -1,84 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / Session
# Class: Session
A session for managing caches and object stores across LanceDB operations.
Sessions allow you to configure cache sizes for index and metadata caches,
which can significantly impact performance for large datasets.
## Constructors
### new Session()
```ts
new Session(indexCacheSizeBytes?, metadataCacheSizeBytes?): Session
```
Create a new session with custom cache sizes.
# Parameters
- `index_cache_size_bytes`: The size of the index cache in bytes.
Defaults to 6GB if not specified.
- `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
Defaults to 1GB if not specified.
#### Parameters
* **indexCacheSizeBytes?**: `null` \| `bigint`
* **metadataCacheSizeBytes?**: `null` \| `bigint`
#### Returns
[`Session`](Session.md)
## Methods
### approxNumItems()
```ts
approxNumItems(): number
```
Get the approximate number of items cached in the session.
#### Returns
`number`
***
### sizeBytes()
```ts
sizeBytes(): bigint
```
Get the current size of the session caches in bytes.
#### Returns
`bigint`
***
### default()
```ts
static default(): Session
```
Create a session with default cache sizes.
This is equivalent to creating a session with 6GB index cache
and 1GB metadata cache.
#### Returns
[`Session`](Session.md)

View File

@@ -6,13 +6,10 @@
# Function: connect() # Function: connect()
## connect(uri, options, session) ## connect(uri, options)
```ts ```ts
function connect( function connect(uri, options?): Promise<Connection>
uri,
options?,
session?): Promise<Connection>
``` ```
Connect to a LanceDB instance at the given URI. Connect to a LanceDB instance at the given URI.
@@ -32,8 +29,6 @@ Accepted formats:
* **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt; * **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
The options to use when connecting to the database The options to use when connecting to the database
* **session?**: [`Session`](../classes/Session.md)
### Returns ### Returns
`Promise`&lt;[`Connection`](../classes/Connection.md)&gt; `Promise`&lt;[`Connection`](../classes/Connection.md)&gt;
@@ -82,7 +77,7 @@ Accepted formats:
[ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format. [ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.
### Examples ### Example
```ts ```ts
const conn = await connect({ const conn = await connect({
@@ -90,11 +85,3 @@ const conn = await connect({
storageOptions: {timeout: "60s"} storageOptions: {timeout: "60s"}
}); });
``` ```
```ts
const session = Session.default();
const conn = await connect({
uri: "/path/to/database",
session: session
});
```

View File

@@ -29,7 +29,6 @@
- [Query](classes/Query.md) - [Query](classes/Query.md)
- [QueryBase](classes/QueryBase.md) - [QueryBase](classes/QueryBase.md)
- [RecordBatchIterator](classes/RecordBatchIterator.md) - [RecordBatchIterator](classes/RecordBatchIterator.md)
- [Session](classes/Session.md)
- [Table](classes/Table.md) - [Table](classes/Table.md)
- [TagContents](classes/TagContents.md) - [TagContents](classes/TagContents.md)
- [Tags](classes/Tags.md) - [Tags](classes/Tags.md)

View File

@@ -70,17 +70,6 @@ Defaults to 'us-east-1'.
*** ***
### session?
```ts
optional session: Session;
```
(For LanceDB OSS only): the session to use for this connection. Holds
shared caches and other session-specific state.
***
### storageOptions? ### storageOptions?
```ts ```ts

View File

@@ -8,7 +8,7 @@
## Properties ## Properties
### ~~indexCacheSize?~~ ### indexCacheSize?
```ts ```ts
optional indexCacheSize: number; optional indexCacheSize: number;
@@ -16,11 +16,6 @@ optional indexCacheSize: number;
Set the size of the index cache, specified as a number of entries Set the size of the index cache, specified as a number of entries
#### Deprecated
Use session-level cache configuration instead.
Create a Session with custom cache sizes and pass it to the connect() function.
The exact meaning of an "entry" will depend on the type of index: The exact meaning of an "entry" will depend on the type of index:
- IVF: there is one entry for each IVF partition - IVF: there is one entry for each IVF partition
- BTREE: there is one entry for the entire index - BTREE: there is one entry for the entire index

View File

@@ -19,7 +19,7 @@ lancedb = { path = "../../../rust/lancedb" }
lance = { workspace = true } lance = { workspace = true }
arrow = { workspace = true, features = ["ffi"] } arrow = { workspace = true, features = ["ffi"] }
arrow-schema.workspace = true arrow-schema.workspace = true
tokio = "1.46" tokio = "1.23"
jni = "0.21.1" jni = "0.21.1"
snafu.workspace = true snafu.workspace = true
lazy_static.workspace = true lazy_static.workspace = true

View File

@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.21.2-final.0</version> <version>0.21.2-beta.1</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.21.2-final.0</version> <version>0.21.2-beta.1</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.21.2-final.0</version> <version>0.21.2-beta.1</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>${project.artifactId}</name> <name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description> <description>LanceDB Java SDK Parent POM</description>

44
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.21.2", "version": "0.21.2-beta.0",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "vectordb", "name": "vectordb",
"version": "0.21.2", "version": "0.21.2-beta.0",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"
@@ -52,11 +52,11 @@
"uuid": "^9.0.0" "uuid": "^9.0.0"
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.21.2", "@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
"@lancedb/vectordb-darwin-x64": "0.21.2", "@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2", "@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
"@lancedb/vectordb-linux-x64-gnu": "0.21.2", "@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
"@lancedb/vectordb-win32-x64-msvc": "0.21.2" "@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
}, },
"peerDependencies": { "peerDependencies": {
"@apache-arrow/ts": "^14.0.2", "@apache-arrow/ts": "^14.0.2",
@@ -327,9 +327,9 @@
} }
}, },
"node_modules/@lancedb/vectordb-darwin-arm64": { "node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.21.2", "version": "0.21.2-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.2.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.2-beta.0.tgz",
"integrity": "sha512-NAQnIKLw9K33KMODNXBEW0qC8/safWzZtqbVC7j1GcE7PSk0Uc6x7w5nrH5gvleZggjaxY9jaRVTqmtg7PNmqw==", "integrity": "sha512-RiYqpKuq9v8A4wFuHt1iPNFYjWJ1KgGFLJwQO4ajp9Hee84sDHq8mP0ATgMcc24hiaOUQ1lRRTULjGbHn4NIYw==",
"cpu": [ "cpu": [
"arm64" "arm64"
], ],
@@ -340,9 +340,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-darwin-x64": { "node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.21.2", "version": "0.21.2-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.2.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.2-beta.0.tgz",
"integrity": "sha512-PudbltlbRiXvBf/bkAaDPL8+RqcI4TG69u00rQHxwkhH7PgPYRTUjfzfaQfiDXZuLXuZHQq703RyoHOqzsHN0Q==", "integrity": "sha512-togdP0YIjMYg/hBRMMxW434i5VB789JWU5o3hWrodbX8olEc0Txqw5Dg9CgIOldBIiCti6uTSQiTo6uldZon1w==",
"cpu": [ "cpu": [
"x64" "x64"
], ],
@@ -353,9 +353,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-linux-arm64-gnu": { "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.21.2", "version": "0.21.2-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.2.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.2-beta.0.tgz",
"integrity": "sha512-3lJ8lootlwLmhqabCdg0DKftv0Ujep6NTWAoLWK/6VQe2IgHmu/ZPRNQkOSZ5tnYlmRyDiMDMB2tlAzo45sV8Q==", "integrity": "sha512-ErS4IQDQVTYVATPeOj/dZXQR34eZQ5rAXm3vJdQi5K6X4zCDaIjOhpmnwzPBGT9W1idaBAoDJhtNfsFaJ6/PQQ==",
"cpu": [ "cpu": [
"arm64" "arm64"
], ],
@@ -366,9 +366,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-linux-x64-gnu": { "node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.21.2", "version": "0.21.2-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.2.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.2-beta.0.tgz",
"integrity": "sha512-5I2drMOIyRODlAHPsipQBTrRRgcOZ45N5GsuhqcKnz3Tg8GAdc1MQKyK3BrdJzKHLPdRtIyRJ6QTLB3wZvDsQQ==", "integrity": "sha512-ycDpyBGbfxtnGGa/RQo5+So6dHALiem1pbYc/LDKKluUJpadtXtEwC61o6hZTcejoYjhEE8ET7vA3OCEJfMFaw==",
"cpu": [ "cpu": [
"x64" "x64"
], ],
@@ -379,9 +379,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-win32-x64-msvc": { "node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.21.2", "version": "0.21.2-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.2.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.2-beta.0.tgz",
"integrity": "sha512-gjpFukq0NTQSRpWPNIpq4XFtaudjSNBT6DMsagC61D2nx9ZLEdSAdU0wdkeluQwhoMvNnXEPdP9HxDSFUXk+Ww==", "integrity": "sha512-IgVkAP/LiNIQD5P6n/9x3bgQOt5pGJarjtSF8r+ialD95QHmo6tcxrwTy/DlA+H1uI6B6h+sbN0c1KXTh1rYcg==",
"cpu": [ "cpu": [
"x64" "x64"
], ],

View File

@@ -1,6 +1,6 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.21.2", "version": "0.21.2-beta.1",
"description": " Serverless, low-latency vector database for AI applications", "description": " Serverless, low-latency vector database for AI applications",
"private": false, "private": false,
"main": "dist/index.js", "main": "dist/index.js",
@@ -89,10 +89,10 @@
} }
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-x64": "0.21.2", "@lancedb/vectordb-darwin-x64": "0.21.2-beta.1",
"@lancedb/vectordb-darwin-arm64": "0.21.2", "@lancedb/vectordb-darwin-arm64": "0.21.2-beta.1",
"@lancedb/vectordb-linux-x64-gnu": "0.21.2", "@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.1",
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2", "@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.1",
"@lancedb/vectordb-win32-x64-msvc": "0.21.2" "@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.1"
} }
} }

View File

@@ -49,7 +49,7 @@ describe('LanceDB Mirrored Store Integration test', function () {
it('s3://...?mirroredStore=... param is processed correctly', async function () { it('s3://...?mirroredStore=... param is processed correctly', async function () {
this.timeout(600000) this.timeout(600000)
const dir = await fs.promises.mkdtemp(path.join(tmpdir(), 'lancedb-mirror-')) const dir = tmpdir()
console.log(dir) console.log(dir)
const conn = await lancedb.connect({ uri: `s3://lancedb-integtest?mirroredStore=${dir}`, storageOptions: { allowHttp: 'true' } }) const conn = await lancedb.connect({ uri: `s3://lancedb-integtest?mirroredStore=${dir}`, storageOptions: { allowHttp: 'true' } })
const data = Array(200).fill({ vector: Array(128).fill(1.0), id: 0 }) const data = Array(200).fill({ vector: Array(128).fill(1.0), id: 0 })
@@ -63,93 +63,118 @@ describe('LanceDB Mirrored Store Integration test', function () {
const t = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite }) const t = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
const mirroredPath = path.join(dir, `${tableName}.lance`) const mirroredPath = path.join(dir, `${tableName}.lance`)
fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
if (err != null) throw err
// there should be three dirs
assert.equal(files.length, 3)
assert.isTrue(files[0].isDirectory())
assert.isTrue(files[1].isDirectory())
const files = await fs.promises.readdir(mirroredPath, { withFileTypes: true }) fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
// there should be three dirs if (err != null) throw err
assert.equal(files.length, 3, 'files after table creation') assert.equal(files.length, 1)
assert.isTrue(files[0].isDirectory()) assert.isTrue(files[0].name.endsWith('.txn'))
assert.isTrue(files[1].isDirectory()) })
const transactionFiles = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }) fs.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true }, (err, files) => {
assert.equal(transactionFiles.length, 1, 'transactionFiles after table creation') if (err != null) throw err
assert.isTrue(transactionFiles[0].name.endsWith('.txn')) assert.equal(files.length, 1)
assert.isTrue(files[0].name.endsWith('.manifest'))
})
const versionFiles = await fs.promises.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true }) fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
assert.equal(versionFiles.length, 1, 'versionFiles after table creation') if (err != null) throw err
assert.isTrue(versionFiles[0].name.endsWith('.manifest')) assert.equal(files.length, 1)
assert.isTrue(files[0].name.endsWith('.lance'))
const dataFiles = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }) })
assert.equal(dataFiles.length, 1, 'dataFiles after table creation') })
assert.isTrue(dataFiles[0].name.endsWith('.lance'))
// try create index and check if it's mirrored // try create index and check if it's mirrored
await t.createIndex({ column: 'vector', type: 'ivf_pq' }) await t.createIndex({ column: 'vector', type: 'ivf_pq' })
const filesAfterIndex = await fs.promises.readdir(mirroredPath, { withFileTypes: true }) fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
// there should be four dirs if (err != null) throw err
assert.equal(filesAfterIndex.length, 4, 'filesAfterIndex') // there should be four dirs
assert.isTrue(filesAfterIndex[0].isDirectory()) assert.equal(files.length, 4)
assert.isTrue(filesAfterIndex[1].isDirectory()) assert.isTrue(files[0].isDirectory())
assert.isTrue(filesAfterIndex[2].isDirectory()) assert.isTrue(files[1].isDirectory())
assert.isTrue(files[2].isDirectory())
// Two TXs now // Two TXs now
const transactionFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }) fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
assert.equal(transactionFilesAfterIndex.length, 2, 'transactionFilesAfterIndex') if (err != null) throw err
assert.isTrue(transactionFilesAfterIndex[0].name.endsWith('.txn')) assert.equal(files.length, 2)
assert.isTrue(transactionFilesAfterIndex[1].name.endsWith('.txn')) assert.isTrue(files[0].name.endsWith('.txn'))
assert.isTrue(files[1].name.endsWith('.txn'))
})
const dataFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }) fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
assert.equal(dataFilesAfterIndex.length, 1, 'dataFilesAfterIndex') if (err != null) throw err
assert.isTrue(dataFilesAfterIndex[0].name.endsWith('.lance')) assert.equal(files.length, 1)
assert.isTrue(files[0].name.endsWith('.lance'))
})
const indicesFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }) fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
assert.equal(indicesFiles.length, 1, 'indicesFiles') if (err != null) throw err
assert.isTrue(indicesFiles[0].isDirectory()) assert.equal(files.length, 1)
assert.isTrue(files[0].isDirectory())
const indexFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFiles[0].name), { withFileTypes: true }) fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
console.log(`DEBUG indexFiles in ${indicesFiles[0].name}:`, indexFiles.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`)) if (err != null) throw err
assert.equal(indexFiles.length, 2, 'indexFiles')
const fileNames = indexFiles.map(f => f.name).sort() assert.equal(files.length, 1)
assert.isTrue(fileNames.includes('auxiliary.idx'), 'auxiliary.idx should be present') assert.isTrue(files[0].isFile())
assert.isTrue(fileNames.includes('index.idx'), 'index.idx should be present') assert.isTrue(files[0].name.endsWith('.idx'))
assert.isTrue(indexFiles.every(f => f.isFile()), 'all index files should be files') })
})
})
// try delete and check if it's mirrored // try delete and check if it's mirrored
await t.delete('id = 0') await t.delete('id = 0')
const filesAfterDelete = await fs.promises.readdir(mirroredPath, { withFileTypes: true }) fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
// there should be five dirs if (err != null) throw err
assert.equal(filesAfterDelete.length, 5, 'filesAfterDelete') // there should be five dirs
assert.isTrue(filesAfterDelete[0].isDirectory()) assert.equal(files.length, 5)
assert.isTrue(filesAfterDelete[1].isDirectory()) assert.isTrue(files[0].isDirectory())
assert.isTrue(filesAfterDelete[2].isDirectory()) assert.isTrue(files[1].isDirectory())
assert.isTrue(filesAfterDelete[3].isDirectory()) assert.isTrue(files[2].isDirectory())
assert.isTrue(filesAfterDelete[4].isDirectory()) assert.isTrue(files[3].isDirectory())
assert.isTrue(files[4].isDirectory())
// Three TXs now // Three TXs now
const transactionFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }) fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
assert.equal(transactionFilesAfterDelete.length, 3, 'transactionFilesAfterDelete') if (err != null) throw err
assert.isTrue(transactionFilesAfterDelete[0].name.endsWith('.txn')) assert.equal(files.length, 3)
assert.isTrue(transactionFilesAfterDelete[1].name.endsWith('.txn')) assert.isTrue(files[0].name.endsWith('.txn'))
assert.isTrue(files[1].name.endsWith('.txn'))
})
const dataFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }) fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
assert.equal(dataFilesAfterDelete.length, 1, 'dataFilesAfterDelete') if (err != null) throw err
assert.isTrue(dataFilesAfterDelete[0].name.endsWith('.lance')) assert.equal(files.length, 1)
assert.isTrue(files[0].name.endsWith('.lance'))
})
const indicesFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }) fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
assert.equal(indicesFilesAfterDelete.length, 1, 'indicesFilesAfterDelete') if (err != null) throw err
assert.isTrue(indicesFilesAfterDelete[0].isDirectory()) assert.equal(files.length, 1)
assert.isTrue(files[0].isDirectory())
const indexFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFilesAfterDelete[0].name), { withFileTypes: true }) fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
console.log(`DEBUG indexFilesAfterDelete in ${indicesFilesAfterDelete[0].name}:`, indexFilesAfterDelete.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`)) if (err != null) throw err
assert.equal(indexFilesAfterDelete.length, 2, 'indexFilesAfterDelete')
const fileNamesAfterDelete = indexFilesAfterDelete.map(f => f.name).sort()
assert.isTrue(fileNamesAfterDelete.includes('auxiliary.idx'), 'auxiliary.idx should be present after delete')
assert.isTrue(fileNamesAfterDelete.includes('index.idx'), 'index.idx should be present after delete')
assert.isTrue(indexFilesAfterDelete.every(f => f.isFile()), 'all index files should be files after delete')
const deletionFiles = await fs.promises.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true }) assert.equal(files.length, 1)
assert.equal(deletionFiles.length, 1, 'deletionFiles') assert.isTrue(files[0].isFile())
assert.isTrue(deletionFiles[0].name.endsWith('.arrow')) assert.isTrue(files[0].name.endsWith('.idx'))
})
})
fs.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 1)
assert.isTrue(files[0].name.endsWith('.arrow'))
})
})
}) })
}) })

View File

@@ -1,13 +0,0 @@
These are the typescript bindings of LanceDB.
The core Rust library is in the `../rust/lancedb` directory, the rust binding
code is in the `src/` directory and the typescript bindings are in
the `lancedb/` directory.
Whenever you change the Rust code, you will need to recompile: `npm run build`.
Common commands:
* Build: `npm run build`
* Lint: `npm run lint`
* Fix lints: `npm run lint-fix`
* Test: `npm test`
* Run single test file: `npm test __test__/arrow.test.ts`

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "lancedb-nodejs" name = "lancedb-nodejs"
edition.workspace = true edition.workspace = true
version = "0.21.2" version = "0.21.2-beta.1"
license.workspace = true license.workspace = true
description.workspace = true description.workspace = true
repository.workspace = true repository.workspace = true

View File

@@ -108,10 +108,7 @@ describe("remote connection", () => {
it("should pass on requested extra headers", async () => { it("should pass on requested extra headers", async () => {
await withMockDatabase( await withMockDatabase(
(req, res) => { (req, res) => {
expect(req.headers["foo"]).toEqual("1"); expect(req.headers["x-my-header"]).toEqual("my-value");
expect(req.headers["bar"]).toEqual("2");
expect(req.headers["baz"]).toEqual("3");
expect(req.headers["x-log-attrs"]).toEqual("foo, bar, baz");
const body = JSON.stringify({ tables: [] }); const body = JSON.stringify({ tables: [] });
res.writeHead(200, { "Content-Type": "application/json" }).end(body); res.writeHead(200, { "Content-Type": "application/json" }).end(body);
@@ -122,12 +119,9 @@ describe("remote connection", () => {
}, },
{ {
clientConfig: { clientConfig: {
extraHeaders: { extraHeaders: {
"x-log-attrs": "foo, bar, baz", "x-my-header": "my-value",
foo: "1", },
bar: "2",
baz: "3",
},
}, },
}, },
); );

View File

@@ -1,46 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import * as tmp from "tmp";
import { Session, connect } from "../lancedb";
describe("Session", () => {
let tmpDir: tmp.DirResult;
beforeEach(() => {
tmpDir = tmp.dirSync({ unsafeCleanup: true });
});
afterEach(() => tmpDir.removeCallback());
it("should configure cache sizes and work with database operations", async () => {
// Create session with small cache limits for testing
const indexCacheSize = BigInt(1024 * 1024); // 1MB
const metadataCacheSize = BigInt(512 * 1024); // 512KB
const session = new Session(indexCacheSize, metadataCacheSize);
// Record initial cache state
const initialCacheSize = session.sizeBytes();
const initialCacheItems = session.approxNumItems();
// Test session works with database connection
const db = await connect({ uri: tmpDir.name, session: session });
// Create and use a table to exercise the session
const data = Array.from({ length: 100 }, (_, i) => ({
id: i,
text: `item ${i}`,
}));
const table = await db.createTable("test", data);
const results = await table.query().limit(5).toArray();
expect(results).toHaveLength(5);
// Verify cache usage increased after operations
const finalCacheSize = session.sizeBytes();
const finalCacheItems = session.approxNumItems();
expect(finalCacheSize).toBeGreaterThan(initialCacheSize); // Cache should have grown
expect(finalCacheItems).toBeGreaterThanOrEqual(initialCacheItems); // Items should not decrease
expect(initialCacheSize).toBeLessThan(indexCacheSize + metadataCacheSize); // Within limits
});
});

View File

@@ -582,7 +582,7 @@ describe("When creating an index", () => {
"Invalid input, minimum_nprobes must be greater than 0", "Invalid input, minimum_nprobes must be greater than 0",
); );
expect(() => tbl.query().nearestTo(queryVec).maximumNprobes(5)).toThrow( expect(() => tbl.query().nearestTo(queryVec).maximumNprobes(5)).toThrow(
"Invalid input, maximum_nprobes must be greater than or equal to minimum_nprobes", "Invalid input, maximum_nprobes must be greater than minimum_nprobes",
); );
await tbl.dropIndex("vec_idx"); await tbl.dropIndex("vec_idx");

View File

@@ -85,9 +85,6 @@ export interface OpenTableOptions {
/** /**
* Set the size of the index cache, specified as a number of entries * Set the size of the index cache, specified as a number of entries
* *
* @deprecated Use session-level cache configuration instead.
* Create a Session with custom cache sizes and pass it to the connect() function.
*
* The exact meaning of an "entry" will depend on the type of index: * The exact meaning of an "entry" will depend on the type of index:
* - IVF: there is one entry for each IVF partition * - IVF: there is one entry for each IVF partition
* - BTREE: there is one entry for the entire index * - BTREE: there is one entry for the entire index

View File

@@ -10,7 +10,6 @@ import {
import { import {
ConnectionOptions, ConnectionOptions,
Connection as LanceDbConnection, Connection as LanceDbConnection,
Session,
} from "./native.js"; } from "./native.js";
export { export {
@@ -52,8 +51,6 @@ export {
OpenTableOptions, OpenTableOptions,
} from "./connection"; } from "./connection";
export { Session } from "./native.js";
export { export {
ExecutableQuery, ExecutableQuery,
Query, Query,
@@ -134,7 +131,6 @@ export { IntoSql, packBits } from "./util";
export async function connect( export async function connect(
uri: string, uri: string,
options?: Partial<ConnectionOptions>, options?: Partial<ConnectionOptions>,
session?: Session,
): Promise<Connection>; ): Promise<Connection>;
/** /**
* Connect to a LanceDB instance at the given URI. * Connect to a LanceDB instance at the given URI.
@@ -153,43 +149,31 @@ export async function connect(
* storageOptions: {timeout: "60s"} * storageOptions: {timeout: "60s"}
* }); * });
* ``` * ```
*
* @example
* ```ts
* const session = Session.default();
* const conn = await connect({
* uri: "/path/to/database",
* session: session
* });
* ```
*/ */
export async function connect( export async function connect(
options: Partial<ConnectionOptions> & { uri: string }, options: Partial<ConnectionOptions> & { uri: string },
): Promise<Connection>; ): Promise<Connection>;
export async function connect( export async function connect(
uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }), uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }),
options?: Partial<ConnectionOptions>, options: Partial<ConnectionOptions> = {},
): Promise<Connection> { ): Promise<Connection> {
let uri: string | undefined; let uri: string | undefined;
let finalOptions: Partial<ConnectionOptions> = {};
if (typeof uriOrOptions !== "string") { if (typeof uriOrOptions !== "string") {
const { uri: uri_, ...opts } = uriOrOptions; const { uri: uri_, ...opts } = uriOrOptions;
uri = uri_; uri = uri_;
finalOptions = opts; options = opts;
} else { } else {
uri = uriOrOptions; uri = uriOrOptions;
finalOptions = options || {};
} }
if (!uri) { if (!uri) {
throw new Error("uri is required"); throw new Error("uri is required");
} }
finalOptions = (finalOptions as ConnectionOptions) ?? {}; options = (options as ConnectionOptions) ?? {};
(<ConnectionOptions>finalOptions).storageOptions = cleanseStorageOptions( (<ConnectionOptions>options).storageOptions = cleanseStorageOptions(
(<ConnectionOptions>finalOptions).storageOptions, (<ConnectionOptions>options).storageOptions,
); );
const nativeConn = await LanceDbConnection.new(uri, finalOptions); const nativeConn = await LanceDbConnection.new(uri, options);
return new LocalConnection(nativeConn); return new LocalConnection(nativeConn);
} }

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-arm64", "name": "@lancedb/lancedb-darwin-arm64",
"version": "0.21.2", "version": "0.21.2-beta.1",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node", "main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-x64", "name": "@lancedb/lancedb-darwin-x64",
"version": "0.21.2", "version": "0.21.2-beta.1",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.darwin-x64.node", "main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-gnu", "name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.21.2", "version": "0.21.2-beta.1",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node", "main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-musl", "name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.21.2", "version": "0.21.2-beta.1",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node", "main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-gnu", "name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.21.2", "version": "0.21.2-beta.1",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node", "main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-musl", "name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.21.2", "version": "0.21.2-beta.1",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node", "main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-arm64-msvc", "name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.21.2", "version": "0.21.2-beta.1",
"os": [ "os": [
"win32" "win32"
], ],

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-x64-msvc", "name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.21.2", "version": "0.21.2-beta.1",
"os": ["win32"], "os": ["win32"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node", "main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{ {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.21.2", "version": "0.21.2-beta.0",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.21.2", "version": "0.21.2-beta.0",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"

View File

@@ -11,7 +11,7 @@
"ann" "ann"
], ],
"private": false, "private": false,
"version": "0.21.2", "version": "0.21.2-beta.1",
"main": "dist/index.js", "main": "dist/index.js",
"exports": { "exports": {
".": "./dist/index.js", ".": "./dist/index.js",

View File

@@ -74,10 +74,6 @@ impl Connection {
builder = builder.host_override(&host_override); builder = builder.host_override(&host_override);
} }
if let Some(session) = options.session {
builder = builder.session(session.inner.clone());
}
Ok(Self::inner_new(builder.execute().await.default_error()?)) Ok(Self::inner_new(builder.execute().await.default_error()?))
} }

View File

@@ -14,7 +14,6 @@ pub mod merge;
mod query; mod query;
pub mod remote; pub mod remote;
mod rerankers; mod rerankers;
mod session;
mod table; mod table;
mod util; mod util;
@@ -35,9 +34,6 @@ pub struct ConnectionOptions {
/// ///
/// The available options are described at https://lancedb.github.io/lancedb/guides/storage/ /// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
pub storage_options: Option<HashMap<String, String>>, pub storage_options: Option<HashMap<String, String>>,
/// (For LanceDB OSS only): the session to use for this connection. Holds
/// shared caches and other session-specific state.
pub session: Option<session::Session>,
/// (For LanceDB cloud only): configuration for the remote HTTP client. /// (For LanceDB cloud only): configuration for the remote HTTP client.
pub client_config: Option<remote::ClientConfig>, pub client_config: Option<remote::ClientConfig>,

View File

@@ -1,102 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::sync::Arc;
use lancedb::{ObjectStoreRegistry, Session as LanceSession};
use napi::bindgen_prelude::*;
use napi_derive::*;
/// A session for managing caches and object stores across LanceDB operations.
///
/// Sessions allow you to configure cache sizes for index and metadata caches,
/// which can significantly impact memory use and performance. They can
/// also be re-used across multiple connections to share the same cache state.
#[napi]
#[derive(Clone)]
pub struct Session {
pub(crate) inner: Arc<LanceSession>,
}
impl std::fmt::Debug for Session {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Session")
.field("size_bytes", &self.inner.size_bytes())
.field("approx_num_items", &self.inner.approx_num_items())
.finish()
}
}
#[napi]
impl Session {
/// Create a new session with custom cache sizes.
///
/// # Parameters
///
/// - `index_cache_size_bytes`: The size of the index cache in bytes.
/// Index data is stored in memory in this cache to speed up queries.
/// Defaults to 6GB if not specified.
/// - `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
/// The metadata cache stores file metadata and schema information in memory.
/// This cache improves scan and write performance.
/// Defaults to 1GB if not specified.
#[napi(constructor)]
pub fn new(
index_cache_size_bytes: Option<BigInt>,
metadata_cache_size_bytes: Option<BigInt>,
) -> napi::Result<Self> {
let index_cache_size = index_cache_size_bytes
.map(|size| size.get_u64().1 as usize)
.unwrap_or(6 * 1024 * 1024 * 1024); // 6GB default
let metadata_cache_size = metadata_cache_size_bytes
.map(|size| size.get_u64().1 as usize)
.unwrap_or(1024 * 1024 * 1024); // 1GB default
let session = LanceSession::new(
index_cache_size,
metadata_cache_size,
Arc::new(ObjectStoreRegistry::default()),
);
Ok(Self {
inner: Arc::new(session),
})
}
/// Create a session with default cache sizes.
///
/// This is equivalent to creating a session with 6GB index cache
/// and 1GB metadata cache.
#[napi(factory)]
pub fn default() -> Self {
Self {
inner: Arc::new(LanceSession::default()),
}
}
/// Get the current size of the session caches in bytes.
#[napi]
pub fn size_bytes(&self) -> BigInt {
BigInt::from(self.inner.size_bytes())
}
/// Get the approximate number of items cached in the session.
#[napi]
pub fn approx_num_items(&self) -> u32 {
self.inner.approx_num_items() as u32
}
}
// Implement FromNapiValue for Session to work with napi(object)
impl napi::bindgen_prelude::FromNapiValue for Session {
unsafe fn from_napi_value(
env: napi::sys::napi_env,
napi_val: napi::sys::napi_value,
) -> napi::Result<Self> {
let object: napi::bindgen_prelude::ClassInstance<Session> =
napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
let copy = object.clone();
Ok(copy)
}
}

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.24.2" current_version = "0.24.2-beta.1"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,19 +0,0 @@
These are the Python bindings of LanceDB.
The core Rust library is in the `../rust/lancedb` directory, the rust binding
code is in the `src/` directory and the Python bindings are in the `lancedb/` directory.
Common commands:
* Build: `make develop`
* Format: `make format`
* Lint: `make check`
* Fix lints: `make fix`
* Test: `make test`
* Doc test: `make doctest`
Before committing changes, run lints and then formatting.
When you change the Rust code, you will need to recompile the Python bindings: `make develop`.
When you export new types from Rust to Python, you must manually update `python/lancedb/_lancedb.pyi`
with the corresponding type hints. You can run `pyright` to check for type errors in the Python code.

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-python" name = "lancedb-python"
version = "0.24.2" version = "0.24.2-beta.1"
edition.workspace = true edition.workspace = true
description = "Python bindings for LanceDB" description = "Python bindings for LanceDB"
license.workspace = true license.workspace = true

View File

@@ -85,8 +85,8 @@ embeddings = [
"boto3>=1.28.57", "boto3>=1.28.57",
"awscli>=1.29.57", "awscli>=1.29.57",
"botocore>=1.31.57", "botocore>=1.31.57",
'ibm-watsonx-ai>=1.1.2; python_version >= "3.10"',
"ollama>=0.3.0", "ollama>=0.3.0",
"ibm-watsonx-ai>=1.1.2",
] ]
azure = ["adlfs>=2024.2.0"] azure = ["adlfs>=2024.2.0"]

View File

@@ -18,7 +18,6 @@ from .remote import ClientConfig
from .remote.db import RemoteDBConnection from .remote.db import RemoteDBConnection
from .schema import vector from .schema import vector
from .table import AsyncTable from .table import AsyncTable
from ._lancedb import Session
def connect( def connect(
@@ -31,7 +30,6 @@ def connect(
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None, request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
client_config: Union[ClientConfig, Dict[str, Any], None] = None, client_config: Union[ClientConfig, Dict[str, Any], None] = None,
storage_options: Optional[Dict[str, str]] = None, storage_options: Optional[Dict[str, str]] = None,
session: Optional[Session] = None,
**kwargs: Any, **kwargs: Any,
) -> DBConnection: ) -> DBConnection:
"""Connect to a LanceDB database. """Connect to a LanceDB database.
@@ -66,12 +64,6 @@ def connect(
storage_options: dict, optional storage_options: dict, optional
Additional options for the storage backend. See available options at Additional options for the storage backend. See available options at
<https://lancedb.github.io/lancedb/guides/storage/> <https://lancedb.github.io/lancedb/guides/storage/>
session: Session, optional
(For LanceDB OSS only)
A session to use for this connection. Sessions allow you to configure
cache sizes for index and metadata caches, which can significantly
impact memory use and performance. They can also be re-used across
multiple connections to share the same cache state.
Examples Examples
-------- --------
@@ -100,7 +92,7 @@ def connect(
if api_key is None: if api_key is None:
api_key = os.environ.get("LANCEDB_API_KEY") api_key = os.environ.get("LANCEDB_API_KEY")
if api_key is None: if api_key is None:
raise ValueError(f"api_key is required to connect to LanceDB cloud: {uri}") raise ValueError(f"api_key is required to connected LanceDB cloud: {uri}")
if isinstance(request_thread_pool, int): if isinstance(request_thread_pool, int):
request_thread_pool = ThreadPoolExecutor(request_thread_pool) request_thread_pool = ThreadPoolExecutor(request_thread_pool)
return RemoteDBConnection( return RemoteDBConnection(
@@ -121,7 +113,6 @@ def connect(
uri, uri,
read_consistency_interval=read_consistency_interval, read_consistency_interval=read_consistency_interval,
storage_options=storage_options, storage_options=storage_options,
session=session,
) )
@@ -134,7 +125,6 @@ async def connect_async(
read_consistency_interval: Optional[timedelta] = None, read_consistency_interval: Optional[timedelta] = None,
client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None, client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
storage_options: Optional[Dict[str, str]] = None, storage_options: Optional[Dict[str, str]] = None,
session: Optional[Session] = None,
) -> AsyncConnection: ) -> AsyncConnection:
"""Connect to a LanceDB database. """Connect to a LanceDB database.
@@ -168,12 +158,6 @@ async def connect_async(
storage_options: dict, optional storage_options: dict, optional
Additional options for the storage backend. See available options at Additional options for the storage backend. See available options at
<https://lancedb.github.io/lancedb/guides/storage/> <https://lancedb.github.io/lancedb/guides/storage/>
session: Session, optional
(For LanceDB OSS only)
A session to use for this connection. Sessions allow you to configure
cache sizes for index and metadata caches, which can significantly
impact memory use and performance. They can also be re-used across
multiple connections to share the same cache state.
Examples Examples
-------- --------
@@ -213,7 +197,6 @@ async def connect_async(
read_consistency_interval_secs, read_consistency_interval_secs,
client_config, client_config,
storage_options, storage_options,
session,
) )
) )
@@ -229,7 +212,6 @@ __all__ = [
"DBConnection", "DBConnection",
"LanceDBConnection", "LanceDBConnection",
"RemoteDBConnection", "RemoteDBConnection",
"Session",
"__version__", "__version__",
] ]

View File

@@ -6,19 +6,6 @@ import pyarrow as pa
from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
from .remote import ClientConfig from .remote import ClientConfig
class Session:
def __init__(
self,
index_cache_size_bytes: Optional[int] = None,
metadata_cache_size_bytes: Optional[int] = None,
): ...
@staticmethod
def default() -> "Session": ...
@property
def size_bytes(self) -> int: ...
@property
def approx_num_items(self) -> int: ...
class Connection(object): class Connection(object):
uri: str uri: str
async def table_names( async def table_names(
@@ -102,7 +89,6 @@ async def connect(
read_consistency_interval: Optional[float], read_consistency_interval: Optional[float],
client_config: Optional[Union[ClientConfig, Dict[str, Any]]], client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
storage_options: Optional[Dict[str, str]], storage_options: Optional[Dict[str, str]],
session: Optional[Session],
) -> Connection: ... ) -> Connection: ...
class RecordBatchStream: class RecordBatchStream:

View File

@@ -94,9 +94,9 @@ def data_to_reader(
else: else:
raise TypeError( raise TypeError(
f"Unknown data type {type(data)}. " f"Unknown data type {type(data)}. "
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, " "Please check "
"pyarrow Table/RecordBatch, or Pydantic models. " "https://lancedb.github.io/lance/read_and_write.html "
"See https://lancedb.github.io/lancedb/guides/tables/ for examples." "to see supported types."
) )

View File

@@ -37,7 +37,6 @@ if TYPE_CHECKING:
from ._lancedb import Connection as LanceDbConnection from ._lancedb import Connection as LanceDbConnection
from .common import DATA, URI from .common import DATA, URI
from .embeddings import EmbeddingFunctionConfig from .embeddings import EmbeddingFunctionConfig
from ._lancedb import Session
class DBConnection(EnforceOverrides): class DBConnection(EnforceOverrides):
@@ -248,9 +247,6 @@ class DBConnection(EnforceOverrides):
name: str name: str
The name of the table. The name of the table.
index_cache_size: int, default 256 index_cache_size: int, default 256
**Deprecated**: Use session-level cache configuration instead.
Create a Session with custom cache sizes and pass it to lancedb.connect().
Set the size of the index cache, specified as a number of entries Set the size of the index cache, specified as a number of entries
The exact meaning of an "entry" will depend on the type of index: The exact meaning of an "entry" will depend on the type of index:
@@ -358,7 +354,6 @@ class LanceDBConnection(DBConnection):
*, *,
read_consistency_interval: Optional[timedelta] = None, read_consistency_interval: Optional[timedelta] = None,
storage_options: Optional[Dict[str, str]] = None, storage_options: Optional[Dict[str, str]] = None,
session: Optional[Session] = None,
): ):
if not isinstance(uri, Path): if not isinstance(uri, Path):
scheme = get_uri_scheme(uri) scheme = get_uri_scheme(uri)
@@ -372,7 +367,6 @@ class LanceDBConnection(DBConnection):
self._entered = False self._entered = False
self.read_consistency_interval = read_consistency_interval self.read_consistency_interval = read_consistency_interval
self.storage_options = storage_options self.storage_options = storage_options
self.session = session
if read_consistency_interval is not None: if read_consistency_interval is not None:
read_consistency_interval_secs = read_consistency_interval.total_seconds() read_consistency_interval_secs = read_consistency_interval.total_seconds()
@@ -388,7 +382,6 @@ class LanceDBConnection(DBConnection):
read_consistency_interval_secs, read_consistency_interval_secs,
None, None,
storage_options, storage_options,
session,
) )
self._conn = AsyncConnection(LOOP.run(do_connect())) self._conn = AsyncConnection(LOOP.run(do_connect()))
@@ -482,17 +475,6 @@ class LanceDBConnection(DBConnection):
------- -------
A LanceTable object representing the table. A LanceTable object representing the table.
""" """
if index_cache_size is not None:
import warnings
warnings.warn(
"index_cache_size is deprecated. Use session-level cache "
"configuration instead. Create a Session with custom cache sizes "
"and pass it to lancedb.connect().",
DeprecationWarning,
stacklevel=2,
)
return LanceTable.open( return LanceTable.open(
self, self,
name, name,
@@ -838,9 +820,6 @@ class AsyncConnection(object):
See available options at See available options at
<https://lancedb.github.io/lancedb/guides/storage/> <https://lancedb.github.io/lancedb/guides/storage/>
index_cache_size: int, default 256 index_cache_size: int, default 256
**Deprecated**: Use session-level cache configuration instead.
Create a Session with custom cache sizes and pass it to lancedb.connect().
Set the size of the index cache, specified as a number of entries Set the size of the index cache, specified as a number of entries
The exact meaning of an "entry" will depend on the type of index: The exact meaning of an "entry" will depend on the type of index:

View File

@@ -11,7 +11,7 @@ from .instructor import InstructorEmbeddingFunction
from .ollama import OllamaEmbeddings from .ollama import OllamaEmbeddings
from .open_clip import OpenClipEmbeddings from .open_clip import OpenClipEmbeddings
from .openai import OpenAIEmbeddings from .openai import OpenAIEmbeddings
from .registry import EmbeddingFunctionRegistry, get_registry, register from .registry import EmbeddingFunctionRegistry, get_registry
from .sentence_transformers import SentenceTransformerEmbeddings from .sentence_transformers import SentenceTransformerEmbeddings
from .gte import GteEmbeddings from .gte import GteEmbeddings
from .transformers import TransformersEmbeddingFunction, ColbertEmbeddings from .transformers import TransformersEmbeddingFunction, ColbertEmbeddings

View File

@@ -9,14 +9,11 @@ from huggingface_hub import snapshot_download
from pydantic import BaseModel from pydantic import BaseModel
from transformers import BertTokenizer from transformers import BertTokenizer
from .utils import create_import_stub
try: try:
import mlx.core as mx import mlx.core as mx
import mlx.nn as nn import mlx.nn as nn
except ImportError: except ImportError:
mx = create_import_stub("mlx.core", "mlx") raise ImportError("You need to install MLX to use this model use - pip install mlx")
nn = create_import_stub("mlx.nn", "mlx")
def average_pool(last_hidden_state: mx.array, attention_mask: mx.array) -> mx.array: def average_pool(last_hidden_state: mx.array, attention_mask: mx.array) -> mx.array:
@@ -75,7 +72,7 @@ class TransformerEncoder(nn.Module):
super().__init__() super().__init__()
self.layers = [ self.layers = [
TransformerEncoderLayer(dims, num_heads, mlp_dims) TransformerEncoderLayer(dims, num_heads, mlp_dims)
for _ in range(num_layers) for i in range(num_layers)
] ]
def __call__(self, x, mask): def __call__(self, x, mask):

View File

@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright The LanceDB Authors # SPDX-FileCopyrightText: Copyright The LanceDB Authors
import json import json
from typing import Dict, Optional, Type from typing import Dict, Optional
from .base import EmbeddingFunction, EmbeddingFunctionConfig from .base import EmbeddingFunction, EmbeddingFunctionConfig
@@ -43,7 +43,7 @@ class EmbeddingFunctionRegistry:
self._functions = {} self._functions = {}
self._variables = {} self._variables = {}
def register(self, alias: Optional[str] = None): def register(self, alias: str = None):
""" """
This creates a decorator that can be used to register This creates a decorator that can be used to register
an EmbeddingFunction. an EmbeddingFunction.
@@ -75,7 +75,7 @@ class EmbeddingFunctionRegistry:
""" """
self._functions = {} self._functions = {}
def get(self, name: str) -> Type[EmbeddingFunction]: def get(self, name: str):
""" """
Fetch an embedding function class by name Fetch an embedding function class by name

View File

@@ -21,36 +21,6 @@ from ..dependencies import pandas as pd
from ..util import attempt_import_or_raise from ..util import attempt_import_or_raise
def create_import_stub(module_name: str, package_name: str = None):
"""
Create a stub module that allows class definition but fails when used.
This allows modules to be imported for doctest collection even when
optional dependencies are not available.
Parameters
----------
module_name : str
The name of the module to create a stub for
package_name : str, optional
The package name to suggest in the error message
Returns
-------
object
A stub object that can be used in place of the module
"""
class _ImportStub:
def __getattr__(self, name):
return _ImportStub # Return stub for chained access like nn.Module
def __call__(self, *args, **kwargs):
pkg = package_name or module_name
raise ImportError(f"You need to install {pkg} to use this functionality")
return _ImportStub()
# ruff: noqa: PERF203 # ruff: noqa: PERF203
def retry(tries=10, delay=1, max_delay=30, backoff=3, jitter=1): def retry(tries=10, delay=1, max_delay=30, backoff=3, jitter=1):
def wrapper(fn): def wrapper(fn):

View File

@@ -14,7 +14,7 @@ from typing import (
Literal, Literal,
Optional, Optional,
Tuple, Tuple,
TypeVar, Type,
Union, Union,
Any, Any,
) )
@@ -58,8 +58,6 @@ if TYPE_CHECKING:
else: else:
from typing_extensions import Self from typing_extensions import Self
T = TypeVar("T", bound="LanceModel")
# Pydantic validation function for vector queries # Pydantic validation function for vector queries
def ensure_vector_query( def ensure_vector_query(
@@ -748,8 +746,8 @@ class LanceQueryBuilder(ABC):
return self.to_arrow(timeout=timeout).to_pylist() return self.to_arrow(timeout=timeout).to_pylist()
def to_pydantic( def to_pydantic(
self, model: type[T], *, timeout: Optional[timedelta] = None self, model: Type[LanceModel], *, timeout: Optional[timedelta] = None
) -> list[T]: ) -> List[LanceModel]:
"""Return the table as a list of pydantic models. """Return the table as a list of pydantic models.
Parameters Parameters
@@ -908,11 +906,11 @@ class LanceQueryBuilder(ABC):
>>> plan = table.search(query).explain_plan(True) >>> plan = table.search(query).explain_plan(True)
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE >>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance] ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
GlobalLimitExec: skip=0, fetch=10 GlobalLimitExec: skip=0, fetch=10
FilterExec: _distance@2 IS NOT NULL FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false] SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2 KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ... LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
Parameters Parameters
---------- ----------
@@ -942,19 +940,19 @@ class LanceQueryBuilder(ABC):
>>> plan = table.search(query).analyze_plan() >>> plan = table.search(query).analyze_plan()
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE >>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
AnalyzeExec verbose=true, metrics=[] AnalyzeExec verbose=true, metrics=[]
TracedExec, metrics=[] ProjectionExec: expr=[...], metrics=[...]
ProjectionExec: expr=[...], metrics=[...] GlobalLimitExec: skip=0, fetch=10, metrics=[...]
GlobalLimitExec: skip=0, fetch=10, metrics=[...] FilterExec: _distance@2 IS NOT NULL,
FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=..., elapsed_compute=...]
metrics=[output_rows=..., elapsed_compute=...] SortExec: TopK(fetch=10), expr=[...],
SortExec: TopK(fetch=10), expr=[...], preserve_partitioning=[...],
preserve_partitioning=[...], metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...] KNNVectorDistance: metric=l2,
KNNVectorDistance: metric=l2, metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
metrics=[output_rows=..., elapsed_compute=..., output_batches=...] LanceScan: uri=..., projection=[vector], row_id=true,
LanceRead: uri=..., projection=[vector], ... row_addr=false, ordered=false,
metrics=[output_rows=..., elapsed_compute=..., metrics=[output_rows=..., elapsed_compute=...,
bytes_read=..., iops=..., requests=...] bytes_read=..., iops=..., requests=...]
Returns Returns
------- -------
@@ -2045,7 +2043,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
FilterExec: _distance@2 IS NOT NULL FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false] SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2 KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ... LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
Parameters Parameters
---------- ----------
@@ -2431,7 +2429,7 @@ class AsyncQueryBase(object):
FilterExec: _distance@2 IS NOT NULL FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false] SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2 KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ... LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
Parameters Parameters
---------- ----------
@@ -3056,7 +3054,7 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
FilterExec: _distance@2 IS NOT NULL FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false] SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2 KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ... LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
<BLANKLINE> <BLANKLINE>
FTS Search Plan: FTS Search Plan:
ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score] ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]

View File

@@ -102,9 +102,7 @@ if TYPE_CHECKING:
) )
def _into_pyarrow_reader( def _into_pyarrow_reader(data) -> pa.RecordBatchReader:
data, schema: Optional[pa.Schema] = None
) -> pa.RecordBatchReader:
from lancedb.dependencies import datasets from lancedb.dependencies import datasets
if _check_for_hugging_face(data): if _check_for_hugging_face(data):
@@ -125,12 +123,6 @@ def _into_pyarrow_reader(
raise ValueError("Cannot add a single dictionary to a table. Use a list.") raise ValueError("Cannot add a single dictionary to a table. Use a list.")
if isinstance(data, list): if isinstance(data, list):
# Handle empty list case
if not data:
if schema is None:
raise ValueError("Cannot create table from empty list without a schema")
return pa.Table.from_pylist(data, schema=schema).to_reader()
# convert to list of dict if data is a bunch of LanceModels # convert to list of dict if data is a bunch of LanceModels
if isinstance(data[0], LanceModel): if isinstance(data[0], LanceModel):
schema = data[0].__class__.to_arrow_schema() schema = data[0].__class__.to_arrow_schema()
@@ -173,9 +165,9 @@ def _into_pyarrow_reader(
else: else:
raise TypeError( raise TypeError(
f"Unknown data type {type(data)}. " f"Unknown data type {type(data)}. "
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, " "Please check "
"pyarrow Table/RecordBatch, or Pydantic models. " "https://lancedb.github.io/lancedb/python/python/ "
"See https://lancedb.github.io/lancedb/guides/tables/ for examples." "to see supported types."
) )
@@ -244,7 +236,7 @@ def _sanitize_data(
# 1. There might be embedding columns missing that will be added # 1. There might be embedding columns missing that will be added
# in the add_embeddings step. # in the add_embeddings step.
# 2. If `allow_subschemas` is True, there might be columns missing. # 2. If `allow_subschemas` is True, there might be columns missing.
reader = _into_pyarrow_reader(data, target_schema) reader = _into_pyarrow_reader(data)
reader = _append_vector_columns(reader, target_schema, metadata=metadata) reader = _append_vector_columns(reader, target_schema, metadata=metadata)
@@ -3673,14 +3665,9 @@ class AsyncTable:
) )
if query.distance_type is not None: if query.distance_type is not None:
async_query = async_query.distance_type(query.distance_type) async_query = async_query.distance_type(query.distance_type)
if query.minimum_nprobes is not None and query.maximum_nprobes is not None: if query.minimum_nprobes is not None:
# Set both to the minimum first to avoid min > max error.
async_query = async_query.nprobes(
query.minimum_nprobes
).maximum_nprobes(query.maximum_nprobes)
elif query.minimum_nprobes is not None:
async_query = async_query.minimum_nprobes(query.minimum_nprobes) async_query = async_query.minimum_nprobes(query.minimum_nprobes)
elif query.maximum_nprobes is not None: if query.maximum_nprobes is not None:
async_query = async_query.maximum_nprobes(query.maximum_nprobes) async_query = async_query.maximum_nprobes(query.maximum_nprobes)
if query.refine_factor is not None: if query.refine_factor is not None:
async_query = async_query.refine_factor(query.refine_factor) async_query = async_query.refine_factor(query.refine_factor)

View File

@@ -33,11 +33,8 @@ tantivy = pytest.importorskip("tantivy")
@pytest.fixture @pytest.fixture
def table(tmp_path) -> ldb.table.LanceTable: def table(tmp_path) -> ldb.table.LanceTable:
# Use local random state to avoid affecting other tests
rng = np.random.RandomState(42)
local_random = random.Random(42)
db = ldb.connect(tmp_path) db = ldb.connect(tmp_path)
vectors = [rng.randn(128) for _ in range(100)] vectors = [np.random.randn(128) for _ in range(100)]
text_nouns = ("puppy", "car") text_nouns = ("puppy", "car")
text2_nouns = ("rabbit", "girl", "monkey") text2_nouns = ("rabbit", "girl", "monkey")
@@ -47,10 +44,10 @@ def table(tmp_path) -> ldb.table.LanceTable:
text = [ text = [
" ".join( " ".join(
[ [
text_nouns[local_random.randrange(0, len(text_nouns))], text_nouns[random.randrange(0, len(text_nouns))],
verbs[local_random.randrange(0, 5)], verbs[random.randrange(0, 5)],
adv[local_random.randrange(0, 5)], adv[random.randrange(0, 5)],
adj[local_random.randrange(0, 5)], adj[random.randrange(0, 5)],
] ]
) )
for _ in range(100) for _ in range(100)
@@ -58,15 +55,15 @@ def table(tmp_path) -> ldb.table.LanceTable:
text2 = [ text2 = [
" ".join( " ".join(
[ [
text2_nouns[local_random.randrange(0, len(text2_nouns))], text2_nouns[random.randrange(0, len(text2_nouns))],
verbs[local_random.randrange(0, 5)], verbs[random.randrange(0, 5)],
adv[local_random.randrange(0, 5)], adv[random.randrange(0, 5)],
adj[local_random.randrange(0, 5)], adj[random.randrange(0, 5)],
] ]
) )
for _ in range(100) for _ in range(100)
] ]
count = [local_random.randint(1, 10000) for _ in range(100)] count = [random.randint(1, 10000) for _ in range(100)]
table = db.create_table( table = db.create_table(
"test", "test",
data=pd.DataFrame( data=pd.DataFrame(
@@ -85,11 +82,8 @@ def table(tmp_path) -> ldb.table.LanceTable:
@pytest.fixture @pytest.fixture
async def async_table(tmp_path) -> ldb.table.AsyncTable: async def async_table(tmp_path) -> ldb.table.AsyncTable:
# Use local random state to avoid affecting other tests
rng = np.random.RandomState(42)
local_random = random.Random(42)
db = await ldb.connect_async(tmp_path) db = await ldb.connect_async(tmp_path)
vectors = [rng.randn(128) for _ in range(100)] vectors = [np.random.randn(128) for _ in range(100)]
text_nouns = ("puppy", "car") text_nouns = ("puppy", "car")
text2_nouns = ("rabbit", "girl", "monkey") text2_nouns = ("rabbit", "girl", "monkey")
@@ -99,10 +93,10 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
text = [ text = [
" ".join( " ".join(
[ [
text_nouns[local_random.randrange(0, len(text_nouns))], text_nouns[random.randrange(0, len(text_nouns))],
verbs[local_random.randrange(0, 5)], verbs[random.randrange(0, 5)],
adv[local_random.randrange(0, 5)], adv[random.randrange(0, 5)],
adj[local_random.randrange(0, 5)], adj[random.randrange(0, 5)],
] ]
) )
for _ in range(100) for _ in range(100)
@@ -110,15 +104,15 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
text2 = [ text2 = [
" ".join( " ".join(
[ [
text2_nouns[local_random.randrange(0, len(text2_nouns))], text2_nouns[random.randrange(0, len(text2_nouns))],
verbs[local_random.randrange(0, 5)], verbs[random.randrange(0, 5)],
adv[local_random.randrange(0, 5)], adv[random.randrange(0, 5)],
adj[local_random.randrange(0, 5)], adj[random.randrange(0, 5)],
] ]
) )
for _ in range(100) for _ in range(100)
] ]
count = [local_random.randint(1, 10000) for _ in range(100)] count = [random.randint(1, 10000) for _ in range(100)]
table = await db.create_table( table = await db.create_table(
"test", "test",
data=pd.DataFrame( data=pd.DataFrame(

View File

@@ -166,7 +166,7 @@ async def test_explain_plan(table: AsyncTable):
assert "Vector Search Plan" in plan assert "Vector Search Plan" in plan
assert "KNNVectorDistance" in plan assert "KNNVectorDistance" in plan
assert "FTS Search Plan" in plan assert "FTS Search Plan" in plan
assert "LanceRead" in plan assert "LanceScan" in plan
@pytest.mark.asyncio @pytest.mark.asyncio

View File

@@ -445,45 +445,25 @@ def test_invalid_nprobes_sync(table):
with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"): with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"):
LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(0).to_list() LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(0).to_list()
with pytest.raises( with pytest.raises(
ValueError, ValueError, match="maximum_nprobes must be greater than minimum_nprobes"
match="maximum_nprobes must be greater than or equal to minimum_nprobes",
): ):
LanceVectorQueryBuilder(table, [0, 0], "vector").maximum_nprobes(5).to_list() LanceVectorQueryBuilder(table, [0, 0], "vector").maximum_nprobes(5).to_list()
with pytest.raises( with pytest.raises(
ValueError, ValueError, match="minimum_nprobes must be less or equal to maximum_nprobes"
match="minimum_nprobes must be less than or equal to maximum_nprobes",
): ):
LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(100).to_list() LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(100).to_list()
def test_nprobes_works_sync(table):
LanceVectorQueryBuilder(table, [0, 0], "vector").nprobes(30).to_list()
def test_nprobes_min_max_works_sync(table):
LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(2).maximum_nprobes(
4
).to_list()
def test_multiple_nprobes_calls_works_sync(table):
LanceVectorQueryBuilder(table, [0, 0], "vector").nprobes(30).maximum_nprobes(
20
).minimum_nprobes(20).to_list()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_invalid_nprobes_async(table_async: AsyncTable): async def test_invalid_nprobes_async(table_async: AsyncTable):
with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"): with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"):
await table_async.vector_search([0, 0]).minimum_nprobes(0).to_list() await table_async.vector_search([0, 0]).minimum_nprobes(0).to_list()
with pytest.raises( with pytest.raises(
ValueError, ValueError, match="maximum_nprobes must be greater than minimum_nprobes"
match="maximum_nprobes must be greater than or equal to minimum_nprobes",
): ):
await table_async.vector_search([0, 0]).maximum_nprobes(5).to_list() await table_async.vector_search([0, 0]).maximum_nprobes(5).to_list()
with pytest.raises( with pytest.raises(
ValueError, ValueError, match="minimum_nprobes must be less or equal to maximum_nprobes"
match="minimum_nprobes must be less than or equal to maximum_nprobes",
): ):
await table_async.vector_search([0, 0]).minimum_nprobes(100).to_list() await table_async.vector_search([0, 0]).minimum_nprobes(100).to_list()
@@ -859,7 +839,7 @@ async def test_explain_plan_with_filters(table_async: AsyncTable):
table_async.query().nearest_to(pa.array([1, 2])).where("id = 1").explain_plan() table_async.query().nearest_to(pa.array([1, 2])).where("id = 1").explain_plan()
) )
assert "KNN" in plan_with_filter assert "KNN" in plan_with_filter
assert "LanceRead" in plan_with_filter assert "FilterExec" in plan_with_filter
# Test FTS query with filter # Test FTS query with filter
from lancedb.index import FTS from lancedb.index import FTS
@@ -870,8 +850,7 @@ async def test_explain_plan_with_filters(table_async: AsyncTable):
) )
plan_fts_filter = await query_fts_filter.where("id = 1").explain_plan() plan_fts_filter = await query_fts_filter.where("id = 1").explain_plan()
assert "MatchQuery: query=dog" in plan_fts_filter assert "MatchQuery: query=dog" in plan_fts_filter
assert "LanceRead" in plan_fts_filter assert "FilterExec: id@" in plan_fts_filter # Should show filter details
assert "full_filter=id = Int64(1)" in plan_fts_filter # Should show filter details
@pytest.mark.asyncio @pytest.mark.asyncio
@@ -1359,20 +1338,3 @@ async def test_query_timeout_async(tmp_path):
.nearest_to([0.0, 0.0]) .nearest_to([0.0, 0.0])
.to_list(timeout=timedelta(0)) .to_list(timeout=timedelta(0))
) )
def test_search_empty_table(mem_db):
"""Test searching on empty table should not crash
Regression test for issue #303:
https://github.com/lancedb/lancedb/issues/303
Searching on empty table produces scary error message
"""
schema = pa.schema(
[pa.field("vector", pa.list_(pa.float32(), 2)), pa.field("id", pa.int64())]
)
table = mem_db.create_table("test_empty_search", schema=schema)
# Search on empty table should return empty results, not crash
results = table.search([1.0, 2.0]).limit(5).to_list()
assert results == []

View File

@@ -1,38 +0,0 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
import lancedb
def test_session_cache_configuration(tmp_path):
"""Test Session cache configuration and basic functionality."""
# Create session with small cache limits for testing
index_cache_size = 1024 * 1024 # 1MB
metadata_cache_size = 512 * 1024 # 512KB
session = lancedb.Session(
index_cache_size_bytes=index_cache_size,
metadata_cache_size_bytes=metadata_cache_size,
)
# Record initial cache state
initial_cache_size = session.size_bytes
initial_cache_items = session.approx_num_items
# Test session works with database connection
db = lancedb.connect(tmp_path, session=session)
# Create and use a table to exercise the session
data = [{"id": i, "text": f"item {i}"} for i in range(100)]
table = db.create_table("test", data)
results = list(table.to_arrow().to_pylist())
assert len(results) == 100
# Verify cache usage increased after operations
final_cache_size = session.size_bytes
final_cache_items = session.approx_num_items
assert final_cache_size > initial_cache_size # Cache should have grown
assert final_cache_items >= initial_cache_items # Items should not decrease
assert initial_cache_size < index_cache_size + metadata_cache_size

View File

@@ -1804,45 +1804,3 @@ def test_stats(mem_db: DBConnection):
}, },
}, },
} }
def test_create_table_empty_list_with_schema(mem_db: DBConnection):
"""Test creating table with empty list data and schema
Regression test for IndexError: list index out of range
when calling create_table(name, data=[], schema=schema)
"""
schema = pa.schema(
[pa.field("vector", pa.list_(pa.float32(), 2)), pa.field("id", pa.int64())]
)
table = mem_db.create_table("test_empty_list", data=[], schema=schema)
assert table.count_rows() == 0
assert table.schema == schema
def test_create_table_empty_list_no_schema_error(mem_db: DBConnection):
"""Test that creating table with empty list and no schema raises error"""
with pytest.raises(
ValueError, match="Cannot create table from empty list without a schema"
):
mem_db.create_table("test_empty_no_schema", data=[])
def test_add_table_with_empty_embeddings(tmp_path):
"""Test exact scenario from issue #1968
Regression test for issue #1968:
https://github.com/lancedb/lancedb/issues/1968
"""
db = lancedb.connect(tmp_path)
class MySchema(LanceModel):
text: str
embedding: Vector(16)
table = db.create_table("test", schema=MySchema)
table.add(
[{"text": "bar", "embedding": [0.1] * 16}],
on_bad_vectors="drop",
)
assert table.count_rows() == 1

View File

@@ -179,7 +179,7 @@ impl Connection {
} }
#[pyfunction] #[pyfunction]
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))] #[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None))]
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub fn connect( pub fn connect(
py: Python, py: Python,
@@ -190,7 +190,6 @@ pub fn connect(
read_consistency_interval: Option<f64>, read_consistency_interval: Option<f64>,
client_config: Option<PyClientConfig>, client_config: Option<PyClientConfig>,
storage_options: Option<HashMap<String, String>>, storage_options: Option<HashMap<String, String>>,
session: Option<crate::session::Session>,
) -> PyResult<Bound<'_, PyAny>> { ) -> PyResult<Bound<'_, PyAny>> {
future_into_py(py, async move { future_into_py(py, async move {
let mut builder = lancedb::connect(&uri); let mut builder = lancedb::connect(&uri);
@@ -214,9 +213,6 @@ pub fn connect(
if let Some(client_config) = client_config { if let Some(client_config) = client_config {
builder = builder.client_config(client_config.into()); builder = builder.client_config(client_config.into());
} }
if let Some(session) = session {
builder = builder.session(session.inner.clone());
}
Ok(Connection::new(builder.execute().await.infer_error()?)) Ok(Connection::new(builder.execute().await.infer_error()?))
}) })
} }

View File

@@ -11,7 +11,6 @@ use pyo3::{
wrap_pyfunction, Bound, PyResult, Python, wrap_pyfunction, Bound, PyResult, Python,
}; };
use query::{FTSQuery, HybridQuery, Query, VectorQuery}; use query::{FTSQuery, HybridQuery, Query, VectorQuery};
use session::Session;
use table::{ use table::{
AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, MergeResult, AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, MergeResult,
Table, UpdateResult, Table, UpdateResult,
@@ -22,7 +21,6 @@ pub mod connection;
pub mod error; pub mod error;
pub mod index; pub mod index;
pub mod query; pub mod query;
pub mod session;
pub mod table; pub mod table;
pub mod util; pub mod util;
@@ -33,7 +31,6 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
.write_style("LANCEDB_LOG_STYLE"); .write_style("LANCEDB_LOG_STYLE");
env_logger::init_from_env(env); env_logger::init_from_env(env);
m.add_class::<Connection>()?; m.add_class::<Connection>()?;
m.add_class::<Session>()?;
m.add_class::<Table>()?; m.add_class::<Table>()?;
m.add_class::<IndexConfig>()?; m.add_class::<IndexConfig>()?;
m.add_class::<Query>()?; m.add_class::<Query>()?;

View File

@@ -1,107 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::sync::Arc;
use lancedb::{ObjectStoreRegistry, Session as LanceSession};
use pyo3::{pyclass, pymethods, PyResult};
/// A session for managing caches and object stores across LanceDB operations.
///
/// Sessions allow you to configure cache sizes for index and metadata caches,
/// which can significantly impact memory use and performance. They can
/// also be re-used across multiple connections to share the same cache state.
#[pyclass]
#[derive(Clone)]
pub struct Session {
pub(crate) inner: Arc<LanceSession>,
}
impl Default for Session {
fn default() -> Self {
Self {
inner: Arc::new(LanceSession::default()),
}
}
}
#[pymethods]
impl Session {
/// Create a new session with custom cache sizes.
///
/// Parameters
/// ----------
/// index_cache_size_bytes : int, optional
/// The size of the index cache in bytes.
/// Index data is stored in memory in this cache to speed up queries.
/// Default: 6GB (6 * 1024 * 1024 * 1024 bytes)
/// metadata_cache_size_bytes : int, optional
/// The size of the metadata cache in bytes.
/// The metadata cache stores file metadata and schema information in memory.
/// This cache improves scan and write performance.
/// Default: 1GB (1024 * 1024 * 1024 bytes)
#[new]
#[pyo3(signature = (index_cache_size_bytes=None, metadata_cache_size_bytes=None))]
pub fn new(
index_cache_size_bytes: Option<usize>,
metadata_cache_size_bytes: Option<usize>,
) -> PyResult<Self> {
let index_cache_size = index_cache_size_bytes.unwrap_or(6 * 1024 * 1024 * 1024); // 6GB default
let metadata_cache_size = metadata_cache_size_bytes.unwrap_or(1024 * 1024 * 1024); // 1GB default
let session = LanceSession::new(
index_cache_size,
metadata_cache_size,
Arc::new(ObjectStoreRegistry::default()),
);
Ok(Self {
inner: Arc::new(session),
})
}
/// Create a session with default cache sizes.
///
/// This is equivalent to creating a session with 6GB index cache
/// and 1GB metadata cache.
///
/// Returns
/// -------
/// Session
/// A new Session with default cache sizes
#[staticmethod]
#[allow(clippy::should_implement_trait)]
pub fn default() -> Self {
Default::default()
}
/// Get the current size of the session caches in bytes.
///
/// Returns
/// -------
/// int
/// The total size of all caches in the session
#[getter]
pub fn size_bytes(&self) -> u64 {
self.inner.size_bytes()
}
/// Get the approximate number of items cached in the session.
///
/// Returns
/// -------
/// int
/// The number of cached items across all caches
#[getter]
pub fn approx_num_items(&self) -> usize {
self.inner.approx_num_items()
}
fn __repr__(&self) -> String {
format!(
"Session(size_bytes={}, approx_num_items={})",
self.size_bytes(),
self.approx_num_items()
)
}
}

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-node" name = "lancedb-node"
version = "0.21.2" version = "0.21.2-beta.1"
description = "Serverless, low-latency vector database for AI applications" description = "Serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true
edition.workspace = true edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb" name = "lancedb"
version = "0.21.2" version = "0.21.2-beta.1"
edition.workspace = true edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications" description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true

View File

@@ -678,8 +678,7 @@ impl Database for ListingDatabase {
let mut read_params = request.lance_read_params.unwrap_or_else(|| { let mut read_params = request.lance_read_params.unwrap_or_else(|| {
let mut default_params = ReadParams::default(); let mut default_params = ReadParams::default();
if let Some(index_cache_size) = request.index_cache_size { if let Some(index_cache_size) = request.index_cache_size {
#[allow(deprecated)] default_params.index_cache_size = index_cache_size as usize;
default_params.index_cache_size(index_cache_size as usize);
} }
default_params default_params
}); });

View File

@@ -290,7 +290,3 @@ impl Display for DistanceType {
/// Connect to a database /// Connect to a database
pub use connection::connect; pub use connection::connect;
/// Re-export Lance Session and ObjectStoreRegistry for custom session creation
pub use lance::session::Session;
pub use lance_io::object_store::ObjectStoreRegistry;

View File

@@ -958,8 +958,7 @@ impl VectorQuery {
if let Some(maximum_nprobes) = self.request.maximum_nprobes { if let Some(maximum_nprobes) = self.request.maximum_nprobes {
if minimum_nprobes > maximum_nprobes { if minimum_nprobes > maximum_nprobes {
return Err(Error::InvalidInput { return Err(Error::InvalidInput {
message: "minimum_nprobes must be less than or equal to maximum_nprobes" message: "minimum_nprobes must be less or equal to maximum_nprobes".to_string(),
.to_string(),
}); });
} }
} }
@@ -990,8 +989,7 @@ impl VectorQuery {
} }
if maximum_nprobes < self.request.minimum_nprobes { if maximum_nprobes < self.request.minimum_nprobes {
return Err(Error::InvalidInput { return Err(Error::InvalidInput {
message: "maximum_nprobes must be greater than or equal to minimum_nprobes" message: "maximum_nprobes must be greater than minimum_nprobes".to_string(),
.to_string(),
}); });
} }
} }

View File

@@ -2,7 +2,7 @@
// SPDX-FileCopyrightText: Copyright The LanceDB Authors // SPDX-FileCopyrightText: Copyright The LanceDB Authors
use http::HeaderName; use http::HeaderName;
use log::{debug, info}; use log::debug;
use reqwest::{ use reqwest::{
header::{HeaderMap, HeaderValue}, header::{HeaderMap, HeaderValue},
Body, Request, RequestBuilder, Response, Body, Request, RequestBuilder, Response,
@@ -324,7 +324,6 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
} }
for (key, value) in &config.extra_headers { for (key, value) in &config.extra_headers {
info!("header: {}={}", key, value);
let key_parsed = HeaderName::from_str(key).map_err(|_| Error::InvalidInput { let key_parsed = HeaderName::from_str(key).map_err(|_| Error::InvalidInput {
message: format!("non-ascii value for header '{}' provided", key), message: format!("non-ascii value for header '{}' provided", key),
})?; })?;

View File

@@ -85,14 +85,6 @@ impl ExecutionPlan for MetadataEraserExec {
vec![&self.input] vec![&self.input]
} }
fn maintains_input_order(&self) -> Vec<bool> {
vec![true; self.children().len()]
}
fn benefits_from_input_partitioning(&self) -> Vec<bool> {
vec![false; self.children().len()]
}
fn with_new_children( fn with_new_children(
self: Arc<Self>, self: Arc<Self>,
children: Vec<Arc<dyn ExecutionPlan>>, children: Vec<Arc<dyn ExecutionPlan>>,
@@ -494,8 +486,11 @@ pub mod tests {
TestFixture::check_plan( TestFixture::check_plan(
plan, plan,
"MetadataEraserExec "MetadataEraserExec
CoalesceBatchesExec:...
FilterExec: i@0 >= 5
RepartitionExec:...
ProjectionExec:... ProjectionExec:...
LanceRead:...", LanceScan:...",
) )
.await; .await;