Compare commits

..

1 Commits

Author SHA1 Message Date
Lance Release
adc8abc203 Bump version: 0.21.2-beta.0 → 0.21.2-beta.1 2025-07-22 15:40:41 +00:00
66 changed files with 281 additions and 1071 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.21.2"
current_version = "0.21.2-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -5,8 +5,8 @@ on:
tags-ignore:
# We don't publish pre-releases for Rust. Crates.io is just a source
# distribution, so we don't need to publish pre-releases.
- "v*-beta*"
- "*-v*" # for example, python-vX.Y.Z
- 'v*-beta*'
- '*-v*' # for example, python-vX.Y.Z
env:
# This env var is used by Swatinem/rust-cache@v2 for the cache
@@ -19,8 +19,6 @@ env:
jobs:
build:
runs-on: ubuntu-22.04
permissions:
id-token: write
timeout-minutes: 30
# Only runs on tags that matches the make-release action
if: startsWith(github.ref, 'refs/tags/v')
@@ -33,8 +31,6 @@ jobs:
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- uses: rust-lang/crates-io-auth-action@v1
id: auth
- name: Publish the package
run: |
cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
cargo publish -p lancedb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}

View File

@@ -1,24 +0,0 @@
LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
remote (against LanceDB Cloud).
The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
Project layout:
* `rust/lancedb`: The LanceDB core Rust implementation.
* `python`: The Python bindings, using PyO3.
* `nodejs`: The Typescript bindings, using napi-rs
* `java`: The Java bindings
(`rust/ffi` and `node/` are for a deprecated package. You can ignore them.)
Common commands:
* Check for compiler errors: `cargo check --features remote --tests --examples`
* Run tests: `cargo test --features remote --tests`
* Run specific test: `cargo test --features remote -p <package_name> --test <test_name>`
* Lint: `cargo clippy --features remote --tests --examples`
* Format: `cargo fmt --all`
Before committing changes, run formatting.

174
Cargo.lock generated
View File

@@ -1039,17 +1039,6 @@ dependencies = [
"tokio",
]
[[package]]
name = "backon"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "302eaff5357a264a2c42f127ecb8bac761cf99749fc3dc95677e2743991f99e7"
dependencies = [
"fastrand",
"gloo-timers",
"tokio",
]
[[package]]
name = "backtrace"
version = "0.3.75"
@@ -2488,7 +2477,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"const-oid",
"crypto-common",
"subtle",
]
@@ -2852,10 +2840,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "0.32.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [
"arrow-array",
"rand 0.8.5",
]
@@ -3269,18 +3256,6 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
[[package]]
name = "gloo-timers"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994"
dependencies = [
"futures-channel",
"futures-core",
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "group"
version = "0.12.1"
@@ -3817,17 +3792,6 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "io-uring"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
dependencies = [
"bitflags 2.9.1",
"cfg-if",
"libc",
]
[[package]]
name = "ipnet"
version = "2.11.0"
@@ -3966,8 +3930,8 @@ dependencies = [
[[package]]
name = "lance"
version = "0.32.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [
"arrow",
"arrow-arith",
@@ -4029,8 +3993,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "0.32.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4047,8 +4011,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "0.32.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4083,8 +4047,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "0.32.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [
"arrow",
"arrow-array",
@@ -4112,8 +4076,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "0.32.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [
"arrow",
"arrow-array",
@@ -4129,8 +4093,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "0.32.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [
"arrayref",
"arrow",
@@ -4169,8 +4133,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "0.32.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4204,8 +4168,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "0.32.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [
"arrow",
"arrow-array",
@@ -4239,6 +4203,7 @@ dependencies = [
"lance-linalg",
"lance-table",
"log",
"moka",
"num-traits",
"object_store",
"prost",
@@ -4258,8 +4223,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "0.32.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [
"arrow",
"arrow-arith",
@@ -4283,8 +4248,6 @@ dependencies = [
"lance-core",
"log",
"object_store",
"object_store_opendal",
"opendal",
"path_abs",
"pin-project",
"prost",
@@ -4299,8 +4262,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "0.32.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4323,8 +4286,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "0.32.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [
"arrow",
"arrow-array",
@@ -4362,8 +4325,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "0.32.1"
source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4374,7 +4337,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.21.2"
version = "0.21.2-beta.0"
dependencies = [
"arrow",
"arrow-array",
@@ -4461,7 +4424,7 @@ dependencies = [
[[package]]
name = "lancedb-node"
version = "0.21.2"
version = "0.21.2-beta.0"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4486,7 +4449,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.21.2"
version = "0.21.2-beta.0"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4506,7 +4469,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.24.2"
version = "0.24.2-beta.0"
dependencies = [
"arrow",
"env_logger",
@@ -5252,21 +5215,6 @@ dependencies = [
"web-time",
]
[[package]]
name = "object_store_opendal"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ce697ee723fdc3eaf6c457abf4059034be15167022b18b619993802cd1443d5"
dependencies = [
"async-trait",
"bytes",
"futures",
"object_store",
"opendal",
"pin-project",
"tokio",
]
[[package]]
name = "once_cell"
version = "1.21.3"
@@ -5307,33 +5255,6 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "opendal"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffb9838d0575c6dbaf3fcec7255af8d5771996d4af900bbb6fa9a314dec00a1a"
dependencies = [
"anyhow",
"backon",
"base64 0.22.1",
"bytes",
"chrono",
"futures",
"getrandom 0.2.16",
"http 1.3.1",
"http-body 1.0.1",
"log",
"md-5",
"percent-encoding",
"quick-xml",
"reqsign",
"reqwest",
"serde",
"serde_json",
"tokio",
"uuid",
]
[[package]]
name = "openssl-probe"
version = "0.1.6"
@@ -6539,33 +6460,6 @@ version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
[[package]]
name = "reqsign"
version = "0.16.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701"
dependencies = [
"anyhow",
"async-trait",
"base64 0.22.1",
"chrono",
"form_urlencoded",
"getrandom 0.2.16",
"hex",
"hmac",
"home",
"http 1.3.1",
"log",
"once_cell",
"percent-encoding",
"rand 0.8.5",
"reqwest",
"serde",
"serde_json",
"sha1",
"sha2",
]
[[package]]
name = "reqwest"
version = "0.12.20"
@@ -7838,18 +7732,16 @@ dependencies = [
[[package]]
name = "tokio"
version = "1.46.1"
version = "1.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17"
checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779"
dependencies = [
"backtrace",
"bytes",
"io-uring",
"libc",
"mio",
"pin-project-lite",
"signal-hook-registry",
"slab",
"socket2",
"tokio-macros",
"windows-sys 0.52.0",

View File

@@ -21,16 +21,16 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=0.32.1", "features" = [
lance = { "version" = "=0.31.2", "features" = [
"dynamodb",
], "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-index = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-table = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
], "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-index = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-table = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
# Note that this one does not include pyarrow
arrow = { version = "55.1", optional = false }
arrow-array = "55.1"

View File

@@ -1,84 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / Session
# Class: Session
A session for managing caches and object stores across LanceDB operations.
Sessions allow you to configure cache sizes for index and metadata caches,
which can significantly impact performance for large datasets.
## Constructors
### new Session()
```ts
new Session(indexCacheSizeBytes?, metadataCacheSizeBytes?): Session
```
Create a new session with custom cache sizes.
# Parameters
- `index_cache_size_bytes`: The size of the index cache in bytes.
Defaults to 6GB if not specified.
- `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
Defaults to 1GB if not specified.
#### Parameters
* **indexCacheSizeBytes?**: `null` \| `bigint`
* **metadataCacheSizeBytes?**: `null` \| `bigint`
#### Returns
[`Session`](Session.md)
## Methods
### approxNumItems()
```ts
approxNumItems(): number
```
Get the approximate number of items cached in the session.
#### Returns
`number`
***
### sizeBytes()
```ts
sizeBytes(): bigint
```
Get the current size of the session caches in bytes.
#### Returns
`bigint`
***
### default()
```ts
static default(): Session
```
Create a session with default cache sizes.
This is equivalent to creating a session with 6GB index cache
and 1GB metadata cache.
#### Returns
[`Session`](Session.md)

View File

@@ -6,13 +6,10 @@
# Function: connect()
## connect(uri, options, session)
## connect(uri, options)
```ts
function connect(
uri,
options?,
session?): Promise<Connection>
function connect(uri, options?): Promise<Connection>
```
Connect to a LanceDB instance at the given URI.
@@ -32,8 +29,6 @@ Accepted formats:
* **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
The options to use when connecting to the database
* **session?**: [`Session`](../classes/Session.md)
### Returns
`Promise`&lt;[`Connection`](../classes/Connection.md)&gt;
@@ -82,7 +77,7 @@ Accepted formats:
[ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.
### Examples
### Example
```ts
const conn = await connect({
@@ -90,11 +85,3 @@ const conn = await connect({
storageOptions: {timeout: "60s"}
});
```
```ts
const session = Session.default();
const conn = await connect({
uri: "/path/to/database",
session: session
});
```

View File

@@ -29,7 +29,6 @@
- [Query](classes/Query.md)
- [QueryBase](classes/QueryBase.md)
- [RecordBatchIterator](classes/RecordBatchIterator.md)
- [Session](classes/Session.md)
- [Table](classes/Table.md)
- [TagContents](classes/TagContents.md)
- [Tags](classes/Tags.md)

View File

@@ -70,17 +70,6 @@ Defaults to 'us-east-1'.
***
### session?
```ts
optional session: Session;
```
(For LanceDB OSS only): the session to use for this connection. Holds
shared caches and other session-specific state.
***
### storageOptions?
```ts

View File

@@ -8,7 +8,7 @@
## Properties
### ~~indexCacheSize?~~
### indexCacheSize?
```ts
optional indexCacheSize: number;
@@ -16,11 +16,6 @@ optional indexCacheSize: number;
Set the size of the index cache, specified as a number of entries
#### Deprecated
Use session-level cache configuration instead.
Create a Session with custom cache sizes and pass it to the connect() function.
The exact meaning of an "entry" will depend on the type of index:
- IVF: there is one entry for each IVF partition
- BTREE: there is one entry for the entire index

View File

@@ -19,7 +19,7 @@ lancedb = { path = "../../../rust/lancedb" }
lance = { workspace = true }
arrow = { workspace = true, features = ["ffi"] }
arrow-schema.workspace = true
tokio = "1.46"
tokio = "1.23"
jni = "0.21.1"
snafu.workspace = true
lazy_static.workspace = true

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.21.2-final.0</version>
<version>0.21.2-beta.1</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.21.2-final.0</version>
<version>0.21.2-beta.1</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.21.2-final.0</version>
<version>0.21.2-beta.1</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>

44
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "vectordb",
"version": "0.21.2",
"version": "0.21.2-beta.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "vectordb",
"version": "0.21.2",
"version": "0.21.2-beta.0",
"cpu": [
"x64",
"arm64"
@@ -52,11 +52,11 @@
"uuid": "^9.0.0"
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.21.2",
"@lancedb/vectordb-darwin-x64": "0.21.2",
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2",
"@lancedb/vectordb-linux-x64-gnu": "0.21.2",
"@lancedb/vectordb-win32-x64-msvc": "0.21.2"
"@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
"@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
"@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
"@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
},
"peerDependencies": {
"@apache-arrow/ts": "^14.0.2",
@@ -327,9 +327,9 @@
}
},
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.21.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.2.tgz",
"integrity": "sha512-NAQnIKLw9K33KMODNXBEW0qC8/safWzZtqbVC7j1GcE7PSk0Uc6x7w5nrH5gvleZggjaxY9jaRVTqmtg7PNmqw==",
"version": "0.21.2-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.2-beta.0.tgz",
"integrity": "sha512-RiYqpKuq9v8A4wFuHt1iPNFYjWJ1KgGFLJwQO4ajp9Hee84sDHq8mP0ATgMcc24hiaOUQ1lRRTULjGbHn4NIYw==",
"cpu": [
"arm64"
],
@@ -340,9 +340,9 @@
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.21.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.2.tgz",
"integrity": "sha512-PudbltlbRiXvBf/bkAaDPL8+RqcI4TG69u00rQHxwkhH7PgPYRTUjfzfaQfiDXZuLXuZHQq703RyoHOqzsHN0Q==",
"version": "0.21.2-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.2-beta.0.tgz",
"integrity": "sha512-togdP0YIjMYg/hBRMMxW434i5VB789JWU5o3hWrodbX8olEc0Txqw5Dg9CgIOldBIiCti6uTSQiTo6uldZon1w==",
"cpu": [
"x64"
],
@@ -353,9 +353,9 @@
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.21.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.2.tgz",
"integrity": "sha512-3lJ8lootlwLmhqabCdg0DKftv0Ujep6NTWAoLWK/6VQe2IgHmu/ZPRNQkOSZ5tnYlmRyDiMDMB2tlAzo45sV8Q==",
"version": "0.21.2-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.2-beta.0.tgz",
"integrity": "sha512-ErS4IQDQVTYVATPeOj/dZXQR34eZQ5rAXm3vJdQi5K6X4zCDaIjOhpmnwzPBGT9W1idaBAoDJhtNfsFaJ6/PQQ==",
"cpu": [
"arm64"
],
@@ -366,9 +366,9 @@
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.21.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.2.tgz",
"integrity": "sha512-5I2drMOIyRODlAHPsipQBTrRRgcOZ45N5GsuhqcKnz3Tg8GAdc1MQKyK3BrdJzKHLPdRtIyRJ6QTLB3wZvDsQQ==",
"version": "0.21.2-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.2-beta.0.tgz",
"integrity": "sha512-ycDpyBGbfxtnGGa/RQo5+So6dHALiem1pbYc/LDKKluUJpadtXtEwC61o6hZTcejoYjhEE8ET7vA3OCEJfMFaw==",
"cpu": [
"x64"
],
@@ -379,9 +379,9 @@
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.21.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.2.tgz",
"integrity": "sha512-gjpFukq0NTQSRpWPNIpq4XFtaudjSNBT6DMsagC61D2nx9ZLEdSAdU0wdkeluQwhoMvNnXEPdP9HxDSFUXk+Ww==",
"version": "0.21.2-beta.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.2-beta.0.tgz",
"integrity": "sha512-IgVkAP/LiNIQD5P6n/9x3bgQOt5pGJarjtSF8r+ialD95QHmo6tcxrwTy/DlA+H1uI6B6h+sbN0c1KXTh1rYcg==",
"cpu": [
"x64"
],

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.21.2",
"version": "0.21.2-beta.1",
"description": " Serverless, low-latency vector database for AI applications",
"private": false,
"main": "dist/index.js",
@@ -89,10 +89,10 @@
}
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-x64": "0.21.2",
"@lancedb/vectordb-darwin-arm64": "0.21.2",
"@lancedb/vectordb-linux-x64-gnu": "0.21.2",
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2",
"@lancedb/vectordb-win32-x64-msvc": "0.21.2"
"@lancedb/vectordb-darwin-x64": "0.21.2-beta.1",
"@lancedb/vectordb-darwin-arm64": "0.21.2-beta.1",
"@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.1",
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.1",
"@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.1"
}
}

View File

@@ -49,7 +49,7 @@ describe('LanceDB Mirrored Store Integration test', function () {
it('s3://...?mirroredStore=... param is processed correctly', async function () {
this.timeout(600000)
const dir = await fs.promises.mkdtemp(path.join(tmpdir(), 'lancedb-mirror-'))
const dir = tmpdir()
console.log(dir)
const conn = await lancedb.connect({ uri: `s3://lancedb-integtest?mirroredStore=${dir}`, storageOptions: { allowHttp: 'true' } })
const data = Array(200).fill({ vector: Array(128).fill(1.0), id: 0 })
@@ -63,93 +63,118 @@ describe('LanceDB Mirrored Store Integration test', function () {
const t = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
const mirroredPath = path.join(dir, `${tableName}.lance`)
fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
if (err != null) throw err
// there should be three dirs
assert.equal(files.length, 3)
assert.isTrue(files[0].isDirectory())
assert.isTrue(files[1].isDirectory())
const files = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
// there should be three dirs
assert.equal(files.length, 3, 'files after table creation')
assert.isTrue(files[0].isDirectory())
assert.isTrue(files[1].isDirectory())
fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 1)
assert.isTrue(files[0].name.endsWith('.txn'))
})
const transactionFiles = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
assert.equal(transactionFiles.length, 1, 'transactionFiles after table creation')
assert.isTrue(transactionFiles[0].name.endsWith('.txn'))
fs.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 1)
assert.isTrue(files[0].name.endsWith('.manifest'))
})
const versionFiles = await fs.promises.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true })
assert.equal(versionFiles.length, 1, 'versionFiles after table creation')
assert.isTrue(versionFiles[0].name.endsWith('.manifest'))
const dataFiles = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
assert.equal(dataFiles.length, 1, 'dataFiles after table creation')
assert.isTrue(dataFiles[0].name.endsWith('.lance'))
fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 1)
assert.isTrue(files[0].name.endsWith('.lance'))
})
})
// try create index and check if it's mirrored
await t.createIndex({ column: 'vector', type: 'ivf_pq' })
const filesAfterIndex = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
// there should be four dirs
assert.equal(filesAfterIndex.length, 4, 'filesAfterIndex')
assert.isTrue(filesAfterIndex[0].isDirectory())
assert.isTrue(filesAfterIndex[1].isDirectory())
assert.isTrue(filesAfterIndex[2].isDirectory())
fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
if (err != null) throw err
// there should be four dirs
assert.equal(files.length, 4)
assert.isTrue(files[0].isDirectory())
assert.isTrue(files[1].isDirectory())
assert.isTrue(files[2].isDirectory())
// Two TXs now
const transactionFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
assert.equal(transactionFilesAfterIndex.length, 2, 'transactionFilesAfterIndex')
assert.isTrue(transactionFilesAfterIndex[0].name.endsWith('.txn'))
assert.isTrue(transactionFilesAfterIndex[1].name.endsWith('.txn'))
// Two TXs now
fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 2)
assert.isTrue(files[0].name.endsWith('.txn'))
assert.isTrue(files[1].name.endsWith('.txn'))
})
const dataFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
assert.equal(dataFilesAfterIndex.length, 1, 'dataFilesAfterIndex')
assert.isTrue(dataFilesAfterIndex[0].name.endsWith('.lance'))
fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 1)
assert.isTrue(files[0].name.endsWith('.lance'))
})
const indicesFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true })
assert.equal(indicesFiles.length, 1, 'indicesFiles')
assert.isTrue(indicesFiles[0].isDirectory())
fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 1)
assert.isTrue(files[0].isDirectory())
const indexFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFiles[0].name), { withFileTypes: true })
console.log(`DEBUG indexFiles in ${indicesFiles[0].name}:`, indexFiles.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`))
assert.equal(indexFiles.length, 2, 'indexFiles')
const fileNames = indexFiles.map(f => f.name).sort()
assert.isTrue(fileNames.includes('auxiliary.idx'), 'auxiliary.idx should be present')
assert.isTrue(fileNames.includes('index.idx'), 'index.idx should be present')
assert.isTrue(indexFiles.every(f => f.isFile()), 'all index files should be files')
fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 1)
assert.isTrue(files[0].isFile())
assert.isTrue(files[0].name.endsWith('.idx'))
})
})
})
// try delete and check if it's mirrored
await t.delete('id = 0')
const filesAfterDelete = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
// there should be five dirs
assert.equal(filesAfterDelete.length, 5, 'filesAfterDelete')
assert.isTrue(filesAfterDelete[0].isDirectory())
assert.isTrue(filesAfterDelete[1].isDirectory())
assert.isTrue(filesAfterDelete[2].isDirectory())
assert.isTrue(filesAfterDelete[3].isDirectory())
assert.isTrue(filesAfterDelete[4].isDirectory())
fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
if (err != null) throw err
// there should be five dirs
assert.equal(files.length, 5)
assert.isTrue(files[0].isDirectory())
assert.isTrue(files[1].isDirectory())
assert.isTrue(files[2].isDirectory())
assert.isTrue(files[3].isDirectory())
assert.isTrue(files[4].isDirectory())
// Three TXs now
const transactionFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
assert.equal(transactionFilesAfterDelete.length, 3, 'transactionFilesAfterDelete')
assert.isTrue(transactionFilesAfterDelete[0].name.endsWith('.txn'))
assert.isTrue(transactionFilesAfterDelete[1].name.endsWith('.txn'))
// Three TXs now
fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 3)
assert.isTrue(files[0].name.endsWith('.txn'))
assert.isTrue(files[1].name.endsWith('.txn'))
})
const dataFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
assert.equal(dataFilesAfterDelete.length, 1, 'dataFilesAfterDelete')
assert.isTrue(dataFilesAfterDelete[0].name.endsWith('.lance'))
fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 1)
assert.isTrue(files[0].name.endsWith('.lance'))
})
const indicesFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true })
assert.equal(indicesFilesAfterDelete.length, 1, 'indicesFilesAfterDelete')
assert.isTrue(indicesFilesAfterDelete[0].isDirectory())
fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 1)
assert.isTrue(files[0].isDirectory())
const indexFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFilesAfterDelete[0].name), { withFileTypes: true })
console.log(`DEBUG indexFilesAfterDelete in ${indicesFilesAfterDelete[0].name}:`, indexFilesAfterDelete.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`))
assert.equal(indexFilesAfterDelete.length, 2, 'indexFilesAfterDelete')
const fileNamesAfterDelete = indexFilesAfterDelete.map(f => f.name).sort()
assert.isTrue(fileNamesAfterDelete.includes('auxiliary.idx'), 'auxiliary.idx should be present after delete')
assert.isTrue(fileNamesAfterDelete.includes('index.idx'), 'index.idx should be present after delete')
assert.isTrue(indexFilesAfterDelete.every(f => f.isFile()), 'all index files should be files after delete')
fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
const deletionFiles = await fs.promises.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true })
assert.equal(deletionFiles.length, 1, 'deletionFiles')
assert.isTrue(deletionFiles[0].name.endsWith('.arrow'))
assert.equal(files.length, 1)
assert.isTrue(files[0].isFile())
assert.isTrue(files[0].name.endsWith('.idx'))
})
})
fs.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true }, (err, files) => {
if (err != null) throw err
assert.equal(files.length, 1)
assert.isTrue(files[0].name.endsWith('.arrow'))
})
})
})
})

View File

@@ -1,13 +0,0 @@
These are the typescript bindings of LanceDB.
The core Rust library is in the `../rust/lancedb` directory, the rust binding
code is in the `src/` directory and the typescript bindings are in
the `lancedb/` directory.
Whenever you change the Rust code, you will need to recompile: `npm run build`.
Common commands:
* Build: `npm run build`
* Lint: `npm run lint`
* Fix lints: `npm run lint-fix`
* Test: `npm test`
* Run single test file: `npm test __test__/arrow.test.ts`

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.21.2"
version = "0.21.2-beta.1"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -108,10 +108,7 @@ describe("remote connection", () => {
it("should pass on requested extra headers", async () => {
await withMockDatabase(
(req, res) => {
expect(req.headers["foo"]).toEqual("1");
expect(req.headers["bar"]).toEqual("2");
expect(req.headers["baz"]).toEqual("3");
expect(req.headers["x-log-attrs"]).toEqual("foo, bar, baz");
expect(req.headers["x-my-header"]).toEqual("my-value");
const body = JSON.stringify({ tables: [] });
res.writeHead(200, { "Content-Type": "application/json" }).end(body);
@@ -122,12 +119,9 @@ describe("remote connection", () => {
},
{
clientConfig: {
extraHeaders: {
"x-log-attrs": "foo, bar, baz",
foo: "1",
bar: "2",
baz: "3",
},
extraHeaders: {
"x-my-header": "my-value",
},
},
},
);

View File

@@ -1,46 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import * as tmp from "tmp";
import { Session, connect } from "../lancedb";
describe("Session", () => {
let tmpDir: tmp.DirResult;
beforeEach(() => {
tmpDir = tmp.dirSync({ unsafeCleanup: true });
});
afterEach(() => tmpDir.removeCallback());
it("should configure cache sizes and work with database operations", async () => {
// Create session with small cache limits for testing
const indexCacheSize = BigInt(1024 * 1024); // 1MB
const metadataCacheSize = BigInt(512 * 1024); // 512KB
const session = new Session(indexCacheSize, metadataCacheSize);
// Record initial cache state
const initialCacheSize = session.sizeBytes();
const initialCacheItems = session.approxNumItems();
// Test session works with database connection
const db = await connect({ uri: tmpDir.name, session: session });
// Create and use a table to exercise the session
const data = Array.from({ length: 100 }, (_, i) => ({
id: i,
text: `item ${i}`,
}));
const table = await db.createTable("test", data);
const results = await table.query().limit(5).toArray();
expect(results).toHaveLength(5);
// Verify cache usage increased after operations
const finalCacheSize = session.sizeBytes();
const finalCacheItems = session.approxNumItems();
expect(finalCacheSize).toBeGreaterThan(initialCacheSize); // Cache should have grown
expect(finalCacheItems).toBeGreaterThanOrEqual(initialCacheItems); // Items should not decrease
expect(initialCacheSize).toBeLessThan(indexCacheSize + metadataCacheSize); // Within limits
});
});

View File

@@ -582,7 +582,7 @@ describe("When creating an index", () => {
"Invalid input, minimum_nprobes must be greater than 0",
);
expect(() => tbl.query().nearestTo(queryVec).maximumNprobes(5)).toThrow(
"Invalid input, maximum_nprobes must be greater than or equal to minimum_nprobes",
"Invalid input, maximum_nprobes must be greater than minimum_nprobes",
);
await tbl.dropIndex("vec_idx");

View File

@@ -85,9 +85,6 @@ export interface OpenTableOptions {
/**
* Set the size of the index cache, specified as a number of entries
*
* @deprecated Use session-level cache configuration instead.
* Create a Session with custom cache sizes and pass it to the connect() function.
*
* The exact meaning of an "entry" will depend on the type of index:
* - IVF: there is one entry for each IVF partition
* - BTREE: there is one entry for the entire index

View File

@@ -10,7 +10,6 @@ import {
import {
ConnectionOptions,
Connection as LanceDbConnection,
Session,
} from "./native.js";
export {
@@ -52,8 +51,6 @@ export {
OpenTableOptions,
} from "./connection";
export { Session } from "./native.js";
export {
ExecutableQuery,
Query,
@@ -134,7 +131,6 @@ export { IntoSql, packBits } from "./util";
export async function connect(
uri: string,
options?: Partial<ConnectionOptions>,
session?: Session,
): Promise<Connection>;
/**
* Connect to a LanceDB instance at the given URI.
@@ -153,43 +149,31 @@ export async function connect(
* storageOptions: {timeout: "60s"}
* });
* ```
*
* @example
* ```ts
* const session = Session.default();
* const conn = await connect({
* uri: "/path/to/database",
* session: session
* });
* ```
*/
export async function connect(
options: Partial<ConnectionOptions> & { uri: string },
): Promise<Connection>;
export async function connect(
uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }),
options?: Partial<ConnectionOptions>,
options: Partial<ConnectionOptions> = {},
): Promise<Connection> {
let uri: string | undefined;
let finalOptions: Partial<ConnectionOptions> = {};
if (typeof uriOrOptions !== "string") {
const { uri: uri_, ...opts } = uriOrOptions;
uri = uri_;
finalOptions = opts;
options = opts;
} else {
uri = uriOrOptions;
finalOptions = options || {};
}
if (!uri) {
throw new Error("uri is required");
}
finalOptions = (finalOptions as ConnectionOptions) ?? {};
(<ConnectionOptions>finalOptions).storageOptions = cleanseStorageOptions(
(<ConnectionOptions>finalOptions).storageOptions,
options = (options as ConnectionOptions) ?? {};
(<ConnectionOptions>options).storageOptions = cleanseStorageOptions(
(<ConnectionOptions>options).storageOptions,
);
const nativeConn = await LanceDbConnection.new(uri, finalOptions);
const nativeConn = await LanceDbConnection.new(uri, options);
return new LocalConnection(nativeConn);
}

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.21.2",
"version": "0.21.2-beta.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.21.2",
"version": "0.21.2-beta.1",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.21.2",
"version": "0.21.2-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.21.2",
"version": "0.21.2-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.21.2",
"version": "0.21.2-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.21.2",
"version": "0.21.2-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.21.2",
"version": "0.21.2-beta.1",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.21.2",
"version": "0.21.2-beta.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.21.2",
"version": "0.21.2-beta.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.21.2",
"version": "0.21.2-beta.0",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.21.2",
"version": "0.21.2-beta.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -74,10 +74,6 @@ impl Connection {
builder = builder.host_override(&host_override);
}
if let Some(session) = options.session {
builder = builder.session(session.inner.clone());
}
Ok(Self::inner_new(builder.execute().await.default_error()?))
}

View File

@@ -14,7 +14,6 @@ pub mod merge;
mod query;
pub mod remote;
mod rerankers;
mod session;
mod table;
mod util;
@@ -35,9 +34,6 @@ pub struct ConnectionOptions {
///
/// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
pub storage_options: Option<HashMap<String, String>>,
/// (For LanceDB OSS only): the session to use for this connection. Holds
/// shared caches and other session-specific state.
pub session: Option<session::Session>,
/// (For LanceDB cloud only): configuration for the remote HTTP client.
pub client_config: Option<remote::ClientConfig>,

View File

@@ -1,102 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::sync::Arc;
use lancedb::{ObjectStoreRegistry, Session as LanceSession};
use napi::bindgen_prelude::*;
use napi_derive::*;
/// A session for managing caches and object stores across LanceDB operations.
///
/// Sessions allow you to configure cache sizes for index and metadata caches,
/// which can significantly impact memory use and performance. They can
/// also be re-used across multiple connections to share the same cache state.
#[napi]
#[derive(Clone)]
pub struct Session {
pub(crate) inner: Arc<LanceSession>,
}
impl std::fmt::Debug for Session {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Session")
.field("size_bytes", &self.inner.size_bytes())
.field("approx_num_items", &self.inner.approx_num_items())
.finish()
}
}
#[napi]
impl Session {
/// Create a new session with custom cache sizes.
///
/// # Parameters
///
/// - `index_cache_size_bytes`: The size of the index cache in bytes.
/// Index data is stored in memory in this cache to speed up queries.
/// Defaults to 6GB if not specified.
/// - `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
/// The metadata cache stores file metadata and schema information in memory.
/// This cache improves scan and write performance.
/// Defaults to 1GB if not specified.
#[napi(constructor)]
pub fn new(
index_cache_size_bytes: Option<BigInt>,
metadata_cache_size_bytes: Option<BigInt>,
) -> napi::Result<Self> {
let index_cache_size = index_cache_size_bytes
.map(|size| size.get_u64().1 as usize)
.unwrap_or(6 * 1024 * 1024 * 1024); // 6GB default
let metadata_cache_size = metadata_cache_size_bytes
.map(|size| size.get_u64().1 as usize)
.unwrap_or(1024 * 1024 * 1024); // 1GB default
let session = LanceSession::new(
index_cache_size,
metadata_cache_size,
Arc::new(ObjectStoreRegistry::default()),
);
Ok(Self {
inner: Arc::new(session),
})
}
/// Create a session with default cache sizes.
///
/// This is equivalent to creating a session with 6GB index cache
/// and 1GB metadata cache.
#[napi(factory)]
pub fn default() -> Self {
Self {
inner: Arc::new(LanceSession::default()),
}
}
/// Get the current size of the session caches in bytes.
#[napi]
pub fn size_bytes(&self) -> BigInt {
BigInt::from(self.inner.size_bytes())
}
/// Get the approximate number of items cached in the session.
#[napi]
pub fn approx_num_items(&self) -> u32 {
self.inner.approx_num_items() as u32
}
}
// Implement FromNapiValue for Session to work with napi(object)
impl napi::bindgen_prelude::FromNapiValue for Session {
unsafe fn from_napi_value(
env: napi::sys::napi_env,
napi_val: napi::sys::napi_value,
) -> napi::Result<Self> {
let object: napi::bindgen_prelude::ClassInstance<Session> =
napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
let copy = object.clone();
Ok(copy)
}
}

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.24.2"
current_version = "0.24.2-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,19 +0,0 @@
These are the Python bindings of LanceDB.
The core Rust library is in the `../rust/lancedb` directory, the rust binding
code is in the `src/` directory and the Python bindings are in the `lancedb/` directory.
Common commands:
* Build: `make develop`
* Format: `make format`
* Lint: `make check`
* Fix lints: `make fix`
* Test: `make test`
* Doc test: `make doctest`
Before committing changes, run lints and then formatting.
When you change the Rust code, you will need to recompile the Python bindings: `make develop`.
When you export new types from Rust to Python, you must manually update `python/lancedb/_lancedb.pyi`
with the corresponding type hints. You can run `pyright` to check for type errors in the Python code.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.24.2"
version = "0.24.2-beta.1"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -85,8 +85,8 @@ embeddings = [
"boto3>=1.28.57",
"awscli>=1.29.57",
"botocore>=1.31.57",
'ibm-watsonx-ai>=1.1.2; python_version >= "3.10"',
"ollama>=0.3.0",
"ibm-watsonx-ai>=1.1.2",
]
azure = ["adlfs>=2024.2.0"]

View File

@@ -18,7 +18,6 @@ from .remote import ClientConfig
from .remote.db import RemoteDBConnection
from .schema import vector
from .table import AsyncTable
from ._lancedb import Session
def connect(
@@ -31,7 +30,6 @@ def connect(
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
storage_options: Optional[Dict[str, str]] = None,
session: Optional[Session] = None,
**kwargs: Any,
) -> DBConnection:
"""Connect to a LanceDB database.
@@ -66,12 +64,6 @@ def connect(
storage_options: dict, optional
Additional options for the storage backend. See available options at
<https://lancedb.github.io/lancedb/guides/storage/>
session: Session, optional
(For LanceDB OSS only)
A session to use for this connection. Sessions allow you to configure
cache sizes for index and metadata caches, which can significantly
impact memory use and performance. They can also be re-used across
multiple connections to share the same cache state.
Examples
--------
@@ -100,7 +92,7 @@ def connect(
if api_key is None:
api_key = os.environ.get("LANCEDB_API_KEY")
if api_key is None:
raise ValueError(f"api_key is required to connect to LanceDB cloud: {uri}")
raise ValueError(f"api_key is required to connected LanceDB cloud: {uri}")
if isinstance(request_thread_pool, int):
request_thread_pool = ThreadPoolExecutor(request_thread_pool)
return RemoteDBConnection(
@@ -121,7 +113,6 @@ def connect(
uri,
read_consistency_interval=read_consistency_interval,
storage_options=storage_options,
session=session,
)
@@ -134,7 +125,6 @@ async def connect_async(
read_consistency_interval: Optional[timedelta] = None,
client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
storage_options: Optional[Dict[str, str]] = None,
session: Optional[Session] = None,
) -> AsyncConnection:
"""Connect to a LanceDB database.
@@ -168,12 +158,6 @@ async def connect_async(
storage_options: dict, optional
Additional options for the storage backend. See available options at
<https://lancedb.github.io/lancedb/guides/storage/>
session: Session, optional
(For LanceDB OSS only)
A session to use for this connection. Sessions allow you to configure
cache sizes for index and metadata caches, which can significantly
impact memory use and performance. They can also be re-used across
multiple connections to share the same cache state.
Examples
--------
@@ -213,7 +197,6 @@ async def connect_async(
read_consistency_interval_secs,
client_config,
storage_options,
session,
)
)
@@ -229,7 +212,6 @@ __all__ = [
"DBConnection",
"LanceDBConnection",
"RemoteDBConnection",
"Session",
"__version__",
]

View File

@@ -6,19 +6,6 @@ import pyarrow as pa
from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
from .remote import ClientConfig
class Session:
def __init__(
self,
index_cache_size_bytes: Optional[int] = None,
metadata_cache_size_bytes: Optional[int] = None,
): ...
@staticmethod
def default() -> "Session": ...
@property
def size_bytes(self) -> int: ...
@property
def approx_num_items(self) -> int: ...
class Connection(object):
uri: str
async def table_names(
@@ -102,7 +89,6 @@ async def connect(
read_consistency_interval: Optional[float],
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
storage_options: Optional[Dict[str, str]],
session: Optional[Session],
) -> Connection: ...
class RecordBatchStream:

View File

@@ -94,9 +94,9 @@ def data_to_reader(
else:
raise TypeError(
f"Unknown data type {type(data)}. "
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
"pyarrow Table/RecordBatch, or Pydantic models. "
"See https://lancedb.github.io/lancedb/guides/tables/ for examples."
"Please check "
"https://lancedb.github.io/lance/read_and_write.html "
"to see supported types."
)

View File

@@ -37,7 +37,6 @@ if TYPE_CHECKING:
from ._lancedb import Connection as LanceDbConnection
from .common import DATA, URI
from .embeddings import EmbeddingFunctionConfig
from ._lancedb import Session
class DBConnection(EnforceOverrides):
@@ -248,9 +247,6 @@ class DBConnection(EnforceOverrides):
name: str
The name of the table.
index_cache_size: int, default 256
**Deprecated**: Use session-level cache configuration instead.
Create a Session with custom cache sizes and pass it to lancedb.connect().
Set the size of the index cache, specified as a number of entries
The exact meaning of an "entry" will depend on the type of index:
@@ -358,7 +354,6 @@ class LanceDBConnection(DBConnection):
*,
read_consistency_interval: Optional[timedelta] = None,
storage_options: Optional[Dict[str, str]] = None,
session: Optional[Session] = None,
):
if not isinstance(uri, Path):
scheme = get_uri_scheme(uri)
@@ -372,7 +367,6 @@ class LanceDBConnection(DBConnection):
self._entered = False
self.read_consistency_interval = read_consistency_interval
self.storage_options = storage_options
self.session = session
if read_consistency_interval is not None:
read_consistency_interval_secs = read_consistency_interval.total_seconds()
@@ -388,7 +382,6 @@ class LanceDBConnection(DBConnection):
read_consistency_interval_secs,
None,
storage_options,
session,
)
self._conn = AsyncConnection(LOOP.run(do_connect()))
@@ -482,17 +475,6 @@ class LanceDBConnection(DBConnection):
-------
A LanceTable object representing the table.
"""
if index_cache_size is not None:
import warnings
warnings.warn(
"index_cache_size is deprecated. Use session-level cache "
"configuration instead. Create a Session with custom cache sizes "
"and pass it to lancedb.connect().",
DeprecationWarning,
stacklevel=2,
)
return LanceTable.open(
self,
name,
@@ -838,9 +820,6 @@ class AsyncConnection(object):
See available options at
<https://lancedb.github.io/lancedb/guides/storage/>
index_cache_size: int, default 256
**Deprecated**: Use session-level cache configuration instead.
Create a Session with custom cache sizes and pass it to lancedb.connect().
Set the size of the index cache, specified as a number of entries
The exact meaning of an "entry" will depend on the type of index:

View File

@@ -11,7 +11,7 @@ from .instructor import InstructorEmbeddingFunction
from .ollama import OllamaEmbeddings
from .open_clip import OpenClipEmbeddings
from .openai import OpenAIEmbeddings
from .registry import EmbeddingFunctionRegistry, get_registry, register
from .registry import EmbeddingFunctionRegistry, get_registry
from .sentence_transformers import SentenceTransformerEmbeddings
from .gte import GteEmbeddings
from .transformers import TransformersEmbeddingFunction, ColbertEmbeddings

View File

@@ -9,14 +9,11 @@ from huggingface_hub import snapshot_download
from pydantic import BaseModel
from transformers import BertTokenizer
from .utils import create_import_stub
try:
import mlx.core as mx
import mlx.nn as nn
except ImportError:
mx = create_import_stub("mlx.core", "mlx")
nn = create_import_stub("mlx.nn", "mlx")
raise ImportError("You need to install MLX to use this model use - pip install mlx")
def average_pool(last_hidden_state: mx.array, attention_mask: mx.array) -> mx.array:
@@ -75,7 +72,7 @@ class TransformerEncoder(nn.Module):
super().__init__()
self.layers = [
TransformerEncoderLayer(dims, num_heads, mlp_dims)
for _ in range(num_layers)
for i in range(num_layers)
]
def __call__(self, x, mask):

View File

@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
import json
from typing import Dict, Optional, Type
from typing import Dict, Optional
from .base import EmbeddingFunction, EmbeddingFunctionConfig
@@ -43,7 +43,7 @@ class EmbeddingFunctionRegistry:
self._functions = {}
self._variables = {}
def register(self, alias: Optional[str] = None):
def register(self, alias: str = None):
"""
This creates a decorator that can be used to register
an EmbeddingFunction.
@@ -75,7 +75,7 @@ class EmbeddingFunctionRegistry:
"""
self._functions = {}
def get(self, name: str) -> Type[EmbeddingFunction]:
def get(self, name: str):
"""
Fetch an embedding function class by name

View File

@@ -21,36 +21,6 @@ from ..dependencies import pandas as pd
from ..util import attempt_import_or_raise
def create_import_stub(module_name: str, package_name: str = None):
"""
Create a stub module that allows class definition but fails when used.
This allows modules to be imported for doctest collection even when
optional dependencies are not available.
Parameters
----------
module_name : str
The name of the module to create a stub for
package_name : str, optional
The package name to suggest in the error message
Returns
-------
object
A stub object that can be used in place of the module
"""
class _ImportStub:
def __getattr__(self, name):
return _ImportStub # Return stub for chained access like nn.Module
def __call__(self, *args, **kwargs):
pkg = package_name or module_name
raise ImportError(f"You need to install {pkg} to use this functionality")
return _ImportStub()
# ruff: noqa: PERF203
def retry(tries=10, delay=1, max_delay=30, backoff=3, jitter=1):
def wrapper(fn):

View File

@@ -14,7 +14,7 @@ from typing import (
Literal,
Optional,
Tuple,
TypeVar,
Type,
Union,
Any,
)
@@ -58,8 +58,6 @@ if TYPE_CHECKING:
else:
from typing_extensions import Self
T = TypeVar("T", bound="LanceModel")
# Pydantic validation function for vector queries
def ensure_vector_query(
@@ -748,8 +746,8 @@ class LanceQueryBuilder(ABC):
return self.to_arrow(timeout=timeout).to_pylist()
def to_pydantic(
self, model: type[T], *, timeout: Optional[timedelta] = None
) -> list[T]:
self, model: Type[LanceModel], *, timeout: Optional[timedelta] = None
) -> List[LanceModel]:
"""Return the table as a list of pydantic models.
Parameters
@@ -908,11 +906,11 @@ class LanceQueryBuilder(ABC):
>>> plan = table.search(query).explain_plan(True)
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
GlobalLimitExec: skip=0, fetch=10
FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ...
GlobalLimitExec: skip=0, fetch=10
FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
Parameters
----------
@@ -942,19 +940,19 @@ class LanceQueryBuilder(ABC):
>>> plan = table.search(query).analyze_plan()
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
AnalyzeExec verbose=true, metrics=[]
TracedExec, metrics=[]
ProjectionExec: expr=[...], metrics=[...]
GlobalLimitExec: skip=0, fetch=10, metrics=[...]
FilterExec: _distance@2 IS NOT NULL,
metrics=[output_rows=..., elapsed_compute=...]
SortExec: TopK(fetch=10), expr=[...],
preserve_partitioning=[...],
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
KNNVectorDistance: metric=l2,
metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
LanceRead: uri=..., projection=[vector], ...
metrics=[output_rows=..., elapsed_compute=...,
bytes_read=..., iops=..., requests=...]
ProjectionExec: expr=[...], metrics=[...]
GlobalLimitExec: skip=0, fetch=10, metrics=[...]
FilterExec: _distance@2 IS NOT NULL,
metrics=[output_rows=..., elapsed_compute=...]
SortExec: TopK(fetch=10), expr=[...],
preserve_partitioning=[...],
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
KNNVectorDistance: metric=l2,
metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
LanceScan: uri=..., projection=[vector], row_id=true,
row_addr=false, ordered=false,
metrics=[output_rows=..., elapsed_compute=...,
bytes_read=..., iops=..., requests=...]
Returns
-------
@@ -2045,7 +2043,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ...
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
Parameters
----------
@@ -2431,7 +2429,7 @@ class AsyncQueryBase(object):
FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ...
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
Parameters
----------
@@ -3056,7 +3054,7 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ...
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
<BLANKLINE>
FTS Search Plan:
ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]

View File

@@ -102,9 +102,7 @@ if TYPE_CHECKING:
)
def _into_pyarrow_reader(
data, schema: Optional[pa.Schema] = None
) -> pa.RecordBatchReader:
def _into_pyarrow_reader(data) -> pa.RecordBatchReader:
from lancedb.dependencies import datasets
if _check_for_hugging_face(data):
@@ -125,12 +123,6 @@ def _into_pyarrow_reader(
raise ValueError("Cannot add a single dictionary to a table. Use a list.")
if isinstance(data, list):
# Handle empty list case
if not data:
if schema is None:
raise ValueError("Cannot create table from empty list without a schema")
return pa.Table.from_pylist(data, schema=schema).to_reader()
# convert to list of dict if data is a bunch of LanceModels
if isinstance(data[0], LanceModel):
schema = data[0].__class__.to_arrow_schema()
@@ -173,9 +165,9 @@ def _into_pyarrow_reader(
else:
raise TypeError(
f"Unknown data type {type(data)}. "
"Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
"pyarrow Table/RecordBatch, or Pydantic models. "
"See https://lancedb.github.io/lancedb/guides/tables/ for examples."
"Please check "
"https://lancedb.github.io/lancedb/python/python/ "
"to see supported types."
)
@@ -244,7 +236,7 @@ def _sanitize_data(
# 1. There might be embedding columns missing that will be added
# in the add_embeddings step.
# 2. If `allow_subschemas` is True, there might be columns missing.
reader = _into_pyarrow_reader(data, target_schema)
reader = _into_pyarrow_reader(data)
reader = _append_vector_columns(reader, target_schema, metadata=metadata)
@@ -3673,14 +3665,9 @@ class AsyncTable:
)
if query.distance_type is not None:
async_query = async_query.distance_type(query.distance_type)
if query.minimum_nprobes is not None and query.maximum_nprobes is not None:
# Set both to the minimum first to avoid min > max error.
async_query = async_query.nprobes(
query.minimum_nprobes
).maximum_nprobes(query.maximum_nprobes)
elif query.minimum_nprobes is not None:
if query.minimum_nprobes is not None:
async_query = async_query.minimum_nprobes(query.minimum_nprobes)
elif query.maximum_nprobes is not None:
if query.maximum_nprobes is not None:
async_query = async_query.maximum_nprobes(query.maximum_nprobes)
if query.refine_factor is not None:
async_query = async_query.refine_factor(query.refine_factor)

View File

@@ -33,11 +33,8 @@ tantivy = pytest.importorskip("tantivy")
@pytest.fixture
def table(tmp_path) -> ldb.table.LanceTable:
# Use local random state to avoid affecting other tests
rng = np.random.RandomState(42)
local_random = random.Random(42)
db = ldb.connect(tmp_path)
vectors = [rng.randn(128) for _ in range(100)]
vectors = [np.random.randn(128) for _ in range(100)]
text_nouns = ("puppy", "car")
text2_nouns = ("rabbit", "girl", "monkey")
@@ -47,10 +44,10 @@ def table(tmp_path) -> ldb.table.LanceTable:
text = [
" ".join(
[
text_nouns[local_random.randrange(0, len(text_nouns))],
verbs[local_random.randrange(0, 5)],
adv[local_random.randrange(0, 5)],
adj[local_random.randrange(0, 5)],
text_nouns[random.randrange(0, len(text_nouns))],
verbs[random.randrange(0, 5)],
adv[random.randrange(0, 5)],
adj[random.randrange(0, 5)],
]
)
for _ in range(100)
@@ -58,15 +55,15 @@ def table(tmp_path) -> ldb.table.LanceTable:
text2 = [
" ".join(
[
text2_nouns[local_random.randrange(0, len(text2_nouns))],
verbs[local_random.randrange(0, 5)],
adv[local_random.randrange(0, 5)],
adj[local_random.randrange(0, 5)],
text2_nouns[random.randrange(0, len(text2_nouns))],
verbs[random.randrange(0, 5)],
adv[random.randrange(0, 5)],
adj[random.randrange(0, 5)],
]
)
for _ in range(100)
]
count = [local_random.randint(1, 10000) for _ in range(100)]
count = [random.randint(1, 10000) for _ in range(100)]
table = db.create_table(
"test",
data=pd.DataFrame(
@@ -85,11 +82,8 @@ def table(tmp_path) -> ldb.table.LanceTable:
@pytest.fixture
async def async_table(tmp_path) -> ldb.table.AsyncTable:
# Use local random state to avoid affecting other tests
rng = np.random.RandomState(42)
local_random = random.Random(42)
db = await ldb.connect_async(tmp_path)
vectors = [rng.randn(128) for _ in range(100)]
vectors = [np.random.randn(128) for _ in range(100)]
text_nouns = ("puppy", "car")
text2_nouns = ("rabbit", "girl", "monkey")
@@ -99,10 +93,10 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
text = [
" ".join(
[
text_nouns[local_random.randrange(0, len(text_nouns))],
verbs[local_random.randrange(0, 5)],
adv[local_random.randrange(0, 5)],
adj[local_random.randrange(0, 5)],
text_nouns[random.randrange(0, len(text_nouns))],
verbs[random.randrange(0, 5)],
adv[random.randrange(0, 5)],
adj[random.randrange(0, 5)],
]
)
for _ in range(100)
@@ -110,15 +104,15 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
text2 = [
" ".join(
[
text2_nouns[local_random.randrange(0, len(text2_nouns))],
verbs[local_random.randrange(0, 5)],
adv[local_random.randrange(0, 5)],
adj[local_random.randrange(0, 5)],
text2_nouns[random.randrange(0, len(text2_nouns))],
verbs[random.randrange(0, 5)],
adv[random.randrange(0, 5)],
adj[random.randrange(0, 5)],
]
)
for _ in range(100)
]
count = [local_random.randint(1, 10000) for _ in range(100)]
count = [random.randint(1, 10000) for _ in range(100)]
table = await db.create_table(
"test",
data=pd.DataFrame(

View File

@@ -166,7 +166,7 @@ async def test_explain_plan(table: AsyncTable):
assert "Vector Search Plan" in plan
assert "KNNVectorDistance" in plan
assert "FTS Search Plan" in plan
assert "LanceRead" in plan
assert "LanceScan" in plan
@pytest.mark.asyncio

View File

@@ -445,45 +445,25 @@ def test_invalid_nprobes_sync(table):
with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"):
LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(0).to_list()
with pytest.raises(
ValueError,
match="maximum_nprobes must be greater than or equal to minimum_nprobes",
ValueError, match="maximum_nprobes must be greater than minimum_nprobes"
):
LanceVectorQueryBuilder(table, [0, 0], "vector").maximum_nprobes(5).to_list()
with pytest.raises(
ValueError,
match="minimum_nprobes must be less than or equal to maximum_nprobes",
ValueError, match="minimum_nprobes must be less or equal to maximum_nprobes"
):
LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(100).to_list()
def test_nprobes_works_sync(table):
LanceVectorQueryBuilder(table, [0, 0], "vector").nprobes(30).to_list()
def test_nprobes_min_max_works_sync(table):
LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(2).maximum_nprobes(
4
).to_list()
def test_multiple_nprobes_calls_works_sync(table):
LanceVectorQueryBuilder(table, [0, 0], "vector").nprobes(30).maximum_nprobes(
20
).minimum_nprobes(20).to_list()
@pytest.mark.asyncio
async def test_invalid_nprobes_async(table_async: AsyncTable):
with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"):
await table_async.vector_search([0, 0]).minimum_nprobes(0).to_list()
with pytest.raises(
ValueError,
match="maximum_nprobes must be greater than or equal to minimum_nprobes",
ValueError, match="maximum_nprobes must be greater than minimum_nprobes"
):
await table_async.vector_search([0, 0]).maximum_nprobes(5).to_list()
with pytest.raises(
ValueError,
match="minimum_nprobes must be less than or equal to maximum_nprobes",
ValueError, match="minimum_nprobes must be less or equal to maximum_nprobes"
):
await table_async.vector_search([0, 0]).minimum_nprobes(100).to_list()
@@ -859,7 +839,7 @@ async def test_explain_plan_with_filters(table_async: AsyncTable):
table_async.query().nearest_to(pa.array([1, 2])).where("id = 1").explain_plan()
)
assert "KNN" in plan_with_filter
assert "LanceRead" in plan_with_filter
assert "FilterExec" in plan_with_filter
# Test FTS query with filter
from lancedb.index import FTS
@@ -870,8 +850,7 @@ async def test_explain_plan_with_filters(table_async: AsyncTable):
)
plan_fts_filter = await query_fts_filter.where("id = 1").explain_plan()
assert "MatchQuery: query=dog" in plan_fts_filter
assert "LanceRead" in plan_fts_filter
assert "full_filter=id = Int64(1)" in plan_fts_filter # Should show filter details
assert "FilterExec: id@" in plan_fts_filter # Should show filter details
@pytest.mark.asyncio
@@ -1359,20 +1338,3 @@ async def test_query_timeout_async(tmp_path):
.nearest_to([0.0, 0.0])
.to_list(timeout=timedelta(0))
)
def test_search_empty_table(mem_db):
"""Test searching on empty table should not crash
Regression test for issue #303:
https://github.com/lancedb/lancedb/issues/303
Searching on empty table produces scary error message
"""
schema = pa.schema(
[pa.field("vector", pa.list_(pa.float32(), 2)), pa.field("id", pa.int64())]
)
table = mem_db.create_table("test_empty_search", schema=schema)
# Search on empty table should return empty results, not crash
results = table.search([1.0, 2.0]).limit(5).to_list()
assert results == []

View File

@@ -1,38 +0,0 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
import lancedb
def test_session_cache_configuration(tmp_path):
"""Test Session cache configuration and basic functionality."""
# Create session with small cache limits for testing
index_cache_size = 1024 * 1024 # 1MB
metadata_cache_size = 512 * 1024 # 512KB
session = lancedb.Session(
index_cache_size_bytes=index_cache_size,
metadata_cache_size_bytes=metadata_cache_size,
)
# Record initial cache state
initial_cache_size = session.size_bytes
initial_cache_items = session.approx_num_items
# Test session works with database connection
db = lancedb.connect(tmp_path, session=session)
# Create and use a table to exercise the session
data = [{"id": i, "text": f"item {i}"} for i in range(100)]
table = db.create_table("test", data)
results = list(table.to_arrow().to_pylist())
assert len(results) == 100
# Verify cache usage increased after operations
final_cache_size = session.size_bytes
final_cache_items = session.approx_num_items
assert final_cache_size > initial_cache_size # Cache should have grown
assert final_cache_items >= initial_cache_items # Items should not decrease
assert initial_cache_size < index_cache_size + metadata_cache_size

View File

@@ -1804,45 +1804,3 @@ def test_stats(mem_db: DBConnection):
},
},
}
def test_create_table_empty_list_with_schema(mem_db: DBConnection):
"""Test creating table with empty list data and schema
Regression test for IndexError: list index out of range
when calling create_table(name, data=[], schema=schema)
"""
schema = pa.schema(
[pa.field("vector", pa.list_(pa.float32(), 2)), pa.field("id", pa.int64())]
)
table = mem_db.create_table("test_empty_list", data=[], schema=schema)
assert table.count_rows() == 0
assert table.schema == schema
def test_create_table_empty_list_no_schema_error(mem_db: DBConnection):
"""Test that creating table with empty list and no schema raises error"""
with pytest.raises(
ValueError, match="Cannot create table from empty list without a schema"
):
mem_db.create_table("test_empty_no_schema", data=[])
def test_add_table_with_empty_embeddings(tmp_path):
"""Test exact scenario from issue #1968
Regression test for issue #1968:
https://github.com/lancedb/lancedb/issues/1968
"""
db = lancedb.connect(tmp_path)
class MySchema(LanceModel):
text: str
embedding: Vector(16)
table = db.create_table("test", schema=MySchema)
table.add(
[{"text": "bar", "embedding": [0.1] * 16}],
on_bad_vectors="drop",
)
assert table.count_rows() == 1

View File

@@ -179,7 +179,7 @@ impl Connection {
}
#[pyfunction]
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None))]
#[allow(clippy::too_many_arguments)]
pub fn connect(
py: Python,
@@ -190,7 +190,6 @@ pub fn connect(
read_consistency_interval: Option<f64>,
client_config: Option<PyClientConfig>,
storage_options: Option<HashMap<String, String>>,
session: Option<crate::session::Session>,
) -> PyResult<Bound<'_, PyAny>> {
future_into_py(py, async move {
let mut builder = lancedb::connect(&uri);
@@ -214,9 +213,6 @@ pub fn connect(
if let Some(client_config) = client_config {
builder = builder.client_config(client_config.into());
}
if let Some(session) = session {
builder = builder.session(session.inner.clone());
}
Ok(Connection::new(builder.execute().await.infer_error()?))
})
}

View File

@@ -11,7 +11,6 @@ use pyo3::{
wrap_pyfunction, Bound, PyResult, Python,
};
use query::{FTSQuery, HybridQuery, Query, VectorQuery};
use session::Session;
use table::{
AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, MergeResult,
Table, UpdateResult,
@@ -22,7 +21,6 @@ pub mod connection;
pub mod error;
pub mod index;
pub mod query;
pub mod session;
pub mod table;
pub mod util;
@@ -33,7 +31,6 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
.write_style("LANCEDB_LOG_STYLE");
env_logger::init_from_env(env);
m.add_class::<Connection>()?;
m.add_class::<Session>()?;
m.add_class::<Table>()?;
m.add_class::<IndexConfig>()?;
m.add_class::<Query>()?;

View File

@@ -1,107 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::sync::Arc;
use lancedb::{ObjectStoreRegistry, Session as LanceSession};
use pyo3::{pyclass, pymethods, PyResult};
/// A session for managing caches and object stores across LanceDB operations.
///
/// Sessions allow you to configure cache sizes for index and metadata caches,
/// which can significantly impact memory use and performance. They can
/// also be re-used across multiple connections to share the same cache state.
#[pyclass]
#[derive(Clone)]
pub struct Session {
pub(crate) inner: Arc<LanceSession>,
}
impl Default for Session {
fn default() -> Self {
Self {
inner: Arc::new(LanceSession::default()),
}
}
}
#[pymethods]
impl Session {
/// Create a new session with custom cache sizes.
///
/// Parameters
/// ----------
/// index_cache_size_bytes : int, optional
/// The size of the index cache in bytes.
/// Index data is stored in memory in this cache to speed up queries.
/// Default: 6GB (6 * 1024 * 1024 * 1024 bytes)
/// metadata_cache_size_bytes : int, optional
/// The size of the metadata cache in bytes.
/// The metadata cache stores file metadata and schema information in memory.
/// This cache improves scan and write performance.
/// Default: 1GB (1024 * 1024 * 1024 bytes)
#[new]
#[pyo3(signature = (index_cache_size_bytes=None, metadata_cache_size_bytes=None))]
pub fn new(
index_cache_size_bytes: Option<usize>,
metadata_cache_size_bytes: Option<usize>,
) -> PyResult<Self> {
let index_cache_size = index_cache_size_bytes.unwrap_or(6 * 1024 * 1024 * 1024); // 6GB default
let metadata_cache_size = metadata_cache_size_bytes.unwrap_or(1024 * 1024 * 1024); // 1GB default
let session = LanceSession::new(
index_cache_size,
metadata_cache_size,
Arc::new(ObjectStoreRegistry::default()),
);
Ok(Self {
inner: Arc::new(session),
})
}
/// Create a session with default cache sizes.
///
/// This is equivalent to creating a session with 6GB index cache
/// and 1GB metadata cache.
///
/// Returns
/// -------
/// Session
/// A new Session with default cache sizes
#[staticmethod]
#[allow(clippy::should_implement_trait)]
pub fn default() -> Self {
Default::default()
}
/// Get the current size of the session caches in bytes.
///
/// Returns
/// -------
/// int
/// The total size of all caches in the session
#[getter]
pub fn size_bytes(&self) -> u64 {
self.inner.size_bytes()
}
/// Get the approximate number of items cached in the session.
///
/// Returns
/// -------
/// int
/// The number of cached items across all caches
#[getter]
pub fn approx_num_items(&self) -> usize {
self.inner.approx_num_items()
}
fn __repr__(&self) -> String {
format!(
"Session(size_bytes={}, approx_num_items={})",
self.size_bytes(),
self.approx_num_items()
)
}
}

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-node"
version = "0.21.2"
version = "0.21.2-beta.1"
description = "Serverless, low-latency vector database for AI applications"
license.workspace = true
edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.21.2"
version = "0.21.2-beta.1"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -678,8 +678,7 @@ impl Database for ListingDatabase {
let mut read_params = request.lance_read_params.unwrap_or_else(|| {
let mut default_params = ReadParams::default();
if let Some(index_cache_size) = request.index_cache_size {
#[allow(deprecated)]
default_params.index_cache_size(index_cache_size as usize);
default_params.index_cache_size = index_cache_size as usize;
}
default_params
});

View File

@@ -290,7 +290,3 @@ impl Display for DistanceType {
/// Connect to a database
pub use connection::connect;
/// Re-export Lance Session and ObjectStoreRegistry for custom session creation
pub use lance::session::Session;
pub use lance_io::object_store::ObjectStoreRegistry;

View File

@@ -958,8 +958,7 @@ impl VectorQuery {
if let Some(maximum_nprobes) = self.request.maximum_nprobes {
if minimum_nprobes > maximum_nprobes {
return Err(Error::InvalidInput {
message: "minimum_nprobes must be less than or equal to maximum_nprobes"
.to_string(),
message: "minimum_nprobes must be less or equal to maximum_nprobes".to_string(),
});
}
}
@@ -990,8 +989,7 @@ impl VectorQuery {
}
if maximum_nprobes < self.request.minimum_nprobes {
return Err(Error::InvalidInput {
message: "maximum_nprobes must be greater than or equal to minimum_nprobes"
.to_string(),
message: "maximum_nprobes must be greater than minimum_nprobes".to_string(),
});
}
}

View File

@@ -2,7 +2,7 @@
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use http::HeaderName;
use log::{debug, info};
use log::debug;
use reqwest::{
header::{HeaderMap, HeaderValue},
Body, Request, RequestBuilder, Response,
@@ -324,7 +324,6 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
}
for (key, value) in &config.extra_headers {
info!("header: {}={}", key, value);
let key_parsed = HeaderName::from_str(key).map_err(|_| Error::InvalidInput {
message: format!("non-ascii value for header '{}' provided", key),
})?;

View File

@@ -85,14 +85,6 @@ impl ExecutionPlan for MetadataEraserExec {
vec![&self.input]
}
fn maintains_input_order(&self) -> Vec<bool> {
vec![true; self.children().len()]
}
fn benefits_from_input_partitioning(&self) -> Vec<bool> {
vec![false; self.children().len()]
}
fn with_new_children(
self: Arc<Self>,
children: Vec<Arc<dyn ExecutionPlan>>,
@@ -494,8 +486,11 @@ pub mod tests {
TestFixture::check_plan(
plan,
"MetadataEraserExec
CoalesceBatchesExec:...
FilterExec: i@0 >= 5
RepartitionExec:...
ProjectionExec:...
LanceRead:...",
LanceScan:...",
)
.await;