test to reproduce node extra headers issue

chore: update npm lockfile (#2563 )
chore: upgrade lance to 0.32.1-beta.2 (#2562 )
2025-12-23 05:19:58 +00:00 · 2025-08-04 10:03:04 -02:30 · 2025-07-30 18:28:06 -07:00 · 2025-07-30 14:31:04 -07:00 · 2025-07-30 09:23:25 -07:00 · 2025-07-29 19:26:30 -07:00
66 changed files with 1071 additions and 281 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.21.2-beta.0"
+current_version = "0.21.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -5,8 +5,8 @@ on:
    tags-ignore:
      # We don't publish pre-releases for Rust. Crates.io is just a source
      # distribution, so we don't need to publish pre-releases.
-      - 'v*-beta*'
-      - '*-v*' # for example, python-vX.Y.Z
+      - "v*-beta*"
+      - "*-v*" # for example, python-vX.Y.Z

 env:
  # This env var is used by Swatinem/rust-cache@v2 for the cache
@@ -19,6 +19,8 @@ env:
 jobs:
  build:
    runs-on: ubuntu-22.04
+    permissions:
+      id-token: write
    timeout-minutes: 30
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
@@ -31,6 +33,8 @@ jobs:
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
+      - uses: rust-lang/crates-io-auth-action@v1
+        id: auth
      - name: Publish the package
        run: |
-          cargo publish -p lancedb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -0,0 +1,24 @@
+LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
+It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
+remote (against LanceDB Cloud).
+
+The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
+
+Project layout:
+
+* `rust/lancedb`: The LanceDB core Rust implementation.
+* `python`: The Python bindings, using PyO3.
+* `nodejs`: The Typescript bindings, using napi-rs
+* `java`: The Java bindings
+
+(`rust/ffi` and `node/` are for a deprecated package. You can ignore them.)
+
+Common commands:
+
+* Check for compiler errors: `cargo check --features remote --tests --examples`
+* Run tests: `cargo test --features remote --tests`
+* Run specific test: `cargo test --features remote -p <package_name> --test <test_name>`
+* Lint: `cargo clippy --features remote --tests --examples`
+* Format: `cargo fmt --all`
+
+Before committing changes, run formatting.
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1039,6 +1039,17 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "backon"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "302eaff5357a264a2c42f127ecb8bac761cf99749fc3dc95677e2743991f99e7"
+dependencies = [
+ "fastrand",
+ "gloo-timers",
+ "tokio",
+]
+
 [[package]]
 name = "backtrace"
 version = "0.3.75"
@@ -2477,6 +2488,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
 dependencies = [
 "block-buffer",
+ "const-oid",
 "crypto-common",
 "subtle",
 ]
@@ -2840,9 +2852,10 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"

 [[package]]
 name = "fsst"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
+ "arrow-array",
 "rand 0.8.5",
 ]

@@ -3256,6 +3269,18 @@ version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"

+[[package]]
+name = "gloo-timers"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "js-sys",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "group"
 version = "0.12.1"
@@ -3792,6 +3817,17 @@ dependencies = [
 "cfg-if",
 ]

+[[package]]
+name = "io-uring"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
+dependencies = [
+ "bitflags 2.9.1",
+ "cfg-if",
+ "libc",
+]
+
 [[package]]
 name = "ipnet"
 version = "2.11.0"
@@ -3930,8 +3966,8 @@ dependencies = [

 [[package]]
 name = "lance"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -3993,8 +4029,8 @@ dependencies = [

 [[package]]
 name = "lance-arrow"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4011,8 +4047,8 @@ dependencies = [

 [[package]]
 name = "lance-core"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4047,8 +4083,8 @@ dependencies = [

 [[package]]
 name = "lance-datafusion"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4076,8 +4112,8 @@ dependencies = [

 [[package]]
 name = "lance-datagen"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4093,8 +4129,8 @@ dependencies = [

 [[package]]
 name = "lance-encoding"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrayref",
 "arrow",
@@ -4133,8 +4169,8 @@ dependencies = [

 [[package]]
 name = "lance-file"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-arith",
 "arrow-array",
@@ -4168,8 +4204,8 @@ dependencies = [

 [[package]]
 name = "lance-index"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4203,7 +4239,6 @@ dependencies = [
 "lance-linalg",
 "lance-table",
 "log",
- "moka",
 "num-traits",
 "object_store",
 "prost",
@@ -4223,8 +4258,8 @@ dependencies = [

 [[package]]
 name = "lance-io"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4248,6 +4283,8 @@ dependencies = [
 "lance-core",
 "log",
 "object_store",
+ "object_store_opendal",
+ "opendal",
 "path_abs",
 "pin-project",
 "prost",
@@ -4262,8 +4299,8 @@ dependencies = [

 [[package]]
 name = "lance-linalg"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4286,8 +4323,8 @@ dependencies = [

 [[package]]
 name = "lance-table"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4325,8 +4362,8 @@ dependencies = [

 [[package]]
 name = "lance-testing"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-array",
 "arrow-schema",
@@ -4337,7 +4374,7 @@ dependencies = [

 [[package]]
 name = "lancedb"
-version = "0.21.2-beta.0"
+version = "0.21.2"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4424,7 +4461,7 @@ dependencies = [

 [[package]]
 name = "lancedb-node"
-version = "0.21.2-beta.0"
+version = "0.21.2"
 dependencies = [
 "arrow-array",
 "arrow-ipc",
@@ -4449,7 +4486,7 @@ dependencies = [

 [[package]]
 name = "lancedb-nodejs"
-version = "0.21.2-beta.0"
+version = "0.21.2"
 dependencies = [
 "arrow-array",
 "arrow-ipc",
@@ -4469,7 +4506,7 @@ dependencies = [

 [[package]]
 name = "lancedb-python"
-version = "0.24.2-beta.0"
+version = "0.24.2"
 dependencies = [
 "arrow",
 "env_logger",
@@ -5215,6 +5252,21 @@ dependencies = [
 "web-time",
 ]

+[[package]]
+name = "object_store_opendal"
+version = "0.54.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ce697ee723fdc3eaf6c457abf4059034be15167022b18b619993802cd1443d5"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures",
+ "object_store",
+ "opendal",
+ "pin-project",
+ "tokio",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.21.3"
@@ -5255,6 +5307,33 @@ dependencies = [
 "pkg-config",
 ]

+[[package]]
+name = "opendal"
+version = "0.54.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ffb9838d0575c6dbaf3fcec7255af8d5771996d4af900bbb6fa9a314dec00a1a"
+dependencies = [
+ "anyhow",
+ "backon",
+ "base64 0.22.1",
+ "bytes",
+ "chrono",
+ "futures",
+ "getrandom 0.2.16",
+ "http 1.3.1",
+ "http-body 1.0.1",
+ "log",
+ "md-5",
+ "percent-encoding",
+ "quick-xml",
+ "reqsign",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "tokio",
+ "uuid",
+]
+
 [[package]]
 name = "openssl-probe"
 version = "0.1.6"
@@ -6460,6 +6539,33 @@ version = "1.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"

+[[package]]
+name = "reqsign"
+version = "0.16.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "base64 0.22.1",
+ "chrono",
+ "form_urlencoded",
+ "getrandom 0.2.16",
+ "hex",
+ "hmac",
+ "home",
+ "http 1.3.1",
+ "log",
+ "once_cell",
+ "percent-encoding",
+ "rand 0.8.5",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "sha1",
+ "sha2",
+]
+
 [[package]]
 name = "reqwest"
 version = "0.12.20"
@@ -7732,16 +7838,18 @@ dependencies = [

 [[package]]
 name = "tokio"
-version = "1.45.1"
+version = "1.46.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779"
+checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17"
 dependencies = [
 "backtrace",
 "bytes",
+ "io-uring",
 "libc",
 "mio",
 "pin-project-lite",
 "signal-hook-registry",
+ "slab",
 "socket2",
 "tokio-macros",
 "windows-sys 0.52.0",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,16 +21,16 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.31.2", "features" = [
+lance = { "version" = "=0.32.1", "features" = [
    "dynamodb",
-], "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-io = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-index = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-linalg = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-table = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-testing = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-datafusion = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-encoding = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
+], "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-io = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-index = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-linalg = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-table = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-testing = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-datafusion = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-encoding = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 # Note that this one does not include pyarrow
 arrow = { version = "55.1", optional = false }
 arrow-array = "55.1"
--- a/docs/src/js/classes/Session.md
+++ b/docs/src/js/classes/Session.md
@@ -0,0 +1,84 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / Session
+
+# Class: Session
+
+A session for managing caches and object stores across LanceDB operations.
+
+Sessions allow you to configure cache sizes for index and metadata caches,
+which can significantly impact performance for large datasets.
+
+## Constructors
+
+### new Session()
+
+```ts
+new Session(indexCacheSizeBytes?, metadataCacheSizeBytes?): Session
+```
+
+Create a new session with custom cache sizes.
+
+# Parameters
+
+- `index_cache_size_bytes`: The size of the index cache in bytes.
+  Defaults to 6GB if not specified.
+- `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
+  Defaults to 1GB if not specified.
+
+#### Parameters
+
+* **indexCacheSizeBytes?**: `null` \| `bigint`
+
+* **metadataCacheSizeBytes?**: `null` \| `bigint`
+
+#### Returns
+
+[`Session`](Session.md)
+
+## Methods
+
+### approxNumItems()
+
+```ts
+approxNumItems(): number
+```
+
+Get the approximate number of items cached in the session.
+
+#### Returns
+
+`number`
+
+***
+
+### sizeBytes()
+
+```ts
+sizeBytes(): bigint
+```
+
+Get the current size of the session caches in bytes.
+
+#### Returns
+
+`bigint`
+
+***
+
+### default()
+
+```ts
+static default(): Session
+```
+
+Create a session with default cache sizes.
+
+This is equivalent to creating a session with 6GB index cache
+and 1GB metadata cache.
+
+#### Returns
+
+[`Session`](Session.md)
--- a/docs/src/js/functions/connect.md
+++ b/docs/src/js/functions/connect.md
@@ -6,10 +6,13 @@

 # Function: connect()

-## connect(uri, options)
+## connect(uri, options, session)

 ```ts
-function connect(uri, options?): Promise<Connection>
+function connect(
+   uri,
+   options?,
+   session?): Promise<Connection>
 ```

 Connect to a LanceDB instance at the given URI.
@@ -29,6 +32,8 @@ Accepted formats:
 * **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
    The options to use when connecting to the database

+* **session?**: [`Session`](../classes/Session.md)
+
 ### Returns

 `Promise`&lt;[`Connection`](../classes/Connection.md)&gt;
@@ -77,7 +82,7 @@ Accepted formats:

 [ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.

-### Example
+### Examples

 ```ts
 const conn = await connect({
@@ -85,3 +90,11 @@ const conn = await connect({
  storageOptions: {timeout: "60s"}
 });
 ```
+
+```ts
+const session = Session.default();
+const conn = await connect({
+  uri: "/path/to/database",
+  session: session
+});
+```
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -29,6 +29,7 @@
 - [Query](classes/Query.md)
 - [QueryBase](classes/QueryBase.md)
 - [RecordBatchIterator](classes/RecordBatchIterator.md)
+- [Session](classes/Session.md)
 - [Table](classes/Table.md)
 - [TagContents](classes/TagContents.md)
 - [Tags](classes/Tags.md)
--- a/docs/src/js/interfaces/ConnectionOptions.md
+++ b/docs/src/js/interfaces/ConnectionOptions.md
@@ -70,6 +70,17 @@ Defaults to 'us-east-1'.

 ***

+### session?
+
+```ts
+optional session: Session;
+```
+
+(For LanceDB OSS only): the session to use for this connection. Holds
+shared caches and other session-specific state.
+
+***
+
 ### storageOptions?

 ```ts
--- a/docs/src/js/interfaces/OpenTableOptions.md
+++ b/docs/src/js/interfaces/OpenTableOptions.md
@@ -8,7 +8,7 @@

 ## Properties

-### indexCacheSize?
+### ~~indexCacheSize?~~

 ```ts
 optional indexCacheSize: number;
@@ -16,6 +16,11 @@ optional indexCacheSize: number;

 Set the size of the index cache, specified as a number of entries

+#### Deprecated
+
+Use session-level cache configuration instead.
+Create a Session with custom cache sizes and pass it to the connect() function.
+
 The exact meaning of an "entry" will depend on the type of index:
 - IVF: there is one entry for each IVF partition
 - BTREE: there is one entry for the entire index
--- a/java/core/lancedb-jni/Cargo.toml
+++ b/java/core/lancedb-jni/Cargo.toml
@@ -19,7 +19,7 @@ lancedb = { path = "../../../rust/lancedb" }
 lance = { workspace = true }
 arrow = { workspace = true, features = ["ffi"] }
 arrow-schema.workspace = true
-tokio = "1.23"
+tokio = "1.46"
 jni = "0.21.1"
 snafu.workspace = true
 lazy_static.workspace = true
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.21.2-beta.0</version>
+        <version>0.21.2-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.21.2-beta.0</version>
+        <version>0.21.2-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.21.2-beta.0</version>
+    <version>0.21.2-final.0</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.21.2-beta.0",
+  "version": "0.21.2",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.21.2-beta.0",
+      "version": "0.21.2",
      "cpu": [
        "x64",
        "arm64"
@@ -52,11 +52,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
-        "@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
-        "@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
-        "@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
+        "@lancedb/vectordb-darwin-arm64": "0.21.2",
+        "@lancedb/vectordb-darwin-x64": "0.21.2",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.21.2",
+        "@lancedb/vectordb-linux-x64-gnu": "0.21.2",
+        "@lancedb/vectordb-win32-x64-msvc": "0.21.2"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -327,9 +327,9 @@
      }
    },
    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.21.2-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.2-beta.0.tgz",
-      "integrity": "sha512-RiYqpKuq9v8A4wFuHt1iPNFYjWJ1KgGFLJwQO4ajp9Hee84sDHq8mP0ATgMcc24hiaOUQ1lRRTULjGbHn4NIYw==",
+      "version": "0.21.2",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.2.tgz",
+      "integrity": "sha512-NAQnIKLw9K33KMODNXBEW0qC8/safWzZtqbVC7j1GcE7PSk0Uc6x7w5nrH5gvleZggjaxY9jaRVTqmtg7PNmqw==",
      "cpu": [
        "arm64"
      ],
@@ -340,9 +340,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.21.2-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.2-beta.0.tgz",
-      "integrity": "sha512-togdP0YIjMYg/hBRMMxW434i5VB789JWU5o3hWrodbX8olEc0Txqw5Dg9CgIOldBIiCti6uTSQiTo6uldZon1w==",
+      "version": "0.21.2",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.2.tgz",
+      "integrity": "sha512-PudbltlbRiXvBf/bkAaDPL8+RqcI4TG69u00rQHxwkhH7PgPYRTUjfzfaQfiDXZuLXuZHQq703RyoHOqzsHN0Q==",
      "cpu": [
        "x64"
      ],
@@ -353,9 +353,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.21.2-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.2-beta.0.tgz",
-      "integrity": "sha512-ErS4IQDQVTYVATPeOj/dZXQR34eZQ5rAXm3vJdQi5K6X4zCDaIjOhpmnwzPBGT9W1idaBAoDJhtNfsFaJ6/PQQ==",
+      "version": "0.21.2",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.2.tgz",
+      "integrity": "sha512-3lJ8lootlwLmhqabCdg0DKftv0Ujep6NTWAoLWK/6VQe2IgHmu/ZPRNQkOSZ5tnYlmRyDiMDMB2tlAzo45sV8Q==",
      "cpu": [
        "arm64"
      ],
@@ -366,9 +366,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.21.2-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.2-beta.0.tgz",
-      "integrity": "sha512-ycDpyBGbfxtnGGa/RQo5+So6dHALiem1pbYc/LDKKluUJpadtXtEwC61o6hZTcejoYjhEE8ET7vA3OCEJfMFaw==",
+      "version": "0.21.2",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.2.tgz",
+      "integrity": "sha512-5I2drMOIyRODlAHPsipQBTrRRgcOZ45N5GsuhqcKnz3Tg8GAdc1MQKyK3BrdJzKHLPdRtIyRJ6QTLB3wZvDsQQ==",
      "cpu": [
        "x64"
      ],
@@ -379,9 +379,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.21.2-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.2-beta.0.tgz",
-      "integrity": "sha512-IgVkAP/LiNIQD5P6n/9x3bgQOt5pGJarjtSF8r+ialD95QHmo6tcxrwTy/DlA+H1uI6B6h+sbN0c1KXTh1rYcg==",
+      "version": "0.21.2",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.2.tgz",
+      "integrity": "sha512-gjpFukq0NTQSRpWPNIpq4XFtaudjSNBT6DMsagC61D2nx9ZLEdSAdU0wdkeluQwhoMvNnXEPdP9HxDSFUXk+Ww==",
      "cpu": [
        "x64"
      ],
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.21.2-beta.0",
+  "version": "0.21.2",
  "description": " Serverless, low-latency vector database for AI applications",
  "private": false,
  "main": "dist/index.js",
@@ -89,10 +89,10 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
-    "@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
-    "@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
-    "@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
+    "@lancedb/vectordb-darwin-x64": "0.21.2",
+    "@lancedb/vectordb-darwin-arm64": "0.21.2",
+    "@lancedb/vectordb-linux-x64-gnu": "0.21.2",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.21.2",
+    "@lancedb/vectordb-win32-x64-msvc": "0.21.2"
  }
 }
--- a/node/src/integration_test/test.ts
+++ b/node/src/integration_test/test.ts
@@ -49,7 +49,7 @@ describe('LanceDB Mirrored Store Integration test', function () {
  it('s3://...?mirroredStore=... param is processed correctly', async function () {
    this.timeout(600000)

-    const dir = tmpdir()
+    const dir = await fs.promises.mkdtemp(path.join(tmpdir(), 'lancedb-mirror-'))
    console.log(dir)
    const conn = await lancedb.connect({ uri: `s3://lancedb-integtest?mirroredStore=${dir}`, storageOptions: { allowHttp: 'true' } })
    const data = Array(200).fill({ vector: Array(128).fill(1.0), id: 0 })
@@ -63,118 +63,93 @@ describe('LanceDB Mirrored Store Integration test', function () {
    const t = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })

    const mirroredPath = path.join(dir, `${tableName}.lance`)
-    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
-      if (err != null) throw err
-      // there should be three dirs
-      assert.equal(files.length, 3)
-      assert.isTrue(files[0].isDirectory())
-      assert.isTrue(files[1].isDirectory())

-      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].name.endsWith('.txn'))
-      })
+    const files = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
+    // there should be three dirs
+    assert.equal(files.length, 3, 'files after table creation')
+    assert.isTrue(files[0].isDirectory())
+    assert.isTrue(files[1].isDirectory())

-      fs.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].name.endsWith('.manifest'))
-      })
+    const transactionFiles = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
+    assert.equal(transactionFiles.length, 1, 'transactionFiles after table creation')
+    assert.isTrue(transactionFiles[0].name.endsWith('.txn'))

-      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].name.endsWith('.lance'))
-      })
-    })
+    const versionFiles = await fs.promises.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true })
+    assert.equal(versionFiles.length, 1, 'versionFiles after table creation')
+    assert.isTrue(versionFiles[0].name.endsWith('.manifest'))
+
+    const dataFiles = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
+    assert.equal(dataFiles.length, 1, 'dataFiles after table creation')
+    assert.isTrue(dataFiles[0].name.endsWith('.lance'))

    // try create index and check if it's mirrored
    await t.createIndex({ column: 'vector', type: 'ivf_pq' })

-    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
-      if (err != null) throw err
-      // there should be four dirs
-      assert.equal(files.length, 4)
-      assert.isTrue(files[0].isDirectory())
-      assert.isTrue(files[1].isDirectory())
-      assert.isTrue(files[2].isDirectory())
+    const filesAfterIndex = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
+    // there should be four dirs
+    assert.equal(filesAfterIndex.length, 4, 'filesAfterIndex')
+    assert.isTrue(filesAfterIndex[0].isDirectory())
+    assert.isTrue(filesAfterIndex[1].isDirectory())
+    assert.isTrue(filesAfterIndex[2].isDirectory())

-      // Two TXs now
-      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 2)
-        assert.isTrue(files[0].name.endsWith('.txn'))
-        assert.isTrue(files[1].name.endsWith('.txn'))
-      })
+    // Two TXs now
+    const transactionFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
+    assert.equal(transactionFilesAfterIndex.length, 2, 'transactionFilesAfterIndex')
+    assert.isTrue(transactionFilesAfterIndex[0].name.endsWith('.txn'))
+    assert.isTrue(transactionFilesAfterIndex[1].name.endsWith('.txn'))

-      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].name.endsWith('.lance'))
-      })
+    const dataFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
+    assert.equal(dataFilesAfterIndex.length, 1, 'dataFilesAfterIndex')
+    assert.isTrue(dataFilesAfterIndex[0].name.endsWith('.lance'))

-      fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].isDirectory())
+    const indicesFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true })
+    assert.equal(indicesFiles.length, 1, 'indicesFiles')
+    assert.isTrue(indicesFiles[0].isDirectory())

-        fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
-          if (err != null) throw err
-
-          assert.equal(files.length, 1)
-          assert.isTrue(files[0].isFile())
-          assert.isTrue(files[0].name.endsWith('.idx'))
-        })
-      })
-    })
+    const indexFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFiles[0].name), { withFileTypes: true })
+    console.log(`DEBUG indexFiles in ${indicesFiles[0].name}:`, indexFiles.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`))
+    assert.equal(indexFiles.length, 2, 'indexFiles')
+    const fileNames = indexFiles.map(f => f.name).sort()
+    assert.isTrue(fileNames.includes('auxiliary.idx'), 'auxiliary.idx should be present')
+    assert.isTrue(fileNames.includes('index.idx'), 'index.idx should be present')
+    assert.isTrue(indexFiles.every(f => f.isFile()), 'all index files should be files')

    // try delete and check if it's mirrored
    await t.delete('id = 0')

-    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
-      if (err != null) throw err
-      // there should be five dirs
-      assert.equal(files.length, 5)
-      assert.isTrue(files[0].isDirectory())
-      assert.isTrue(files[1].isDirectory())
-      assert.isTrue(files[2].isDirectory())
-      assert.isTrue(files[3].isDirectory())
-      assert.isTrue(files[4].isDirectory())
+    const filesAfterDelete = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
+    // there should be five dirs
+    assert.equal(filesAfterDelete.length, 5, 'filesAfterDelete')
+    assert.isTrue(filesAfterDelete[0].isDirectory())
+    assert.isTrue(filesAfterDelete[1].isDirectory())
+    assert.isTrue(filesAfterDelete[2].isDirectory())
+    assert.isTrue(filesAfterDelete[3].isDirectory())
+    assert.isTrue(filesAfterDelete[4].isDirectory())

-      // Three TXs now
-      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 3)
-        assert.isTrue(files[0].name.endsWith('.txn'))
-        assert.isTrue(files[1].name.endsWith('.txn'))
-      })
+    // Three TXs now
+    const transactionFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
+    assert.equal(transactionFilesAfterDelete.length, 3, 'transactionFilesAfterDelete')
+    assert.isTrue(transactionFilesAfterDelete[0].name.endsWith('.txn'))
+    assert.isTrue(transactionFilesAfterDelete[1].name.endsWith('.txn'))

-      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].name.endsWith('.lance'))
-      })
+    const dataFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
+    assert.equal(dataFilesAfterDelete.length, 1, 'dataFilesAfterDelete')
+    assert.isTrue(dataFilesAfterDelete[0].name.endsWith('.lance'))

-      fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].isDirectory())
+    const indicesFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true })
+    assert.equal(indicesFilesAfterDelete.length, 1, 'indicesFilesAfterDelete')
+    assert.isTrue(indicesFilesAfterDelete[0].isDirectory())

-        fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
-          if (err != null) throw err
+    const indexFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFilesAfterDelete[0].name), { withFileTypes: true })
+    console.log(`DEBUG indexFilesAfterDelete in ${indicesFilesAfterDelete[0].name}:`, indexFilesAfterDelete.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`))
+    assert.equal(indexFilesAfterDelete.length, 2, 'indexFilesAfterDelete')
+    const fileNamesAfterDelete = indexFilesAfterDelete.map(f => f.name).sort()
+    assert.isTrue(fileNamesAfterDelete.includes('auxiliary.idx'), 'auxiliary.idx should be present after delete')
+    assert.isTrue(fileNamesAfterDelete.includes('index.idx'), 'index.idx should be present after delete')
+    assert.isTrue(indexFilesAfterDelete.every(f => f.isFile()), 'all index files should be files after delete')

-          assert.equal(files.length, 1)
-          assert.isTrue(files[0].isFile())
-          assert.isTrue(files[0].name.endsWith('.idx'))
-        })
-      })
-
-      fs.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].name.endsWith('.arrow'))
-      })
-    })
+    const deletionFiles = await fs.promises.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true })
+    assert.equal(deletionFiles.length, 1, 'deletionFiles')
+    assert.isTrue(deletionFiles[0].name.endsWith('.arrow'))
  })
 })
--- a/nodejs/CLAUDE.md
+++ b/nodejs/CLAUDE.md
@@ -0,0 +1,13 @@
+These are the typescript bindings of LanceDB.
+The core Rust library is in the `../rust/lancedb` directory, the rust binding
+code is in the `src/` directory and the typescript bindings are in
+the `lancedb/` directory.
+
+Whenever you change the Rust code, you will need to recompile: `npm run build`.
+
+Common commands:
+* Build: `npm run build`
+* Lint: `npm run lint`
+* Fix lints: `npm run lint-fix`
+* Test: `npm test`
+* Run single test file: `npm test __test__/arrow.test.ts`
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.21.2-beta.0"
+version = "0.21.2"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/remote.test.ts
+++ b/nodejs/test/remote.test.ts
@@ -108,7 +108,10 @@ describe("remote connection", () => {
  it("should pass on requested extra headers", async () => {
    await withMockDatabase(
      (req, res) => {
-        expect(req.headers["x-my-header"]).toEqual("my-value");
+        expect(req.headers["foo"]).toEqual("1");
+        expect(req.headers["bar"]).toEqual("2");
+        expect(req.headers["baz"]).toEqual("3");
+        expect(req.headers["x-log-attrs"]).toEqual("foo, bar, baz");

        const body = JSON.stringify({ tables: [] });
        res.writeHead(200, { "Content-Type": "application/json" }).end(body);
@@ -119,9 +122,12 @@ describe("remote connection", () => {
      },
      {
        clientConfig: {
-          extraHeaders: {
-            "x-my-header": "my-value",
-          },
+            extraHeaders: {
+                "x-log-attrs": "foo, bar, baz",
+                foo: "1",
+                bar: "2",
+                baz: "3",
+            },
        },
      },
    );
--- a/nodejs/test/session.test.ts
+++ b/nodejs/test/session.test.ts
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+import * as tmp from "tmp";
+import { Session, connect } from "../lancedb";
+
+describe("Session", () => {
+  let tmpDir: tmp.DirResult;
+  beforeEach(() => {
+    tmpDir = tmp.dirSync({ unsafeCleanup: true });
+  });
+  afterEach(() => tmpDir.removeCallback());
+
+  it("should configure cache sizes and work with database operations", async () => {
+    // Create session with small cache limits for testing
+    const indexCacheSize = BigInt(1024 * 1024); // 1MB
+    const metadataCacheSize = BigInt(512 * 1024); // 512KB
+
+    const session = new Session(indexCacheSize, metadataCacheSize);
+
+    // Record initial cache state
+    const initialCacheSize = session.sizeBytes();
+    const initialCacheItems = session.approxNumItems();
+
+    // Test session works with database connection
+    const db = await connect({ uri: tmpDir.name, session: session });
+
+    // Create and use a table to exercise the session
+    const data = Array.from({ length: 100 }, (_, i) => ({
+      id: i,
+      text: `item ${i}`,
+    }));
+    const table = await db.createTable("test", data);
+    const results = await table.query().limit(5).toArray();
+
+    expect(results).toHaveLength(5);
+
+    // Verify cache usage increased after operations
+    const finalCacheSize = session.sizeBytes();
+    const finalCacheItems = session.approxNumItems();
+
+    expect(finalCacheSize).toBeGreaterThan(initialCacheSize); // Cache should have grown
+    expect(finalCacheItems).toBeGreaterThanOrEqual(initialCacheItems); // Items should not decrease
+    expect(initialCacheSize).toBeLessThan(indexCacheSize + metadataCacheSize); // Within limits
+  });
+});
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -582,7 +582,7 @@ describe("When creating an index", () => {
      "Invalid input, minimum_nprobes must be greater than 0",
    );
    expect(() => tbl.query().nearestTo(queryVec).maximumNprobes(5)).toThrow(
-      "Invalid input, maximum_nprobes must be greater than minimum_nprobes",
+      "Invalid input, maximum_nprobes must be greater than or equal to minimum_nprobes",
    );

    await tbl.dropIndex("vec_idx");
--- a/nodejs/lancedb/connection.ts
+++ b/nodejs/lancedb/connection.ts
@@ -85,6 +85,9 @@ export interface OpenTableOptions {
  /**
   * Set the size of the index cache, specified as a number of entries
   *
+   * @deprecated Use session-level cache configuration instead.
+   * Create a Session with custom cache sizes and pass it to the connect() function.
+   *
   * The exact meaning of an "entry" will depend on the type of index:
   * - IVF: there is one entry for each IVF partition
   * - BTREE: there is one entry for the entire index
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -10,6 +10,7 @@ import {
 import {
  ConnectionOptions,
  Connection as LanceDbConnection,
+  Session,
 } from "./native.js";

 export {
@@ -51,6 +52,8 @@ export {
  OpenTableOptions,
 } from "./connection";

+export { Session } from "./native.js";
+
 export {
  ExecutableQuery,
  Query,
@@ -131,6 +134,7 @@ export { IntoSql, packBits } from "./util";
 export async function connect(
  uri: string,
  options?: Partial<ConnectionOptions>,
+  session?: Session,
 ): Promise<Connection>;
 /**
 * Connect to a LanceDB instance at the given URI.
@@ -149,31 +153,43 @@ export async function connect(
 *   storageOptions: {timeout: "60s"}
 * });
 * ```
+ *
+ * @example
+ * ```ts
+ * const session = Session.default();
+ * const conn = await connect({
+ *   uri: "/path/to/database",
+ *   session: session
+ * });
+ * ```
 */
 export async function connect(
  options: Partial<ConnectionOptions> & { uri: string },
 ): Promise<Connection>;
 export async function connect(
  uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }),
-  options: Partial<ConnectionOptions> = {},
+  options?: Partial<ConnectionOptions>,
 ): Promise<Connection> {
  let uri: string | undefined;
+  let finalOptions: Partial<ConnectionOptions> = {};
+
  if (typeof uriOrOptions !== "string") {
    const { uri: uri_, ...opts } = uriOrOptions;
    uri = uri_;
-    options = opts;
+    finalOptions = opts;
  } else {
    uri = uriOrOptions;
+    finalOptions = options || {};
  }

  if (!uri) {
    throw new Error("uri is required");
  }

-  options = (options as ConnectionOptions) ?? {};
-  (<ConnectionOptions>options).storageOptions = cleanseStorageOptions(
-    (<ConnectionOptions>options).storageOptions,
+  finalOptions = (finalOptions as ConnectionOptions) ?? {};
+  (<ConnectionOptions>finalOptions).storageOptions = cleanseStorageOptions(
+    (<ConnectionOptions>finalOptions).storageOptions,
  );
-  const nativeConn = await LanceDbConnection.new(uri, options);
+  const nativeConn = await LanceDbConnection.new(uri, finalOptions);
  return new LocalConnection(nativeConn);
 }
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.21.2-beta.0",
+  "version": "0.21.2",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.21.2-beta.0",
+  "version": "0.21.2",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.21.2-beta.0",
+      "version": "0.21.2",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.21.2-beta.0",
+  "version": "0.21.2",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/connection.rs
+++ b/nodejs/src/connection.rs
@@ -74,6 +74,10 @@ impl Connection {
            builder = builder.host_override(&host_override);
        }

+        if let Some(session) = options.session {
+            builder = builder.session(session.inner.clone());
+        }
+
        Ok(Self::inner_new(builder.execute().await.default_error()?))
    }

--- a/nodejs/src/lib.rs
+++ b/nodejs/src/lib.rs
@@ -14,6 +14,7 @@ pub mod merge;
 mod query;
 pub mod remote;
 mod rerankers;
+mod session;
 mod table;
 mod util;

@@ -34,6 +35,9 @@ pub struct ConnectionOptions {
    ///
    /// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
    pub storage_options: Option<HashMap<String, String>>,
+    /// (For LanceDB OSS only): the session to use for this connection. Holds
+    /// shared caches and other session-specific state.
+    pub session: Option<session::Session>,

    /// (For LanceDB cloud only): configuration for the remote HTTP client.
    pub client_config: Option<remote::ClientConfig>,
--- a/nodejs/src/session.rs
+++ b/nodejs/src/session.rs
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+use std::sync::Arc;
+
+use lancedb::{ObjectStoreRegistry, Session as LanceSession};
+use napi::bindgen_prelude::*;
+use napi_derive::*;
+
+/// A session for managing caches and object stores across LanceDB operations.
+///
+/// Sessions allow you to configure cache sizes for index and metadata caches,
+/// which can significantly impact memory use and performance. They can
+/// also be re-used across multiple connections to share the same cache state.
+#[napi]
+#[derive(Clone)]
+pub struct Session {
+    pub(crate) inner: Arc<LanceSession>,
+}
+
+impl std::fmt::Debug for Session {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Session")
+            .field("size_bytes", &self.inner.size_bytes())
+            .field("approx_num_items", &self.inner.approx_num_items())
+            .finish()
+    }
+}
+
+#[napi]
+impl Session {
+    /// Create a new session with custom cache sizes.
+    ///
+    /// # Parameters
+    ///
+    /// - `index_cache_size_bytes`: The size of the index cache in bytes.
+    ///   Index data is stored in memory in this cache to speed up queries.
+    ///   Defaults to 6GB if not specified.
+    /// - `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
+    ///   The metadata cache stores file metadata and schema information in memory.
+    ///   This cache improves scan and write performance.
+    ///   Defaults to 1GB if not specified.
+    #[napi(constructor)]
+    pub fn new(
+        index_cache_size_bytes: Option<BigInt>,
+        metadata_cache_size_bytes: Option<BigInt>,
+    ) -> napi::Result<Self> {
+        let index_cache_size = index_cache_size_bytes
+            .map(|size| size.get_u64().1 as usize)
+            .unwrap_or(6 * 1024 * 1024 * 1024); // 6GB default
+
+        let metadata_cache_size = metadata_cache_size_bytes
+            .map(|size| size.get_u64().1 as usize)
+            .unwrap_or(1024 * 1024 * 1024); // 1GB default
+
+        let session = LanceSession::new(
+            index_cache_size,
+            metadata_cache_size,
+            Arc::new(ObjectStoreRegistry::default()),
+        );
+
+        Ok(Self {
+            inner: Arc::new(session),
+        })
+    }
+
+    /// Create a session with default cache sizes.
+    ///
+    /// This is equivalent to creating a session with 6GB index cache
+    /// and 1GB metadata cache.
+    #[napi(factory)]
+    pub fn default() -> Self {
+        Self {
+            inner: Arc::new(LanceSession::default()),
+        }
+    }
+
+    /// Get the current size of the session caches in bytes.
+    #[napi]
+    pub fn size_bytes(&self) -> BigInt {
+        BigInt::from(self.inner.size_bytes())
+    }
+
+    /// Get the approximate number of items cached in the session.
+    #[napi]
+    pub fn approx_num_items(&self) -> u32 {
+        self.inner.approx_num_items() as u32
+    }
+}
+
+// Implement FromNapiValue for Session to work with napi(object)
+impl napi::bindgen_prelude::FromNapiValue for Session {
+    unsafe fn from_napi_value(
+        env: napi::sys::napi_env,
+        napi_val: napi::sys::napi_value,
+    ) -> napi::Result<Self> {
+        let object: napi::bindgen_prelude::ClassInstance<Session> =
+            napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
+        let copy = object.clone();
+        Ok(copy)
+    }
+}
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.24.2-beta.1"
+current_version = "0.24.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/CLAUDE.md
+++ b/python/CLAUDE.md
@@ -0,0 +1,19 @@
+These are the Python bindings of LanceDB.
+The core Rust library is in the `../rust/lancedb` directory, the rust binding
+code is in the `src/` directory and the Python bindings are in the `lancedb/` directory.
+
+Common commands:
+
+* Build: `make develop`
+* Format: `make format`
+* Lint: `make check`
+* Fix lints: `make fix`
+* Test: `make test`
+* Doc test: `make doctest`
+
+Before committing changes, run lints and then formatting.
+
+When you change the Rust code, you will need to recompile the Python bindings: `make develop`.
+
+When you export new types from Rust to Python, you must manually update `python/lancedb/_lancedb.pyi`
+with the corresponding type hints. You can run `pyright` to check for type errors in the Python code.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.24.2-beta.1"
+version = "0.24.2"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -85,8 +85,8 @@ embeddings = [
    "boto3>=1.28.57",
    "awscli>=1.29.57",
    "botocore>=1.31.57",
+    'ibm-watsonx-ai>=1.1.2; python_version >= "3.10"',
    "ollama>=0.3.0",
-    "ibm-watsonx-ai>=1.1.2",
 ]
 azure = ["adlfs>=2024.2.0"]

--- a/python/python/lancedb/init.py
+++ b/python/python/lancedb/init.py
@@ -18,6 +18,7 @@ from .remote import ClientConfig
 from .remote.db import RemoteDBConnection
 from .schema import vector
 from .table import AsyncTable
+from ._lancedb import Session


 def connect(
@@ -30,6 +31,7 @@ def connect(
    request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
    client_config: Union[ClientConfig, Dict[str, Any], None] = None,
    storage_options: Optional[Dict[str, str]] = None,
+    session: Optional[Session] = None,
    **kwargs: Any,
 ) -> DBConnection:
    """Connect to a LanceDB database.
@@ -64,6 +66,12 @@ def connect(
    storage_options: dict, optional
        Additional options for the storage backend. See available options at
        <https://lancedb.github.io/lancedb/guides/storage/>
+    session: Session, optional
+        (For LanceDB OSS only)
+        A session to use for this connection. Sessions allow you to configure
+        cache sizes for index and metadata caches, which can significantly
+        impact memory use and performance. They can also be re-used across
+        multiple connections to share the same cache state.

    Examples
    --------
@@ -92,7 +100,7 @@ def connect(
        if api_key is None:
            api_key = os.environ.get("LANCEDB_API_KEY")
        if api_key is None:
-            raise ValueError(f"api_key is required to connected LanceDB cloud: {uri}")
+            raise ValueError(f"api_key is required to connect to LanceDB cloud: {uri}")
        if isinstance(request_thread_pool, int):
            request_thread_pool = ThreadPoolExecutor(request_thread_pool)
        return RemoteDBConnection(
@@ -113,6 +121,7 @@ def connect(
        uri,
        read_consistency_interval=read_consistency_interval,
        storage_options=storage_options,
+        session=session,
    )


@@ -125,6 +134,7 @@ async def connect_async(
    read_consistency_interval: Optional[timedelta] = None,
    client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
    storage_options: Optional[Dict[str, str]] = None,
+    session: Optional[Session] = None,
 ) -> AsyncConnection:
    """Connect to a LanceDB database.

@@ -158,6 +168,12 @@ async def connect_async(
    storage_options: dict, optional
        Additional options for the storage backend. See available options at
        <https://lancedb.github.io/lancedb/guides/storage/>
+    session: Session, optional
+        (For LanceDB OSS only)
+        A session to use for this connection. Sessions allow you to configure
+        cache sizes for index and metadata caches, which can significantly
+        impact memory use and performance. They can also be re-used across
+        multiple connections to share the same cache state.

    Examples
    --------
@@ -197,6 +213,7 @@ async def connect_async(
            read_consistency_interval_secs,
            client_config,
            storage_options,
+            session,
        )
    )

@@ -212,6 +229,7 @@ __all__ = [
    "DBConnection",
    "LanceDBConnection",
    "RemoteDBConnection",
+    "Session",
    "__version__",
 ]

--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -6,6 +6,19 @@ import pyarrow as pa
 from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
 from .remote import ClientConfig

+class Session:
+    def __init__(
+        self,
+        index_cache_size_bytes: Optional[int] = None,
+        metadata_cache_size_bytes: Optional[int] = None,
+    ): ...
+    @staticmethod
+    def default() -> "Session": ...
+    @property
+    def size_bytes(self) -> int: ...
+    @property
+    def approx_num_items(self) -> int: ...
+
 class Connection(object):
    uri: str
    async def table_names(
@@ -89,6 +102,7 @@ async def connect(
    read_consistency_interval: Optional[float],
    client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
    storage_options: Optional[Dict[str, str]],
+    session: Optional[Session],
 ) -> Connection: ...

 class RecordBatchStream:
--- a/python/python/lancedb/common.py
+++ b/python/python/lancedb/common.py
@@ -94,9 +94,9 @@ def data_to_reader(
    else:
        raise TypeError(
            f"Unknown data type {type(data)}. "
-            "Please check "
-            "https://lancedb.github.io/lance/read_and_write.html "
-            "to see supported types."
+            "Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
+            "pyarrow Table/RecordBatch, or Pydantic models. "
+            "See https://lancedb.github.io/lancedb/guides/tables/ for examples."
        )


--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -37,6 +37,7 @@ if TYPE_CHECKING:
    from ._lancedb import Connection as LanceDbConnection
    from .common import DATA, URI
    from .embeddings import EmbeddingFunctionConfig
+    from ._lancedb import Session


 class DBConnection(EnforceOverrides):
@@ -247,6 +248,9 @@ class DBConnection(EnforceOverrides):
        name: str
            The name of the table.
        index_cache_size: int, default 256
+            **Deprecated**: Use session-level cache configuration instead.
+            Create a Session with custom cache sizes and pass it to lancedb.connect().
+
            Set the size of the index cache, specified as a number of entries

            The exact meaning of an "entry" will depend on the type of index:
@@ -354,6 +358,7 @@ class LanceDBConnection(DBConnection):
        *,
        read_consistency_interval: Optional[timedelta] = None,
        storage_options: Optional[Dict[str, str]] = None,
+        session: Optional[Session] = None,
    ):
        if not isinstance(uri, Path):
            scheme = get_uri_scheme(uri)
@@ -367,6 +372,7 @@ class LanceDBConnection(DBConnection):
        self._entered = False
        self.read_consistency_interval = read_consistency_interval
        self.storage_options = storage_options
+        self.session = session

        if read_consistency_interval is not None:
            read_consistency_interval_secs = read_consistency_interval.total_seconds()
@@ -382,6 +388,7 @@ class LanceDBConnection(DBConnection):
                read_consistency_interval_secs,
                None,
                storage_options,
+                session,
            )

        self._conn = AsyncConnection(LOOP.run(do_connect()))
@@ -475,6 +482,17 @@ class LanceDBConnection(DBConnection):
        -------
        A LanceTable object representing the table.
        """
+        if index_cache_size is not None:
+            import warnings
+
+            warnings.warn(
+                "index_cache_size is deprecated. Use session-level cache "
+                "configuration instead. Create a Session with custom cache sizes "
+                "and pass it to lancedb.connect().",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+
        return LanceTable.open(
            self,
            name,
@@ -820,6 +838,9 @@ class AsyncConnection(object):
            See available options at
            <https://lancedb.github.io/lancedb/guides/storage/>
        index_cache_size: int, default 256
+            **Deprecated**: Use session-level cache configuration instead.
+            Create a Session with custom cache sizes and pass it to lancedb.connect().
+
            Set the size of the index cache, specified as a number of entries

            The exact meaning of an "entry" will depend on the type of index:
--- a/python/python/lancedb/embeddings/init.py
+++ b/python/python/lancedb/embeddings/init.py
@@ -11,7 +11,7 @@ from .instructor import InstructorEmbeddingFunction
 from .ollama import OllamaEmbeddings
 from .open_clip import OpenClipEmbeddings
 from .openai import OpenAIEmbeddings
-from .registry import EmbeddingFunctionRegistry, get_registry
+from .registry import EmbeddingFunctionRegistry, get_registry, register
 from .sentence_transformers import SentenceTransformerEmbeddings
 from .gte import GteEmbeddings
 from .transformers import TransformersEmbeddingFunction, ColbertEmbeddings
--- a/python/python/lancedb/embeddings/gte_mlx_model.py
+++ b/python/python/lancedb/embeddings/gte_mlx_model.py
@@ -9,11 +9,14 @@ from huggingface_hub import snapshot_download
 from pydantic import BaseModel
 from transformers import BertTokenizer

+from .utils import create_import_stub
+
 try:
    import mlx.core as mx
    import mlx.nn as nn
 except ImportError:
-    raise ImportError("You need to install MLX to use this model use - pip install mlx")
+    mx = create_import_stub("mlx.core", "mlx")
+    nn = create_import_stub("mlx.nn", "mlx")


 def average_pool(last_hidden_state: mx.array, attention_mask: mx.array) -> mx.array:
@@ -72,7 +75,7 @@ class TransformerEncoder(nn.Module):
        super().__init__()
        self.layers = [
            TransformerEncoderLayer(dims, num_heads, mlp_dims)
-            for i in range(num_layers)
+            for _ in range(num_layers)
        ]

    def __call__(self, x, mask):
--- a/python/python/lancedb/embeddings/registry.py
+++ b/python/python/lancedb/embeddings/registry.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import json
-from typing import Dict, Optional
+from typing import Dict, Optional, Type

 from .base import EmbeddingFunction, EmbeddingFunctionConfig

@@ -43,7 +43,7 @@ class EmbeddingFunctionRegistry:
        self._functions = {}
        self._variables = {}

-    def register(self, alias: str = None):
+    def register(self, alias: Optional[str] = None):
        """
        This creates a decorator that can be used to register
        an EmbeddingFunction.
@@ -75,7 +75,7 @@ class EmbeddingFunctionRegistry:
        """
        self._functions = {}

-    def get(self, name: str):
+    def get(self, name: str) -> Type[EmbeddingFunction]:
        """
        Fetch an embedding function class by name

--- a/python/python/lancedb/embeddings/utils.py
+++ b/python/python/lancedb/embeddings/utils.py
@@ -21,6 +21,36 @@ from ..dependencies import pandas as pd
 from ..util import attempt_import_or_raise


+def create_import_stub(module_name: str, package_name: str = None):
+    """
+    Create a stub module that allows class definition but fails when used.
+    This allows modules to be imported for doctest collection even when
+    optional dependencies are not available.
+
+    Parameters
+    ----------
+    module_name : str
+        The name of the module to create a stub for
+    package_name : str, optional
+        The package name to suggest in the error message
+
+    Returns
+    -------
+    object
+        A stub object that can be used in place of the module
+    """
+
+    class _ImportStub:
+        def __getattr__(self, name):
+            return _ImportStub  # Return stub for chained access like nn.Module
+
+        def __call__(self, *args, **kwargs):
+            pkg = package_name or module_name
+            raise ImportError(f"You need to install {pkg} to use this functionality")
+
+    return _ImportStub()
+
+
 # ruff: noqa: PERF203
 def retry(tries=10, delay=1, max_delay=30, backoff=3, jitter=1):
    def wrapper(fn):
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -14,7 +14,7 @@ from typing import (
    Literal,
    Optional,
    Tuple,
-    Type,
+    TypeVar,
    Union,
    Any,
 )
@@ -58,6 +58,8 @@ if TYPE_CHECKING:
    else:
        from typing_extensions import Self

+T = TypeVar("T", bound="LanceModel")
+

 # Pydantic validation function for vector queries
 def ensure_vector_query(
@@ -746,8 +748,8 @@ class LanceQueryBuilder(ABC):
        return self.to_arrow(timeout=timeout).to_pylist()

    def to_pydantic(
-        self, model: Type[LanceModel], *, timeout: Optional[timedelta] = None
-    ) -> List[LanceModel]:
+        self, model: type[T], *, timeout: Optional[timedelta] = None
+    ) -> list[T]:
        """Return the table as a list of pydantic models.

        Parameters
@@ -906,11 +908,11 @@ class LanceQueryBuilder(ABC):
        >>> plan = table.search(query).explain_plan(True)
        >>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
-        GlobalLimitExec: skip=0, fetch=10
-          FilterExec: _distance@2 IS NOT NULL
-            SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
-              KNNVectorDistance: metric=l2
-                LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
+          GlobalLimitExec: skip=0, fetch=10
+            FilterExec: _distance@2 IS NOT NULL
+              SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
+                KNNVectorDistance: metric=l2
+                  LanceRead: uri=..., projection=[vector], ...

        Parameters
        ----------
@@ -940,19 +942,19 @@ class LanceQueryBuilder(ABC):
        >>> plan = table.search(query).analyze_plan()
        >>> print(plan)  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        AnalyzeExec verbose=true, metrics=[]
-          ProjectionExec: expr=[...], metrics=[...]
-            GlobalLimitExec: skip=0, fetch=10, metrics=[...]
-              FilterExec: _distance@2 IS NOT NULL,
-              metrics=[output_rows=..., elapsed_compute=...]
-                SortExec: TopK(fetch=10), expr=[...],
-                preserve_partitioning=[...],
-                metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
-                  KNNVectorDistance: metric=l2,
-                  metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
-                    LanceScan: uri=..., projection=[vector], row_id=true,
-                    row_addr=false, ordered=false,
-                    metrics=[output_rows=..., elapsed_compute=...,
-                    bytes_read=..., iops=..., requests=...]
+          TracedExec, metrics=[]
+            ProjectionExec: expr=[...], metrics=[...]
+              GlobalLimitExec: skip=0, fetch=10, metrics=[...]
+                FilterExec: _distance@2 IS NOT NULL,
+                metrics=[output_rows=..., elapsed_compute=...]
+                  SortExec: TopK(fetch=10), expr=[...],
+                  preserve_partitioning=[...],
+                  metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
+                    KNNVectorDistance: metric=l2,
+                    metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
+                      LanceRead: uri=..., projection=[vector], ...
+                      metrics=[output_rows=..., elapsed_compute=...,
+                      bytes_read=..., iops=..., requests=...]

        Returns
        -------
@@ -2043,7 +2045,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
          FilterExec: _distance@2 IS NOT NULL
            SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
              KNNVectorDistance: metric=l2
-                LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
+                LanceRead: uri=..., projection=[vector], ...

        Parameters
        ----------
@@ -2429,7 +2431,7 @@ class AsyncQueryBase(object):
            FilterExec: _distance@2 IS NOT NULL
              SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
                KNNVectorDistance: metric=l2
-                  LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
+                  LanceRead: uri=..., projection=[vector], ...

        Parameters
        ----------
@@ -3054,7 +3056,7 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
                FilterExec: _distance@2 IS NOT NULL
                  SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
                    KNNVectorDistance: metric=l2
-                      LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
+                      LanceRead: uri=..., projection=[vector], ...
        <BLANKLINE>
        FTS Search Plan:
        ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -102,7 +102,9 @@ if TYPE_CHECKING:
    )


-def _into_pyarrow_reader(data) -> pa.RecordBatchReader:
+def _into_pyarrow_reader(
+    data, schema: Optional[pa.Schema] = None
+) -> pa.RecordBatchReader:
    from lancedb.dependencies import datasets

    if _check_for_hugging_face(data):
@@ -123,6 +125,12 @@ def _into_pyarrow_reader(data) -> pa.RecordBatchReader:
        raise ValueError("Cannot add a single dictionary to a table. Use a list.")

    if isinstance(data, list):
+        # Handle empty list case
+        if not data:
+            if schema is None:
+                raise ValueError("Cannot create table from empty list without a schema")
+            return pa.Table.from_pylist(data, schema=schema).to_reader()
+
        # convert to list of dict if data is a bunch of LanceModels
        if isinstance(data[0], LanceModel):
            schema = data[0].__class__.to_arrow_schema()
@@ -165,9 +173,9 @@ def _into_pyarrow_reader(data) -> pa.RecordBatchReader:
    else:
        raise TypeError(
            f"Unknown data type {type(data)}. "
-            "Please check "
-            "https://lancedb.github.io/lancedb/python/python/ "
-            "to see supported types."
+            "Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
+            "pyarrow Table/RecordBatch, or Pydantic models. "
+            "See https://lancedb.github.io/lancedb/guides/tables/ for examples."
        )


@@ -236,7 +244,7 @@ def _sanitize_data(
    # 1. There might be embedding columns missing that will be added
    #    in the add_embeddings step.
    # 2. If `allow_subschemas` is True, there might be columns missing.
-    reader = _into_pyarrow_reader(data)
+    reader = _into_pyarrow_reader(data, target_schema)

    reader = _append_vector_columns(reader, target_schema, metadata=metadata)

@@ -3665,9 +3673,14 @@ class AsyncTable:
            )
            if query.distance_type is not None:
                async_query = async_query.distance_type(query.distance_type)
-            if query.minimum_nprobes is not None:
+            if query.minimum_nprobes is not None and query.maximum_nprobes is not None:
+                # Set both to the minimum first to avoid min > max error.
+                async_query = async_query.nprobes(
+                    query.minimum_nprobes
+                ).maximum_nprobes(query.maximum_nprobes)
+            elif query.minimum_nprobes is not None:
                async_query = async_query.minimum_nprobes(query.minimum_nprobes)
-            if query.maximum_nprobes is not None:
+            elif query.maximum_nprobes is not None:
                async_query = async_query.maximum_nprobes(query.maximum_nprobes)
            if query.refine_factor is not None:
                async_query = async_query.refine_factor(query.refine_factor)
--- a/python/python/tests/test_fts.py
+++ b/python/python/tests/test_fts.py
@@ -33,8 +33,11 @@ tantivy = pytest.importorskip("tantivy")

@pytest.fixture
 def table(tmp_path) -> ldb.table.LanceTable:
+    # Use local random state to avoid affecting other tests
+    rng = np.random.RandomState(42)
+    local_random = random.Random(42)
    db = ldb.connect(tmp_path)
-    vectors = [np.random.randn(128) for _ in range(100)]
+    vectors = [rng.randn(128) for _ in range(100)]

    text_nouns = ("puppy", "car")
    text2_nouns = ("rabbit", "girl", "monkey")
@@ -44,10 +47,10 @@ def table(tmp_path) -> ldb.table.LanceTable:
    text = [
        " ".join(
            [
-                text_nouns[random.randrange(0, len(text_nouns))],
-                verbs[random.randrange(0, 5)],
-                adv[random.randrange(0, 5)],
-                adj[random.randrange(0, 5)],
+                text_nouns[local_random.randrange(0, len(text_nouns))],
+                verbs[local_random.randrange(0, 5)],
+                adv[local_random.randrange(0, 5)],
+                adj[local_random.randrange(0, 5)],
            ]
        )
        for _ in range(100)
@@ -55,15 +58,15 @@ def table(tmp_path) -> ldb.table.LanceTable:
    text2 = [
        " ".join(
            [
-                text2_nouns[random.randrange(0, len(text2_nouns))],
-                verbs[random.randrange(0, 5)],
-                adv[random.randrange(0, 5)],
-                adj[random.randrange(0, 5)],
+                text2_nouns[local_random.randrange(0, len(text2_nouns))],
+                verbs[local_random.randrange(0, 5)],
+                adv[local_random.randrange(0, 5)],
+                adj[local_random.randrange(0, 5)],
            ]
        )
        for _ in range(100)
    ]
-    count = [random.randint(1, 10000) for _ in range(100)]
+    count = [local_random.randint(1, 10000) for _ in range(100)]
    table = db.create_table(
        "test",
        data=pd.DataFrame(
@@ -82,8 +85,11 @@ def table(tmp_path) -> ldb.table.LanceTable:

@pytest.fixture
 async def async_table(tmp_path) -> ldb.table.AsyncTable:
+    # Use local random state to avoid affecting other tests
+    rng = np.random.RandomState(42)
+    local_random = random.Random(42)
    db = await ldb.connect_async(tmp_path)
-    vectors = [np.random.randn(128) for _ in range(100)]
+    vectors = [rng.randn(128) for _ in range(100)]

    text_nouns = ("puppy", "car")
    text2_nouns = ("rabbit", "girl", "monkey")
@@ -93,10 +99,10 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
    text = [
        " ".join(
            [
-                text_nouns[random.randrange(0, len(text_nouns))],
-                verbs[random.randrange(0, 5)],
-                adv[random.randrange(0, 5)],
-                adj[random.randrange(0, 5)],
+                text_nouns[local_random.randrange(0, len(text_nouns))],
+                verbs[local_random.randrange(0, 5)],
+                adv[local_random.randrange(0, 5)],
+                adj[local_random.randrange(0, 5)],
            ]
        )
        for _ in range(100)
@@ -104,15 +110,15 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
    text2 = [
        " ".join(
            [
-                text2_nouns[random.randrange(0, len(text2_nouns))],
-                verbs[random.randrange(0, 5)],
-                adv[random.randrange(0, 5)],
-                adj[random.randrange(0, 5)],
+                text2_nouns[local_random.randrange(0, len(text2_nouns))],
+                verbs[local_random.randrange(0, 5)],
+                adv[local_random.randrange(0, 5)],
+                adj[local_random.randrange(0, 5)],
            ]
        )
        for _ in range(100)
    ]
-    count = [random.randint(1, 10000) for _ in range(100)]
+    count = [local_random.randint(1, 10000) for _ in range(100)]
    table = await db.create_table(
        "test",
        data=pd.DataFrame(
--- a/python/python/tests/test_hybrid_query.py
+++ b/python/python/tests/test_hybrid_query.py
@@ -166,7 +166,7 @@ async def test_explain_plan(table: AsyncTable):
    assert "Vector Search Plan" in plan
    assert "KNNVectorDistance" in plan
    assert "FTS Search Plan" in plan
-    assert "LanceScan" in plan
+    assert "LanceRead" in plan


@pytest.mark.asyncio
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -445,25 +445,45 @@ def test_invalid_nprobes_sync(table):
    with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"):
        LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(0).to_list()
    with pytest.raises(
-        ValueError, match="maximum_nprobes must be greater than minimum_nprobes"
+        ValueError,
+        match="maximum_nprobes must be greater than or equal to minimum_nprobes",
    ):
        LanceVectorQueryBuilder(table, [0, 0], "vector").maximum_nprobes(5).to_list()
    with pytest.raises(
-        ValueError, match="minimum_nprobes must be less or equal to maximum_nprobes"
+        ValueError,
+        match="minimum_nprobes must be less than or equal to maximum_nprobes",
    ):
        LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(100).to_list()


+def test_nprobes_works_sync(table):
+    LanceVectorQueryBuilder(table, [0, 0], "vector").nprobes(30).to_list()
+
+
+def test_nprobes_min_max_works_sync(table):
+    LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(2).maximum_nprobes(
+        4
+    ).to_list()
+
+
+def test_multiple_nprobes_calls_works_sync(table):
+    LanceVectorQueryBuilder(table, [0, 0], "vector").nprobes(30).maximum_nprobes(
+        20
+    ).minimum_nprobes(20).to_list()
+
+
@pytest.mark.asyncio
 async def test_invalid_nprobes_async(table_async: AsyncTable):
    with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"):
        await table_async.vector_search([0, 0]).minimum_nprobes(0).to_list()
    with pytest.raises(
-        ValueError, match="maximum_nprobes must be greater than minimum_nprobes"
+        ValueError,
+        match="maximum_nprobes must be greater than or equal to minimum_nprobes",
    ):
        await table_async.vector_search([0, 0]).maximum_nprobes(5).to_list()
    with pytest.raises(
-        ValueError, match="minimum_nprobes must be less or equal to maximum_nprobes"
+        ValueError,
+        match="minimum_nprobes must be less than or equal to maximum_nprobes",
    ):
        await table_async.vector_search([0, 0]).minimum_nprobes(100).to_list()

@@ -839,7 +859,7 @@ async def test_explain_plan_with_filters(table_async: AsyncTable):
        table_async.query().nearest_to(pa.array([1, 2])).where("id = 1").explain_plan()
    )
    assert "KNN" in plan_with_filter
-    assert "FilterExec" in plan_with_filter
+    assert "LanceRead" in plan_with_filter

    # Test FTS query with filter
    from lancedb.index import FTS
@@ -850,7 +870,8 @@ async def test_explain_plan_with_filters(table_async: AsyncTable):
    )
    plan_fts_filter = await query_fts_filter.where("id = 1").explain_plan()
    assert "MatchQuery: query=dog" in plan_fts_filter
-    assert "FilterExec: id@" in plan_fts_filter  # Should show filter details
+    assert "LanceRead" in plan_fts_filter
+    assert "full_filter=id = Int64(1)" in plan_fts_filter  # Should show filter details


@pytest.mark.asyncio
@@ -1338,3 +1359,20 @@ async def test_query_timeout_async(tmp_path):
            .nearest_to([0.0, 0.0])
            .to_list(timeout=timedelta(0))
        )
+
+
+def test_search_empty_table(mem_db):
+    """Test searching on empty table should not crash
+
+    Regression test for issue #303:
+    https://github.com/lancedb/lancedb/issues/303
+    Searching on empty table produces scary error message
+    """
+    schema = pa.schema(
+        [pa.field("vector", pa.list_(pa.float32(), 2)), pa.field("id", pa.int64())]
+    )
+    table = mem_db.create_table("test_empty_search", schema=schema)
+
+    # Search on empty table should return empty results, not crash
+    results = table.search([1.0, 2.0]).limit(5).to_list()
+    assert results == []
--- a/python/python/tests/test_session.py
+++ b/python/python/tests/test_session.py
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+import lancedb
+
+
+def test_session_cache_configuration(tmp_path):
+    """Test Session cache configuration and basic functionality."""
+    # Create session with small cache limits for testing
+    index_cache_size = 1024 * 1024  # 1MB
+    metadata_cache_size = 512 * 1024  # 512KB
+
+    session = lancedb.Session(
+        index_cache_size_bytes=index_cache_size,
+        metadata_cache_size_bytes=metadata_cache_size,
+    )
+
+    # Record initial cache state
+    initial_cache_size = session.size_bytes
+    initial_cache_items = session.approx_num_items
+
+    # Test session works with database connection
+    db = lancedb.connect(tmp_path, session=session)
+
+    # Create and use a table to exercise the session
+    data = [{"id": i, "text": f"item {i}"} for i in range(100)]
+    table = db.create_table("test", data)
+    results = list(table.to_arrow().to_pylist())
+
+    assert len(results) == 100
+
+    # Verify cache usage increased after operations
+    final_cache_size = session.size_bytes
+    final_cache_items = session.approx_num_items
+
+    assert final_cache_size > initial_cache_size  # Cache should have grown
+    assert final_cache_items >= initial_cache_items  # Items should not decrease
+    assert initial_cache_size < index_cache_size + metadata_cache_size
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -1804,3 +1804,45 @@ def test_stats(mem_db: DBConnection):
            },
        },
    }
+
+
+def test_create_table_empty_list_with_schema(mem_db: DBConnection):
+    """Test creating table with empty list data and schema
+
+    Regression test for IndexError: list index out of range
+    when calling create_table(name, data=[], schema=schema)
+    """
+    schema = pa.schema(
+        [pa.field("vector", pa.list_(pa.float32(), 2)), pa.field("id", pa.int64())]
+    )
+    table = mem_db.create_table("test_empty_list", data=[], schema=schema)
+    assert table.count_rows() == 0
+    assert table.schema == schema
+
+
+def test_create_table_empty_list_no_schema_error(mem_db: DBConnection):
+    """Test that creating table with empty list and no schema raises error"""
+    with pytest.raises(
+        ValueError, match="Cannot create table from empty list without a schema"
+    ):
+        mem_db.create_table("test_empty_no_schema", data=[])
+
+
+def test_add_table_with_empty_embeddings(tmp_path):
+    """Test exact scenario from issue #1968
+
+    Regression test for issue #1968:
+    https://github.com/lancedb/lancedb/issues/1968
+    """
+    db = lancedb.connect(tmp_path)
+
+    class MySchema(LanceModel):
+        text: str
+        embedding: Vector(16)
+
+    table = db.create_table("test", schema=MySchema)
+    table.add(
+        [{"text": "bar", "embedding": [0.1] * 16}],
+        on_bad_vectors="drop",
+    )
+    assert table.count_rows() == 1
--- a/python/src/connection.rs
+++ b/python/src/connection.rs
@@ -179,7 +179,7 @@ impl Connection {
 }

 #[pyfunction]
-#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None))]
+#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
 #[allow(clippy::too_many_arguments)]
 pub fn connect(
    py: Python,
@@ -190,6 +190,7 @@ pub fn connect(
    read_consistency_interval: Option<f64>,
    client_config: Option<PyClientConfig>,
    storage_options: Option<HashMap<String, String>>,
+    session: Option<crate::session::Session>,
 ) -> PyResult<Bound<'_, PyAny>> {
    future_into_py(py, async move {
        let mut builder = lancedb::connect(&uri);
@@ -213,6 +214,9 @@ pub fn connect(
        if let Some(client_config) = client_config {
            builder = builder.client_config(client_config.into());
        }
+        if let Some(session) = session {
+            builder = builder.session(session.inner.clone());
+        }
        Ok(Connection::new(builder.execute().await.infer_error()?))
    })
 }
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -11,6 +11,7 @@ use pyo3::{
    wrap_pyfunction, Bound, PyResult, Python,
 };
 use query::{FTSQuery, HybridQuery, Query, VectorQuery};
+use session::Session;
 use table::{
    AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, MergeResult,
    Table, UpdateResult,
@@ -21,6 +22,7 @@ pub mod connection;
 pub mod error;
 pub mod index;
 pub mod query;
+pub mod session;
 pub mod table;
 pub mod util;

@@ -31,6 +33,7 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
        .write_style("LANCEDB_LOG_STYLE");
    env_logger::init_from_env(env);
    m.add_class::<Connection>()?;
+    m.add_class::<Session>()?;
    m.add_class::<Table>()?;
    m.add_class::<IndexConfig>()?;
    m.add_class::<Query>()?;
--- a/python/src/session.rs
+++ b/python/src/session.rs
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+use std::sync::Arc;
+
+use lancedb::{ObjectStoreRegistry, Session as LanceSession};
+use pyo3::{pyclass, pymethods, PyResult};
+
+/// A session for managing caches and object stores across LanceDB operations.
+///
+/// Sessions allow you to configure cache sizes for index and metadata caches,
+/// which can significantly impact memory use and performance. They can
+/// also be re-used across multiple connections to share the same cache state.
+#[pyclass]
+#[derive(Clone)]
+pub struct Session {
+    pub(crate) inner: Arc<LanceSession>,
+}
+
+impl Default for Session {
+    fn default() -> Self {
+        Self {
+            inner: Arc::new(LanceSession::default()),
+        }
+    }
+}
+
+#[pymethods]
+impl Session {
+    /// Create a new session with custom cache sizes.
+    ///
+    /// Parameters
+    /// ----------
+    /// index_cache_size_bytes : int, optional
+    ///     The size of the index cache in bytes.
+    ///     Index data is stored in memory in this cache to speed up queries.
+    ///     Default: 6GB (6 * 1024 * 1024 * 1024 bytes)
+    /// metadata_cache_size_bytes : int, optional
+    ///     The size of the metadata cache in bytes.
+    ///     The metadata cache stores file metadata and schema information in memory.
+    ///     This cache improves scan and write performance.
+    ///     Default: 1GB (1024 * 1024 * 1024 bytes)
+    #[new]
+    #[pyo3(signature = (index_cache_size_bytes=None, metadata_cache_size_bytes=None))]
+    pub fn new(
+        index_cache_size_bytes: Option<usize>,
+        metadata_cache_size_bytes: Option<usize>,
+    ) -> PyResult<Self> {
+        let index_cache_size = index_cache_size_bytes.unwrap_or(6 * 1024 * 1024 * 1024); // 6GB default
+        let metadata_cache_size = metadata_cache_size_bytes.unwrap_or(1024 * 1024 * 1024); // 1GB default
+
+        let session = LanceSession::new(
+            index_cache_size,
+            metadata_cache_size,
+            Arc::new(ObjectStoreRegistry::default()),
+        );
+
+        Ok(Self {
+            inner: Arc::new(session),
+        })
+    }
+
+    /// Create a session with default cache sizes.
+    ///
+    /// This is equivalent to creating a session with 6GB index cache
+    /// and 1GB metadata cache.
+    ///
+    /// Returns
+    /// -------
+    /// Session
+    ///     A new Session with default cache sizes
+    #[staticmethod]
+    #[allow(clippy::should_implement_trait)]
+    pub fn default() -> Self {
+        Default::default()
+    }
+
+    /// Get the current size of the session caches in bytes.
+    ///
+    /// Returns
+    /// -------
+    /// int
+    ///     The total size of all caches in the session
+    #[getter]
+    pub fn size_bytes(&self) -> u64 {
+        self.inner.size_bytes()
+    }
+
+    /// Get the approximate number of items cached in the session.
+    ///
+    /// Returns
+    /// -------
+    /// int
+    ///     The number of cached items across all caches
+    #[getter]
+    pub fn approx_num_items(&self) -> usize {
+        self.inner.approx_num_items()
+    }
+
+    fn __repr__(&self) -> String {
+        format!(
+            "Session(size_bytes={}, approx_num_items={})",
+            self.size_bytes(),
+            self.approx_num_items()
+        )
+    }
+}
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-node"
-version = "0.21.2-beta.0"
+version = "0.21.2"
 description = "Serverless, low-latency vector database for AI applications"
 license.workspace = true
 edition.workspace = true
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.21.2-beta.0"
+version = "0.21.2"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -678,7 +678,8 @@ impl Database for ListingDatabase {
        let mut read_params = request.lance_read_params.unwrap_or_else(|| {
            let mut default_params = ReadParams::default();
            if let Some(index_cache_size) = request.index_cache_size {
-                default_params.index_cache_size = index_cache_size as usize;
+                #[allow(deprecated)]
+                default_params.index_cache_size(index_cache_size as usize);
            }
            default_params
        });
--- a/rust/lancedb/src/lib.rs
+++ b/rust/lancedb/src/lib.rs
@@ -290,3 +290,7 @@ impl Display for DistanceType {

 /// Connect to a database
 pub use connection::connect;
+
+/// Re-export Lance Session and ObjectStoreRegistry for custom session creation
+pub use lance::session::Session;
+pub use lance_io::object_store::ObjectStoreRegistry;
--- a/rust/lancedb/src/query.rs
+++ b/rust/lancedb/src/query.rs
@@ -958,7 +958,8 @@ impl VectorQuery {
        if let Some(maximum_nprobes) = self.request.maximum_nprobes {
            if minimum_nprobes > maximum_nprobes {
                return Err(Error::InvalidInput {
-                    message: "minimum_nprobes must be less or equal to maximum_nprobes".to_string(),
+                    message: "minimum_nprobes must be less than or equal to maximum_nprobes"
+                        .to_string(),
                });
            }
        }
@@ -989,7 +990,8 @@ impl VectorQuery {
            }
            if maximum_nprobes < self.request.minimum_nprobes {
                return Err(Error::InvalidInput {
-                    message: "maximum_nprobes must be greater than minimum_nprobes".to_string(),
+                    message: "maximum_nprobes must be greater than or equal to minimum_nprobes"
+                        .to_string(),
                });
            }
        }
--- a/rust/lancedb/src/remote/client.rs
+++ b/rust/lancedb/src/remote/client.rs
@@ -2,7 +2,7 @@
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

 use http::HeaderName;
-use log::debug;
+use log::{debug, info};
 use reqwest::{
    header::{HeaderMap, HeaderValue},
    Body, Request, RequestBuilder, Response,
@@ -324,6 +324,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
        }

        for (key, value) in &config.extra_headers {
+            info!("header: {}={}", key, value);
            let key_parsed = HeaderName::from_str(key).map_err(|_| Error::InvalidInput {
                message: format!("non-ascii value for header '{}' provided", key),
            })?;
--- a/rust/lancedb/src/table/datafusion.rs
+++ b/rust/lancedb/src/table/datafusion.rs
@@ -85,6 +85,14 @@ impl ExecutionPlan for MetadataEraserExec {
        vec![&self.input]
    }

+    fn maintains_input_order(&self) -> Vec<bool> {
+        vec![true; self.children().len()]
+    }
+
+    fn benefits_from_input_partitioning(&self) -> Vec<bool> {
+        vec![false; self.children().len()]
+    }
+
    fn with_new_children(
        self: Arc<Self>,
        children: Vec<Arc<dyn ExecutionPlan>>,
@@ -486,11 +494,8 @@ pub mod tests {
        TestFixture::check_plan(
            plan,
            "MetadataEraserExec
-             CoalesceBatchesExec:...
-             FilterExec: i@0 >= 5
-             RepartitionExec:...
             ProjectionExec:...
-             LanceScan:...",
+             LanceRead:...",
        )
        .await;
Author	SHA1	Message	Date
Ryan Green	e340599c1f	test to reproduce node extra headers issue	2025-08-04 10:03:04 -02:30
Wyatt Alt	c7afa724dd	chore: update npm lockfile (#2563 )	2025-07-30 18:28:06 -07:00
BubbleCal	c359cec504	chore: upgrade lance to 0.32.1-beta.2 (#2562 ) Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-07-30 14:31:04 -07:00
Mark McCaskey	fe76496a59	fix: `.nprobes` method in python bindings, improve error messages (#2556 ) `nprobes` with a value greater than 20 fails with the minimum error: ``` self = <lancedb.query.AsyncVectorQuery object at 0x10b749720>, minimum_nprobes = 30 def minimum_nprobes(self, minimum_nprobes: int) -> Self: """Set the minimum number of probes to use. See `nprobes` for more details. These partitions will be searched on every indexed vector query and will increase recall at the expense of latency. """ > self._inner.minimum_nprobes(minimum_nprobes) E ValueError: Invalid input, minimum_nprobes must be less than or equal to maximum_nprobes python/lancedb/query.py:2744: ValueError ``` Putting the max set before the min seems reasonable but it causes this reasonable case to fail: ``` def test_nprobes_min_max_works_sync(table): LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(2).maximum_nprobes(4).to_list() ``` with ``` self = <lancedb.query.AsyncVectorQuery object at 0x1203f1c90>, maximum_nprobes = 4 def maximum_nprobes(self, maximum_nprobes: int) -> Self: """Set the maximum number of probes to use. See `nprobes` for more details. If this value is greater than `minimum_nprobes` then the excess partitions will be searched only if we have not found enough results. This can be useful when there is a narrow filter to allow these queries to spend more time searching and avoid potential false negatives. If this value is 0 then no limit will be applied and all partitions could be searched if needed to satisfy the limit. """ > self._inner.maximum_nprobes(maximum_nprobes) E ValueError: Invalid input, maximum_nprobes must be greater than or equal to minimum_nprobes python/lancedb/query.py:2761: ValueError ```. The case I care about is where min == max, but this solution handles it even if they're not. If both min and max exist, we set both to the minimum and then set the max. This isn't 100% the same as the minimum setter checks for 0 on the min and `.nprobes` does not do any sanity checking at all. But I figured this was the most reasonable and general solution without touching more of this code. As part of this I noticed the error messages were a bit ambiguous so I made them symmetric and clarified them while I was here.	2025-07-30 09:23:25 -07:00
Weston Pace	67ec1fe75c	feat: don't repartition for the sake of the metadata eraser (#2559 ) The `MetadataEraserExec` is super lightweight and doesn't really justify partitioning. I had a plan recently that was partitioning just for this node and that seems wasteful.	2025-07-29 19:26:30 -07:00
Lance Release	70d9b04ba5	Bump version: 0.21.2-beta.2 → 0.21.2	2025-07-25 20:32:41 +00:00
Lance Release	b0d4a79c35	Bump version: 0.21.2-beta.1 → 0.21.2-beta.2	2025-07-25 20:31:50 +00:00
Lance Release	f79295c697	Bump version: 0.24.2-beta.2 → 0.24.2	2025-07-25 20:31:15 +00:00
Lance Release	381fad9b65	Bump version: 0.24.2-beta.1 → 0.24.2-beta.2	2025-07-25 20:31:15 +00:00
Tristan Zajonc	055bf91d3e	fix: handle empty list with schema in table creation (#2548 ) ## Summary Fixes IndexError when creating tables with empty list data and a provided schema. Previously, `_into_pyarrow_reader()` would attempt to access `data[0]` on empty lists, causing an IndexError. Now properly handles empty lists by using the provided schema. Also adds regression tests for GitHub issues #1968 and #303 to prevent future regressions with empty table scenarios. ## Changes - Fix IndexError in `_into_pyarrow_reader()` for empty list + schema case - Add Optional[pa.Schema] parameter to handle empty data gracefully - Add `test_create_table_empty_list_with_schema` for the IndexError fix - Add `test_create_empty_then_add_data` for issue #1968 - Add `test_search_empty_table` for issue #303 ## Test plan - [x] All new regression tests pass - [x] Existing tests continue to pass - [x] Code formatted with `make format`	2025-07-25 10:23:43 +08:00
Will Jones	050f0086b8	feat: upgrade Lance to v0.32.0 (#2543 ) Changelog: https://github.com/lancedb/lance/releases/tag/v0.32.0 Fixes #2521	2025-07-24 19:22:53 -07:00
Tristan Zajonc	10fa23e0d6	fix(python): expose register function in embeddings module (#2544 ) ## Summary Fixes #2541 Problem: The `register` function was not accessible via `from lancedb.embeddings import register` as documented, causing ImportError for users trying to create custom embedding functions. Solution: Added `register` to the exports in `python/lancedb/embeddings/__init__.py` to match the documented API and follow the same pattern as other registry functions (`get_registry`, `EmbeddingFunctionRegistry`). Root Cause: The function existed in `lancedb.embeddings.registry` but wasn't exposed through the main embeddings module interface. ## Changes - Add `register` to imports in `/python/python/lancedb/embeddings/__init__.py` ## Test Plan - [x] Verified `from lancedb.embeddings import register` works as documented - [x] Confirmed existing embedding tests pass - [x] Checked that the fix follows existing patterns (same as `get_registry`) - [x] Validated linting and formatting passes ## References Fixes #2541	2025-07-24 15:30:06 -07:00
yihong	43d9fc28b0	fix: can not build on python3.9 for dev (#2477 ) This patch fix can not build on python3.9 dev the reason is that for ibm-watsonx-ai the min version is py3.10 more can check on `pyoven` https://pyoven.org/package/ibm-watsonx-ai/ also fix tiny md lint --------- Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2025-07-24 12:39:04 -07:00
aniaan	f45f0d0431	fix(python): correct type annotations in EmbeddingFunctionRegistry (#2478 ) - Fix register() method's alias parameter type from 'str = None' to 'Optional[str] = None' - Add return type annotation 'Type[EmbeddingFunction]' to get() method - Import Type from typing module for proper type hints	2025-07-24 12:31:49 -07:00
Tristan Zajonc	b9e3c36d82	fix: replace broken documentation URLs in error messages (#2533 ) Replaces broken 404 URL and unhelpful documentation links in type error messages with working URL and inline list of supported data types. Before: Points to https://lancedb.github.io/lance/read_and_write.html (404 error) After: Lists supported types inline and points to https://lancedb.github.io/lancedb/guides/tables/	2025-07-24 12:30:27 -07:00
Chen Chongchen	3cd7dd3375	fix: to_pydantic typing (#2517 ) currently, to_pydantic will always return LanceModel. If type checking is enabled in my project. I have to use `cast(data, List[RealModelType])` to solve type error. This PR uses generic to solve this problem.	2025-07-24 12:30:15 -07:00
Tristan Zajonc	12d4ce4cfe	fix: resolve flaky Node.js integration test for mirrored store (#2539 ) ## Summary - Fixed flaky Node.js integration test for mirrored store functionality - Converted callback-based `fs.readdir()` to `fs.promises.readdir()` with proper async/await - Used unique temporary directories to prevent test isolation issues - Updated test expectations to match current IVF-PQ index file structure ## Problem The mirrored store integration test was experiencing random failures in CI with errors like: - `expected 2 to equal 1` at various assertion points - `done() called multiple times` ## Root Causes Identified 1. Race conditions: Mixing callback-based filesystem operations with async functions created timing issues where assertions ran before filesystem operations completed 2. Test isolation: Multiple tests shared the same temp directory (`tmpdir()`), causing one test to see files from another 3. Outdated expectations: IVF-PQ indexes now create 2 files (`auxiliary.idx` + `index.idx`) instead of 1, but the test expected only 1 ## Solution - Replace all `fs.readdir()` callbacks with `fs.promises.readdir()` and `await` - Use `fs.promises.mkdtemp()` to create unique temporary directories for each test run - Update index file count expectations from 1 to 2 files to match current Lance behavior - Add descriptive assertion labels for easier debugging ## Analysis The mirroring implementation in `MirroringObjectStore::put_opts` is synchronous - it awaits writes to both secondary (local) and primary (S3) stores before returning. The test failures were due to callback/async pattern mismatch and test isolation issues, not actual async mirroring behavior. ## Test plan - [x] Local tests are running without timing-based failures - [x] Integration tests with AWS credentials pass in CI This resolves the flaky failures including 'expected 2 to equal 1' assertions and 'done() called multiple times' errors seen in CI runs.	2025-07-24 12:07:05 -07:00
Will Jones	3d1f102087	feat: allow Python and Typescript users to create `Session`s (#2530 ) ## Summary - Exposes `Session` in Python and Typescript so users can set the `index_cache_size_bytes` and `metadata_cache_size_bytes` * The `Session` is attached to the `Connection`, and thus shared across all tables in that connection. - Adds deprecation warnings for table-level cache configuration 🤖 Generated with [Claude Code](https://claude.ai/code) --------- Co-authored-by: Claude <noreply@anthropic.com>	2025-07-24 12:06:29 -07:00
Tristan Zajonc	81afd8a42f	fix: use local random state in FTS test fixtures to prevent flaky failures (#2532 ) ## Summary Fixes intermittent CI failures in `test_search_fts[False]` where boolean FTS queries were returning fewer results than expected due to non-deterministic test data generation. ## Problem The test was using global `random` and `np.random` without seeding, causing the boolean query `MatchQuery("puppy", "text") & MatchQuery("runs", "text")` to sometimes return only 3 results instead of the expected 5, leading to `AssertionError: assert 3 == 5`. ## Solution - Replace global random calls with local `random.Random(42)` and `np.random.RandomState(42)` objects in test fixtures - Ensures deterministic test data while maintaining test isolation - No impact on other tests since random state is scoped to fixtures only ## Test Results - ✅ `test_search_fts[False]` now passes consistently - ✅ All other FTS tests continue to pass - ✅ No regression in other test suites (verified with `test_basic`) - ✅ Maintains existing test behavior and coverage	2025-07-24 11:30:02 -07:00
Tristan Zajonc	c2aa03615a	fix: correct grammar in LanceDB cloud connection error message (#2537 ) ## Summary Fixed a minor grammar error in the error message for missing API key when connecting to LanceDB cloud. ## Changes - Changed 'api_key is required to connected LanceDB cloud' to 'api_key is required to connect to LanceDB cloud' - Location: `python/python/lancedb/__init__.py:95` ## Test plan - Error message formatting is correct and grammatical - No functional changes to existing behavior	2025-07-24 09:56:06 -07:00
Tristan Zajonc	d2c6759e7f	fix: use import stubs to prevent MLX doctest collection failures (#2536 ) ## Summary - Add `create_import_stub()` helper to `embeddings/utils.py` for handling optional dependencies - Fix MLX doctest collection failures by using import stubs in `gte_mlx_model.py` - Module now imports successfully for doctest collection even when MLX is not installed ## Changes - New utility function: `create_import_stub()` creates placeholder objects that allow class inheritance but raise helpful errors when used - Updated MLX model: Uses import stubs instead of direct imports that fail immediately - Graceful degradation: Clear error messages when MLX functionality is accessed without MLX installed ## Test Results - ✅ `pytest --doctest-modules python/lancedb` now passes (with and without MLX installed) - ✅ All existing tests continue to pass - ✅ MLX functionality works normally when MLX is installed - ✅ Helpful error messages when MLX functionality is used without MLX installed Fixes #2538 --------- Co-authored-by: Will Jones <willjones127@gmail.com>	2025-07-23 16:25:33 -07:00
Weston Pace	94fb9f364a	feat: update lance version to 0.32.0-b2 (#2525 )	2025-07-23 12:23:10 -07:00
Will Jones	fbff244ed8	chore: add claude md files (#2531 ) Gives basic context to Claude about how to do common tasks in the repo.	2025-07-23 12:20:36 -07:00
Xuanwo	7e7466d224	ci: enable trust publishing for rust crates (#2529 )	2025-07-23 14:53:52 +08:00
Lance Release	cceaf27d79	Bump version: 0.21.2-beta.0 → 0.21.2-beta.1	2025-07-22 15:41:13 +00:00