Bump version: 0.23.0 → 0.23.1-beta.0

2026-01-07 20:32:59 +00:00 · 2025-12-17 03:30:40 +00:00
42 changed files with 310 additions and 1258 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.23.1"
+current_version = "0.23.1-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3141,9 +3141,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
 [[package]]
 name = "fsst"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "5ffdff7a2d68d22afc0657eddde3e946371ce7cfe730a3f78a5ed44ea5b1cb2e"
 dependencies = [
 "arrow-array",
 "rand 0.9.2",
@@ -4262,7 +4261,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5"
 dependencies = [
 "equivalent",
- "hashbrown 0.16.0",
+ "hashbrown 0.15.5",
 "serde",
 "serde_core",
 ]
@@ -4479,9 +4478,8 @@ dependencies = [
 [[package]]
 name = "lance"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "e8c439decbc304e180748e34bb6d3df729069a222e83e74e2185c38f107136e9"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4546,9 +4544,8 @@ dependencies = [
 [[package]]
 name = "lance-arrow"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "f4ee5508b225456d3d56998eaeef0d8fbce5ea93856df47b12a94d2e74153210"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4566,9 +4563,8 @@ dependencies = [
 [[package]]
 name = "lance-bitpacking"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "d1c065fb3bd4a8cc4f78428443e990d4921aa08f707b676753db740e0b402a21"
 dependencies = [
 "arrayref",
 "paste",
@@ -4577,9 +4573,8 @@ dependencies = [
 [[package]]
 name = "lance-core"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "e8856abad92e624b75cd57a04703f6441948a239463bdf973f2ac1924b0bcdbe"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4615,9 +4610,8 @@ dependencies = [
 [[package]]
 name = "lance-datafusion"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "4c8835308044cef5467d7751be87fcbefc2db01c22370726a8704bd62991693f"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4647,9 +4641,8 @@ dependencies = [
 [[package]]
 name = "lance-datagen"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "612de1e888bb36f6bf51196a6eb9574587fdf256b1759a4c50e643e00d5f96d0"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4666,9 +4659,8 @@ dependencies = [
 [[package]]
 name = "lance-encoding"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "2b456b29b135d3c7192602e516ccade38b5483986e121895fa43cf1fdb38bf60"
 dependencies = [
 "arrow-arith",
 "arrow-array",
@@ -4705,9 +4697,8 @@ dependencies = [
 [[package]]
 name = "lance-file"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "ab1538d14d5bb3735b4222b3f5aff83cfa59cc6ef7cdd3dd9139e4c77193c80b"
 dependencies = [
 "arrow-arith",
 "arrow-array",
@@ -4739,9 +4730,8 @@ dependencies = [
 [[package]]
 name = "lance-geo"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "a5a69a2f3b55703d9c240ad7c5ffa2c755db69e9cf8aa05efe274a212910472d"
 dependencies = [
 "datafusion",
 "geo-types",
@@ -4752,9 +4742,8 @@ dependencies = [
 [[package]]
 name = "lance-index"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "0ea84613df6fa6b9168a1f056ba4f9cb73b90a1b452814c6fd4b3529bcdbfc78"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4815,9 +4804,8 @@ dependencies = [
 [[package]]
 name = "lance-io"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "6b3fc4c1d941fceef40a0edbd664dbef108acfc5d559bb9e7f588d0c733cbc35"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4857,9 +4845,8 @@ dependencies = [
 [[package]]
 name = "lance-linalg"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "b62ffbc5ce367fbf700a69de3fe0612ee1a11191a64a632888610b6bacfa0f63"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4875,9 +4862,8 @@ dependencies = [
 [[package]]
 name = "lance-namespace"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "791bbcd868ee758123a34e07d320a1fb99379432b5ecc0e78d6b4686e999b629"
 dependencies = [
 "arrow",
 "async-trait",
@@ -4889,9 +4875,8 @@ dependencies = [
 [[package]]
 name = "lance-namespace-impls"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "ee713505576f6b1988a491f77c7ca8b0cf7090a393598e63c85079fa70a53ebf"
 dependencies = [
 "arrow",
 "arrow-ipc",
@@ -4933,9 +4918,8 @@ dependencies = [
 [[package]]
 name = "lance-table"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "6fdb2d56bfa4d1511c765fa0cc00fdaa37e5d2d1cd2f57b3c6355d9072177052"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4974,9 +4958,8 @@ dependencies = [
 [[package]]
 name = "lance-testing"
-version = "1.0.1"
+version = "1.0.1-beta.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
 checksum = "d8ccb1a4a9284435c6a8c02c8c06e7e041bece0d7f722152159353cf55dc51e3"
 dependencies = [
 "arrow-array",
 "arrow-schema",
@@ -4987,7 +4970,7 @@ dependencies = [
 [[package]]
 name = "lancedb"
-version = "0.23.1"
+version = "0.23.0"
 dependencies = [
 "ahash",
 "anyhow",
@@ -5066,7 +5049,7 @@ dependencies = [
 [[package]]
 name = "lancedb-nodejs"
-version = "0.23.1"
+version = "0.23.0"
 dependencies = [
 "arrow-array",
 "arrow-ipc",
@@ -5086,7 +5069,7 @@ dependencies = [
 [[package]]
 name = "lancedb-python"
-version = "0.26.1"
+version = "0.26.0"
 dependencies = [
 "arrow",
 "async-trait",
@@ -6742,8 +6725,8 @@ version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
 dependencies = [
- "heck 0.5.0",
+ "heck 0.4.1",
- "itertools 0.14.0",
+ "itertools 0.12.1",
 "log",
 "multimap",
 "once_cell",
@@ -6763,7 +6746,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
 dependencies = [
 "anyhow",
- "itertools 0.14.0",
+ "itertools 0.12.1",
 "proc-macro2",
 "quote",
 "syn 2.0.106",
@@ -8093,7 +8076,7 @@ version = "0.8.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451"
 dependencies = [
- "heck 0.5.0",
+ "heck 0.4.1",
 "proc-macro2",
 "quote",
 "syn 2.0.106",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"
 [workspace.dependencies]
-lance = { "version" = "=1.0.1", default-features = false }
+lance = { "version" = "=1.0.1-beta.1", default-features = false, "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-core = "=1.0.1"
+lance-core = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-datagen = "=1.0.1"
+lance-datagen = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-file = "=1.0.1"
+lance-file = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-io = { "version" = "=1.0.1", default-features = false }
+lance-io = { "version" = "=1.0.1-beta.1", default-features = false, "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-index = "=1.0.1"
+lance-index = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-linalg = "=1.0.1"
+lance-linalg = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace = "=1.0.1"
+lance-namespace = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace-impls = { "version" = "=1.0.1", default-features = false }
+lance-namespace-impls = { "version" = "=1.0.1-beta.1", default-features = false, "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-table = "=1.0.1"
+lance-table = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-testing = "=1.0.1"
+lance-testing = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-datafusion = "=1.0.1"
+lance-datafusion = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-encoding = "=1.0.1"
+lance-encoding = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-arrow = "=1.0.1"
+lance-arrow = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
 ahash = "0.8"
 # Note that this one does not include pyarrow
 arrow = { version = "56.2", optional = false }
--- a/ci/run_with_test_connection.sh
+++ b/ci/run_with_test_connection.sh
@@ -16,7 +16,7 @@ check_command_exists() {
 }
 if [[ ! -e ./lancedb ]]; then
-    if [[ x${SOPHON_READ_TOKEN} != "x" ]]; then
+    if [[ -v SOPHON_READ_TOKEN ]]; then
        INPUT="lancedb-linux-x64"
        gh release \
            --repo lancedb/lancedb \
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -11,7 +11,7 @@ watch:
 theme:
  name: "material"
  logo: assets/logo.png
-  favicon: assets/favicon.ico
+  favicon: assets/logo.png
  palette:
    # Palette toggle for light mode
    - scheme: lancedb
@@ -32,6 +32,8 @@ theme:
    - content.tooltips
    - toc.follow
    - navigation.top
    - navigation.tabs
    - navigation.tabs.sticky
    - navigation.footer
    - navigation.tracking
    - navigation.instant
@@ -113,13 +115,12 @@ markdown_extensions:
      emoji_index: !!python/name:material.extensions.emoji.twemoji
      emoji_generator: !!python/name:material.extensions.emoji.to_svg
  - markdown.extensions.toc:
-      toc_depth: 3
+      baselevel: 1
-      permalink: true
+      permalink: ""
      permalink_title: Anchor link to this section
 nav:
-  - Documentation:
+  - API reference:
-      - SDK Reference: index.md
+      - Overview: index.md
      - Python: python/python.md
      - Javascript/TypeScript: js/globals.md
      - Java: java/java.md
--- a/docs/src/assets/favicon.ico
+++ b/docs/src/assets/favicon.ico
--- a/docs/src/embeddings/available_embedding_models/multimodal_embedding_functions/voyageai_multimodal_embedding.md
+++ b/docs/src/embeddings/available_embedding_models/multimodal_embedding_functions/voyageai_multimodal_embedding.md
@@ -1,111 +0,0 @@
 # VoyageAI Embeddings : Multimodal
 VoyageAI embeddings can also be used to embed both text and image data, only some of the models support image data and you can check the list
 under [https://docs.voyageai.com/docs/multimodal-embeddings](https://docs.voyageai.com/docs/multimodal-embeddings)
 Supported multimodal models:
 - `voyage-multimodal-3` - 1024 dimensions (text + images)
 - `voyage-multimodal-3.5` - Flexible dimensions (256, 512, 1024 default, 2048). Supports text, images, and video.
 ### Video Support (voyage-multimodal-3.5)
 The `voyage-multimodal-3.5` model supports video input through:
 - Video URLs (`.mp4`, `.webm`, `.mov`, `.avi`, `.mkv`, `.m4v`, `.gif`)
 - Video file paths
 Constraints: Max 20MB video size.
 Supported parameters (to be passed in `create` method) are:
 | Parameter | Type | Default Value           | Description                               |
 |---|---|-------------------------|-------------------------------------------|
 | `name` | `str` | `"voyage-multimodal-3"` | The model ID of the VoyageAI model to use |
 | `output_dimension` | `int` | `None` | Output dimension for voyage-multimodal-3.5. Valid: 256, 512, 1024, 2048 |
 Usage Example:
 ```python
 import base64
 import os
 from io import BytesIO
 import requests
 import lancedb
 from lancedb.pydantic import LanceModel, Vector
 from lancedb.embeddings import get_registry
 import pandas as pd
 os.environ['VOYAGE_API_KEY'] = 'YOUR_VOYAGE_API_KEY'
 db = lancedb.connect(".lancedb")
 func = get_registry().get("voyageai").create(name="voyage-multimodal-3")
 def image_to_base64(image_bytes: bytes):
    buffered = BytesIO(image_bytes)
    img_str = base64.b64encode(buffered.getvalue())
    return img_str.decode("utf-8")
 class Images(LanceModel):
    label: str
    image_uri: str = func.SourceField()  # image uri as the source
    image_bytes: str = func.SourceField()  # image bytes base64 encoded as the source
    vector: Vector(func.ndims()) = func.VectorField()  # vector column
    vec_from_bytes: Vector(func.ndims()) = func.VectorField()  # Another vector column
 if "images" in db.table_names():
    db.drop_table("images")
 table = db.create_table("images", schema=Images)
 labels = ["cat", "cat", "dog", "dog", "horse", "horse"]
 uris = [
    "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
    "http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
    "http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
    "http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
    "http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
    "http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
 ]
 # get each uri as bytes
 images_bytes = [image_to_base64(requests.get(uri).content) for uri in uris]
 table.add(
    pd.DataFrame({"label": labels, "image_uri": uris, "image_bytes": images_bytes})
 )
 ```
 Now we can search using text from both the default vector column and the custom vector column
 ```python
 # text search
 actual = table.search("man's best friend", "vec_from_bytes").limit(1).to_pydantic(Images)[0]
 print(actual.label) # prints "dog"
 frombytes = (
    table.search("man's best friend", vector_column_name="vec_from_bytes")
    .limit(1)
    .to_pydantic(Images)[0]
 )
 print(frombytes.label)
 ```
 Because we're using a multi-modal embedding function, we can also search using images
 ```python
 # image search
 query_image_uri = "http://farm1.staticflickr.com/200/467715466_ed4a31801f_z.jpg"
 image_bytes = requests.get(query_image_uri).content
 query_image = Image.open(BytesIO(image_bytes))
 actual = table.search(query_image, "vec_from_bytes").limit(1).to_pydantic(Images)[0]
 print(actual.label == "dog")
 # image search using a custom vector column
 other = (
    table.search(query_image, vector_column_name="vec_from_bytes")
    .limit(1)
    .to_pydantic(Images)[0]
 )
 print(actual.label)
 ```
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,12 +1,8 @@
-# SDK Reference
+# API Reference
-This site contains the API reference for the client SDKs supported by [LanceDB](https://lancedb.com).
+This page contains the API reference for the SDKs supported by the LanceDB team.
 - [Python](python/python.md)
 - [JavaScript/TypeScript](js/globals.md)
 - [Java](java/java.md)
- [Rust](https://docs.rs/lancedb/latest/lancedb/index.html)
+- [Rust](https://docs.rs/lancedb/latest/lancedb/index.html)
 !!! info "LanceDB Documentation"
    If you're looking for the full documentation of LanceDB, visit [docs.lancedb.com](https://docs.lancedb.com).
--- a/docs/src/java/java.md
+++ b/docs/src/java/java.md
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
 <dependency>
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-core</artifactId>
-    <version>0.23.1</version>
+    <version>0.23.1-beta.0</version>
 </dependency>
 ```
--- a/docs/src/styles/extra.css
+++ b/docs/src/styles/extra.css
@@ -85,26 +85,17 @@
 /* Header gradient (only header area) */
 .md-header {
-  background: linear-gradient(90deg, #e4d8f8 0%, #F0B7C1 45%, #E55A2B 100%);
+  background: linear-gradient(90deg, #3B2E58 0%, #F0B7C1 45%, #E55A2B 100%);
  box-shadow: inset 0 1px 0 rgba(255,255,255,0.08), 0 1px 0 rgba(0,0,0,0.08);
 }
 /* Improve brand title contrast on the lavender side */
 .md-header__title,
 .md-header__topic,
 .md-header__title .md-ellipsis,
 .md-header__topic .md-ellipsis {
  color: #2b1b3a;
  text-shadow: 0 1px 0 rgba(255, 255, 255, 0.25);
 }
 /* Same colors as header for tabs (that hold the text) */
 .md-tabs {
-  background: linear-gradient(90deg, #e4d8f8 0%, #F0B7C1 45%, #E55A2B 100%);
+  background: linear-gradient(90deg, #3B2E58 0%, #F0B7C1 45%, #E55A2B 100%);
 }
 /* Dark scheme variant */
 [data-md-color-scheme="slate"] .md-header,
 [data-md-color-scheme="slate"] .md-tabs {
-  background: linear-gradient(90deg, #e4d8f8 0%, #F0B7C1 45%, #E55A2B 100%);
+  background: linear-gradient(90deg, #3B2E58 0%, #F0B7C1 45%, #E55A2B 100%);
 }
--- a/java/lancedb-core/pom.xml
+++ b/java/lancedb-core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
      <groupId>com.lancedb</groupId>
      <artifactId>lancedb-parent</artifactId>
-      <version>0.23.1-final.0</version>
+      <version>0.23.1-beta.0</version>
      <relativePath>../pom.xml</relativePath>
    </parent>
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.23.1-final.0</version>
+    <version>0.23.1-beta.0</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.23.1"
+version = "0.23.1-beta.0"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.23.1",
+	"version": "0.23.1-beta.0",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.23.1",
+	"version": "0.23.1-beta.0",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.23.1",
+	"version": "0.23.1-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.23.1",
+	"version": "0.23.1-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.23.1",
+	"version": "0.23.1-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.23.1",
+	"version": "0.23.1-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.23.1",
+  "version": "0.23.1-beta.0",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.23.1",
+	"version": "0.23.1-beta.0",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.23.1",
+  "version": "0.23.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.23.1",
+      "version": "0.23.0",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.23.1",
+  "version": "0.23.1-beta.0",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.26.1"
+current_version = "0.26.1-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.26.1"
+version = "0.26.1-beta.0"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/python/lancedb/init.py
+++ b/python/python/lancedb/init.py
@@ -13,7 +13,6 @@ __version__ = importlib.metadata.version("lancedb")
 from ._lancedb import connect as lancedb_connect
 from .common import URI, sanitize_uri
 from urllib.parse import urlparse
 from .db import AsyncConnection, DBConnection, LanceDBConnection
 from .io import StorageOptionsProvider
 from .remote import ClientConfig
@@ -29,39 +28,6 @@ from .namespace import (
 )
 def _check_s3_bucket_with_dots(
    uri: str, storage_options: Optional[Dict[str, str]]
 ) -> None:
    """
    Check if an S3 URI has a bucket name containing dots and warn if no region
    is specified. S3 buckets with dots cannot use virtual-hosted-style URLs,
    which breaks automatic region detection.
    See: https://github.com/lancedb/lancedb/issues/1898
    """
    if not isinstance(uri, str) or not uri.startswith("s3://"):
        return
    parsed = urlparse(uri)
    bucket = parsed.netloc
    if "." not in bucket:
        return
    # Check if region is provided in storage_options
    region_keys = {"region", "aws_region"}
    has_region = storage_options and any(k in storage_options for k in region_keys)
    if not has_region:
        raise ValueError(
            f"S3 bucket name '{bucket}' contains dots, which prevents automatic "
            f"region detection. Please specify the region explicitly via "
            f"storage_options={{'region': '<your-region>'}} or "
            f"storage_options={{'aws_region': '<your-region>'}}. "
            f"See https://github.com/lancedb/lancedb/issues/1898 for details."
        )
 def connect(
    uri: URI,
    *,
@@ -155,11 +121,9 @@ def connect(
            storage_options=storage_options,
            **kwargs,
        )
    _check_s3_bucket_with_dots(str(uri), storage_options)
    if kwargs:
        raise ValueError(f"Unknown keyword arguments: {kwargs}")
    return LanceDBConnection(
        uri,
        read_consistency_interval=read_consistency_interval,
@@ -247,8 +211,6 @@ async def connect_async(
    if isinstance(client_config, dict):
        client_config = ClientConfig(**client_config)
    _check_s3_bucket_with_dots(str(uri), storage_options)
    return AsyncConnection(
        await lancedb_connect(
            sanitize_uri(uri),
--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -210,8 +210,10 @@ class DBConnection(EnforceOverrides):
        page_token: str, optional
            The token to use for pagination. If not present, start from the beginning.
            Typically, this token is last table name from the previous page.
            Only supported by LanceDb Cloud.
        limit: int, default 10
            The size of the page to return.
            Only supported by LanceDb Cloud.
        Returns
        -------
--- a/python/python/lancedb/embeddings/voyageai.py
+++ b/python/python/lancedb/embeddings/voyageai.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors
 import base64
 import os
-from typing import ClassVar, TYPE_CHECKING, List, Union, Any, Generator, Optional
+from typing import ClassVar, TYPE_CHECKING, List, Union, Any, Generator
 from pathlib import Path
 from urllib.parse import urlparse
@@ -45,29 +45,11 @@ def is_valid_url(text):
        return False
 VIDEO_EXTENSIONS = {".mp4", ".webm", ".mov", ".avi", ".mkv", ".m4v", ".gif"}
 def is_video_url(url: str) -> bool:
    """Check if URL points to a video file based on extension."""
    parsed = urlparse(url)
    path = parsed.path.lower()
    return any(path.endswith(ext) for ext in VIDEO_EXTENSIONS)
 def is_video_path(path: Path) -> bool:
    """Check if file path is a video file based on extension."""
    return path.suffix.lower() in VIDEO_EXTENSIONS
 def transform_input(input_data: Union[str, bytes, Path]):
    PIL = attempt_import_or_raise("PIL", "pillow")
    if isinstance(input_data, str):
        if is_valid_url(input_data):
-            if is_video_url(input_data):
+            content = {"type": "image_url", "image_url": input_data}
                content = {"type": "video_url", "video_url": input_data}
            else:
                content = {"type": "image_url", "image_url": input_data}
        else:
            content = {"type": "text", "text": input_data}
    elif isinstance(input_data, PIL.Image.Image):
@@ -88,24 +70,14 @@ def transform_input(input_data: Union[str, bytes, Path]):
            "image_base64": "data:image/jpeg;base64," + img_str,
        }
    elif isinstance(input_data, Path):
-        if is_video_path(input_data):
+        img = PIL.Image.open(input_data)
-            # Read video file and encode as base64
+        buffered = BytesIO()
-            with open(input_data, "rb") as f:
+        img.save(buffered, format="JPEG")
-                video_bytes = f.read()
+        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
-            video_str = base64.b64encode(video_bytes).decode("utf-8")
+        content = {
-            content = {
+            "type": "image_base64",
-                "type": "video_base64",
+            "image_base64": "data:image/jpeg;base64," + img_str,
-                "video_base64": video_str,
+        }
            }
        else:
            img = PIL.Image.open(input_data)
            buffered = BytesIO()
            img.save(buffered, format="JPEG")
            img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
            content = {
                "type": "image_base64",
                "image_base64": "data:image/jpeg;base64," + img_str,
            }
    else:
        raise ValueError("Each input should be either str, bytes, Path or Image.")
@@ -119,8 +91,6 @@ def sanitize_multimodal_input(inputs: Union[TEXT, IMAGES]) -> List[Any]:
    PIL = attempt_import_or_raise("PIL", "pillow")
    if isinstance(inputs, (str, bytes, Path, PIL.Image.Image)):
        inputs = [inputs]
    elif isinstance(inputs, list):
        pass  # Already a list, use as-is
    elif isinstance(inputs, pa.Array):
        inputs = inputs.to_pylist()
    elif isinstance(inputs, pa.ChunkedArray):
@@ -173,16 +143,11 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
            * voyage-3
            * voyage-3-lite
            * voyage-multimodal-3
            * voyage-multimodal-3.5
            * voyage-finance-2
            * voyage-multilingual-2
            * voyage-law-2
            * voyage-code-2
    output_dimension: int, optional
        The output dimension for models that support flexible dimensions.
        Currently only voyage-multimodal-3.5 supports this feature.
        Valid options: 256, 512, 1024 (default), 2048.
    Examples
    --------
@@ -210,10 +175,7 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
    """
    name: str
    output_dimension: Optional[int] = None
    client: ClassVar = None
    _FLEXIBLE_DIM_MODELS: ClassVar[list] = ["voyage-multimodal-3.5"]
    _VALID_DIMENSIONS: ClassVar[list] = [256, 512, 1024, 2048]
    text_embedding_models: list = [
        "voyage-3.5",
        "voyage-3.5-lite",
@@ -224,7 +186,7 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
        "voyage-law-2",
        "voyage-code-2",
    ]
-    multimodal_embedding_models: list = ["voyage-multimodal-3", "voyage-multimodal-3.5"]
+    multimodal_embedding_models: list = ["voyage-multimodal-3"]
    contextual_embedding_models: list = ["voyage-context-3"]
    def _is_multimodal_model(self, model_name: str):
@@ -236,17 +198,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
        return model_name in self.contextual_embedding_models or "context" in model_name
    def ndims(self):
        # Handle flexible dimension models
        if self.name in self._FLEXIBLE_DIM_MODELS:
            if self.output_dimension is not None:
                if self.output_dimension not in self._VALID_DIMENSIONS:
                    raise ValueError(
                        f"Invalid output_dimension {self.output_dimension} "
                        f"for {self.name}. Valid options: {self._VALID_DIMENSIONS}"
                    )
                return self.output_dimension
            return 1024  # default dimension
        if self.name == "voyage-3-lite":
            return 512
        elif self.name == "voyage-code-2":
@@ -260,17 +211,12 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
            "voyage-finance-2",
            "voyage-multilingual-2",
            "voyage-law-2",
            "voyage-multimodal-3",
        ]:
            return 1024
        else:
            raise ValueError(f"Model {self.name} not supported")
    def _get_multimodal_kwargs(self, **kwargs):
        """Get kwargs for multimodal embed call, including output_dimension if set."""
        if self.name in self._FLEXIBLE_DIM_MODELS and self.output_dimension is not None:
            kwargs["output_dimension"] = self.output_dimension
        return kwargs
    def compute_query_embeddings(
        self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
    ) -> List[np.ndarray]:
@@ -288,7 +234,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
        """
        client = VoyageAIEmbeddingFunction._get_client()
        if self._is_multimodal_model(self.name):
            kwargs = self._get_multimodal_kwargs(**kwargs)
            result = client.multimodal_embed(
                inputs=[[query]], model=self.name, input_type="query", **kwargs
            )
@@ -330,7 +275,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
            )
            if has_images:
                # Use non-batched API for images
                kwargs = self._get_multimodal_kwargs(**kwargs)
                result = client.multimodal_embed(
                    inputs=sanitized, model=self.name, input_type="document", **kwargs
                )
@@ -413,7 +357,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
            callable: A function that takes a batch of texts and returns embeddings.
        """
        if self._is_multimodal_model(self.name):
            multimodal_kwargs = self._get_multimodal_kwargs(**kwargs)
            def embed_batch(batch: List[str]) -> List[np.array]:
                batch_inputs = sanitize_multimodal_input(batch)
@@ -421,7 +364,7 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
                    inputs=batch_inputs,
                    model=self.name,
                    input_type=input_type,
-                    **multimodal_kwargs,
+                    **kwargs,
                )
                return result.embeddings
--- a/python/python/lancedb/remote/db.py
+++ b/python/python/lancedb/remote/db.py
@@ -384,7 +384,6 @@ class RemoteDBConnection(DBConnection):
        on_bad_vectors: str = "error",
        fill_value: float = 0.0,
        mode: Optional[str] = None,
        exist_ok: bool = False,
        embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
        *,
        namespace: Optional[List[str]] = None,
@@ -413,12 +412,6 @@ class RemoteDBConnection(DBConnection):
            - pyarrow.Schema
            - [LanceModel][lancedb.pydantic.LanceModel]
        mode: str, default "create"
            The mode to use when creating the table.
            Can be either "create", "overwrite", or "exist_ok".
        exist_ok: bool, default False
            If exist_ok is True, and mode is None or "create", mode will be changed
            to "exist_ok".
        on_bad_vectors: str, default "error"
            What to do if any of the vectors are not the same size or contains NaNs.
            One of "error", "drop", "fill".
@@ -490,11 +483,6 @@ class RemoteDBConnection(DBConnection):
        LanceTable(table4)
        """
        if exist_ok:
            if mode == "create":
                mode = "exist_ok"
            elif not mode:
                mode = "exist_ok"
        if namespace is None:
            namespace = []
        validate_table_name(name)
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -18,17 +18,7 @@ from lancedb._lancedb import (
    UpdateResult,
 )
 from lancedb.embeddings.base import EmbeddingFunctionConfig
-from lancedb.index import (
+from lancedb.index import FTS, BTree, Bitmap, HnswSq, IvfFlat, IvfPq, IvfSq, LabelList
    FTS,
    BTree,
    Bitmap,
    HnswSq,
    IvfFlat,
    IvfPq,
    IvfRq,
    IvfSq,
    LabelList,
 )
 from lancedb.remote.db import LOOP
 import pyarrow as pa
@@ -275,12 +265,6 @@ class RemoteTable(Table):
                num_sub_vectors=num_sub_vectors,
                num_bits=num_bits,
            )
        elif index_type == "IVF_RQ":
            config = IvfRq(
                distance_type=metric,
                num_partitions=num_partitions,
                num_bits=num_bits,
            )
        elif index_type == "IVF_SQ":
            config = IvfSq(distance_type=metric, num_partitions=num_partitions)
        elif index_type == "IVF_HNSW_PQ":
@@ -295,8 +279,7 @@ class RemoteTable(Table):
        else:
            raise ValueError(
                f"Unknown vector index type: {index_type}. Valid options are"
-                " 'IVF_FLAT', 'IVF_PQ', 'IVF_RQ', 'IVF_SQ',"
+                " 'IVF_FLAT', 'IVF_SQ', 'IVF_PQ', 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
                " 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
            )
        LOOP.run(
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -684,24 +684,6 @@ class Table(ABC):
        """
        raise NotImplementedError
    def to_lance(self, **kwargs) -> lance.LanceDataset:
        """Return the table as a lance.LanceDataset.
        Returns
        -------
        lance.LanceDataset
        """
        raise NotImplementedError
    def to_polars(self, **kwargs) -> "pl.DataFrame":
        """Return the table as a polars.DataFrame.
        Returns
        -------
        polars.DataFrame
        """
        raise NotImplementedError
    def create_index(
        self,
        metric="l2",
--- a/python/python/tests/test_embeddings_slow.py
+++ b/python/python/tests/test_embeddings_slow.py
@@ -613,133 +613,6 @@ def test_voyageai_multimodal_embedding_text_function():
    assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
@pytest.mark.slow
@pytest.mark.skipif(
    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
 )
 def test_voyageai_multimodal_35_embedding_function():
    """Test voyage-multimodal-3.5 model with text input."""
    voyageai = (
        get_registry()
        .get("voyageai")
        .create(name="voyage-multimodal-3.5", max_retries=0)
    )
    class TextModel(LanceModel):
        text: str = voyageai.SourceField()
        vector: Vector(voyageai.ndims()) = voyageai.VectorField()
    df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
    db = lancedb.connect("~/lancedb")
    tbl = db.create_table("test_multimodal_35", schema=TextModel, mode="overwrite")
    tbl.add(df)
    assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
    assert voyageai.ndims() == 1024
@pytest.mark.slow
@pytest.mark.skipif(
    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
 )
 def test_voyageai_multimodal_35_flexible_dimensions():
    """Test voyage-multimodal-3.5 model with custom output dimension."""
    voyageai = (
        get_registry()
        .get("voyageai")
        .create(name="voyage-multimodal-3.5", output_dimension=512, max_retries=0)
    )
    class TextModel(LanceModel):
        text: str = voyageai.SourceField()
        vector: Vector(voyageai.ndims()) = voyageai.VectorField()
    assert voyageai.ndims() == 512
    df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
    db = lancedb.connect("~/lancedb")
    tbl = db.create_table("test_multimodal_35_dim", schema=TextModel, mode="overwrite")
    tbl.add(df)
    assert len(tbl.to_pandas()["vector"][0]) == 512
@pytest.mark.slow
@pytest.mark.skipif(
    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
 )
 def test_voyageai_multimodal_35_image_embedding():
    """Test voyage-multimodal-3.5 model with image input."""
    voyageai = (
        get_registry()
        .get("voyageai")
        .create(name="voyage-multimodal-3.5", max_retries=0)
    )
    class Images(LanceModel):
        label: str
        image_uri: str = voyageai.SourceField()
        vector: Vector(voyageai.ndims()) = voyageai.VectorField()
    db = lancedb.connect("~/lancedb")
    table = db.create_table(
        "test_multimodal_35_images", schema=Images, mode="overwrite"
    )
    labels = ["cat", "dog"]
    uris = [
        "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
        "http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
    ]
    table.add(pd.DataFrame({"label": labels, "image_uri": uris}))
    assert len(table.to_pandas()["vector"][0]) == voyageai.ndims()
    assert voyageai.ndims() == 1024
@pytest.mark.slow
@pytest.mark.skipif(
    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
 )
@pytest.mark.parametrize("dimension", [256, 512, 1024, 2048])
 def test_voyageai_multimodal_35_all_dimensions(dimension):
    """Test voyage-multimodal-3.5 model with all valid output dimensions."""
    voyageai = (
        get_registry()
        .get("voyageai")
        .create(name="voyage-multimodal-3.5", output_dimension=dimension, max_retries=0)
    )
    assert voyageai.ndims() == dimension
    class TextModel(LanceModel):
        text: str = voyageai.SourceField()
        vector: Vector(voyageai.ndims()) = voyageai.VectorField()
    df = pd.DataFrame({"text": ["hello world"]})
    db = lancedb.connect("~/lancedb")
    tbl = db.create_table(
        f"test_multimodal_35_dim_{dimension}", schema=TextModel, mode="overwrite"
    )
    tbl.add(df)
    assert len(tbl.to_pandas()["vector"][0]) == dimension
@pytest.mark.slow
@pytest.mark.skipif(
    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
 )
 def test_voyageai_multimodal_35_invalid_dimension():
    """Test voyage-multimodal-3.5 model raises error for invalid output dimension."""
    with pytest.raises(ValueError, match="Invalid output_dimension"):
        voyageai = (
            get_registry()
            .get("voyageai")
            .create(name="voyage-multimodal-3.5", output_dimension=999, max_retries=0)
        )
        # ndims() is where the validation happens
        voyageai.ndims()
@pytest.mark.slow
@pytest.mark.skipif(
    importlib.util.find_spec("colpali_engine") is None,
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -168,42 +168,6 @@ def test_table_len_sync():
        assert len(table) == 1
 def test_create_table_exist_ok():
    def handler(request):
        if request.path == "/v1/table/test/create/?mode=exist_ok":
            request.send_response(200)
            request.send_header("Content-Type", "application/json")
            request.end_headers()
            request.wfile.write(b"{}")
        else:
            request.send_response(404)
            request.end_headers()
    with mock_lancedb_connection(handler) as db:
        table = db.create_table("test", [{"id": 1}], exist_ok=True)
        assert table is not None
    with mock_lancedb_connection(handler) as db:
        table = db.create_table("test", [{"id": 1}], mode="create", exist_ok=True)
        assert table is not None
 def test_create_table_exist_ok_with_mode_overwrite():
    def handler(request):
        if request.path == "/v1/table/test/create/?mode=overwrite":
            request.send_response(200)
            request.send_header("Content-Type", "application/json")
            request.end_headers()
            request.wfile.write(b"{}")
        else:
            request.send_response(404)
            request.end_headers()
    with mock_lancedb_connection(handler) as db:
        table = db.create_table("test", [{"id": 1}], mode="overwrite", exist_ok=True)
        assert table is not None
@pytest.mark.asyncio
 async def test_http_error():
    request_id_holder = {"request_id": None}
--- a/python/python/tests/test_s3_bucket_dots.py
+++ b/python/python/tests/test_s3_bucket_dots.py
@@ -1,68 +0,0 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors
 """
 Tests for S3 bucket names containing dots.
 Related issue: https://github.com/lancedb/lancedb/issues/1898
 These tests validate the early error checking for S3 bucket names with dots.
 No actual S3 connection is made - validation happens before connection.
 """
 import pytest
 import lancedb
 # Test URIs
 BUCKET_WITH_DOTS = "s3://my.bucket.name/path"
 BUCKET_WITH_DOTS_AND_REGION = ("s3://my.bucket.name", {"region": "us-east-1"})
 BUCKET_WITH_DOTS_AND_AWS_REGION = ("s3://my.bucket.name", {"aws_region": "us-east-1"})
 BUCKET_WITHOUT_DOTS = "s3://my-bucket/path"
 class TestS3BucketWithDotsSync:
    """Tests for connect()."""
    def test_bucket_with_dots_requires_region(self):
        with pytest.raises(ValueError, match="contains dots"):
            lancedb.connect(BUCKET_WITH_DOTS)
    def test_bucket_with_dots_and_region_passes(self):
        uri, opts = BUCKET_WITH_DOTS_AND_REGION
        db = lancedb.connect(uri, storage_options=opts)
        assert db is not None
    def test_bucket_with_dots_and_aws_region_passes(self):
        uri, opts = BUCKET_WITH_DOTS_AND_AWS_REGION
        db = lancedb.connect(uri, storage_options=opts)
        assert db is not None
    def test_bucket_without_dots_passes(self):
        db = lancedb.connect(BUCKET_WITHOUT_DOTS)
        assert db is not None
 class TestS3BucketWithDotsAsync:
    """Tests for connect_async()."""
    @pytest.mark.asyncio
    async def test_bucket_with_dots_requires_region(self):
        with pytest.raises(ValueError, match="contains dots"):
            await lancedb.connect_async(BUCKET_WITH_DOTS)
    @pytest.mark.asyncio
    async def test_bucket_with_dots_and_region_passes(self):
        uri, opts = BUCKET_WITH_DOTS_AND_REGION
        db = await lancedb.connect_async(uri, storage_options=opts)
        assert db is not None
    @pytest.mark.asyncio
    async def test_bucket_with_dots_and_aws_region_passes(self):
        uri, opts = BUCKET_WITH_DOTS_AND_AWS_REGION
        db = await lancedb.connect_async(uri, storage_options=opts)
        assert db is not None
    @pytest.mark.asyncio
    async def test_bucket_without_dots_passes(self):
        db = await lancedb.connect_async(BUCKET_WITHOUT_DOTS)
        assert db is not None
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.23.1"
+version = "0.23.1-beta.0"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
--- a/rust/lancedb/src/connection.rs
+++ b/rust/lancedb/src/connection.rs
@@ -1325,27 +1325,25 @@ mod tests {
    #[tokio::test]
    async fn test_table_names() {
-        let tc = new_test_connection().await.unwrap();
+        let tmp_dir = tempdir().unwrap();
        let db = tc.connection;
        let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
        let mut names = Vec::with_capacity(100);
        for _ in 0..100 {
-            let name = uuid::Uuid::new_v4().to_string();
+            let mut name = uuid::Uuid::new_v4().to_string();
            names.push(name.clone());
-            db.create_empty_table(name, schema.clone())
+            name.push_str(".lance");
-                .execute()
+            create_dir_all(tmp_dir.path().join(&name)).unwrap();
                .await
                .unwrap();
        }
        names.sort();
-        let tables = db.table_names().limit(100).execute().await.unwrap();
+
        let uri = tmp_dir.path().to_str().unwrap();
        let db = connect(uri).execute().await.unwrap();
        let tables = db.table_names().execute().await.unwrap();
        assert_eq!(tables, names);
        let tables = db
            .table_names()
            .start_after(&names[30])
            .limit(100)
            .execute()
            .await
            .unwrap();
--- a/rust/lancedb/src/database/namespace.rs
+++ b/rust/lancedb/src/database/namespace.rs
@@ -7,6 +7,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use async_trait::async_trait;
 use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsProvider};
 use lance_namespace::{
    models::{
        CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
@@ -18,13 +19,13 @@ use lance_namespace::{
 };
 use lance_namespace_impls::ConnectBuilder;
 use crate::connection::ConnectRequest;
 use crate::database::ReadConsistency;
 use crate::error::{Error, Result};
 use crate::table::NativeTable;
 use super::{
-    BaseTable, CloneTableRequest, CreateTableMode, CreateTableRequest as DbCreateTableRequest,
+    listing::ListingDatabase, BaseTable, CloneTableRequest, CreateTableMode,
-    Database, OpenTableRequest, TableNamesRequest,
+    CreateTableRequest as DbCreateTableRequest, Database, OpenTableRequest, TableNamesRequest,
 };
 /// A database implementation that uses lance-namespace for table management
@@ -89,6 +90,51 @@ impl std::fmt::Display for LanceNamespaceDatabase {
    }
 }
 impl LanceNamespaceDatabase {
    /// Create a temporary listing database for the given location
    ///
    /// Merges storage options with priority: connection < user < namespace
    async fn create_listing_database(
        &self,
        location: &str,
        table_id: Vec<String>,
        user_storage_options: Option<&HashMap<String, String>>,
        response_storage_options: Option<&HashMap<String, String>>,
    ) -> Result<ListingDatabase> {
        // Merge storage options: connection < user < namespace
        let mut merged_storage_options = self.storage_options.clone();
        if let Some(opts) = user_storage_options {
            merged_storage_options.extend(opts.clone());
        }
        if let Some(opts) = response_storage_options {
            merged_storage_options.extend(opts.clone());
        }
        let request = ConnectRequest {
            uri: location.to_string(),
            #[cfg(feature = "remote")]
            client_config: Default::default(),
            options: merged_storage_options,
            read_consistency_interval: self.read_consistency_interval,
            session: self.session.clone(),
        };
        let mut listing_db = ListingDatabase::connect_with_options(&request).await?;
        // Create storage options provider only if namespace returned storage options
        // (not just user-provided options)
        if response_storage_options.is_some() {
            let provider = Arc::new(LanceNamespaceStorageOptionsProvider::new(
                self.namespace.clone(),
                table_id,
            )) as Arc<dyn StorageOptionsProvider>;
            listing_db.storage_options_provider = Some(provider);
        }
        Ok(listing_db)
    }
 }
 #[async_trait]
 impl Database for LanceNamespaceDatabase {
    fn uri(&self) -> &str {
@@ -149,6 +195,14 @@ impl Database for LanceNamespaceDatabase {
    }
    async fn create_table(&self, request: DbCreateTableRequest) -> Result<Arc<dyn BaseTable>> {
        // Extract user-provided storage options from request
        let user_storage_options = request
            .write_options
            .lance_write_params
            .as_ref()
            .and_then(|lwp| lwp.store_params.as_ref())
            .and_then(|sp| sp.storage_options.as_ref());
        let mut table_id = request.namespace.clone();
        table_id.push(request.name.clone());
        let describe_request = DescribeTableRequest {
@@ -181,20 +235,34 @@ impl Database for LanceNamespaceDatabase {
                }
            }
            CreateTableMode::ExistOk(_) => {
-                if describe_result.is_ok() {
+                if let Ok(response) = describe_result {
-                    let native_table = NativeTable::open_from_namespace(
+                    let location = response.location.ok_or_else(|| Error::Runtime {
-                        self.namespace.clone(),
+                        message: "Table location is missing from namespace response".to_string(),
-                        &request.name,
+                    })?;
                        request.namespace.clone(),
                        None,
                        None,
                        self.read_consistency_interval,
                        self.server_side_query_enabled,
                        self.session.clone(),
                    )
                    .await?;
-                    return Ok(Arc::new(native_table));
+                    let listing_db = self
                        .create_listing_database(
                            &location,
                            table_id.clone(),
                            user_storage_options,
                            response.storage_options.as_ref(),
                        )
                        .await?;
                    let namespace_client = self
                        .server_side_query_enabled
                        .then(|| self.namespace.clone());
                    return listing_db
                        .open_table(OpenTableRequest {
                            name: request.name.clone(),
                            namespace: request.namespace.clone(),
                            index_cache_size: None,
                            lance_read_params: None,
                            location: Some(location),
                            namespace_client,
                        })
                        .await;
                }
            }
        }
@@ -226,37 +294,82 @@ impl Database for LanceNamespaceDatabase {
                message: "Table location is missing from create_empty_table response".to_string(),
            })?;
-        let native_table = NativeTable::create_from_namespace(
+        let listing_db = self
-            self.namespace.clone(),
+            .create_listing_database(
-            &location,
+                &location,
-            &request.name,
+                table_id.clone(),
-            request.namespace.clone(),
+                user_storage_options,
-            request.data,
+                create_empty_response.storage_options.as_ref(),
-            None, // write_store_wrapper not used for namespace connections
+            )
-            request.write_options.lance_write_params,
+            .await?;
            self.read_consistency_interval,
            self.server_side_query_enabled,
            self.session.clone(),
        )
        .await?;
-        Ok(Arc::new(native_table))
+        let namespace_client = self
            .server_side_query_enabled
            .then(|| self.namespace.clone());
        let create_request = DbCreateTableRequest {
            name: request.name,
            namespace: request.namespace,
            data: request.data,
            mode: request.mode,
            write_options: request.write_options,
            location: Some(location),
            namespace_client,
        };
        listing_db.create_table(create_request).await
    }
    async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
-        let native_table = NativeTable::open_from_namespace(
+        // Extract user-provided storage options from request
-            self.namespace.clone(),
+        let user_storage_options = request
-            &request.name,
+            .lance_read_params
-            request.namespace.clone(),
+            .as_ref()
-            None, // write_store_wrapper not used for namespace connections
+            .and_then(|lrp| lrp.store_options.as_ref())
-            request.lance_read_params,
+            .and_then(|so| so.storage_options.as_ref());
            self.read_consistency_interval,
            self.server_side_query_enabled,
            self.session.clone(),
        )
        .await?;
-        Ok(Arc::new(native_table))
+        let mut table_id = request.namespace.clone();
        table_id.push(request.name.clone());
        let describe_request = DescribeTableRequest {
            id: Some(table_id.clone()),
            version: None,
        };
        let response = self
            .namespace
            .describe_table(describe_request)
            .await
            .map_err(|e| Error::Runtime {
                message: format!("Failed to describe table: {}", e),
            })?;
        let location = response.location.ok_or_else(|| Error::Runtime {
            message: "Table location is missing from namespace response".to_string(),
        })?;
        let listing_db = self
            .create_listing_database(
                &location,
                table_id.clone(),
                user_storage_options,
                response.storage_options.as_ref(),
            )
            .await?;
        let namespace_client = self
            .server_side_query_enabled
            .then(|| self.namespace.clone());
        let open_request = OpenTableRequest {
            name: request.name.clone(),
            namespace: request.namespace.clone(),
            index_cache_size: request.index_cache_size,
            lance_read_params: request.lance_read_params,
            location: Some(location),
            namespace_client,
        };
        listing_db.open_table(open_request).await
    }
    async fn clone_table(&self, _request: CloneTableRequest) -> Result<Arc<dyn BaseTable>> {
--- a/rust/lancedb/src/embeddings.rs
+++ b/rust/lancedb/src/embeddings.rs
@@ -120,13 +120,8 @@ impl MemoryRegistry {
 }
 /// A record batch reader that has embeddings applied to it
-///
+/// This is a wrapper around another record batch reader that applies an embedding function
-/// This is a wrapper around another record batch reader that applies embedding functions
+/// when reading from the record batch
 /// when reading from the record batch.
 ///
 /// When multiple embedding functions are defined, they are computed in parallel using
 /// scoped threads to improve performance. For a single embedding function, computation
 /// is done inline without threading overhead.
 pub struct WithEmbeddings<R: RecordBatchReader> {
    inner: R,
    embeddings: Vec<(EmbeddingDefinition, Arc<dyn EmbeddingFunction>)>,
@@ -240,48 +235,6 @@ impl<R: RecordBatchReader> WithEmbeddings<R> {
            column_definitions,
        })
    }
    fn compute_embeddings_parallel(&self, batch: &RecordBatch) -> Result<Vec<Arc<dyn Array>>> {
        if self.embeddings.len() == 1 {
            let (fld, func) = &self.embeddings[0];
            let src_column =
                batch
                    .column_by_name(&fld.source_column)
                    .ok_or_else(|| Error::InvalidInput {
                        message: format!("Source column '{}' not found", fld.source_column),
                    })?;
            return Ok(vec![func.compute_source_embeddings(src_column.clone())?]);
        }
        // Parallel path: multiple embeddings
        std::thread::scope(|s| {
            let handles: Vec<_> = self
                .embeddings
                .iter()
                .map(|(fld, func)| {
                    let src_column = batch.column_by_name(&fld.source_column).ok_or_else(|| {
                        Error::InvalidInput {
                            message: format!("Source column '{}' not found", fld.source_column),
                        }
                    })?;
                    let handle =
                        s.spawn(move || func.compute_source_embeddings(src_column.clone()));
                    Ok(handle)
                })
                .collect::<Result<_>>()?;
            handles
                .into_iter()
                .map(|h| {
                    h.join().map_err(|e| Error::Runtime {
                        message: format!("Thread panicked during embedding computation: {:?}", e),
                    })?
                })
                .collect()
        })
    }
 }
 impl<R: RecordBatchReader> Iterator for MaybeEmbedded<R> {
@@ -309,19 +262,19 @@ impl<R: RecordBatchReader> Iterator for WithEmbeddings<R> {
    fn next(&mut self) -> Option<Self::Item> {
        let batch = self.inner.next()?;
        match batch {
-            Ok(batch) => {
+            Ok(mut batch) => {
-                let embeddings = match self.compute_embeddings_parallel(&batch) {
+                // todo: parallelize this
-                    Ok(emb) => emb,
+                for (fld, func) in self.embeddings.iter() {
-                    Err(e) => {
+                    let src_column = batch.column_by_name(&fld.source_column).unwrap();
-                        return Some(Err(arrow_schema::ArrowError::ComputeError(format!(
+                    let embedding = match func.compute_source_embeddings(src_column.clone()) {
-                            "Error computing embedding: {}",
+                        Ok(embedding) => embedding,
-                            e
+                        Err(e) => {
-                        ))))
+                            return Some(Err(arrow_schema::ArrowError::ComputeError(format!(
-                    }
+                                "Error computing embedding: {}",
-                };
+                                e
-
+                            ))))
-                let mut batch = batch;
+                        }
-                for ((fld, _), embedding) in self.embeddings.iter().zip(embeddings.iter()) {
+                    };
                    let dst_field_name = fld
                        .dest_column
                        .clone()
@@ -333,7 +286,7 @@ impl<R: RecordBatchReader> Iterator for WithEmbeddings<R> {
                        embedding.nulls().is_some(),
                    );
-                    match batch.try_with_column(dst_field.clone(), embedding.clone()) {
+                    match batch.try_with_column(dst_field.clone(), embedding) {
                        Ok(b) => batch = b,
                        Err(e) => return Some(Err(e)),
                    };
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -1088,17 +1088,6 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
                    body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
                }
            }
            Index::IvfRq(index) => {
                body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_RQ".to_string());
                body[METRIC_TYPE_KEY] =
                    serde_json::Value::String(index.distance_type.to_string().to_lowercase());
                if let Some(num_partitions) = index.num_partitions {
                    body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
                }
                if let Some(num_bits) = index.num_bits {
                    body["num_bits"] = serde_json::Value::Number(num_bits.into());
                }
            }
            Index::BTree(_) => {
                body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
            }
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -29,7 +29,7 @@ use lance::dataset::{
 use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
 use lance::index::vector::utils::infer_vector_dim;
 use lance::index::vector::VectorIndexParams;
-use lance::io::{ObjectStoreParams, WrappingObjectStore};
+use lance::io::WrappingObjectStore;
 use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan};
 use lance_datafusion::utils::StreamingWriteSource;
 use lance_index::scalar::{BuiltinIndexType, ScalarIndexParams};
@@ -40,7 +40,6 @@ use lance_index::vector::pq::PQBuildParams;
 use lance_index::vector::sq::builder::SQBuildParams;
 use lance_index::DatasetIndexExt;
 use lance_index::IndexType;
 use lance_io::object_store::LanceNamespaceStorageOptionsProvider;
 use lance_namespace::models::{
    QueryTableRequest as NsQueryTableRequest, QueryTableRequestFullTextQuery,
    QueryTableRequestVector, StringFtsQuery,
@@ -1612,105 +1611,6 @@ impl NativeTable {
        self
    }
    /// Opens an existing Table using a namespace client.
    ///
    /// This method uses `DatasetBuilder::from_namespace` to open the table, which
    /// automatically fetches the table location and storage options from the namespace.
    /// This eliminates the need to pre-fetch and merge storage options before opening.
    ///
    /// # Arguments
    ///
    /// * `namespace_client` - The namespace client to use for fetching table metadata
    /// * `name` - The table name
    /// * `namespace` - The namespace path (e.g., vec!["parent", "child"])
    /// * `write_store_wrapper` - Optional wrapper for the object store on write path
    /// * `params` - Optional read parameters
    /// * `read_consistency_interval` - Optional interval for read consistency
    /// * `server_side_query_enabled` - Whether to enable server-side query execution.
    ///   When true, the namespace_client will be stored and queries will be executed
    ///   on the namespace server. When false, the namespace is only used for opening
    ///   the table, and queries are executed locally.
    /// * `session` - Optional session for object stores and caching
    ///
    /// # Returns
    ///
    /// * A [NativeTable] object.
    #[allow(clippy::too_many_arguments)]
    pub async fn open_from_namespace(
        namespace_client: Arc<dyn LanceNamespace>,
        name: &str,
        namespace: Vec<String>,
        write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
        params: Option<ReadParams>,
        read_consistency_interval: Option<std::time::Duration>,
        server_side_query_enabled: bool,
        session: Option<Arc<lance::session::Session>>,
    ) -> Result<Self> {
        let mut params = params.unwrap_or_default();
        // Set the session in read params
        if let Some(sess) = session {
            params.session(sess);
        }
        // patch the params if we have a write store wrapper
        let params = match write_store_wrapper.clone() {
            Some(wrapper) => params.patch_with_store_wrapper(wrapper)?,
            None => params,
        };
        // Build table_id from namespace + name
        let mut table_id = namespace.clone();
        table_id.push(name.to_string());
        // Use DatasetBuilder::from_namespace which automatically fetches location
        // and storage options from the namespace
        let builder = DatasetBuilder::from_namespace(
            namespace_client.clone(),
            table_id,
            false, // Don't ignore namespace storage options
        )
        .await
        .map_err(|e| match e {
            lance::Error::Namespace { source, .. } => Error::Runtime {
                message: format!("Failed to get table info from namespace: {:?}", source),
            },
            source => Error::Lance { source },
        })?;
        let dataset = builder
            .with_read_params(params)
            .load()
            .await
            .map_err(|e| match e {
                lance::Error::DatasetNotFound { .. } => Error::TableNotFound {
                    name: name.to_string(),
                    source: Box::new(e),
                },
                source => Error::Lance { source },
            })?;
        let uri = dataset.uri().to_string();
        let dataset = DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval);
        let id = Self::build_id(&namespace, name);
        let stored_namespace_client = if server_side_query_enabled {
            Some(namespace_client)
        } else {
            None
        };
        Ok(Self {
            name: name.to_string(),
            namespace,
            id,
            uri,
            dataset,
            read_consistency_interval,
            namespace_client: stored_namespace_client,
        })
    }
    fn get_table_name(uri: &str) -> Result<String> {
        let path = Path::new(uri);
        let name = path
@@ -1822,102 +1722,6 @@ impl NativeTable {
        .await
    }
    /// Creates a new Table using a namespace client for storage options.
    ///
    /// This method sets up a `StorageOptionsProvider` from the namespace client,
    /// enabling automatic credential refresh for cloud storage. The namespace
    /// is used for:
    /// 1. Setting up storage options provider for credential vending
    /// 2. Optionally enabling server-side query execution
    ///
    /// # Arguments
    ///
    /// * `namespace_client` - The namespace client to use for storage options
    /// * `uri` - The URI to the table (obtained from create_empty_table response)
    /// * `name` - The table name
    /// * `namespace` - The namespace path (e.g., vec!["parent", "child"])
    /// * `batches` - RecordBatch to be saved in the database
    /// * `write_store_wrapper` - Optional wrapper for the object store on write path
    /// * `params` - Optional write parameters
    /// * `read_consistency_interval` - Optional interval for read consistency
    /// * `server_side_query_enabled` - Whether to enable server-side query execution
    ///
    /// # Returns
    ///
    /// * A [NativeTable] object.
    #[allow(clippy::too_many_arguments)]
    pub async fn create_from_namespace(
        namespace_client: Arc<dyn LanceNamespace>,
        uri: &str,
        name: &str,
        namespace: Vec<String>,
        batches: impl StreamingWriteSource,
        write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
        params: Option<WriteParams>,
        read_consistency_interval: Option<std::time::Duration>,
        server_side_query_enabled: bool,
        session: Option<Arc<lance::session::Session>>,
    ) -> Result<Self> {
        // Build table_id from namespace + name for the storage options provider
        let mut table_id = namespace.clone();
        table_id.push(name.to_string());
        // Set up storage options provider from namespace
        let storage_options_provider = Arc::new(LanceNamespaceStorageOptionsProvider::new(
            namespace_client.clone(),
            table_id,
        ));
        // Start with provided params or defaults
        let mut params = params.unwrap_or_default();
        // Set the session in write params
        if let Some(sess) = session {
            params.session = Some(sess);
        }
        // Ensure store_params exists and set the storage options provider
        let store_params = params
            .store_params
            .get_or_insert_with(ObjectStoreParams::default);
        store_params.storage_options_provider = Some(storage_options_provider);
        // Patch the params if we have a write store wrapper
        let params = match write_store_wrapper.clone() {
            Some(wrapper) => params.patch_with_store_wrapper(wrapper)?,
            None => params,
        };
        let insert_builder = InsertBuilder::new(uri).with_params(&params);
        let dataset = insert_builder
            .execute_stream(batches)
            .await
            .map_err(|e| match e {
                lance::Error::DatasetAlreadyExists { .. } => Error::TableAlreadyExists {
                    name: name.to_string(),
                },
                source => Error::Lance { source },
            })?;
        let id = Self::build_id(&namespace, name);
        let stored_namespace_client = if server_side_query_enabled {
            Some(namespace_client)
        } else {
            None
        };
        Ok(Self {
            name: name.to_string(),
            namespace,
            id,
            uri: uri.to_string(),
            dataset: DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval),
            read_consistency_interval,
            namespace_client: stored_namespace_client,
        })
    }
    async fn optimize_indices(&self, options: &OptimizeOptions) -> Result<()> {
        info!("LanceDB: optimizing indices: {:?}", options);
        self.dataset
--- a/rust/lancedb/src/test_utils/connection.rs
+++ b/rust/lancedb/src/test_utils/connection.rs
@@ -5,19 +5,16 @@
 use regex::Regex;
 use std::env;
-use std::process::Stdio;
+use std::io::{BufRead, BufReader};
-use tokio::io::{AsyncBufReadExt, BufReader};
+use std::process::{Child, ChildStdout, Command, Stdio};
 use tokio::process::{Child, ChildStdout, Command};
 use tokio::sync::mpsc;
 use crate::{connect, Connection};
-use anyhow::{anyhow, bail, Result};
+use anyhow::{bail, Result};
 use tempfile::{tempdir, TempDir};
 pub struct TestConnection {
    pub uri: String,
    pub connection: Connection,
    pub is_remote: bool,
    _temp_dir: Option<TempDir>,
    _process: Option<TestProcess>,
 }
@@ -40,56 +37,6 @@ pub async fn new_test_connection() -> Result<TestConnection> {
    }
 }
 async fn spawn_stdout_reader(
    mut stdout: BufReader<ChildStdout>,
    port_sender: mpsc::Sender<anyhow::Result<String>>,
 ) -> tokio::task::JoinHandle<()> {
    let print_stdout = env::var("PRINT_LANCEDB_TEST_CONNECTION_SCRIPT_OUTPUT").is_ok();
    tokio::spawn(async move {
        let mut line = String::new();
        let re = Regex::new(r"Query node now listening on 0.0.0.0:(.*)").unwrap();
        loop {
            line.clear();
            let result = stdout.read_line(&mut line).await;
            if let Err(err) = result {
                port_sender
                    .send(Err(anyhow!(
                        "error while reading from process output: {}",
                        err
                    )))
                    .await
                    .unwrap();
                return;
            } else if result.unwrap() == 0 {
                port_sender
                    .send(Err(anyhow!(
                        " hit EOF before reading port from process output."
                    )))
                    .await
                    .unwrap();
                return;
            }
            if re.is_match(&line) {
                let caps = re.captures(&line).unwrap();
                port_sender.send(Ok(caps[1].to_string())).await.unwrap();
                break;
            }
        }
        loop {
            line.clear();
            match stdout.read_line(&mut line).await {
                Err(_) => return,
                Ok(0) => return,
                Ok(_size) => {
                    if print_stdout {
                        print!("{}", line);
                    }
                }
            }
        }
    })
 }
 async fn new_remote_connection(script_path: &str) -> Result<TestConnection> {
    let temp_dir = tempdir()?;
    let data_path = temp_dir.path().to_str().unwrap().to_string();
@@ -110,25 +57,38 @@ async fn new_remote_connection(script_path: &str) -> Result<TestConnection> {
        child: child_result.unwrap(),
    };
    let stdout = BufReader::new(process.child.stdout.take().unwrap());
-    let (port_sender, mut port_receiver) = mpsc::channel(5);
+    let port = read_process_port(stdout)?;
    let _reader = spawn_stdout_reader(stdout, port_sender).await;
    let port = match port_receiver.recv().await {
        None => bail!("Unable to determine the port number used by the phalanx process we spawned, because the reader thread was closed too soon."),
        Some(Err(err)) => bail!("Unable to determine the port number used by the phalanx process we spawned, because of an error, {}", err),
        Some(Ok(port)) => port,
    };
    let uri = "db://test";
    let host_override = format!("http://localhost:{}", port);
    let connection = create_new_connection(uri, &host_override).await?;
    Ok(TestConnection {
        uri: uri.to_string(),
        connection,
        is_remote: true,
        _temp_dir: Some(temp_dir),
        _process: Some(process),
    })
 }
 fn read_process_port(mut stdout: BufReader<ChildStdout>) -> Result<String> {
    let mut line = String::new();
    let re = Regex::new(r"Query node now listening on 0.0.0.0:(.*)").unwrap();
    loop {
        let result = stdout.read_line(&mut line);
        if let Err(err) = result {
            bail!(format!(
                "read_process_port: error while reading from process output: {}",
                err
            ));
        } else if result.unwrap() == 0 {
            bail!("read_process_port: hit EOF before reading port from process output.");
        }
        if re.is_match(&line) {
            let caps = re.captures(&line).unwrap();
            return Ok(caps[1].to_string());
        }
    }
 }
 #[cfg(feature = "remote")]
 async fn create_new_connection(uri: &str, host_override: &str) -> crate::error::Result<Connection> {
    connect(uri)
@@ -154,7 +114,6 @@ async fn new_local_connection() -> Result<TestConnection> {
    Ok(TestConnection {
        uri: uri.to_string(),
        connection,
        is_remote: false,
        _temp_dir: Some(temp_dir),
        _process: None,
    })
--- a/rust/lancedb/tests/embeddings_parallel_test.rs
+++ b/rust/lancedb/tests/embeddings_parallel_test.rs
@@ -1,253 +0,0 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 use std::{
    borrow::Cow,
    sync::{
        atomic::{AtomicUsize, Ordering},
        Arc,
    },
    time::Duration,
 };
 use arrow::buffer::NullBuffer;
 use arrow_array::{
    Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator, StringArray,
 };
 use arrow_schema::{DataType, Field, Schema};
 use lancedb::{
    embeddings::{EmbeddingDefinition, EmbeddingFunction, MaybeEmbedded, WithEmbeddings},
    Error, Result,
 };
 #[derive(Debug)]
 struct SlowMockEmbed {
    name: String,
    dim: usize,
    delay_ms: u64,
    call_count: Arc<AtomicUsize>,
 }
 impl SlowMockEmbed {
    pub fn new(name: String, dim: usize, delay_ms: u64) -> Self {
        Self {
            name,
            dim,
            delay_ms,
            call_count: Arc::new(AtomicUsize::new(0)),
        }
    }
    pub fn get_call_count(&self) -> usize {
        self.call_count.load(Ordering::SeqCst)
    }
 }
 impl EmbeddingFunction for SlowMockEmbed {
    fn name(&self) -> &str {
        &self.name
    }
    fn source_type(&self) -> Result<Cow<'_, DataType>> {
        Ok(Cow::Owned(DataType::Utf8))
    }
    fn dest_type(&self) -> Result<Cow<'_, DataType>> {
        Ok(Cow::Owned(DataType::new_fixed_size_list(
            DataType::Float32,
            self.dim as _,
            true,
        )))
    }
    fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
        // Simulate slow embedding computation
        std::thread::sleep(Duration::from_millis(self.delay_ms));
        self.call_count.fetch_add(1, Ordering::SeqCst);
        let len = source.len();
        let inner = Arc::new(Float32Array::from(vec![Some(1.0); len * self.dim]));
        let field = Field::new("item", inner.data_type().clone(), false);
        let arr = FixedSizeListArray::new(
            Arc::new(field),
            self.dim as _,
            inner,
            Some(NullBuffer::new_valid(len)),
        );
        Ok(Arc::new(arr))
    }
    fn compute_query_embeddings(&self, _input: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
        unimplemented!()
    }
 }
 fn create_test_batch() -> Result<RecordBatch> {
    let schema = Arc::new(Schema::new(vec![Field::new("text", DataType::Utf8, false)]));
    let text = StringArray::from(vec!["hello", "world"]);
    RecordBatch::try_new(schema, vec![Arc::new(text)]).map_err(|e| Error::Runtime {
        message: format!("Failed to create test batch: {}", e),
    })
 }
 #[test]
 fn test_single_embedding_fast_path() {
    // Single embedding should execute without spawning threads
    let batch = create_test_batch().unwrap();
    let schema = batch.schema();
    let embed = Arc::new(SlowMockEmbed::new("test".to_string(), 2, 10));
    let embedding_def = EmbeddingDefinition::new("text", "test", Some("embedding"));
    let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
    let embeddings = vec![(embedding_def, embed.clone() as Arc<dyn EmbeddingFunction>)];
    let mut with_embeddings = WithEmbeddings::new(reader, embeddings);
    let result = with_embeddings.next().unwrap().unwrap();
    assert!(result.column_by_name("embedding").is_some());
    assert_eq!(embed.get_call_count(), 1);
 }
 #[test]
 fn test_multiple_embeddings_parallel() {
    // Multiple embeddings should execute in parallel
    let batch = create_test_batch().unwrap();
    let schema = batch.schema();
    let embed1 = Arc::new(SlowMockEmbed::new("embed1".to_string(), 2, 100));
    let embed2 = Arc::new(SlowMockEmbed::new("embed2".to_string(), 3, 100));
    let embed3 = Arc::new(SlowMockEmbed::new("embed3".to_string(), 4, 100));
    let def1 = EmbeddingDefinition::new("text", "embed1", Some("emb1"));
    let def2 = EmbeddingDefinition::new("text", "embed2", Some("emb2"));
    let def3 = EmbeddingDefinition::new("text", "embed3", Some("emb3"));
    let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
    let embeddings = vec![
        (def1, embed1.clone() as Arc<dyn EmbeddingFunction>),
        (def2, embed2.clone() as Arc<dyn EmbeddingFunction>),
        (def3, embed3.clone() as Arc<dyn EmbeddingFunction>),
    ];
    let mut with_embeddings = WithEmbeddings::new(reader, embeddings);
    let result = with_embeddings.next().unwrap().unwrap();
    // Verify all embedding columns are present
    assert!(result.column_by_name("emb1").is_some());
    assert!(result.column_by_name("emb2").is_some());
    assert!(result.column_by_name("emb3").is_some());
    // Verify all embeddings were computed
    assert_eq!(embed1.get_call_count(), 1);
    assert_eq!(embed2.get_call_count(), 1);
    assert_eq!(embed3.get_call_count(), 1);
 }
 #[test]
 fn test_embedding_column_order_preserved() {
    // Verify that embedding columns are added in the same order as definitions
    let batch = create_test_batch().unwrap();
    let schema = batch.schema();
    let embed1 = Arc::new(SlowMockEmbed::new("embed1".to_string(), 2, 10));
    let embed2 = Arc::new(SlowMockEmbed::new("embed2".to_string(), 3, 10));
    let embed3 = Arc::new(SlowMockEmbed::new("embed3".to_string(), 4, 10));
    let def1 = EmbeddingDefinition::new("text", "embed1", Some("first"));
    let def2 = EmbeddingDefinition::new("text", "embed2", Some("second"));
    let def3 = EmbeddingDefinition::new("text", "embed3", Some("third"));
    let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
    let embeddings = vec![
        (def1, embed1 as Arc<dyn EmbeddingFunction>),
        (def2, embed2 as Arc<dyn EmbeddingFunction>),
        (def3, embed3 as Arc<dyn EmbeddingFunction>),
    ];
    let mut with_embeddings = WithEmbeddings::new(reader, embeddings);
    let result = with_embeddings.next().unwrap().unwrap();
    let result_schema = result.schema();
    // Original column is first
    assert_eq!(result_schema.field(0).name(), "text");
    // Embedding columns follow in order
    assert_eq!(result_schema.field(1).name(), "first");
    assert_eq!(result_schema.field(2).name(), "second");
    assert_eq!(result_schema.field(3).name(), "third");
 }
 #[test]
 fn test_embedding_error_propagation() {
    // Test that errors from embedding computation are properly propagated
    #[derive(Debug)]
    struct FailingEmbed {
        name: String,
    }
    impl EmbeddingFunction for FailingEmbed {
        fn name(&self) -> &str {
            &self.name
        }
        fn source_type(&self) -> Result<Cow<'_, DataType>> {
            Ok(Cow::Owned(DataType::Utf8))
        }
        fn dest_type(&self) -> Result<Cow<'_, DataType>> {
            Ok(Cow::Owned(DataType::new_fixed_size_list(
                DataType::Float32,
                2,
                true,
            )))
        }
        fn compute_source_embeddings(&self, _source: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
            Err(Error::Runtime {
                message: "Intentional failure".to_string(),
            })
        }
        fn compute_query_embeddings(&self, _input: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
            unimplemented!()
        }
    }
    let batch = create_test_batch().unwrap();
    let schema = batch.schema();
    let embed = Arc::new(FailingEmbed {
        name: "failing".to_string(),
    });
    let def = EmbeddingDefinition::new("text", "failing", Some("emb"));
    let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
    let embeddings = vec![(def, embed as Arc<dyn EmbeddingFunction>)];
    let mut with_embeddings = WithEmbeddings::new(reader, embeddings);
    let result = with_embeddings.next().unwrap();
    assert!(result.is_err());
    let err_msg = format!("{}", result.err().unwrap());
    assert!(err_msg.contains("Intentional failure"));
 }
 #[test]
 fn test_maybe_embedded_with_no_embeddings() {
    // Test that MaybeEmbedded::No variant works correctly
    let batch = create_test_batch().unwrap();
    let schema = batch.schema();
    let reader = RecordBatchIterator::new(vec![Ok(batch.clone())], schema.clone());
    let table_def = lancedb::table::TableDefinition {
        schema: schema.clone(),
        column_definitions: vec![lancedb::table::ColumnDefinition {
            kind: lancedb::table::ColumnKind::Physical,
        }],
    };
    let mut maybe_embedded = MaybeEmbedded::try_new(reader, table_def, None).unwrap();
    let result = maybe_embedded.next().unwrap().unwrap();
    assert_eq!(result.num_columns(), 1);
    assert_eq!(result.column(0).as_ref(), batch.column(0).as_ref());
 }