Bump version: 0.29.0-beta.0 → 0.29.0

Bump version: 0.28.0-beta.0 → 0.29.0-beta.0
ci: fix python version for latest release (#2989 )
2026-03-26 10:30:40 +00:00 · 2026-02-06 18:07:49 +00:00 · 2026-02-06 18:07:48 +00:00 · 2026-02-06 10:07:03 -08:00 · 2026-02-06 09:43:44 -08:00 · 2026-02-05 17:39:32 -08:00
103 changed files with 5546 additions and 1687 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.23.0-beta.2"
+current_version = "0.25.0-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/build_linux_wheel/action.yml
+++ b/.github/workflows/build_linux_wheel/action.yml
@@ -3,7 +3,7 @@ name: build-linux-wheel
 description: "Build a manylinux wheel for lance"
 inputs:
  python-minor-version:
-    description: "8, 9, 10, 11, 12"
+    description: "10, 11, 12, 13"
    required: true
  args:
    description: "--release"
--- a/.github/workflows/build_mac_wheel/action.yml
+++ b/.github/workflows/build_mac_wheel/action.yml
@@ -3,7 +3,7 @@ name: build_wheel
 description: "Build a lance wheel"
 inputs:
  python-minor-version:
-    description: "8, 9, 10, 11"
+    description: "10, 11, 12, 13"
    required: true
  args:
    description: "--release"
--- a/.github/workflows/build_windows_wheel/action.yml
+++ b/.github/workflows/build_windows_wheel/action.yml
@@ -3,7 +3,7 @@ name: build_wheel
 description: "Build a lance wheel"
 inputs:
  python-minor-version:
-    description: "8, 9, 10, 11"
+    description: "10, 11, 12, 13, 14"
    required: true
  args:
    description: "--release"
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -42,7 +42,7 @@ jobs:
    name: Report Workflow Failure
    runs-on: ubuntu-latest
    needs: [build]
-    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    if: always() && failure() && startsWith(github.ref, 'refs/tags/v')
    permissions:
      contents: read
      issues: write
--- a/.github/workflows/codex-update-lance-dependency.yml
+++ b/.github/workflows/codex-update-lance-dependency.yml
@@ -75,20 +75,28 @@ jobs:
          VERSION="${VERSION#v}"
          BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"

+          # Use "chore" for beta/rc versions, "feat" for stable releases
+          if [[ "${VERSION}" == *beta* ]] || [[ "${VERSION}" == *rc* ]]; then
+            COMMIT_TYPE="chore"
+          else
+            COMMIT_TYPE="feat"
+          fi
+
          cat <<EOF >/tmp/codex-prompt.txt
          You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.

          Follow these steps exactly:
-          1. Use script "ci/set_lance_version.py" to update Lance dependencies. The script already refreshes Cargo metadata, so allow it to finish even if it takes time.
-          2. Run "cargo clippy --workspace --tests --all-features -- -D warnings". If diagnostics appear, fix them yourself and rerun clippy until it exits cleanly. Do not skip any warnings.
-          3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
-          4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
-          5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
-          6. Stage all relevant files with "git add -A". Commit using the message "chore: update lance dependency to v${VERSION}".
-          7. Push the branch to origin. If the branch already exists, force-push your changes.
-          8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
-          9. Create a pull request targeting "main" with title "chore: update lance dependency to v${VERSION}". In the body, summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}).
-          10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
+          1. Use script "ci/set_lance_version.py" to update Lance Rust dependencies. The script already refreshes Cargo metadata, so allow it to finish even if it takes time.
+          2. Update the Java lance-core dependency version in "java/pom.xml": change the "<lance-core.version>...</lance-core.version>" property to "${VERSION}".
+          3. Run "cargo clippy --workspace --tests --all-features -- -D warnings". If diagnostics appear, fix them yourself and rerun clippy until it exits cleanly. Do not skip any warnings.
+          4. After clippy succeeds, run "cargo fmt --all" to format the workspace.
+          5. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
+          6. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
+          7. Stage all relevant files with "git add -A". Commit using the message "${COMMIT_TYPE}: update lance dependency to v${VERSION}".
+          8. Push the branch to origin. If the remote branch already exists, delete it first with "gh api -X DELETE repos/lancedb/lancedb/git/refs/heads/${BRANCH_NAME}" then push with "git push origin ${BRANCH_NAME}". Do NOT use "git push --force" or "git push -f".
+          9. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
+          10. Create a pull request targeting "main" with title "${COMMIT_TYPE}: update lance dependency to v${VERSION}". First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'EOF' > /tmp/pr-body.md). The body should summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). Then run "gh pr create --body-file /tmp/pr-body.md".
+          11. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.

          Constraints:
          - Use bash commands; avoid modifying GitHub workflow files other than through the scripted task above.
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -41,7 +41,7 @@ jobs:
          sudo apt install -y protobuf-compiler libssl-dev
          rustup update && rustup default
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
          python-version: "3.10"
          cache: "pip"
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -8,6 +8,7 @@ on:
    paths:
      - Cargo.toml
      - nodejs/**
+      - docs/src/js/**
      - .github/workflows/nodejs.yml
      - docker-compose.yml

--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -348,7 +348,6 @@ jobs:
        run: find npm
      - name: Publish
        env:
-          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
          DRY_RUN: ${{ !startsWith(github.ref, 'refs/tags/v') }}
        run: |
          ARGS="--access public"
@@ -363,7 +362,7 @@ jobs:
    name: Report Workflow Failure
    runs-on: ubuntu-latest
    needs: [build-lancedb, test-lancedb, publish]
-    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    if: always() && failure() && startsWith(github.ref, 'refs/tags/v')
    permissions:
      contents: read
      issues: write
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -44,12 +44,12 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v6
        with:
-          python-version: 3.8
+          python-version: "3.10"
      - uses: ./.github/workflows/build_linux_wheel
        with:
-          python-minor-version: 8
+          python-minor-version: 10
          args: "--release --strip ${{ matrix.config.extra_args }}"
          arm-build: ${{ matrix.config.platform == 'aarch64' }}
          manylinux: ${{ matrix.config.manylinux }}
@@ -74,12 +74,12 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v6
        with:
-          python-version: 3.12
+          python-version: "3.13"
      - uses: ./.github/workflows/build_mac_wheel
        with:
-          python-minor-version: 8
+          python-minor-version: 10
          args: "--release --strip --target ${{ matrix.config.target }} --features fp16kernels"
      - uses: ./.github/workflows/upload_wheel
        if: startsWith(github.ref, 'refs/tags/python-v')
@@ -95,12 +95,12 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v6
        with:
-          python-version: 3.12
+          python-version: "3.13"
      - uses: ./.github/workflows/build_windows_wheel
        with:
-          python-minor-version: 8
+          python-minor-version: 10
          args: "--release --strip"
          vcpkg_token: ${{ secrets.VCPKG_GITHUB_PACKAGES }}
      - uses: ./.github/workflows/upload_wheel
@@ -181,7 +181,7 @@ jobs:
    permissions:
      contents: read
      issues: write
-    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    if: always() && failure() && startsWith(github.ref, 'refs/tags/python-v')
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/create-failure-issue
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -36,9 +36,9 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
-          python-version: "3.12"
+          python-version: "3.13"
      - name: Install ruff
        run: |
          pip install ruff==0.9.9
@@ -61,9 +61,9 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
-          python-version: "3.12"
+          python-version: "3.13"
      - name: Install protobuf compiler
        run: |
          sudo apt update
@@ -90,9 +90,9 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
-          python-version: "3.12"
+          python-version: "3.13"
          cache: "pip"
      - name: Install protobuf
        run: |
@@ -110,7 +110,7 @@ jobs:
    timeout-minutes: 30
    strategy:
      matrix:
-        python-minor-version: ["9", "12"]
+        python-minor-version: ["10", "13"]
    runs-on: "ubuntu-24.04"
    defaults:
      run:
@@ -126,7 +126,7 @@ jobs:
          sudo apt update
          sudo apt install -y protobuf-compiler
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
          python-version: 3.${{ matrix.python-minor-version }}
      - uses: ./.github/workflows/build_linux_wheel
@@ -156,9 +156,9 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
-          python-version: "3.12"
+          python-version: "3.13"
      - uses: ./.github/workflows/build_mac_wheel
        with:
          args: --profile ci
@@ -185,9 +185,9 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
-          python-version: "3.12"
+          python-version: "3.13"
      - uses: ./.github/workflows/build_windows_wheel
        with:
          args: --profile ci
@@ -212,9 +212,9 @@ jobs:
          sudo apt update
          sudo apt install -y protobuf-compiler
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
-          python-version: 3.9
+          python-version: "3.10"
      - name: Install lancedb
        run: |
          pip install "pydantic<2"
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -48,6 +48,8 @@ jobs:
        run: cargo fmt --all -- --check
      - name: Run clippy
        run: cargo clippy --profile ci --workspace --tests --all-features -- -D warnings
+      - name: Run clippy (without remote feature)
+        run: cargo clippy --profile ci --workspace --tests -- -D warnings

  build-no-lock:
    runs-on: ubuntu-24.04
@@ -167,13 +169,13 @@ jobs:
      - name: Build
        run: |
          $env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
-          cargo build --profile ci --features remote --tests --locked --target ${{ matrix.target }}
+          cargo build --profile ci --features aws,remote --tests --locked --target ${{ matrix.target }}
      - name: Run tests
        # Can only run tests when target matches host
        if: ${{ matrix.target == 'x86_64-pc-windows-msvc' }}
        run: |
          $env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
-          cargo test --profile ci --features remote --locked
+          cargo test --profile ci --features aws,remote --locked

  msrv:
    # Check the minimum supported Rust version
@@ -181,7 +183,7 @@ jobs:
    runs-on: ubuntu-24.04
    strategy:
      matrix:
-        msrv: ["1.78.0"] # This should match up with rust-version in Cargo.toml
+        msrv: ["1.88.0"] # This should match up with rust-version in Cargo.toml
    env:
      # Need up-to-date compilers for kernels
      CC: clang-18
@@ -212,4 +214,6 @@ jobs:
          cargo update -p aws-sdk-sts --precise 1.51.0
          cargo update -p home --precise 0.5.9
      - name: cargo +${{ matrix.msrv }} check
+        env:
+          RUSTUP_TOOLCHAIN: ${{ matrix.msrv }}
        run: cargo check --profile ci --workspace --tests --benches --all-features
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,42 +12,43 @@ repository = "https://github.com/lancedb/lancedb"
 description = "Serverless, low-latency vector database for AI applications"
 keywords = ["lancedb", "lance", "database", "vector", "search"]
 categories = ["database-implementations"]
-rust-version = "1.78.0"
+rust-version = "1.88.0"

 [workspace.dependencies]
-lance = { "version" = "=1.0.0", default-features = false, "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
-lance-core = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
-lance-datagen = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
-lance-file = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
-lance-io = { "version" = "=1.0.0", default-features = false, "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
-lance-index = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
-lance-linalg = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace-impls = { "version" = "=1.0.0", default-features = false, "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
-lance-table = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
-lance-testing = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
-lance-datafusion = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
-lance-encoding = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
-lance-arrow = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
+lance = { "version" = "=2.0.0", default-features = false }
+lance-core = "=2.0.0"
+lance-datagen = "=2.0.0"
+lance-file = "=2.0.0"
+lance-io = { "version" = "=2.0.0", default-features = false }
+lance-index = "=2.0.0"
+lance-linalg = "=2.0.0"
+lance-namespace = "=2.0.0"
+lance-namespace-impls = { "version" = "=2.0.0", default-features = false }
+lance-table = "=2.0.0"
+lance-testing = "=2.0.0"
+lance-datafusion = "=2.0.0"
+lance-encoding = "=2.0.0"
+lance-arrow = "=2.0.0"
 ahash = "0.8"
 # Note that this one does not include pyarrow
-arrow = { version = "56.2", optional = false }
-arrow-array = "56.2"
-arrow-data = "56.2"
-arrow-ipc = "56.2"
-arrow-ord = "56.2"
-arrow-schema = "56.2"
-arrow-select = "56.2"
-arrow-cast = "56.2"
+arrow = { version = "57.2", optional = false }
+arrow-array = "57.2"
+arrow-data = "57.2"
+arrow-ipc = "57.2"
+arrow-ord = "57.2"
+arrow-schema = "57.2"
+arrow-select = "57.2"
+arrow-cast = "57.2"
 async-trait = "0"
-datafusion = { version = "50.1", default-features = false }
-datafusion-catalog = "50.1"
-datafusion-common = { version = "50.1", default-features = false }
-datafusion-execution = "50.1"
-datafusion-expr = "50.1"
-datafusion-physical-plan = "50.1"
+datafusion = { version = "51.0", default-features = false }
+datafusion-catalog = "51.0"
+datafusion-common = { version = "51.0", default-features = false }
+datafusion-execution = "51.0"
+datafusion-expr = "51.0"
+datafusion-physical-plan = "51.0"
+datafusion-physical-expr = "51.0"
 env_logger = "0.11"
-half = { "version" = "2.6.0", default-features = false, features = [
+half = { "version" = "2.7.1", default-features = false, features = [
    "num-traits",
 ] }
 futures = "0"
--- a/README.md
+++ b/README.md
@@ -66,7 +66,7 @@ Follow the [Quickstart](https://lancedb.com/docs/quickstart/) doc to set up Lanc
 | Python SDK | https://lancedb.github.io/lancedb/python/python/ |
 | Typescript SDK | https://lancedb.github.io/lancedb/js/globals/ |
 | Rust SDK | https://docs.rs/lancedb/latest/lancedb/index.html |
-| REST API | https://docs.lancedb.com/api-reference/introduction |
+| REST API | https://docs.lancedb.com/api-reference/rest |

 ## **Join Us and Contribute**

--- a/ci/run_with_test_connection.sh
+++ b/ci/run_with_test_connection.sh
@@ -16,7 +16,7 @@ check_command_exists() {
 }

 if [[ ! -e ./lancedb ]]; then
-    if [[ -v SOPHON_READ_TOKEN ]]; then
+    if [[ x${SOPHON_READ_TOKEN} != "x" ]]; then
        INPUT="lancedb-linux-x64"
        gh release \
            --repo lancedb/lancedb \
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -11,7 +11,7 @@ watch:
 theme:
  name: "material"
  logo: assets/logo.png
-  favicon: assets/logo.png
+  favicon: assets/favicon.ico
  palette:
    # Palette toggle for light mode
    - scheme: lancedb
@@ -32,8 +32,6 @@ theme:
    - content.tooltips
    - toc.follow
    - navigation.top
-    - navigation.tabs
-    - navigation.tabs.sticky
    - navigation.footer
    - navigation.tracking
    - navigation.instant
@@ -115,12 +113,13 @@ markdown_extensions:
      emoji_index: !!python/name:material.extensions.emoji.twemoji
      emoji_generator: !!python/name:material.extensions.emoji.to_svg
  - markdown.extensions.toc:
-      baselevel: 1
-      permalink: ""
+      toc_depth: 3
+      permalink: true
+      permalink_title: Anchor link to this section

 nav:
-  - API reference:
-      - Overview: index.md
+  - Documentation:
+      - SDK Reference: index.md
      - Python: python/python.md
      - Javascript/TypeScript: js/globals.md
      - Java: java/java.md
--- a/docs/src/assets/favicon.ico
+++ b/docs/src/assets/favicon.ico
--- a/docs/src/embeddings/available_embedding_models/multimodal_embedding_functions/voyageai_multimodal_embedding.md
+++ b/docs/src/embeddings/available_embedding_models/multimodal_embedding_functions/voyageai_multimodal_embedding.md
@@ -0,0 +1,111 @@
+# VoyageAI Embeddings : Multimodal
+
+VoyageAI embeddings can also be used to embed both text and image data, only some of the models support image data and you can check the list
+under [https://docs.voyageai.com/docs/multimodal-embeddings](https://docs.voyageai.com/docs/multimodal-embeddings)
+
+Supported multimodal models:
+
+- `voyage-multimodal-3` - 1024 dimensions (text + images)
+- `voyage-multimodal-3.5` - Flexible dimensions (256, 512, 1024 default, 2048). Supports text, images, and video.
+
+### Video Support (voyage-multimodal-3.5)
+
+The `voyage-multimodal-3.5` model supports video input through:
+- Video URLs (`.mp4`, `.webm`, `.mov`, `.avi`, `.mkv`, `.m4v`, `.gif`)
+- Video file paths
+
+Constraints: Max 20MB video size.
+
+Supported parameters (to be passed in `create` method) are:
+
+| Parameter | Type | Default Value           | Description                               |
+|---|---|-------------------------|-------------------------------------------|
+| `name` | `str` | `"voyage-multimodal-3"` | The model ID of the VoyageAI model to use |
+| `output_dimension` | `int` | `None` | Output dimension for voyage-multimodal-3.5. Valid: 256, 512, 1024, 2048 |
+
+Usage Example:
+
+```python
+import base64
+import os
+from io import BytesIO
+
+import requests
+import lancedb
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.embeddings import get_registry
+import pandas as pd
+
+os.environ['VOYAGE_API_KEY'] = 'YOUR_VOYAGE_API_KEY'
+
+db = lancedb.connect(".lancedb")
+func = get_registry().get("voyageai").create(name="voyage-multimodal-3")
+
+
+def image_to_base64(image_bytes: bytes):
+    buffered = BytesIO(image_bytes)
+    img_str = base64.b64encode(buffered.getvalue())
+    return img_str.decode("utf-8")
+
+
+class Images(LanceModel):
+    label: str
+    image_uri: str = func.SourceField()  # image uri as the source
+    image_bytes: str = func.SourceField()  # image bytes base64 encoded as the source
+    vector: Vector(func.ndims()) = func.VectorField()  # vector column
+    vec_from_bytes: Vector(func.ndims()) = func.VectorField()  # Another vector column
+
+
+if "images" in db.table_names():
+    db.drop_table("images")
+table = db.create_table("images", schema=Images)
+labels = ["cat", "cat", "dog", "dog", "horse", "horse"]
+uris = [
+    "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
+    "http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
+    "http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
+    "http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
+    "http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
+    "http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
+]
+# get each uri as bytes
+images_bytes = [image_to_base64(requests.get(uri).content) for uri in uris]
+table.add(
+    pd.DataFrame({"label": labels, "image_uri": uris, "image_bytes": images_bytes})
+)
+```
+Now we can search using text from both the default vector column and the custom vector column
+```python
+
+# text search
+actual = table.search("man's best friend", "vec_from_bytes").limit(1).to_pydantic(Images)[0]
+print(actual.label) # prints "dog"
+
+frombytes = (
+    table.search("man's best friend", vector_column_name="vec_from_bytes")
+    .limit(1)
+    .to_pydantic(Images)[0]
+)
+print(frombytes.label)
+
+```
+
+Because we're using a multi-modal embedding function, we can also search using images
+
+```python
+# image search
+query_image_uri = "http://farm1.staticflickr.com/200/467715466_ed4a31801f_z.jpg"
+image_bytes = requests.get(query_image_uri).content
+query_image = Image.open(BytesIO(image_bytes))
+actual = table.search(query_image, "vec_from_bytes").limit(1).to_pydantic(Images)[0]
+print(actual.label == "dog")
+
+# image search using a custom vector column
+other = (
+    table.search(query_image, vector_column_name="vec_from_bytes")
+    .limit(1)
+    .to_pydantic(Images)[0]
+)
+print(actual.label)
+
+```
--- a/docs/src/embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
+++ b/docs/src/embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
@@ -0,0 +1,62 @@
+# VoyageAI Embeddings
+
+Voyage AI provides cutting-edge embedding and rerankers.
+
+
+Using voyageai API requires voyageai package, which can be installed using `pip install voyageai`. Voyage AI embeddings are used to generate embeddings for text data. The embeddings can be used for various tasks like semantic search, clustering, and classification.
+You also need to set the `VOYAGE_API_KEY` environment variable to use the VoyageAI API.
+
+Supported models are:
+
+**Voyage-4 Series (Latest)**
+
+- voyage-4 (1024 dims, general-purpose and multilingual retrieval, 320K batch tokens)
+- voyage-4-lite (1024 dims, optimized for latency and cost, 1M batch tokens)
+- voyage-4-large (1024 dims, best retrieval quality, 120K batch tokens)
+
+**Voyage-3 Series**
+
+- voyage-3
+- voyage-3-lite
+
+**Domain-Specific Models**
+
+- voyage-finance-2
+- voyage-multilingual-2
+- voyage-law-2
+- voyage-code-2
+
+
+Supported parameters (to be passed in `create` method) are:
+
+| Parameter | Type | Default Value | Description |
+|---|---|--------|---------|
+| `name` | `str` | `None` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-4, voyage-4-lite, voyage-4-large, voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
+| `input_type` | `str` | `None` | Type of the input text. Default to None. Other options: query, document. |
+| `truncation` | `bool` | `True` | Whether to truncate the input texts to fit within the context length. |
+
+
+Usage Example:
+    
+```python
+    import lancedb
+    from lancedb.pydantic import LanceModel, Vector
+    from lancedb.embeddings import EmbeddingFunctionRegistry
+
+    voyageai = EmbeddingFunctionRegistry
+        .get_instance()
+        .get("voyageai")
+        .create(name="voyage-3")
+
+    class TextModel(LanceModel):
+        text: str = voyageai.SourceField()
+        vector: Vector(voyageai.ndims()) =  voyageai.VectorField()
+
+    data = [ { "text": "hello world" },
+            { "text": "goodbye world" }]
+
+    db = lancedb.connect("~/.lancedb")
+    tbl = db.create_table("test", schema=TextModel, mode="overwrite")
+
+    tbl.add(data)
+```
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,8 +1,12 @@
-# API Reference
+# SDK Reference

-This page contains the API reference for the SDKs supported by the LanceDB team.
+This site contains the API reference for the client SDKs supported by [LanceDB](https://lancedb.com).

 - [Python](python/python.md)
 - [JavaScript/TypeScript](js/globals.md)
 - [Java](java/java.md)
- [Rust](https://docs.rs/lancedb/latest/lancedb/index.html)
+- [Rust](https://docs.rs/lancedb/latest/lancedb/index.html)
+
+!!! info "LanceDB Documentation"
+
+    If you're looking for the full documentation of LanceDB, visit [docs.lancedb.com](https://docs.lancedb.com).
--- a/docs/src/java/java.md
+++ b/docs/src/java/java.md
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
 <dependency>
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-core</artifactId>
-    <version>0.23.0-beta.2</version>
+    <version>0.25.0-beta.0</version>
 </dependency>
 ```

--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -367,6 +367,27 @@ Use [Table.listIndices](Table.md#listindices) to find the names of the indices.

 ***

+### initialStorageOptions()
+
+```ts
+abstract initialStorageOptions(): Promise<undefined | null | Record<string, string>>
+```
+
+Get the initial storage options that were passed in when opening this table.
+
+For dynamically refreshed options (e.g., credential vending), use
+[Table.latestStorageOptions](Table.md#lateststorageoptions).
+
+Warning: This is an internal API and the return value is subject to change.
+
+#### Returns
+
+`Promise`&lt;`undefined` \| `null` \| `Record`&lt;`string`, `string`&gt;&gt;
+
+The storage options, or undefined if no storage options were configured.
+
+***
+
 ### isOpen()

 ```ts
@@ -381,6 +402,28 @@ Return true if the table has not been closed

 ***

+### latestStorageOptions()
+
+```ts
+abstract latestStorageOptions(): Promise<undefined | null | Record<string, string>>
+```
+
+Get the latest storage options, refreshing from provider if configured.
+
+This method is useful for credential vending scenarios where storage options
+may be refreshed dynamically. If no dynamic provider is configured, this
+returns the initial static options.
+
+Warning: This is an internal API and the return value is subject to change.
+
+#### Returns
+
+`Promise`&lt;`undefined` \| `null` \| `Record`&lt;`string`, `string`&gt;&gt;
+
+The storage options, or undefined if no storage options were configured.
+
+***
+
 ### listIndices()

 ```ts
@@ -705,8 +748,11 @@ Create a query that returns a subset of the rows in the table.

 #### Parameters

-* **rowIds**: `number`[]
+* **rowIds**: readonly (`number` \| `bigint`)[]
    The row ids of the rows to return.
+    Row ids returned by `withRowId()` are `bigint`, so `bigint[]` is supported.
+    For convenience / backwards compatibility, `number[]` is also accepted (for
+    small row ids that fit in a safe integer).

 #### Returns

--- a/docs/src/styles/extra.css
+++ b/docs/src/styles/extra.css
@@ -85,17 +85,26 @@

 /* Header gradient (only header area) */
 .md-header {
-  background: linear-gradient(90deg, #3B2E58 0%, #F0B7C1 45%, #E55A2B 100%);
+  background: linear-gradient(90deg, #e4d8f8 0%, #F0B7C1 45%, #E55A2B 100%);
  box-shadow: inset 0 1px 0 rgba(255,255,255,0.08), 0 1px 0 rgba(0,0,0,0.08);
 }

+/* Improve brand title contrast on the lavender side */
+.md-header__title,
+.md-header__topic,
+.md-header__title .md-ellipsis,
+.md-header__topic .md-ellipsis {
+  color: #2b1b3a;
+  text-shadow: 0 1px 0 rgba(255, 255, 255, 0.25);
+}
+
 /* Same colors as header for tabs (that hold the text) */
 .md-tabs {
-  background: linear-gradient(90deg, #3B2E58 0%, #F0B7C1 45%, #E55A2B 100%);
+  background: linear-gradient(90deg, #e4d8f8 0%, #F0B7C1 45%, #E55A2B 100%);
 }

 /* Dark scheme variant */
 [data-md-color-scheme="slate"] .md-header,
 [data-md-color-scheme="slate"] .md-tabs {
-  background: linear-gradient(90deg, #3B2E58 0%, #F0B7C1 45%, #E55A2B 100%);
+  background: linear-gradient(90deg, #e4d8f8 0%, #F0B7C1 45%, #E55A2B 100%);
 }
--- a/java/lancedb-core/pom.xml
+++ b/java/lancedb-core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
      <groupId>com.lancedb</groupId>
      <artifactId>lancedb-parent</artifactId>
-      <version>0.23.0-beta.2</version>
+      <version>0.25.0-beta.0</version>
      <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.23.0-beta.2</version>
+    <version>0.25.0-beta.0</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
@@ -28,7 +28,7 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
-        <lance-core.version>1.0.0-rc.2</lance-core.version>
+        <lance-core.version>2.0.0</lance-core.version>
        <spotless.skip>false</spotless.skip>
        <spotless.version>2.30.0</spotless.version>
        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
@@ -292,11 +292,12 @@
                    <plugin>
                        <groupId>org.sonatype.central</groupId>
                        <artifactId>central-publishing-maven-plugin</artifactId>
-                        <version>0.4.0</version>
+                        <version>0.8.0</version>
                        <extensions>true</extensions>
                        <configuration>
                            <publishingServerId>ossrh</publishingServerId>
                            <tokenAuth>true</tokenAuth>
+                            <autoPublish>true</autoPublish>
                        </configuration>
                    </plugin>
                    <plugin>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.23.0-beta.2"
+version = "0.25.0-beta.0"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
@@ -36,6 +36,6 @@ aws-lc-rs = "=1.13.0"
 napi-build = "2.1"

 [features]
-default = ["remote", "lancedb/default"]
+default = ["remote", "lancedb/aws", "lancedb/gcs", "lancedb/azure", "lancedb/dynamodb", "lancedb/oss", "lancedb/huggingface"]
 fp16kernels = ["lancedb/fp16kernels"]
 remote = ["lancedb/remote"]
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -312,6 +312,66 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      expect(res.getChild("id")?.toJSON()).toEqual([2, 3]);
    });

+    it("should support takeRowIds with bigint array", async () => {
+      await table.add([{ id: 1 }, { id: 2 }, { id: 3 }]);
+      // Get actual row IDs using withRowId()
+      const allRows = await table.query().withRowId().toArray();
+      const rowIds = allRows.map((row) => row._rowid) as bigint[];
+
+      // Verify row IDs are bigint
+      expect(typeof rowIds[0]).toBe("bigint");
+
+      // Use takeRowIds with bigint array (the main use case from issue #2722)
+      const res = await table.takeRowIds([rowIds[0], rowIds[2]]).toArray();
+      expect(res.map((r) => r.id)).toEqual([1, 3]);
+    });
+
+    it("should support takeRowIds with number array for backwards compatibility", async () => {
+      await table.add([{ id: 1 }, { id: 2 }, { id: 3 }]);
+      // Small row IDs can be passed as numbers
+      const res = await table.takeRowIds([0, 2]).toArray();
+      expect(res.map((r) => r.id)).toEqual([1, 3]);
+    });
+
+    it("should support takeRowIds with mixed bigint and number array", async () => {
+      await table.add([{ id: 1 }, { id: 2 }, { id: 3 }]);
+      // Mixed array of bigint and number
+      const res = await table.takeRowIds([0n, 1, 2n]).toArray();
+      expect(res.map((r) => r.id)).toEqual([1, 2, 3]);
+    });
+
+    it("should throw for non-integer number in takeRowIds", () => {
+      expect(() => table.takeRowIds([1.5])).toThrow(
+        "Row id must be an integer (or bigint)",
+      );
+      expect(() => table.takeRowIds([0, 1.1, 2])).toThrow(
+        "Row id must be an integer (or bigint)",
+      );
+    });
+
+    it("should throw for negative number in takeRowIds", () => {
+      expect(() => table.takeRowIds([-1])).toThrow("Row id cannot be negative");
+      expect(() => table.takeRowIds([0, -5, 2])).toThrow(
+        "Row id cannot be negative",
+      );
+    });
+
+    it("should throw for unsafe large number in takeRowIds", () => {
+      // Number.MAX_SAFE_INTEGER + 1 is not safe
+      const unsafeNumber = Number.MAX_SAFE_INTEGER + 1;
+      expect(() => table.takeRowIds([unsafeNumber])).toThrow(
+        "Row id is too large for number; use bigint instead",
+      );
+    });
+
+    it("should reject negative bigint in takeRowIds", async () => {
+      await table.add([{ id: 1 }]);
+      // Negative bigint should be rejected by the Rust layer
+      expect(() => {
+        table.takeRowIds([-1n]);
+      }).toThrow("Row id cannot be negative");
+    });
+
    it("should return the table as an instance of an arrow table", async () => {
      const arrowTbl = await table.toArrow();
      expect(arrowTbl).toBeInstanceOf(ArrowTable);
@@ -1520,9 +1580,9 @@ describe("when optimizing a dataset", () => {

  it("delete unverified", async () => {
    const version = await table.version();
-    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
-      version - 1
-    }.manifest`;
+    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${String(
+      18446744073709551615n - (BigInt(version) - 1n),
+    ).padStart(20, "0")}.manifest`;
    fs.rmSync(versionFile);

    let stats = await table.optimize({ deleteUnverified: false });
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -347,9 +347,13 @@ export abstract class Table {
  /**
   * Create a query that returns a subset of the rows in the table.
   * @param rowIds The row ids of the rows to return.
+   *
+   * Row ids returned by `withRowId()` are `bigint`, so `bigint[]` is supported.
+   * For convenience / backwards compatibility, `number[]` is also accepted (for
+   * small row ids that fit in a safe integer).
   * @returns A builder that can be used to parameterize the query.
   */
-  abstract takeRowIds(rowIds: number[]): TakeQuery;
+  abstract takeRowIds(rowIds: readonly (bigint | number)[]): TakeQuery;

  /**
   * Create a search query to find the nearest neighbors
@@ -538,6 +542,35 @@ export abstract class Table {
   *
   */
  abstract stats(): Promise<TableStatistics>;
+
+  /**
+   * Get the initial storage options that were passed in when opening this table.
+   *
+   * For dynamically refreshed options (e.g., credential vending), use
+   * {@link Table.latestStorageOptions}.
+   *
+   * Warning: This is an internal API and the return value is subject to change.
+   *
+   * @returns The storage options, or undefined if no storage options were configured.
+   */
+  abstract initialStorageOptions(): Promise<
+    Record<string, string> | null | undefined
+  >;
+
+  /**
+   * Get the latest storage options, refreshing from provider if configured.
+   *
+   * This method is useful for credential vending scenarios where storage options
+   * may be refreshed dynamically. If no dynamic provider is configured, this
+   * returns the initial static options.
+   *
+   * Warning: This is an internal API and the return value is subject to change.
+   *
+   * @returns The storage options, or undefined if no storage options were configured.
+   */
+  abstract latestStorageOptions(): Promise<
+    Record<string, string> | null | undefined
+  >;
 }

 export class LocalTable extends Table {
@@ -686,8 +719,24 @@ export class LocalTable extends Table {
    return new TakeQuery(this.inner.takeOffsets(offsets));
  }

-  takeRowIds(rowIds: number[]): TakeQuery {
-    return new TakeQuery(this.inner.takeRowIds(rowIds));
+  takeRowIds(rowIds: readonly (bigint | number)[]): TakeQuery {
+    const ids = rowIds.map((id) => {
+      if (typeof id === "bigint") {
+        return id;
+      }
+      if (!Number.isInteger(id)) {
+        throw new Error("Row id must be an integer (or bigint)");
+      }
+      if (id < 0) {
+        throw new Error("Row id cannot be negative");
+      }
+      if (!Number.isSafeInteger(id)) {
+        throw new Error("Row id is too large for number; use bigint instead");
+      }
+      return BigInt(id);
+    });
+
+    return new TakeQuery(this.inner.takeRowIds(ids));
  }

  query(): Query {
@@ -858,6 +907,18 @@ export class LocalTable extends Table {
    return await this.inner.stats();
  }

+  async initialStorageOptions(): Promise<
+    Record<string, string> | null | undefined
+  > {
+    return await this.inner.initialStorageOptions();
+  }
+
+  async latestStorageOptions(): Promise<
+    Record<string, string> | null | undefined
+  > {
+    return await this.inner.latestStorageOptions();
+  }
+
  mergeInsert(on: string | string[]): MergeInsertBuilder {
    on = Array.isArray(on) ? on : [on];
    return new MergeInsertBuilder(this.inner.mergeInsert(on), this.schema());
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.23.0-beta.2",
+	"version": "0.25.0-beta.0",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/README.md
+++ b/nodejs/npm/darwin-x64/README.md
@@ -1,3 +0,0 @@
-# `@lancedb/lancedb-darwin-x64`
-
-This is the **x86_64-apple-darwin** binary for `@lancedb/lancedb`
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,12 +0,0 @@
-{
-	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.23.0-beta.2",
-	"os": ["darwin"],
-	"cpu": ["x64"],
-	"main": "lancedb.darwin-x64.node",
-	"files": ["lancedb.darwin-x64.node"],
-	"license": "Apache-2.0",
-	"engines": {
-		"node": ">= 18"
-	}
-}
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.23.0-beta.2",
+	"version": "0.25.0-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.23.0-beta.2",
+	"version": "0.25.0-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.23.0-beta.2",
+	"version": "0.25.0-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.23.0-beta.2",
+	"version": "0.25.0-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.23.0-beta.2",
+  "version": "0.25.0-beta.0",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.23.0-beta.2",
+	"version": "0.25.0-beta.0",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.23.0-beta.1",
+  "version": "0.25.0-beta.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.23.0-beta.1",
+      "version": "0.25.0-beta.0",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.23.0-beta.2",
+  "version": "0.25.0-beta.0",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
@@ -25,7 +25,6 @@
    "triples": {
      "defaults": false,
      "additional": [
-        "x86_64-apple-darwin",
        "aarch64-apple-darwin",
        "x86_64-unknown-linux-gnu",
        "aarch64-unknown-linux-gnu",
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -166,6 +166,19 @@ impl Table {
        Ok(stats.into())
    }

+    #[napi(catch_unwind)]
+    pub async fn initial_storage_options(&self) -> napi::Result<Option<HashMap<String, String>>> {
+        Ok(self.inner_ref()?.initial_storage_options().await)
+    }
+
+    #[napi(catch_unwind)]
+    pub async fn latest_storage_options(&self) -> napi::Result<Option<HashMap<String, String>>> {
+        self.inner_ref()?
+            .latest_storage_options()
+            .await
+            .default_error()
+    }
+
    #[napi(catch_unwind)]
    pub async fn update(
        &self,
@@ -208,18 +221,24 @@ impl Table {
    }

    #[napi(catch_unwind)]
-    pub fn take_row_ids(&self, row_ids: Vec<i64>) -> napi::Result<TakeQuery> {
+    pub fn take_row_ids(&self, row_ids: Vec<BigInt>) -> napi::Result<TakeQuery> {
        Ok(TakeQuery::new(
            self.inner_ref()?.take_row_ids(
                row_ids
                    .into_iter()
-                    .map(|o| {
-                        u64::try_from(o).map_err(|e| {
-                            napi::Error::from_reason(format!(
-                                "Failed to convert row id to u64: {}",
-                                e
+                    .map(|id| {
+                        let (negative, value, lossless) = id.get_u64();
+                        if negative {
+                            Err(napi::Error::from_reason(
+                                "Row id cannot be negative".to_string(),
                            ))
-                        })
+                        } else if !lossless {
+                            Err(napi::Error::from_reason(
+                                "Row id is too large to fit in u64".to_string(),
+                            ))
+                        } else {
+                            Ok(value)
+                        }
                    })
                    .collect::<Result<Vec<_>>>()?,
            ),
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.26.0"
+current_version = "0.29.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/CONTRIBUTING.md
+++ b/python/CONTRIBUTING.md
@@ -16,7 +16,7 @@ The Python package is a wrapper around the Rust library, `lancedb`. We use

 To set up your development environment, you will need to install the following:

-1. Python 3.9 or later
+1. Python 3.10 or later
 2. Cargo (Rust's package manager). Use [rustup](https://rustup.rs/) to install.
 3. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)

--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,28 +1,28 @@
 [package]
 name = "lancedb-python"
-version = "0.26.0"
+version = "0.29.0"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
 repository.workspace = true
 keywords.workspace = true
 categories.workspace = true
-rust-version = "1.75.0"
+rust-version = "1.88.0"

 [lib]
 name = "_lancedb"
 crate-type = ["cdylib"]

 [dependencies]
-arrow = { version = "56.2", features = ["pyarrow"] }
+arrow = { version = "57.2", features = ["pyarrow"] }
 async-trait = "0.1"
 lancedb = { path = "../rust/lancedb", default-features = false }
 lance-core.workspace = true
 lance-namespace.workspace = true
 lance-io.workspace = true
 env_logger.workspace = true
-pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
-pyo3-async-runtimes = { version = "0.25", features = [
+pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
+pyo3-async-runtimes = { version = "0.26", features = [
    "attributes",
    "tokio-runtime",
 ] }
@@ -32,12 +32,12 @@ snafu.workspace = true
 tokio = { version = "1.40", features = ["sync"] }

 [build-dependencies]
-pyo3-build-config = { version = "0.25", features = [
+pyo3-build-config = { version = "0.26", features = [
    "extension-module",
    "abi3-py39",
 ] }

 [features]
-default = ["remote", "lancedb/default"]
+default = ["remote",  "lancedb/aws", "lancedb/gcs", "lancedb/azure", "lancedb/dynamodb", "lancedb/oss", "lancedb/huggingface"]
 fp16kernels = ["lancedb/fp16kernels"]
 remote = ["lancedb/remote"]
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -16,7 +16,7 @@ description = "lancedb"
 authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
 license = { file = "LICENSE" }
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 keywords = [
    "data-format",
    "data-science",
@@ -33,10 +33,10 @@ classifiers = [
    "Programming Language :: Python",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
    "Topic :: Scientific/Engineering",
 ]

@@ -137,4 +137,4 @@ include = [
    "python/lancedb/_lancedb.pyi",
 ]
 exclude = ["python/tests/"]
-pythonVersion = "3.12"
+pythonVersion = "3.13"
--- a/python/python/lancedb/init.py
+++ b/python/python/lancedb/init.py
@@ -13,6 +13,7 @@ __version__ = importlib.metadata.version("lancedb")

 from ._lancedb import connect as lancedb_connect
 from .common import URI, sanitize_uri
+from urllib.parse import urlparse
 from .db import AsyncConnection, DBConnection, LanceDBConnection
 from .io import StorageOptionsProvider
 from .remote import ClientConfig
@@ -28,6 +29,39 @@ from .namespace import (
 )


+def _check_s3_bucket_with_dots(
+    uri: str, storage_options: Optional[Dict[str, str]]
+) -> None:
+    """
+    Check if an S3 URI has a bucket name containing dots and warn if no region
+    is specified. S3 buckets with dots cannot use virtual-hosted-style URLs,
+    which breaks automatic region detection.
+
+    See: https://github.com/lancedb/lancedb/issues/1898
+    """
+    if not isinstance(uri, str) or not uri.startswith("s3://"):
+        return
+
+    parsed = urlparse(uri)
+    bucket = parsed.netloc
+
+    if "." not in bucket:
+        return
+
+    # Check if region is provided in storage_options
+    region_keys = {"region", "aws_region"}
+    has_region = storage_options and any(k in storage_options for k in region_keys)
+
+    if not has_region:
+        raise ValueError(
+            f"S3 bucket name '{bucket}' contains dots, which prevents automatic "
+            f"region detection. Please specify the region explicitly via "
+            f"storage_options={{'region': '<your-region>'}} or "
+            f"storage_options={{'aws_region': '<your-region>'}}. "
+            f"See https://github.com/lancedb/lancedb/issues/1898 for details."
+        )
+
+
 def connect(
    uri: URI,
    *,
@@ -121,9 +155,11 @@ def connect(
            storage_options=storage_options,
            **kwargs,
        )
+    _check_s3_bucket_with_dots(str(uri), storage_options)

    if kwargs:
        raise ValueError(f"Unknown keyword arguments: {kwargs}")
+
    return LanceDBConnection(
        uri,
        read_consistency_interval=read_consistency_interval,
@@ -211,6 +247,8 @@ async def connect_async(
    if isinstance(client_config, dict):
        client_config = ClientConfig(**client_config)

+    _check_s3_bucket_with_dots(str(uri), storage_options)
+
    return AsyncConnection(
        await lancedb_connect(
            sanitize_uri(uri),
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -179,6 +179,9 @@ class Table:
        cleanup_since_ms: Optional[int] = None,
        delete_unverified: Optional[bool] = None,
    ) -> OptimizeStats: ...
+    async def uri(self) -> str: ...
+    async def initial_storage_options(self) -> Optional[Dict[str, str]]: ...
+    async def latest_storage_options(self) -> Optional[Dict[str, str]]: ...
    @property
    def tags(self) -> Tags: ...
    def query(self) -> Query: ...
--- a/python/python/lancedb/background_loop.py
+++ b/python/python/lancedb/background_loop.py
@@ -22,7 +22,12 @@ class BackgroundEventLoop:
        self.thread.start()

    def run(self, future):
-        return asyncio.run_coroutine_threadsafe(future, self.loop).result()
+        concurrent_future = asyncio.run_coroutine_threadsafe(future, self.loop)
+        try:
+            return concurrent_future.result()
+        except BaseException:
+            concurrent_future.cancel()
+            raise


 LOOP = BackgroundEventLoop()
--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -210,10 +210,8 @@ class DBConnection(EnforceOverrides):
        page_token: str, optional
            The token to use for pagination. If not present, start from the beginning.
            Typically, this token is last table name from the previous page.
-            Only supported by LanceDb Cloud.
        limit: int, default 10
            The size of the page to return.
-            Only supported by LanceDb Cloud.

        Returns
        -------
--- a/python/python/lancedb/embeddings/colpali.py
+++ b/python/python/lancedb/embeddings/colpali.py
@@ -275,7 +275,7 @@ class ColPaliEmbeddings(EmbeddingFunction):
        """
        Convert image inputs to PIL Images.
        """
-        PIL = attempt_import_or_raise("PIL", "pillow")
+        PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
        requests = attempt_import_or_raise("requests", "requests")
        images = self.sanitize_input(images)
        pil_images = []
@@ -285,12 +285,12 @@ class ColPaliEmbeddings(EmbeddingFunction):
                    if image.startswith(("http://", "https://")):
                        response = requests.get(image, timeout=10)
                        response.raise_for_status()
-                        pil_images.append(PIL.Image.open(io.BytesIO(response.content)))
+                        pil_images.append(PIL_Image.open(io.BytesIO(response.content)))
                    else:
-                        with PIL.Image.open(image) as im:
+                        with PIL_Image.open(image) as im:
                            pil_images.append(im.copy())
                elif isinstance(image, bytes):
-                    pil_images.append(PIL.Image.open(io.BytesIO(image)))
+                    pil_images.append(PIL_Image.open(io.BytesIO(image)))
                else:
                    # Assume it's a PIL Image; will raise if invalid
                    pil_images.append(image)
--- a/python/python/lancedb/embeddings/jinaai.py
+++ b/python/python/lancedb/embeddings/jinaai.py
@@ -77,8 +77,8 @@ class JinaEmbeddings(EmbeddingFunction):
            if isinstance(inputs, list):
                inputs = inputs
            else:
-                PIL = attempt_import_or_raise("PIL", "pillow")
-                if isinstance(inputs, PIL.Image.Image):
+                PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
+                if isinstance(inputs, PIL_Image.Image):
                    inputs = [inputs]
        return inputs

@@ -89,13 +89,13 @@ class JinaEmbeddings(EmbeddingFunction):
        elif isinstance(image, (str, Path)):
            parsed = urlparse.urlparse(image)
            # TODO handle drive letter on windows.
-            PIL = attempt_import_or_raise("PIL", "pillow")
+            PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
            if parsed.scheme == "file":
-                pil_image = PIL.Image.open(parsed.path)
+                pil_image = PIL_Image.open(parsed.path)
            elif parsed.scheme == "":
-                pil_image = PIL.Image.open(image if os.name == "nt" else parsed.path)
+                pil_image = PIL_Image.open(image if os.name == "nt" else parsed.path)
            elif parsed.scheme.startswith("http"):
-                pil_image = PIL.Image.open(io.BytesIO(url_retrieve(image)))
+                pil_image = PIL_Image.open(io.BytesIO(url_retrieve(image)))
            else:
                raise NotImplementedError("Only local and http(s) urls are supported")
            buffered = io.BytesIO()
@@ -103,9 +103,9 @@ class JinaEmbeddings(EmbeddingFunction):
            image_bytes = buffered.getvalue()
            image_dict = {"image": base64.b64encode(image_bytes).decode("utf-8")}
        else:
-            PIL = attempt_import_or_raise("PIL", "pillow")
+            PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")

-            if isinstance(image, PIL.Image.Image):
+            if isinstance(image, PIL_Image.Image):
                buffered = io.BytesIO()
                image.save(buffered, format="PNG")
                image_bytes = buffered.getvalue()
@@ -136,9 +136,9 @@ class JinaEmbeddings(EmbeddingFunction):
        elif isinstance(query, (Path, bytes)):
            return [self.generate_image_embedding(query)]
        else:
-            PIL = attempt_import_or_raise("PIL", "pillow")
+            PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")

-            if isinstance(query, PIL.Image.Image):
+            if isinstance(query, PIL_Image.Image):
                return [self.generate_image_embedding(query)]
            else:
                raise TypeError(
--- a/python/python/lancedb/embeddings/open_clip.py
+++ b/python/python/lancedb/embeddings/open_clip.py
@@ -71,8 +71,8 @@ class OpenClipEmbeddings(EmbeddingFunction):
        if isinstance(query, str):
            return [self.generate_text_embeddings(query)]
        else:
-            PIL = attempt_import_or_raise("PIL", "pillow")
-            if isinstance(query, PIL.Image.Image):
+            PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
+            if isinstance(query, PIL_Image.Image):
                return [self.generate_image_embedding(query)]
            else:
                raise TypeError("OpenClip supports str or PIL Image as query")
@@ -145,20 +145,20 @@ class OpenClipEmbeddings(EmbeddingFunction):
            return self._encode_and_normalize_image(image)

    def _to_pil(self, image: Union[str, bytes]):
-        PIL = attempt_import_or_raise("PIL", "pillow")
+        PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
        if isinstance(image, bytes):
-            return PIL.Image.open(io.BytesIO(image))
-        if isinstance(image, PIL.Image.Image):
+            return PIL_Image.open(io.BytesIO(image))
+        if isinstance(image, PIL_Image.Image):
            return image
        elif isinstance(image, str):
            parsed = urlparse.urlparse(image)
            # TODO handle drive letter on windows.
            if parsed.scheme == "file":
-                return PIL.Image.open(parsed.path)
+                return PIL_Image.open(parsed.path)
            elif parsed.scheme == "":
-                return PIL.Image.open(image if os.name == "nt" else parsed.path)
+                return PIL_Image.open(image if os.name == "nt" else parsed.path)
            elif parsed.scheme.startswith("http"):
-                return PIL.Image.open(io.BytesIO(url_retrieve(image)))
+                return PIL_Image.open(io.BytesIO(url_retrieve(image)))
            else:
                raise NotImplementedError("Only local and http(s) urls are supported")

--- a/python/python/lancedb/embeddings/siglip.py
+++ b/python/python/lancedb/embeddings/siglip.py
@@ -56,8 +56,8 @@ class SigLipEmbeddings(EmbeddingFunction):
        if isinstance(query, str):
            return [self.generate_text_embeddings(query)]
        else:
-            PIL = attempt_import_or_raise("PIL", "pillow")
-            if isinstance(query, PIL.Image.Image):
+            PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
+            if isinstance(query, PIL_Image.Image):
                return [self.generate_image_embedding(query)]
            else:
                raise TypeError("SigLIP supports str or PIL Image as query")
@@ -127,21 +127,21 @@ class SigLipEmbeddings(EmbeddingFunction):
            return image_features.cpu().detach().numpy().squeeze()

    def _to_pil(self, image: Union[str, bytes, "PIL.Image.Image"]):
-        PIL = attempt_import_or_raise("PIL", "pillow")
-        if isinstance(image, PIL.Image.Image):
+        PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
+        if isinstance(image, PIL_Image.Image):
            return image.convert("RGB") if image.mode != "RGB" else image
        elif isinstance(image, bytes):
-            return PIL.Image.open(io.BytesIO(image)).convert("RGB")
+            return PIL_Image.open(io.BytesIO(image)).convert("RGB")
        elif isinstance(image, str):
            parsed = urlparse.urlparse(image)
            if parsed.scheme == "file":
-                return PIL.Image.open(parsed.path).convert("RGB")
+                return PIL_Image.open(parsed.path).convert("RGB")
            elif parsed.scheme == "":
                path = image if os.name == "nt" else parsed.path
-                return PIL.Image.open(path).convert("RGB")
+                return PIL_Image.open(path).convert("RGB")
            elif parsed.scheme.startswith("http"):
                image_bytes = url_retrieve(image)
-                return PIL.Image.open(io.BytesIO(image_bytes)).convert("RGB")
+                return PIL_Image.open(io.BytesIO(image_bytes)).convert("RGB")
            else:
                raise NotImplementedError("Only local and http(s) urls are supported")
        else:
--- a/python/python/lancedb/embeddings/voyageai.py
+++ b/python/python/lancedb/embeddings/voyageai.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors
 import base64
 import os
-from typing import ClassVar, TYPE_CHECKING, List, Union, Any, Generator
+from typing import ClassVar, TYPE_CHECKING, List, Union, Any, Generator, Optional

 from pathlib import Path
 from urllib.parse import urlparse
@@ -21,6 +21,9 @@ if TYPE_CHECKING:

 # Token limits for different VoyageAI models
 VOYAGE_TOTAL_TOKEN_LIMITS = {
+    "voyage-4": 320_000,
+    "voyage-4-lite": 1_000_000,
+    "voyage-4-large": 120_000,
    "voyage-context-3": 32_000,
    "voyage-3.5-lite": 1_000_000,
    "voyage-3.5": 320_000,
@@ -45,14 +48,32 @@ def is_valid_url(text):
        return False


+VIDEO_EXTENSIONS = {".mp4", ".webm", ".mov", ".avi", ".mkv", ".m4v", ".gif"}
+
+
+def is_video_url(url: str) -> bool:
+    """Check if URL points to a video file based on extension."""
+    parsed = urlparse(url)
+    path = parsed.path.lower()
+    return any(path.endswith(ext) for ext in VIDEO_EXTENSIONS)
+
+
+def is_video_path(path: Path) -> bool:
+    """Check if file path is a video file based on extension."""
+    return path.suffix.lower() in VIDEO_EXTENSIONS
+
+
 def transform_input(input_data: Union[str, bytes, Path]):
-    PIL = attempt_import_or_raise("PIL", "pillow")
+    PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
    if isinstance(input_data, str):
        if is_valid_url(input_data):
-            content = {"type": "image_url", "image_url": input_data}
+            if is_video_url(input_data):
+                content = {"type": "video_url", "video_url": input_data}
+            else:
+                content = {"type": "image_url", "image_url": input_data}
        else:
            content = {"type": "text", "text": input_data}
-    elif isinstance(input_data, PIL.Image.Image):
+    elif isinstance(input_data, PIL_Image.Image):
        buffered = BytesIO()
        input_data.save(buffered, format="JPEG")
        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
@@ -61,7 +82,7 @@ def transform_input(input_data: Union[str, bytes, Path]):
            "image_base64": "data:image/jpeg;base64," + img_str,
        }
    elif isinstance(input_data, bytes):
-        img = PIL.Image.open(BytesIO(input_data))
+        img = PIL_Image.open(BytesIO(input_data))
        buffered = BytesIO()
        img.save(buffered, format="JPEG")
        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
@@ -70,14 +91,24 @@ def transform_input(input_data: Union[str, bytes, Path]):
            "image_base64": "data:image/jpeg;base64," + img_str,
        }
    elif isinstance(input_data, Path):
-        img = PIL.Image.open(input_data)
-        buffered = BytesIO()
-        img.save(buffered, format="JPEG")
-        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
-        content = {
-            "type": "image_base64",
-            "image_base64": "data:image/jpeg;base64," + img_str,
-        }
+        if is_video_path(input_data):
+            # Read video file and encode as base64
+            with open(input_data, "rb") as f:
+                video_bytes = f.read()
+            video_str = base64.b64encode(video_bytes).decode("utf-8")
+            content = {
+                "type": "video_base64",
+                "video_base64": video_str,
+            }
+        else:
+            img = PIL_Image.open(input_data)
+            buffered = BytesIO()
+            img.save(buffered, format="JPEG")
+            img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+            content = {
+                "type": "image_base64",
+                "image_base64": "data:image/jpeg;base64," + img_str,
+            }
    else:
        raise ValueError("Each input should be either str, bytes, Path or Image.")

@@ -88,9 +119,11 @@ def sanitize_multimodal_input(inputs: Union[TEXT, IMAGES]) -> List[Any]:
    """
    Sanitize the input to the embedding function.
    """
-    PIL = attempt_import_or_raise("PIL", "pillow")
-    if isinstance(inputs, (str, bytes, Path, PIL.Image.Image)):
+    PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
+    if isinstance(inputs, (str, bytes, Path, PIL_Image.Image)):
        inputs = [inputs]
+    elif isinstance(inputs, list):
+        pass  # Already a list, use as-is
    elif isinstance(inputs, pa.Array):
        inputs = inputs.to_pylist()
    elif isinstance(inputs, pa.ChunkedArray):
@@ -100,7 +133,7 @@ def sanitize_multimodal_input(inputs: Union[TEXT, IMAGES]) -> List[Any]:
            f"Input type {type(inputs)} not allowed with multimodal model."
        )

-    if not all(isinstance(x, (str, bytes, Path, PIL.Image.Image)) for x in inputs):
+    if not all(isinstance(x, (str, bytes, Path, PIL_Image.Image)) for x in inputs):
        raise ValueError("Each input should be either str, bytes, Path or Image.")

    return [transform_input(i) for i in inputs]
@@ -137,17 +170,25 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
    name: str
        The name of the model to use. List of acceptable models:

+            * voyage-4 (1024 dims, general-purpose and multilingual retrieval)
+            * voyage-4-lite (1024 dims, optimized for latency and cost)
+            * voyage-4-large (1024 dims, best retrieval quality)
            * voyage-context-3
            * voyage-3.5
            * voyage-3.5-lite
            * voyage-3
            * voyage-3-lite
            * voyage-multimodal-3
+            * voyage-multimodal-3.5
            * voyage-finance-2
            * voyage-multilingual-2
            * voyage-law-2
            * voyage-code-2

+    output_dimension: int, optional
+        The output dimension for models that support flexible dimensions.
+        Currently only voyage-multimodal-3.5 supports this feature.
+        Valid options: 256, 512, 1024 (default), 2048.

    Examples
    --------
@@ -175,8 +216,14 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
    """

    name: str
+    output_dimension: Optional[int] = None
    client: ClassVar = None
+    _FLEXIBLE_DIM_MODELS: ClassVar[list] = ["voyage-multimodal-3.5"]
+    _VALID_DIMENSIONS: ClassVar[list] = [256, 512, 1024, 2048]
    text_embedding_models: list = [
+        "voyage-4",
+        "voyage-4-lite",
+        "voyage-4-large",
        "voyage-3.5",
        "voyage-3.5-lite",
        "voyage-3",
@@ -186,7 +233,7 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
        "voyage-law-2",
        "voyage-code-2",
    ]
-    multimodal_embedding_models: list = ["voyage-multimodal-3"]
+    multimodal_embedding_models: list = ["voyage-multimodal-3", "voyage-multimodal-3.5"]
    contextual_embedding_models: list = ["voyage-context-3"]

    def _is_multimodal_model(self, model_name: str):
@@ -198,11 +245,25 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
        return model_name in self.contextual_embedding_models or "context" in model_name

    def ndims(self):
+        # Handle flexible dimension models
+        if self.name in self._FLEXIBLE_DIM_MODELS:
+            if self.output_dimension is not None:
+                if self.output_dimension not in self._VALID_DIMENSIONS:
+                    raise ValueError(
+                        f"Invalid output_dimension {self.output_dimension} "
+                        f"for {self.name}. Valid options: {self._VALID_DIMENSIONS}"
+                    )
+                return self.output_dimension
+            return 1024  # default dimension
+
        if self.name == "voyage-3-lite":
            return 512
        elif self.name == "voyage-code-2":
            return 1536
        elif self.name in [
+            "voyage-4",
+            "voyage-4-lite",
+            "voyage-4-large",
            "voyage-context-3",
            "voyage-3.5",
            "voyage-3.5-lite",
@@ -211,12 +272,17 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
            "voyage-finance-2",
            "voyage-multilingual-2",
            "voyage-law-2",
-            "voyage-multimodal-3",
        ]:
            return 1024
        else:
            raise ValueError(f"Model {self.name} not supported")

+    def _get_multimodal_kwargs(self, **kwargs):
+        """Get kwargs for multimodal embed call, including output_dimension if set."""
+        if self.name in self._FLEXIBLE_DIM_MODELS and self.output_dimension is not None:
+            kwargs["output_dimension"] = self.output_dimension
+        return kwargs
+
    def compute_query_embeddings(
        self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
    ) -> List[np.ndarray]:
@@ -234,6 +300,7 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
        """
        client = VoyageAIEmbeddingFunction._get_client()
        if self._is_multimodal_model(self.name):
+            kwargs = self._get_multimodal_kwargs(**kwargs)
            result = client.multimodal_embed(
                inputs=[[query]], model=self.name, input_type="query", **kwargs
            )
@@ -275,6 +342,7 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
            )
            if has_images:
                # Use non-batched API for images
+                kwargs = self._get_multimodal_kwargs(**kwargs)
                result = client.multimodal_embed(
                    inputs=sanitized, model=self.name, input_type="document", **kwargs
                )
@@ -357,6 +425,7 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
            callable: A function that takes a batch of texts and returns embeddings.
        """
        if self._is_multimodal_model(self.name):
+            multimodal_kwargs = self._get_multimodal_kwargs(**kwargs)

            def embed_batch(batch: List[str]) -> List[np.array]:
                batch_inputs = sanitize_multimodal_input(batch)
@@ -364,7 +433,7 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
                    inputs=batch_inputs,
                    model=self.name,
                    input_type=input_type,
-                    **kwargs,
+                    **multimodal_kwargs,
                )
                return result.embeddings

--- a/python/python/lancedb/pydantic.py
+++ b/python/python/lancedb/pydantic.py
@@ -275,7 +275,7 @@ def _py_type_to_arrow_type(py_type: Type[Any], field: FieldInfo) -> pa.DataType:
        return pa.timestamp("us", tz=tz)
    elif getattr(py_type, "__origin__", None) in (list, tuple):
        child = py_type.__args__[0]
-        return pa.list_(_py_type_to_arrow_type(child, field))
+        return _pydantic_list_child_to_arrow(child, field)
    raise TypeError(
        f"Converting Pydantic type to Arrow Type: unsupported type {py_type}."
    )
@@ -298,12 +298,18 @@ else:


 def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
+    def _safe_issubclass(candidate: Any, base: type) -> bool:
+        try:
+            return issubclass(candidate, base)
+        except TypeError:
+            return False
+
    if inspect.isclass(tp):
-        if issubclass(tp, pydantic.BaseModel):
+        if _safe_issubclass(tp, pydantic.BaseModel):
            # Struct
            fields = _pydantic_model_to_fields(tp)
            return pa.struct(fields)
-        if issubclass(tp, FixedSizeListMixin):
+        if _safe_issubclass(tp, FixedSizeListMixin):
            if getattr(tp, "is_multi_vector", lambda: False)():
                return pa.list_(pa.list_(tp.value_arrow_type(), tp.dim()))
            # For regular Vector
@@ -311,45 +317,67 @@ def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
    return _py_type_to_arrow_type(tp, field)


+def _pydantic_list_child_to_arrow(child: Any, field: FieldInfo) -> pa.DataType:
+    unwrapped = _unwrap_optional_annotation(child)
+    if unwrapped is not None:
+        return pa.list_(
+            pa.field("item", _pydantic_type_to_arrow_type(unwrapped, field), True)
+        )
+    return pa.list_(_pydantic_type_to_arrow_type(child, field))
+
+
+def _unwrap_optional_annotation(annotation: Any) -> Any | None:
+    if isinstance(annotation, (_GenericAlias, GenericAlias)):
+        origin = annotation.__origin__
+        args = annotation.__args__
+        if origin == Union:
+            non_none = [arg for arg in args if arg is not type(None)]
+            if len(non_none) == 1 and len(non_none) != len(args):
+                return non_none[0]
+    elif sys.version_info >= (3, 10) and isinstance(annotation, types.UnionType):
+        args = annotation.__args__
+        non_none = [arg for arg in args if arg is not type(None)]
+        if len(non_none) == 1 and len(non_none) != len(args):
+            return non_none[0]
+    return None
+
+
 def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
    """Convert a Pydantic FieldInfo to Arrow DataType"""
+    unwrapped = _unwrap_optional_annotation(field.annotation)
+    if unwrapped is not None:
+        return _pydantic_type_to_arrow_type(unwrapped, field)
    if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
        origin = field.annotation.__origin__
        args = field.annotation.__args__

        if origin is list:
            child = args[0]
-            return pa.list_(_py_type_to_arrow_type(child, field))
-        elif origin == Union:
-            if len(args) == 2 and args[1] is type(None):
-                return _pydantic_type_to_arrow_type(args[0], field)
-    elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType):
-        args = field.annotation.__args__
-        if len(args) == 2:
-            for typ in args:
-                if typ is type(None):
-                    continue
-                return _py_type_to_arrow_type(typ, field)
+            return _pydantic_list_child_to_arrow(child, field)
    return _pydantic_type_to_arrow_type(field.annotation, field)


 def is_nullable(field: FieldInfo) -> bool:
    """Check if a Pydantic FieldInfo is nullable."""
+    if _unwrap_optional_annotation(field.annotation) is not None:
+        return True
    if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
        origin = field.annotation.__origin__
        args = field.annotation.__args__
        if origin == Union:
-            if len(args) == 2 and args[1] is type(None):
+            if any(typ is type(None) for typ in args):
                return True
    elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType):
        args = field.annotation.__args__
        for typ in args:
            if typ is type(None):
                return True
-    elif inspect.isclass(field.annotation) and issubclass(
-        field.annotation, FixedSizeListMixin
-    ):
-        return field.annotation.nullable()
+    elif inspect.isclass(field.annotation):
+        try:
+            if issubclass(field.annotation, FixedSizeListMixin):
+                return field.annotation.nullable()
+        except TypeError:
+            return False
    return False


--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -961,22 +961,27 @@ class LanceQueryBuilder(ABC):
        >>> query = [100, 100]
        >>> plan = table.search(query).analyze_plan()
        >>> print(plan)  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-        AnalyzeExec verbose=true, metrics=[], cumulative_cpu=...
-          TracedExec, metrics=[], cumulative_cpu=...
-            ProjectionExec: expr=[...], metrics=[...], cumulative_cpu=...
-              GlobalLimitExec: skip=0, fetch=10, metrics=[...], cumulative_cpu=...
-                FilterExec: _distance@2 IS NOT NULL,
-                metrics=[output_rows=..., elapsed_compute=...], cumulative_cpu=...
-                  SortExec: TopK(fetch=10), expr=[...],
+        AnalyzeExec verbose=true, elapsed=..., metrics=...
+          TracedExec, elapsed=..., metrics=...
+            ProjectionExec: elapsed=..., expr=[...],
+            metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
+              GlobalLimitExec: elapsed=..., skip=0, fetch=10,
+              metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
+                FilterExec: elapsed=..., _distance@2 IS NOT NULL, metrics=[...]
+                  SortExec: elapsed=..., TopK(fetch=10), expr=[...],
                  preserve_partitioning=[...],
-                  metrics=[output_rows=..., elapsed_compute=..., row_replacements=...],
-                  cumulative_cpu=...
-                    KNNVectorDistance: metric=l2,
-                    metrics=[output_rows=..., elapsed_compute=..., output_batches=...],
-                    cumulative_cpu=...
-                      LanceRead: uri=..., projection=[vector], ...
-                      metrics=[output_rows=..., elapsed_compute=...,
-                      bytes_read=..., iops=..., requests=...], cumulative_cpu=...
+                  metrics=[output_rows=..., elapsed_compute=...,
+                  output_bytes=..., row_replacements=...]
+                    KNNVectorDistance: elapsed=..., metric=l2,
+                    metrics=[output_rows=..., elapsed_compute=...,
+                    output_bytes=..., output_batches=...]
+                      LanceRead: elapsed=..., uri=..., projection=[vector],
+                      num_fragments=..., range_before=None, range_after=None,
+                      row_id=true, row_addr=false,
+                      full_filter=--, refine_filter=--,
+                      metrics=[output_rows=..., elapsed_compute=..., output_bytes=...,
+                      fragments_scanned=..., ranges_scanned=1, rows_scanned=1,
+                      bytes_read=..., iops=..., requests=..., task_wait_time=...]

        Returns
        -------
@@ -1428,6 +1433,19 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        self._bypass_vector_index = True
        return self

+    def fast_search(self) -> LanceVectorQueryBuilder:
+        """
+        Skip a flat search of unindexed data. This will improve
+        search performance but search results will not include unindexed data.
+
+        Returns
+        -------
+        LanceVectorQueryBuilder
+            The LanceVectorQueryBuilder object.
+        """
+        self._fast_search = True
+        return self
+

 class LanceFtsQueryBuilder(LanceQueryBuilder):
    """A builder for full text search for LanceDB."""
--- a/python/python/lancedb/remote/db.py
+++ b/python/python/lancedb/remote/db.py
@@ -384,6 +384,7 @@ class RemoteDBConnection(DBConnection):
        on_bad_vectors: str = "error",
        fill_value: float = 0.0,
        mode: Optional[str] = None,
+        exist_ok: bool = False,
        embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
        *,
        namespace: Optional[List[str]] = None,
@@ -412,6 +413,12 @@ class RemoteDBConnection(DBConnection):
            - pyarrow.Schema

            - [LanceModel][lancedb.pydantic.LanceModel]
+        mode: str, default "create"
+            The mode to use when creating the table.
+            Can be either "create", "overwrite", or "exist_ok".
+        exist_ok: bool, default False
+            If exist_ok is True, and mode is None or "create", mode will be changed
+            to "exist_ok".
        on_bad_vectors: str, default "error"
            What to do if any of the vectors are not the same size or contains NaNs.
            One of "error", "drop", "fill".
@@ -483,6 +490,11 @@ class RemoteDBConnection(DBConnection):
        LanceTable(table4)

        """
+        if exist_ok:
+            if mode == "create":
+                mode = "exist_ok"
+            elif not mode:
+                mode = "exist_ok"
        if namespace is None:
            namespace = []
        validate_table_name(name)
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -18,7 +18,17 @@ from lancedb._lancedb import (
    UpdateResult,
 )
 from lancedb.embeddings.base import EmbeddingFunctionConfig
-from lancedb.index import FTS, BTree, Bitmap, HnswSq, IvfFlat, IvfPq, IvfSq, LabelList
+from lancedb.index import (
+    FTS,
+    BTree,
+    Bitmap,
+    HnswSq,
+    IvfFlat,
+    IvfPq,
+    IvfRq,
+    IvfSq,
+    LabelList,
+)
 from lancedb.remote.db import LOOP
 import pyarrow as pa

@@ -265,6 +275,12 @@ class RemoteTable(Table):
                num_sub_vectors=num_sub_vectors,
                num_bits=num_bits,
            )
+        elif index_type == "IVF_RQ":
+            config = IvfRq(
+                distance_type=metric,
+                num_partitions=num_partitions,
+                num_bits=num_bits,
+            )
        elif index_type == "IVF_SQ":
            config = IvfSq(distance_type=metric, num_partitions=num_partitions)
        elif index_type == "IVF_HNSW_PQ":
@@ -279,7 +295,8 @@ class RemoteTable(Table):
        else:
            raise ValueError(
                f"Unknown vector index type: {index_type}. Valid options are"
-                " 'IVF_FLAT', 'IVF_SQ', 'IVF_PQ', 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
+                " 'IVF_FLAT', 'IVF_PQ', 'IVF_RQ', 'IVF_SQ',"
+                " 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
            )

        LOOP.run(
@@ -638,6 +655,14 @@ class RemoteTable(Table):
    def stats(self):
        return LOOP.run(self._table.stats())

+    @property
+    def uri(self) -> str:
+        """The table URI (storage location).
+
+        For remote tables, this fetches the location from the server via describe.
+        """
+        return LOOP.run(self._table.uri())
+
    def take_offsets(self, offsets: list[int]) -> LanceTakeQueryBuilder:
        return LanceTakeQueryBuilder(self._table.take_offsets(offsets))

--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -684,6 +684,24 @@ class Table(ABC):
        """
        raise NotImplementedError

+    def to_lance(self, **kwargs) -> lance.LanceDataset:
+        """Return the table as a lance.LanceDataset.
+
+        Returns
+        -------
+        lance.LanceDataset
+        """
+        raise NotImplementedError
+
+    def to_polars(self, **kwargs) -> "pl.DataFrame":
+        """Return the table as a polars.DataFrame.
+
+        Returns
+        -------
+        polars.DataFrame
+        """
+        raise NotImplementedError
+
    def create_index(
        self,
        metric="l2",
@@ -2200,6 +2218,41 @@ class LanceTable(Table):
    def stats(self) -> TableStatistics:
        return LOOP.run(self._table.stats())

+    @property
+    def uri(self) -> str:
+        return LOOP.run(self._table.uri())
+
+    def initial_storage_options(self) -> Optional[Dict[str, str]]:
+        """Get the initial storage options that were passed in when opening this table.
+
+        For dynamically refreshed options (e.g., credential vending), use
+        :meth:`latest_storage_options`.
+
+        Warning: This is an internal API and the return value is subject to change.
+
+        Returns
+        -------
+        Optional[Dict[str, str]]
+            The storage options, or None if no storage options were configured.
+        """
+        return LOOP.run(self._table.initial_storage_options())
+
+    def latest_storage_options(self) -> Optional[Dict[str, str]]:
+        """Get the latest storage options, refreshing from provider if configured.
+
+        This method is useful for credential vending scenarios where storage options
+        may be refreshed dynamically. If no dynamic provider is configured, this
+        returns the initial static options.
+
+        Warning: This is an internal API and the return value is subject to change.
+
+        Returns
+        -------
+        Optional[Dict[str, str]]
+            The storage options, or None if no storage options were configured.
+        """
+        return LOOP.run(self._table.latest_storage_options())
+
    def create_scalar_index(
        self,
        column: str,
@@ -3588,6 +3641,51 @@ class AsyncTable:
        """
        return await self._inner.stats()

+    async def uri(self) -> str:
+        """
+        Get the table URI (storage location).
+
+        For remote tables, this fetches the location from the server via describe.
+        For local tables, this returns the dataset URI.
+
+        Returns
+        -------
+        str
+            The full storage location of the table (e.g., S3/GCS path).
+        """
+        return await self._inner.uri()
+
+    async def initial_storage_options(self) -> Optional[Dict[str, str]]:
+        """Get the initial storage options that were passed in when opening this table.
+
+        For dynamically refreshed options (e.g., credential vending), use
+        :meth:`latest_storage_options`.
+
+        Warning: This is an internal API and the return value is subject to change.
+
+        Returns
+        -------
+        Optional[Dict[str, str]]
+            The storage options, or None if no storage options were configured.
+        """
+        return await self._inner.initial_storage_options()
+
+    async def latest_storage_options(self) -> Optional[Dict[str, str]]:
+        """Get the latest storage options, refreshing from provider if configured.
+
+        This method is useful for credential vending scenarios where storage options
+        may be refreshed dynamically. If no dynamic provider is configured, this
+        returns the initial static options.
+
+        Warning: This is an internal API and the return value is subject to change.
+
+        Returns
+        -------
+        Optional[Dict[str, str]]
+            The storage options, or None if no storage options were configured.
+        """
+        return await self._inner.latest_storage_options()
+
    async def add(
        self,
        data: DATA,
--- a/python/python/tests/conftest.py
+++ b/python/python/tests/conftest.py
@@ -2,12 +2,27 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 from datetime import timedelta
+
 from lancedb.db import AsyncConnection, DBConnection
 import lancedb
 import pytest
 import pytest_asyncio


+def pandas_string_type():
+    """Return the PyArrow string type that pandas uses for string columns.
+
+    pandas 3.0+ uses large_string for string columns, pandas 2.x uses string.
+    """
+    import pandas as pd
+    import pyarrow as pa
+
+    version = tuple(int(x) for x in pd.__version__.split(".")[:2])
+    if version >= (3, 0):
+        return pa.large_utf8()
+    return pa.utf8()
+
+
 # Use an in-memory database for most tests.
@pytest.fixture
 def mem_db() -> DBConnection:
--- a/python/python/tests/test_db.py
+++ b/python/python/tests/test_db.py
@@ -268,6 +268,8 @@ async def test_create_table_from_iterator_async(mem_db_async: lancedb.AsyncConne


 def test_create_exist_ok(tmp_db: lancedb.DBConnection):
+    from conftest import pandas_string_type
+
    data = pd.DataFrame(
        {
            "vector": [[3.1, 4.1], [5.9, 26.5]],
@@ -286,10 +288,11 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
    assert tbl.schema == tbl2.schema
    assert len(tbl) == len(tbl2)

+    # pandas 3.0+ uses large_string, pandas 2.x uses string
    schema = pa.schema(
        [
            pa.field("vector", pa.list_(pa.float32(), list_size=2)),
-            pa.field("item", pa.utf8()),
+            pa.field("item", pandas_string_type()),
            pa.field("price", pa.float64()),
        ]
    )
@@ -299,7 +302,7 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
    bad_schema = pa.schema(
        [
            pa.field("vector", pa.list_(pa.float32(), list_size=2)),
-            pa.field("item", pa.utf8()),
+            pa.field("item", pandas_string_type()),
            pa.field("price", pa.float64()),
            pa.field("extra", pa.float32()),
        ]
@@ -365,6 +368,8 @@ async def test_create_mode_async(tmp_db_async: lancedb.AsyncConnection):

@pytest.mark.asyncio
 async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
+    from conftest import pandas_string_type
+
    data = pd.DataFrame(
        {
            "vector": [[3.1, 4.1], [5.9, 26.5]],
@@ -382,10 +387,11 @@ async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
    assert tbl.name == tbl2.name
    assert await tbl.schema() == await tbl2.schema()

+    # pandas 3.0+ uses large_string, pandas 2.x uses string
    schema = pa.schema(
        [
            pa.field("vector", pa.list_(pa.float32(), list_size=2)),
-            pa.field("item", pa.utf8()),
+            pa.field("item", pandas_string_type()),
            pa.field("price", pa.float64()),
        ]
    )
@@ -595,6 +601,8 @@ def test_open_table_sync(tmp_db: lancedb.DBConnection):

@pytest.mark.asyncio
 async def test_open_table(tmp_path):
+    from conftest import pandas_string_type
+
    db = await lancedb.connect_async(tmp_path)
    data = pd.DataFrame(
        {
@@ -614,10 +622,11 @@ async def test_open_table(tmp_path):
        )
        is not None
    )
+    # pandas 3.0+ uses large_string, pandas 2.x uses string
    assert await tbl.schema() == pa.schema(
        {
            "vector": pa.list_(pa.float32(), list_size=2),
-            "item": pa.utf8(),
+            "item": pandas_string_type(),
            "price": pa.float64(),
        }
    )
--- a/python/python/tests/test_embeddings_slow.py
+++ b/python/python/tests/test_embeddings_slow.py
@@ -517,19 +517,36 @@ def test_ollama_embedding(tmp_path):
@pytest.mark.skipif(
    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
 )
-def test_voyageai_embedding_function():
-    voyageai = get_registry().get("voyageai").create(name="voyage-3", max_retries=0)
+@pytest.mark.parametrize(
+    "model_name,expected_dims",
+    [
+        ("voyage-3", 1024),
+        ("voyage-4", 1024),
+        ("voyage-4-lite", 1024),
+        ("voyage-4-large", 1024),
+    ],
+)
+def test_voyageai_embedding_function(model_name, expected_dims, tmp_path):
+    """Integration test for VoyageAI text embedding models with real API calls."""
+    voyageai = get_registry().get("voyageai").create(name=model_name, max_retries=0)

    class TextModel(LanceModel):
        text: str = voyageai.SourceField()
        vector: Vector(voyageai.ndims()) = voyageai.VectorField()

    df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
-    db = lancedb.connect("~/lancedb")
+    db = lancedb.connect(tmp_path)
    tbl = db.create_table("test", schema=TextModel, mode="overwrite")

    tbl.add(df)
    assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
+    assert voyageai.ndims() == expected_dims, (
+        f"{model_name} should have {expected_dims} dimensions"
+    )
+
+    # Test search functionality
+    result = tbl.search("hello").limit(1).to_pandas()
+    assert result["text"][0] == "hello world"


@pytest.mark.slow
@@ -613,6 +630,133 @@ def test_voyageai_multimodal_embedding_text_function():
    assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()


+@pytest.mark.slow
+@pytest.mark.skipif(
+    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
+)
+def test_voyageai_multimodal_35_embedding_function():
+    """Test voyage-multimodal-3.5 model with text input."""
+    voyageai = (
+        get_registry()
+        .get("voyageai")
+        .create(name="voyage-multimodal-3.5", max_retries=0)
+    )
+
+    class TextModel(LanceModel):
+        text: str = voyageai.SourceField()
+        vector: Vector(voyageai.ndims()) = voyageai.VectorField()
+
+    df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
+    db = lancedb.connect("~/lancedb")
+    tbl = db.create_table("test_multimodal_35", schema=TextModel, mode="overwrite")
+
+    tbl.add(df)
+    assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
+    assert voyageai.ndims() == 1024
+
+
+@pytest.mark.slow
+@pytest.mark.skipif(
+    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
+)
+def test_voyageai_multimodal_35_flexible_dimensions():
+    """Test voyage-multimodal-3.5 model with custom output dimension."""
+    voyageai = (
+        get_registry()
+        .get("voyageai")
+        .create(name="voyage-multimodal-3.5", output_dimension=512, max_retries=0)
+    )
+
+    class TextModel(LanceModel):
+        text: str = voyageai.SourceField()
+        vector: Vector(voyageai.ndims()) = voyageai.VectorField()
+
+    assert voyageai.ndims() == 512
+
+    df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
+    db = lancedb.connect("~/lancedb")
+    tbl = db.create_table("test_multimodal_35_dim", schema=TextModel, mode="overwrite")
+
+    tbl.add(df)
+    assert len(tbl.to_pandas()["vector"][0]) == 512
+
+
+@pytest.mark.slow
+@pytest.mark.skipif(
+    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
+)
+def test_voyageai_multimodal_35_image_embedding():
+    """Test voyage-multimodal-3.5 model with image input."""
+    voyageai = (
+        get_registry()
+        .get("voyageai")
+        .create(name="voyage-multimodal-3.5", max_retries=0)
+    )
+
+    class Images(LanceModel):
+        label: str
+        image_uri: str = voyageai.SourceField()
+        vector: Vector(voyageai.ndims()) = voyageai.VectorField()
+
+    db = lancedb.connect("~/lancedb")
+    table = db.create_table(
+        "test_multimodal_35_images", schema=Images, mode="overwrite"
+    )
+    labels = ["cat", "dog"]
+    uris = [
+        "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
+        "http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
+    ]
+    table.add(pd.DataFrame({"label": labels, "image_uri": uris}))
+    assert len(table.to_pandas()["vector"][0]) == voyageai.ndims()
+    assert voyageai.ndims() == 1024
+
+
+@pytest.mark.slow
+@pytest.mark.skipif(
+    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
+)
+@pytest.mark.parametrize("dimension", [256, 512, 1024, 2048])
+def test_voyageai_multimodal_35_all_dimensions(dimension):
+    """Test voyage-multimodal-3.5 model with all valid output dimensions."""
+    voyageai = (
+        get_registry()
+        .get("voyageai")
+        .create(name="voyage-multimodal-3.5", output_dimension=dimension, max_retries=0)
+    )
+
+    assert voyageai.ndims() == dimension
+
+    class TextModel(LanceModel):
+        text: str = voyageai.SourceField()
+        vector: Vector(voyageai.ndims()) = voyageai.VectorField()
+
+    df = pd.DataFrame({"text": ["hello world"]})
+    db = lancedb.connect("~/lancedb")
+    tbl = db.create_table(
+        f"test_multimodal_35_dim_{dimension}", schema=TextModel, mode="overwrite"
+    )
+
+    tbl.add(df)
+    assert len(tbl.to_pandas()["vector"][0]) == dimension
+
+
+@pytest.mark.slow
+@pytest.mark.skipif(
+    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
+)
+def test_voyageai_multimodal_35_invalid_dimension():
+    """Test voyage-multimodal-3.5 model raises error for invalid output dimension."""
+    with pytest.raises(ValueError, match="Invalid output_dimension"):
+        voyageai = (
+            get_registry()
+            .get("voyageai")
+            .create(name="voyage-multimodal-3.5", output_dimension=999, max_retries=0)
+        )
+        # ndims() is where the validation happens
+        voyageai.ndims()
+
+
@pytest.mark.slow
@pytest.mark.skipif(
    importlib.util.find_spec("colpali_engine") is None,
--- a/python/python/tests/test_namespace_integration.py
+++ b/python/python/tests/test_namespace_integration.py
@@ -26,6 +26,8 @@ import pytest
 from lance_namespace import (
    CreateEmptyTableRequest,
    CreateEmptyTableResponse,
+    DeclareTableRequest,
+    DeclareTableResponse,
    DescribeTableRequest,
    DescribeTableResponse,
    LanceNamespace,
@@ -160,6 +162,19 @@ class TrackingNamespace(LanceNamespace):

        return modified

+    def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
+        """Track declare_table calls and inject rotating credentials."""
+        with self.lock:
+            self.create_call_count += 1
+            count = self.create_call_count
+
+        response = self.inner.declare_table(request)
+        response.storage_options = self._modify_storage_options(
+            response.storage_options, count
+        )
+
+        return response
+
    def create_empty_table(
        self, request: CreateEmptyTableRequest
    ) -> CreateEmptyTableResponse:
--- a/python/python/tests/test_permutation.py
+++ b/python/python/tests/test_permutation.py
@@ -438,11 +438,15 @@ def test_filter_with_splits(mem_db):
    row_count = permutation_tbl.count_rows()
    assert row_count == 67

-    data = permutation_tbl.search(None).to_arrow().to_pydict()
+    # Verify the permutation table only contains row_id and split_id
+    assert set(permutation_tbl.schema.names) == {"row_id", "split_id"}
+
+    row_ids = permutation_tbl.search(None).to_arrow().to_pydict()["row_id"]
+    data = tbl.take_row_ids(row_ids).to_arrow().to_pydict()
    categories = data["category"]

    # All categories should be A or B
-    assert all(cat in ["A", "B"] for cat in categories)
+    assert all(cat in ("A", "B") for cat in categories)


 def test_filter_with_shuffle(mem_db):
--- a/python/python/tests/test_pydantic.py
+++ b/python/python/tests/test_pydantic.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import json
-import sys
 from datetime import date, datetime
 from typing import List, Optional, Tuple

@@ -20,10 +19,6 @@ from pydantic import BaseModel
 from pydantic import Field


-@pytest.mark.skipif(
-    sys.version_info < (3, 9),
-    reason="using native type alias requires python3.9 or higher",
-)
 def test_pydantic_to_arrow():
    class StructModel(pydantic.BaseModel):
        a: str
@@ -83,10 +78,6 @@ def test_pydantic_to_arrow():
    assert schema == expect_schema


-@pytest.mark.skipif(
-    sys.version_info < (3, 10),
-    reason="using | type syntax requires python3.10 or higher",
-)
 def test_optional_types_py310():
    class TestModel(pydantic.BaseModel):
        a: str | None
@@ -105,10 +96,233 @@ def test_optional_types_py310():
    assert schema == expect_schema


-@pytest.mark.skipif(
-    sys.version_info > (3, 8),
-    reason="using native type alias requires python3.9 or higher",
-)
+def test_optional_structs():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        split: SplitInfo | None = None
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "split",
+                pa.struct(
+                    [
+                        pa.field("start_frame", pa.int64(), False),
+                        pa.field("end_frame", pa.int64(), False),
+                    ]
+                ),
+                True,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
+def test_optional_struct_list_py310():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        splits: list[SplitInfo] | None = None
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "splits",
+                pa.list_(
+                    pa.struct(
+                        [
+                            pa.field("start_frame", pa.int64(), False),
+                            pa.field("end_frame", pa.int64(), False),
+                        ]
+                    )
+                ),
+                True,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
+def test_nested_struct_list():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        splits: list[SplitInfo]
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "splits",
+                pa.list_(
+                    pa.struct(
+                        [
+                            pa.field("start_frame", pa.int64(), False),
+                            pa.field("end_frame", pa.int64(), False),
+                        ]
+                    )
+                ),
+                False,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
+def test_nested_struct_list_optional():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        splits: Optional[list[SplitInfo]] = None
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "splits",
+                pa.list_(
+                    pa.struct(
+                        [
+                            pa.field("start_frame", pa.int64(), False),
+                            pa.field("end_frame", pa.int64(), False),
+                        ]
+                    )
+                ),
+                True,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
+def test_nested_struct_list_optional_items():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        splits: list[Optional[SplitInfo]]
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "splits",
+                pa.list_(
+                    pa.field(
+                        "item",
+                        pa.struct(
+                            [
+                                pa.field("start_frame", pa.int64(), False),
+                                pa.field("end_frame", pa.int64(), False),
+                            ]
+                        ),
+                        True,
+                    )
+                ),
+                False,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
+def test_nested_struct_list_optional_container_and_items():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        splits: Optional[list[Optional[SplitInfo]]] = None
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "splits",
+                pa.list_(
+                    pa.field(
+                        "item",
+                        pa.struct(
+                            [
+                                pa.field("start_frame", pa.int64(), False),
+                                pa.field("end_frame", pa.int64(), False),
+                            ]
+                        ),
+                        True,
+                    )
+                ),
+                True,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
+def test_nested_struct_list_optional_items_pep604():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        splits: list[SplitInfo | None]
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "splits",
+                pa.list_(
+                    pa.field(
+                        "item",
+                        pa.struct(
+                            [
+                                pa.field("start_frame", pa.int64(), False),
+                                pa.field("end_frame", pa.int64(), False),
+                            ]
+                        ),
+                        True,
+                    )
+                ),
+                False,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
 def test_pydantic_to_arrow_py38():
    class StructModel(pydantic.BaseModel):
        a: str
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -1499,3 +1499,30 @@ def test_search_empty_table(mem_db):
    # Search on empty table should return empty results, not crash
    results = table.search([1.0, 2.0]).limit(5).to_list()
    assert results == []
+
+
+def test_fast_search(tmp_path):
+    db = lancedb.connect(tmp_path)
+
+    # Generate data matching the async test style
+    vectors = pa.FixedShapeTensorArray.from_numpy_ndarray(
+        np.random.rand(256, 32)
+    ).storage
+
+    table = db.create_table("test", pa.table({"vector": vectors}))
+
+    # FIX: Pass arguments directly instead of using 'config=IvfPq(...)'
+    table.create_index(vector_column_name="vector", num_partitions=1, num_sub_vectors=1)
+
+    # Add data to ensure table has enough segments/rows
+    table.add(pa.table({"vector": vectors}))
+
+    q = [1.0] * 32
+
+    # 1. Normal Search -> Should include "LanceScan" (Brute Force / Scan)
+    plan = table.search(q).explain_plan(True)
+    assert "LanceScan" in plan
+
+    # 2. Fast Search -> Should NOT include "LanceScan" (Uses Index)
+    plan = table.search(q).fast_search().explain_plan(True)
+    assert "LanceScan" not in plan
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -8,7 +8,7 @@ import http.server
 import json
 import threading
 import time
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch
 import uuid
 from packaging.version import Version

@@ -168,6 +168,42 @@ def test_table_len_sync():
        assert len(table) == 1


+def test_create_table_exist_ok():
+    def handler(request):
+        if request.path == "/v1/table/test/create/?mode=exist_ok":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            request.wfile.write(b"{}")
+        else:
+            request.send_response(404)
+            request.end_headers()
+
+    with mock_lancedb_connection(handler) as db:
+        table = db.create_table("test", [{"id": 1}], exist_ok=True)
+        assert table is not None
+
+    with mock_lancedb_connection(handler) as db:
+        table = db.create_table("test", [{"id": 1}], mode="create", exist_ok=True)
+        assert table is not None
+
+
+def test_create_table_exist_ok_with_mode_overwrite():
+    def handler(request):
+        if request.path == "/v1/table/test/create/?mode=overwrite":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            request.wfile.write(b"{}")
+        else:
+            request.send_response(404)
+            request.end_headers()
+
+    with mock_lancedb_connection(handler) as db:
+        table = db.create_table("test", [{"id": 1}], mode="overwrite", exist_ok=True)
+        assert table is not None
+
+
@pytest.mark.asyncio
 async def test_http_error():
    request_id_holder = {"request_id": None}
@@ -565,7 +601,6 @@ def test_head():
 def test_query_sync_minimal():
    def handler(body):
        assert body == {
-            "distance_type": "l2",
            "k": 10,
            "prefilter": True,
            "refine_factor": None,
@@ -649,7 +684,6 @@ def test_query_sync_maximal():
 def test_query_sync_nprobes():
    def handler(body):
        assert body == {
-            "distance_type": "l2",
            "k": 10,
            "prefilter": True,
            "fast_search": True,
@@ -679,7 +713,6 @@ def test_query_sync_nprobes():
 def test_query_sync_no_max_nprobes():
    def handler(body):
        assert body == {
-            "distance_type": "l2",
            "k": 10,
            "prefilter": True,
            "fast_search": True,
@@ -802,7 +835,6 @@ def test_query_sync_hybrid():
        else:
            # Vector query
            assert body == {
-                "distance_type": "l2",
                "k": 42,
                "prefilter": True,
                "refine_factor": None,
@@ -1167,3 +1199,22 @@ async def test_header_provider_overrides_static_headers():
        extra_headers={"X-API-Key": "static-key", "X-Extra": "extra-value"},
    ) as db:
        await db.table_names()
+
+
+@pytest.mark.parametrize("exception", [KeyboardInterrupt, SystemExit, GeneratorExit])
+def test_background_loop_cancellation(exception):
+    """Test that BackgroundEventLoop.run() cancels the future on interrupt."""
+    from lancedb.background_loop import BackgroundEventLoop
+
+    mock_future = MagicMock()
+    mock_future.result.side_effect = exception()
+
+    with (
+        patch.object(BackgroundEventLoop, "__init__", return_value=None),
+        patch("asyncio.run_coroutine_threadsafe", return_value=mock_future),
+    ):
+        loop = BackgroundEventLoop()
+        loop.loop = MagicMock()
+        with pytest.raises(exception):
+            loop.run(None)
+        mock_future.cancel.assert_called_once()
--- a/python/python/tests/test_s3_bucket_dots.py
+++ b/python/python/tests/test_s3_bucket_dots.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+"""
+Tests for S3 bucket names containing dots.
+
+Related issue: https://github.com/lancedb/lancedb/issues/1898
+
+These tests validate the early error checking for S3 bucket names with dots.
+No actual S3 connection is made - validation happens before connection.
+"""
+
+import pytest
+import lancedb
+
+# Test URIs
+BUCKET_WITH_DOTS = "s3://my.bucket.name/path"
+BUCKET_WITH_DOTS_AND_REGION = ("s3://my.bucket.name", {"region": "us-east-1"})
+BUCKET_WITH_DOTS_AND_AWS_REGION = ("s3://my.bucket.name", {"aws_region": "us-east-1"})
+BUCKET_WITHOUT_DOTS = "s3://my-bucket/path"
+
+
+class TestS3BucketWithDotsSync:
+    """Tests for connect()."""
+
+    def test_bucket_with_dots_requires_region(self):
+        with pytest.raises(ValueError, match="contains dots"):
+            lancedb.connect(BUCKET_WITH_DOTS)
+
+    def test_bucket_with_dots_and_region_passes(self):
+        uri, opts = BUCKET_WITH_DOTS_AND_REGION
+        db = lancedb.connect(uri, storage_options=opts)
+        assert db is not None
+
+    def test_bucket_with_dots_and_aws_region_passes(self):
+        uri, opts = BUCKET_WITH_DOTS_AND_AWS_REGION
+        db = lancedb.connect(uri, storage_options=opts)
+        assert db is not None
+
+    def test_bucket_without_dots_passes(self):
+        db = lancedb.connect(BUCKET_WITHOUT_DOTS)
+        assert db is not None
+
+
+class TestS3BucketWithDotsAsync:
+    """Tests for connect_async()."""
+
+    @pytest.mark.asyncio
+    async def test_bucket_with_dots_requires_region(self):
+        with pytest.raises(ValueError, match="contains dots"):
+            await lancedb.connect_async(BUCKET_WITH_DOTS)
+
+    @pytest.mark.asyncio
+    async def test_bucket_with_dots_and_region_passes(self):
+        uri, opts = BUCKET_WITH_DOTS_AND_REGION
+        db = await lancedb.connect_async(uri, storage_options=opts)
+        assert db is not None
+
+    @pytest.mark.asyncio
+    async def test_bucket_with_dots_and_aws_region_passes(self):
+        uri, opts = BUCKET_WITH_DOTS_AND_AWS_REGION
+        db = await lancedb.connect_async(uri, storage_options=opts)
+        assert db is not None
+
+    @pytest.mark.asyncio
+    async def test_bucket_without_dots_passes(self):
+        db = await lancedb.connect_async(BUCKET_WITHOUT_DOTS)
+        assert db is not None
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -1880,8 +1880,13 @@ async def test_optimize_delete_unverified(tmp_db_async: AsyncConnection, tmp_pat
        ],
    )
    version = await table.version()
-    path = tmp_path / "test.lance" / "_versions" / f"{version - 1}.manifest"
+    assert version == 2
+
+    # By removing a manifest file, we make the data files we just inserted unverified
+    version_name = 18446744073709551615 - (version - 1)
+    path = tmp_path / "test.lance" / "_versions" / f"{version_name:020}.manifest"
    os.remove(path)
+
    stats = await table.optimize(delete_unverified=False)
    assert stats.prune.old_versions_removed == 0
    stats = await table.optimize(
@@ -1967,3 +1972,9 @@ def test_add_table_with_empty_embeddings(tmp_path):
        on_bad_vectors="drop",
    )
    assert table.count_rows() == 1
+
+
+def test_table_uri(tmp_path):
+    db = lancedb.connect(tmp_path)
+    table = db.create_table("my_table", data=[{"x": 0}])
+    assert table.uri == str(tmp_path / "my_table.lance")
--- a/python/python/tests/test_util.py
+++ b/python/python/tests/test_util.py
@@ -528,12 +528,19 @@ def test_sanitize_data(
        else:
            expected_schema = schema
    else:
+        from conftest import pandas_string_type
+
+        # polars uses large_string, pandas 3.0+ uses large_string, others use string
+        if isinstance(data, pl.DataFrame):
+            text_type = pa.large_utf8()
+        elif isinstance(data, pd.DataFrame):
+            text_type = pandas_string_type()
+        else:
+            text_type = pa.string()
        expected_schema = pa.schema(
            {
                "id": pa.int64(),
-                "text": pa.large_utf8()
-                if isinstance(data, pl.DataFrame)
-                else pa.string(),
+                "text": text_type,
                "vector": pa.list_(pa.float32(), 10),
            }
        )
--- a/python/python/tests/test_voyageai_embeddings.py
+++ b/python/python/tests/test_voyageai_embeddings.py
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+"""Unit tests for VoyageAI embedding function.
+
+These tests verify model registration and configuration without requiring API calls.
+"""
+
+import pytest
+from unittest.mock import MagicMock, patch
+
+from lancedb.embeddings import get_registry
+
+
+@pytest.fixture(autouse=True)
+def reset_voyageai_client():
+    """Reset VoyageAI client before and after each test to avoid state pollution."""
+    from lancedb.embeddings.voyageai import VoyageAIEmbeddingFunction
+
+    VoyageAIEmbeddingFunction.client = None
+    yield
+    VoyageAIEmbeddingFunction.client = None
+
+
+class TestVoyageAIModelRegistration:
+    """Tests for VoyageAI model registration and configuration."""
+
+    @pytest.fixture
+    def mock_voyageai_client(self):
+        """Mock VoyageAI client to avoid API calls."""
+        with patch.dict("os.environ", {"VOYAGE_API_KEY": "test-key"}):
+            with patch("lancedb.embeddings.voyageai.attempt_import_or_raise") as mock:
+                mock_client = MagicMock()
+                mock_voyageai = MagicMock()
+                mock_voyageai.Client.return_value = mock_client
+                mock.return_value = mock_voyageai
+                yield mock_client
+
+    def test_voyageai_registered(self):
+        """Test that VoyageAI is registered in the embedding function registry."""
+        registry = get_registry()
+        assert registry.get("voyageai") is not None
+
+    @pytest.mark.parametrize(
+        "model_name,expected_dims",
+        [
+            # Voyage-4 series (all 1024 dims)
+            ("voyage-4", 1024),
+            ("voyage-4-lite", 1024),
+            ("voyage-4-large", 1024),
+            # Voyage-3 series
+            ("voyage-3", 1024),
+            ("voyage-3-lite", 512),
+            # Domain-specific models
+            ("voyage-finance-2", 1024),
+            ("voyage-multilingual-2", 1024),
+            ("voyage-law-2", 1024),
+            ("voyage-code-2", 1536),
+            # Multimodal
+            ("voyage-multimodal-3", 1024),
+        ],
+    )
+    def test_model_dimensions(self, model_name, expected_dims, mock_voyageai_client):
+        """Test that each model returns the correct dimensions."""
+        registry = get_registry()
+        func = registry.get("voyageai").create(name=model_name)
+        assert func.ndims() == expected_dims, (
+            f"Model {model_name} should have {expected_dims} dimensions"
+        )
+
+    def test_unsupported_model_raises_error(self, mock_voyageai_client):
+        """Test that unsupported models raise ValueError."""
+        registry = get_registry()
+        func = registry.get("voyageai").create(name="unsupported-model")
+        with pytest.raises(ValueError, match="not supported"):
+            func.ndims()
+
+    @pytest.mark.parametrize(
+        "model_name",
+        [
+            "voyage-4",
+            "voyage-4-lite",
+            "voyage-4-large",
+        ],
+    )
+    def test_voyage4_models_are_text_models(self, model_name, mock_voyageai_client):
+        """Test that voyage-4 models are classified as text models (not multimodal)."""
+        registry = get_registry()
+        func = registry.get("voyageai").create(name=model_name)
+        assert not func._is_multimodal_model(model_name), (
+            f"{model_name} should be a text model, not multimodal"
+        )
+
+    def test_voyage4_models_in_text_embedding_list(self, mock_voyageai_client):
+        """Test that voyage-4 models are in the text_embedding_models list."""
+        registry = get_registry()
+        func = registry.get("voyageai").create(name="voyage-4")
+        assert "voyage-4" in func.text_embedding_models
+        assert "voyage-4-lite" in func.text_embedding_models
+        assert "voyage-4-large" in func.text_embedding_models
+
+    def test_voyage4_models_not_in_multimodal_list(self, mock_voyageai_client):
+        """Test that voyage-4 models are NOT in the multimodal_embedding_models list."""
+        registry = get_registry()
+        func = registry.get("voyageai").create(name="voyage-4")
+        assert "voyage-4" not in func.multimodal_embedding_models
+        assert "voyage-4-lite" not in func.multimodal_embedding_models
+        assert "voyage-4-large" not in func.multimodal_embedding_models
--- a/python/src/arrow.rs
+++ b/python/src/arrow.rs
@@ -10,8 +10,7 @@ use arrow::{
 use futures::stream::StreamExt;
 use lancedb::arrow::SendableRecordBatchStream;
 use pyo3::{
-    exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult,
-    Python,
+    exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python,
 };
 use pyo3_async_runtimes::tokio::future_into_py;

@@ -36,8 +35,11 @@ impl RecordBatchStream {
 #[pymethods]
 impl RecordBatchStream {
    #[getter]
-    pub fn schema(&self, py: Python) -> PyResult<PyObject> {
-        (*self.schema).clone().into_pyarrow(py)
+    pub fn schema(&self, py: Python) -> PyResult<Py<PyAny>> {
+        (*self.schema)
+            .clone()
+            .into_pyarrow(py)
+            .map(|obj| obj.unbind())
    }

    pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> {
@@ -53,7 +55,12 @@ impl RecordBatchStream {
                .next()
                .await
                .ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
-            Python::with_gil(|py| inner_next.infer_error()?.to_pyarrow(py))
+            Python::attach(|py| {
+                inner_next
+                    .infer_error()?
+                    .to_pyarrow(py)
+                    .map(|obj| obj.unbind())
+            })
        })
    }
 }
--- a/python/src/connection.rs
+++ b/python/src/connection.rs
@@ -12,7 +12,7 @@ use pyo3::{
    exceptions::{PyRuntimeError, PyValueError},
    pyclass, pyfunction, pymethods,
    types::{PyDict, PyDictMethods},
-    Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
+    Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
 };
 use pyo3_async_runtimes::tokio::future_into_py;

@@ -114,7 +114,7 @@ impl Connection {
        data: Bound<'_, PyAny>,
        namespace: Vec<String>,
        storage_options: Option<HashMap<String, String>>,
-        storage_options_provider: Option<PyObject>,
+        storage_options_provider: Option<Py<PyAny>>,
        location: Option<String>,
    ) -> PyResult<Bound<'a, PyAny>> {
        let inner = self_.get_inner()?.clone();
@@ -152,7 +152,7 @@ impl Connection {
        schema: Bound<'_, PyAny>,
        namespace: Vec<String>,
        storage_options: Option<HashMap<String, String>>,
-        storage_options_provider: Option<PyObject>,
+        storage_options_provider: Option<Py<PyAny>>,
        location: Option<String>,
    ) -> PyResult<Bound<'a, PyAny>> {
        let inner = self_.get_inner()?.clone();
@@ -187,7 +187,7 @@ impl Connection {
        name: String,
        namespace: Vec<String>,
        storage_options: Option<HashMap<String, String>>,
-        storage_options_provider: Option<PyObject>,
+        storage_options_provider: Option<Py<PyAny>>,
        index_cache_size: Option<u32>,
        location: Option<String>,
    ) -> PyResult<Bound<'_, PyAny>> {
@@ -304,9 +304,10 @@ impl Connection {
                },
                page_token,
                limit: limit.map(|l| l as i32),
+                ..Default::default()
            };
            let response = inner.list_namespaces(request).await.infer_error()?;
-            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
+            Python::attach(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("namespaces", response.namespaces)?;
                dict.set_item("page_token", response.page_token)?;
@@ -325,11 +326,12 @@ impl Connection {
        let inner = self_.get_inner()?.clone();
        let py = self_.py();
        future_into_py(py, async move {
-            use lance_namespace::models::{create_namespace_request, CreateNamespaceRequest};
-            let mode_enum = mode.and_then(|m| match m.to_lowercase().as_str() {
-                "create" => Some(create_namespace_request::Mode::Create),
-                "exist_ok" => Some(create_namespace_request::Mode::ExistOk),
-                "overwrite" => Some(create_namespace_request::Mode::Overwrite),
+            use lance_namespace::models::CreateNamespaceRequest;
+            // Mode is now a string field
+            let mode_str = mode.and_then(|m| match m.to_lowercase().as_str() {
+                "create" => Some("Create".to_string()),
+                "exist_ok" => Some("ExistOk".to_string()),
+                "overwrite" => Some("Overwrite".to_string()),
                _ => None,
            });
            let request = CreateNamespaceRequest {
@@ -338,11 +340,12 @@ impl Connection {
                } else {
                    Some(namespace)
                },
-                mode: mode_enum,
+                mode: mode_str,
                properties,
+                ..Default::default()
            };
            let response = inner.create_namespace(request).await.infer_error()?;
-            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
+            Python::attach(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("properties", response.properties)?;
                Ok(dict.unbind())
@@ -360,15 +363,16 @@ impl Connection {
        let inner = self_.get_inner()?.clone();
        let py = self_.py();
        future_into_py(py, async move {
-            use lance_namespace::models::{drop_namespace_request, DropNamespaceRequest};
-            let mode_enum = mode.and_then(|m| match m.to_uppercase().as_str() {
-                "SKIP" => Some(drop_namespace_request::Mode::Skip),
-                "FAIL" => Some(drop_namespace_request::Mode::Fail),
+            use lance_namespace::models::DropNamespaceRequest;
+            // Mode and Behavior are now string fields
+            let mode_str = mode.and_then(|m| match m.to_uppercase().as_str() {
+                "SKIP" => Some("Skip".to_string()),
+                "FAIL" => Some("Fail".to_string()),
                _ => None,
            });
-            let behavior_enum = behavior.and_then(|b| match b.to_uppercase().as_str() {
-                "RESTRICT" => Some(drop_namespace_request::Behavior::Restrict),
-                "CASCADE" => Some(drop_namespace_request::Behavior::Cascade),
+            let behavior_str = behavior.and_then(|b| match b.to_uppercase().as_str() {
+                "RESTRICT" => Some("Restrict".to_string()),
+                "CASCADE" => Some("Cascade".to_string()),
                _ => None,
            });
            let request = DropNamespaceRequest {
@@ -377,11 +381,12 @@ impl Connection {
                } else {
                    Some(namespace)
                },
-                mode: mode_enum,
-                behavior: behavior_enum,
+                mode: mode_str,
+                behavior: behavior_str,
+                ..Default::default()
            };
            let response = inner.drop_namespace(request).await.infer_error()?;
-            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
+            Python::attach(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("properties", response.properties)?;
                dict.set_item("transaction_id", response.transaction_id)?;
@@ -405,9 +410,10 @@ impl Connection {
                } else {
                    Some(namespace)
                },
+                ..Default::default()
            };
            let response = inner.describe_namespace(request).await.infer_error()?;
-            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
+            Python::attach(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("properties", response.properties)?;
                Ok(dict.unbind())
@@ -434,9 +440,10 @@ impl Connection {
                },
                page_token,
                limit: limit.map(|l| l as i32),
+                ..Default::default()
            };
            let response = inner.list_tables(request).await.infer_error()?;
-            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
+            Python::attach(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("tables", response.tables)?;
                dict.set_item("page_token", response.page_token)?;
--- a/python/src/error.rs
+++ b/python/src/error.rs
@@ -40,7 +40,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
                    request_id,
                    source,
                    status_code,
-                } => Python::with_gil(|py| {
+                } => Python::attach(|py| {
                    let message = err.to_string();
                    let http_err_cls = py
                        .import(intern!(py, "lancedb.remote.errors"))?
@@ -75,7 +75,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
                    max_read_failures,
                    source,
                    status_code,
-                } => Python::with_gil(|py| {
+                } => Python::attach(|py| {
                    let cause_err = http_from_rust_error(
                        py,
                        source.as_ref(),
--- a/python/src/header.rs
+++ b/python/src/header.rs
@@ -12,7 +12,7 @@ pub struct PyHeaderProvider {

 impl Clone for PyHeaderProvider {
    fn clone(&self) -> Self {
-        Python::with_gil(|py| Self {
+        Python::attach(|py| Self {
            provider: self.provider.clone_ref(py),
        })
    }
@@ -25,7 +25,7 @@ impl PyHeaderProvider {

    /// Get headers from the Python provider (internal implementation)
    fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
-        Python::with_gil(|py| {
+        Python::attach(|py| {
            // Call the get_headers method
            let result = self.provider.call_method0(py, "get_headers");

--- a/python/src/permutation.rs
+++ b/python/src/permutation.rs
@@ -281,7 +281,7 @@ impl PyPermutationReader {
        let reader = slf.reader.clone();
        future_into_py(slf.py(), async move {
            let schema = reader.output_schema(selection).await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
+            Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
        })
    }

--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -453,7 +453,7 @@ impl Query {
        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.output_schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
+            Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
        })
    }

@@ -532,7 +532,7 @@ impl TakeQuery {
        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.output_schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
+            Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
        })
    }

@@ -627,7 +627,7 @@ impl FTSQuery {
        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.output_schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
+            Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
        })
    }

@@ -806,7 +806,7 @@ impl VectorQuery {
        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.output_schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
+            Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
        })
    }

--- a/python/src/storage_options.rs
+++ b/python/src/storage_options.rs
@@ -17,20 +17,20 @@ use pyo3::types::PyDict;
 /// Internal wrapper around a Python object implementing StorageOptionsProvider
 pub struct PyStorageOptionsProvider {
    /// The Python object implementing fetch_storage_options()
-    inner: PyObject,
+    inner: Py<PyAny>,
 }

 impl Clone for PyStorageOptionsProvider {
    fn clone(&self) -> Self {
-        Python::with_gil(|py| Self {
+        Python::attach(|py| Self {
            inner: self.inner.clone_ref(py),
        })
    }
 }

 impl PyStorageOptionsProvider {
-    pub fn new(obj: PyObject) -> PyResult<Self> {
-        Python::with_gil(|py| {
+    pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
+        Python::attach(|py| {
            // Verify the object has a fetch_storage_options method
            if !obj.bind(py).hasattr("fetch_storage_options")? {
                return Err(pyo3::exceptions::PyTypeError::new_err(
@@ -60,7 +60,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
        let py_provider = self.py_provider.clone();

        tokio::task::spawn_blocking(move || {
-            Python::with_gil(|py| {
+            Python::attach(|py| {
                // Call the Python fetch_storage_options method
                let result = py_provider
                    .inner
@@ -119,7 +119,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
    }

    fn provider_id(&self) -> String {
-        Python::with_gil(|py| {
+        Python::attach(|py| {
            // Call provider_id() method on the Python object
            let obj = self.py_provider.inner.bind(py);
            obj.call_method0("provider_id")
@@ -143,7 +143,7 @@ impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
 /// This is the main entry point for converting Python StorageOptionsProvider objects
 /// to Rust trait objects that can be used by the Lance ecosystem.
 pub fn py_object_to_storage_options_provider(
-    py_obj: PyObject,
+    py_obj: Py<PyAny>,
 ) -> PyResult<Arc<dyn StorageOptionsProvider>> {
    let py_provider = PyStorageOptionsProvider::new(py_obj)?;
    Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -287,7 +287,7 @@ impl Table {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
+            Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
        })
    }

@@ -437,7 +437,7 @@ impl Table {
        future_into_py(self_.py(), async move {
            let stats = inner.index_stats(&index_name).await.infer_error()?;
            if let Some(stats) = stats {
-                Python::with_gil(|py| {
+                Python::attach(|py| {
                    let dict = PyDict::new(py);
                    dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
                    dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
@@ -467,7 +467,7 @@ impl Table {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
            let stats = inner.stats().await.infer_error()?;
-            Python::with_gil(|py| {
+            Python::attach(|py| {
                let dict = PyDict::new(py);
                dict.set_item("total_bytes", stats.total_bytes)?;
                dict.set_item("num_rows", stats.num_rows)?;
@@ -497,6 +497,25 @@ impl Table {
        })
    }

+    pub fn uri(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
+        let inner = self_.inner_ref()?.clone();
+        future_into_py(self_.py(), async move { inner.uri().await.infer_error() })
+    }
+
+    pub fn initial_storage_options(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
+        let inner = self_.inner_ref()?.clone();
+        future_into_py(self_.py(), async move {
+            Ok(inner.initial_storage_options().await)
+        })
+    }
+
+    pub fn latest_storage_options(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
+        let inner = self_.inner_ref()?.clone();
+        future_into_py(self_.py(), async move {
+            inner.latest_storage_options().await.infer_error()
+        })
+    }
+
    pub fn __repr__(&self) -> String {
        match &self.inner {
            None => format!("ClosedTable({})", self.name),
@@ -516,7 +535,7 @@ impl Table {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
            let versions = inner.list_versions().await.infer_error()?;
-            let versions_as_dict = Python::with_gil(|py| {
+            let versions_as_dict = Python::attach(|py| {
                versions
                    .iter()
                    .map(|v| {
@@ -867,7 +886,7 @@ impl Tags {
            let tags = inner.tags().await.infer_error()?;
            let res = tags.list().await.infer_error()?;

-            Python::with_gil(|py| {
+            Python::attach(|py| {
                let py_dict = PyDict::new(py);
                for (key, contents) in res {
                    let value_dict = PyDict::new(py);
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.23.0-beta.2"
+version = "0.25.0-beta.0"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -25,6 +25,7 @@ datafusion-catalog.workspace = true
 datafusion-common.workspace = true
 datafusion-execution.workspace = true
 datafusion-expr.workspace = true
+datafusion-physical-expr.workspace = true
 datafusion-physical-plan.workspace = true
 datafusion.workspace = true
 object_store = { workspace = true }
@@ -104,13 +105,18 @@ test-log = "0.2"


 [features]
-default = ["aws", "gcs", "azure", "dynamodb", "oss"]
+default = []
 aws = ["lance/aws", "lance-io/aws", "lance-namespace-impls/dir-aws"]
 oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"]
 gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"]
 azure = ["lance/azure", "lance-io/azure", "lance-namespace-impls/dir-azure"]
+huggingface = [
+    "lance/huggingface",
+    "lance-io/huggingface",
+    "lance-namespace-impls/dir-huggingface",
+]
 dynamodb = ["lance/dynamodb", "aws"]
-remote = ["dep:reqwest", "dep:http", "lance-namespace-impls/rest"]
+remote = ["dep:reqwest", "dep:http", "lance-namespace-impls/rest", "lance-namespace-impls/rest-adapter"]
 fp16kernels = ["lance-linalg/fp16kernels"]
 s3-test = []
 bedrock = ["dep:aws-sdk-bedrockruntime"]
@@ -148,3 +154,6 @@ name = "ivf_pq"
 [[example]]
 name = "hybrid_search"
 required-features = ["sentence-transformers"]
+
+[package.metadata.docs.rs]
+all-features = true
--- a/rust/lancedb/src/connection.rs
+++ b/rust/lancedb/src/connection.rs
@@ -36,10 +36,42 @@ use crate::remote::{
 };
 use crate::table::{TableDefinition, WriteOptions};
 use crate::Table;
+use lance::io::ObjectStoreParams;
 pub use lance_encoding::version::LanceFileVersion;
 #[cfg(feature = "remote")]
 use lance_io::object_store::StorageOptions;
-use lance_io::object_store::StorageOptionsProvider;
+use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
+
+fn merge_storage_options(
+    store_params: &mut ObjectStoreParams,
+    pairs: impl IntoIterator<Item = (String, String)>,
+) {
+    let mut options = store_params.storage_options().cloned().unwrap_or_default();
+    for (key, value) in pairs {
+        options.insert(key, value);
+    }
+    let provider = store_params
+        .storage_options_accessor
+        .as_ref()
+        .and_then(|accessor| accessor.provider().cloned());
+    let accessor = if let Some(provider) = provider {
+        StorageOptionsAccessor::with_initial_and_provider(options, provider)
+    } else {
+        StorageOptionsAccessor::with_static_options(options)
+    };
+    store_params.storage_options_accessor = Some(Arc::new(accessor));
+}
+
+fn set_storage_options_provider(
+    store_params: &mut ObjectStoreParams,
+    provider: Arc<dyn StorageOptionsProvider>,
+) {
+    let accessor = match store_params.storage_options().cloned() {
+        Some(options) => StorageOptionsAccessor::with_initial_and_provider(options, provider),
+        None => StorageOptionsAccessor::with_provider(provider),
+    };
+    store_params.storage_options_accessor = Some(Arc::new(accessor));
+}

 /// A builder for configuring a [`Connection::table_names`] operation
 pub struct TableNamesBuilder {
@@ -219,8 +251,36 @@ impl CreateTableBuilder<false> {
    /// Execute the create table operation
    pub async fn execute(self) -> Result<Table> {
        let parent = self.parent.clone();
-        let table = parent.create_table(self.request).await?;
-        Ok(Table::new(table, parent))
+        let embedding_registry = self.embedding_registry.clone();
+        let request = self.into_request()?;
+        Ok(Table::new_with_embedding_registry(
+            parent.create_table(request).await?,
+            parent,
+            embedding_registry,
+        ))
+    }
+
+    fn into_request(self) -> Result<CreateTableRequest> {
+        if self.embeddings.is_empty() {
+            return Ok(self.request);
+        }
+
+        let CreateTableData::Empty(table_def) = self.request.data else {
+            unreachable!("CreateTableBuilder<false> should always have Empty data")
+        };
+
+        let schema = table_def.schema.clone();
+        let empty_batch = arrow_array::RecordBatch::new_empty(schema.clone());
+
+        let reader = Box::new(std::iter::once(Ok(empty_batch)).collect::<Vec<_>>());
+        let reader = arrow_array::RecordBatchIterator::new(reader.into_iter(), schema);
+        let with_embeddings = WithEmbeddings::new(reader, self.embeddings);
+        let table_definition = with_embeddings.table_definition()?;
+
+        Ok(CreateTableRequest {
+            data: CreateTableData::Empty(table_definition),
+            ..self.request
+        })
    }
 }

@@ -246,16 +306,14 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
    ///
    /// See available options at <https://lancedb.com/docs/storage/>
    pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
-        let store_options = self
+        let store_params = self
            .request
            .write_options
            .lance_write_params
            .get_or_insert(Default::default())
            .store_params
-            .get_or_insert(Default::default())
-            .storage_options
            .get_or_insert(Default::default());
-        store_options.insert(key.into(), value.into());
+        merge_storage_options(store_params, [(key.into(), value.into())]);
        self
    }

@@ -269,19 +327,17 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
        mut self,
        pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
    ) -> Self {
-        let store_options = self
+        let store_params = self
            .request
            .write_options
            .lance_write_params
            .get_or_insert(Default::default())
            .store_params
-            .get_or_insert(Default::default())
-            .storage_options
            .get_or_insert(Default::default());
-
-        for (key, value) in pairs {
-            store_options.insert(key.into(), value.into());
-        }
+        let updates = pairs
+            .into_iter()
+            .map(|(key, value)| (key.into(), value.into()));
+        merge_storage_options(store_params, updates);
        self
    }

@@ -318,23 +374,21 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
    /// This has no effect in LanceDB Cloud.
    #[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
    pub fn enable_v2_manifest_paths(mut self, use_v2_manifest_paths: bool) -> Self {
-        let storage_options = self
+        let store_params = self
            .request
            .write_options
            .lance_write_params
            .get_or_insert_with(Default::default)
            .store_params
-            .get_or_insert_with(Default::default)
-            .storage_options
            .get_or_insert_with(Default::default);
-
-        storage_options.insert(
-            OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(),
-            if use_v2_manifest_paths {
-                "true".to_string()
-            } else {
-                "false".to_string()
-            },
+        let value = if use_v2_manifest_paths {
+            "true".to_string()
+        } else {
+            "false".to_string()
+        };
+        merge_storage_options(
+            store_params,
+            [(OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(), value)],
        );
        self
    }
@@ -344,19 +398,19 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
    /// The default is `LanceFileVersion::Stable`.
    #[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
    pub fn data_storage_version(mut self, data_storage_version: LanceFileVersion) -> Self {
-        let storage_options = self
+        let store_params = self
            .request
            .write_options
            .lance_write_params
            .get_or_insert_with(Default::default)
            .store_params
-            .get_or_insert_with(Default::default)
-            .storage_options
            .get_or_insert_with(Default::default);
-
-        storage_options.insert(
-            OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
-            data_storage_version.to_string(),
+        merge_storage_options(
+            store_params,
+            [(
+                OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
+                data_storage_version.to_string(),
+            )],
        );
        self
    }
@@ -381,13 +435,14 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
    /// This allows tables to automatically refresh cloud storage credentials
    /// when they expire, enabling long-running operations on remote storage.
    pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
-        self.request
+        let store_params = self
+            .request
            .write_options
            .lance_write_params
            .get_or_insert(Default::default())
            .store_params
-            .get_or_insert(Default::default())
-            .storage_options_provider = Some(provider);
+            .get_or_insert(Default::default());
+        set_storage_options_provider(store_params, provider);
        self
    }
 }
@@ -450,15 +505,13 @@ impl OpenTableBuilder {
    ///
    /// See available options at <https://lancedb.com/docs/storage/>
    pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
-        let storage_options = self
+        let store_params = self
            .request
            .lance_read_params
            .get_or_insert(Default::default())
            .store_options
-            .get_or_insert(Default::default())
-            .storage_options
            .get_or_insert(Default::default());
-        storage_options.insert(key.into(), value.into());
+        merge_storage_options(store_params, [(key.into(), value.into())]);
        self
    }

@@ -472,18 +525,16 @@ impl OpenTableBuilder {
        mut self,
        pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
    ) -> Self {
-        let storage_options = self
+        let store_params = self
            .request
            .lance_read_params
            .get_or_insert(Default::default())
            .store_options
-            .get_or_insert(Default::default())
-            .storage_options
            .get_or_insert(Default::default());
-
-        for (key, value) in pairs {
-            storage_options.insert(key.into(), value.into());
-        }
+        let updates = pairs
+            .into_iter()
+            .map(|(key, value)| (key.into(), value.into()));
+        merge_storage_options(store_params, updates);
        self
    }

@@ -507,12 +558,13 @@ impl OpenTableBuilder {
    /// This allows tables to automatically refresh cloud storage credentials
    /// when they expire, enabling long-running operations on remote storage.
    pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
-        self.request
+        let store_params = self
+            .request
            .lance_read_params
            .get_or_insert(Default::default())
            .store_options
-            .get_or_insert(Default::default())
-            .storage_options_provider = Some(provider);
+            .get_or_insert(Default::default());
+        set_storage_options_provider(store_params, provider);
        self
    }

@@ -804,6 +856,14 @@ impl Connection {
        self.internal.describe_namespace(request).await
    }

+    /// Get the equivalent namespace client in the database of this connection.
+    /// For LanceNamespaceDatabase, it is the underlying LanceNamespace.
+    /// For ListingDatabase, it is the equivalent DirectoryNamespace.
+    /// For RemoteDatabase, it is the equivalent RestNamespace.
+    pub async fn namespace_client(&self) -> Result<Arc<dyn lance_namespace::LanceNamespace>> {
+        self.internal.namespace_client().await
+    }
+
    /// List tables with pagination support
    pub async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
        self.internal.list_tables(request).await
@@ -860,6 +920,10 @@ pub struct ConnectBuilder {
    embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
 }

+#[cfg(feature = "remote")]
+const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 1] =
+    [("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name")];
+
 impl ConnectBuilder {
    /// Create a new [`ConnectOptions`] with the given database URI.
    pub fn new(uri: &str) -> Self {
@@ -1043,11 +1107,27 @@ impl ConnectBuilder {
        self
    }

+    #[cfg(feature = "remote")]
+    fn apply_env_defaults(
+        env_var_to_remote_storage_option: &[(&str, &str)],
+        options: &mut HashMap<String, String>,
+    ) {
+        for (env_key, opt_key) in env_var_to_remote_storage_option {
+            if let Ok(env_value) = std::env::var(env_key) {
+                if !options.contains_key(*opt_key) {
+                    options.insert((*opt_key).to_string(), env_value);
+                }
+            }
+        }
+    }
+
    #[cfg(feature = "remote")]
    fn execute_remote(self) -> Result<Connection> {
        use crate::remote::db::RemoteDatabaseOptions;

-        let options = RemoteDatabaseOptions::parse_from_map(&self.request.options)?;
+        let mut merged_options = self.request.options.clone();
+        Self::apply_env_defaults(&ENV_VARS_TO_STORAGE_OPTS, &mut merged_options);
+        let options = RemoteDatabaseOptions::parse_from_map(&merged_options)?;

        let region = options.region.ok_or_else(|| Error::InvalidInput {
            message: "A region is required when connecting to LanceDb Cloud".to_string(),
@@ -1269,8 +1349,6 @@ mod test_utils {

 #[cfg(test)]
 mod tests {
-    use std::fs::create_dir_all;
-
    use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
    use crate::query::QueryBase;
    use crate::query::{ExecutableQuery, QueryExecutionOptions};
@@ -1294,6 +1372,23 @@ mod tests {
        assert_eq!(tc.connection.uri(), tc.uri);
    }

+    #[cfg(feature = "remote")]
+    #[test]
+    fn test_apply_env_defaults() {
+        let env_key = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_KEY";
+        let env_val = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_VAL";
+        let opts_key = "test_apply_env_defaults_environment_variable_opts_key";
+        std::env::set_var(env_key, env_val);
+
+        let mut options = HashMap::new();
+        ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
+        assert_eq!(Some(&env_val.to_string()), options.get(opts_key));
+
+        options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string());
+        ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
+        assert_eq!(Some(&"EXPLICIT-VALUE".to_string()), options.get(opts_key));
+    }
+
    #[cfg(not(windows))]
    #[tokio::test]
    async fn test_connect_relative() {
@@ -1317,25 +1412,27 @@ mod tests {

    #[tokio::test]
    async fn test_table_names() {
-        let tmp_dir = tempdir().unwrap();
+        let tc = new_test_connection().await.unwrap();
+        let db = tc.connection;
+        let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
        let mut names = Vec::with_capacity(100);
        for _ in 0..100 {
-            let mut name = uuid::Uuid::new_v4().to_string();
+            let name = uuid::Uuid::new_v4().to_string();
            names.push(name.clone());
-            name.push_str(".lance");
-            create_dir_all(tmp_dir.path().join(&name)).unwrap();
+            db.create_empty_table(name, schema.clone())
+                .execute()
+                .await
+                .unwrap();
        }
        names.sort();
-
-        let uri = tmp_dir.path().to_str().unwrap();
-        let db = connect(uri).execute().await.unwrap();
-        let tables = db.table_names().execute().await.unwrap();
+        let tables = db.table_names().limit(100).execute().await.unwrap();

        assert_eq!(tables, names);

        let tables = db
            .table_names()
            .start_after(&names[30])
+            .limit(100)
            .execute()
            .await
            .unwrap();
@@ -1516,18 +1613,27 @@ mod tests {

    #[tokio::test]
    async fn drop_table() {
-        let tmp_dir = tempdir().unwrap();
+        let tc = new_test_connection().await.unwrap();
+        let db = tc.connection;

-        let uri = tmp_dir.path().to_str().unwrap();
-        let db = connect(uri).execute().await.unwrap();
+        if tc.is_remote {
+            // All the typical endpoints such as s3:///, file-object-store:///, etc. treat drop_table
+            // as idempotent.
+            assert!(db.drop_table("invalid_table", &[]).await.is_ok());
+        } else {
+            // The behavior of drop_table when using a file:/// endpoint differs from all other
+            // object providers, in that it returns an error when deleting a non-existent table.
+            assert!(matches!(
+                db.drop_table("invalid_table", &[]).await,
+                Err(crate::Error::TableNotFound { .. }),
+            ));
+        }

-        // drop non-exist table
-        assert!(matches!(
-            db.drop_table("invalid_table", &[]).await,
-            Err(crate::Error::TableNotFound { .. }),
-        ));
-
-        create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
+        let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
+        db.create_empty_table("table1", schema.clone())
+            .execute()
+            .await
+            .unwrap();
        db.drop_table("table1", &[]).await.unwrap();

        let tables = db.table_names().execute().await.unwrap();
@@ -1614,4 +1720,128 @@ mod tests {
        let cloned_count = cloned_table.count_rows(None).await.unwrap();
        assert_eq!(source_count, cloned_count);
    }
+
+    #[tokio::test]
+    async fn test_create_empty_table_with_embeddings() {
+        use crate::embeddings::{EmbeddingDefinition, EmbeddingFunction};
+        use arrow_array::{
+            Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator, StringArray,
+        };
+        use std::borrow::Cow;
+
+        #[derive(Debug, Clone)]
+        struct MockEmbedding {
+            dim: usize,
+        }
+
+        impl EmbeddingFunction for MockEmbedding {
+            fn name(&self) -> &str {
+                "test_embedding"
+            }
+
+            fn source_type(&self) -> Result<Cow<'_, DataType>> {
+                Ok(Cow::Owned(DataType::Utf8))
+            }
+
+            fn dest_type(&self) -> Result<Cow<'_, DataType>> {
+                Ok(Cow::Owned(DataType::new_fixed_size_list(
+                    DataType::Float32,
+                    self.dim as i32,
+                    true,
+                )))
+            }
+
+            fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
+                let len = source.len();
+                let values = vec![1.0f32; len * self.dim];
+                let values = Arc::new(Float32Array::from(values));
+                let field = Arc::new(Field::new("item", DataType::Float32, true));
+                Ok(Arc::new(FixedSizeListArray::new(
+                    field,
+                    self.dim as i32,
+                    values,
+                    None,
+                )))
+            }
+
+            fn compute_query_embeddings(&self, _input: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
+                unimplemented!()
+            }
+        }
+
+        let tmp_dir = tempdir().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+        let db = connect(uri).execute().await.unwrap();
+
+        let embed_func = Arc::new(MockEmbedding { dim: 128 });
+        db.embedding_registry()
+            .register("test_embedding", embed_func.clone())
+            .unwrap();
+
+        let schema = Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true)]));
+        let ed = EmbeddingDefinition {
+            source_column: "name".to_owned(),
+            dest_column: Some("name_embedding".to_owned()),
+            embedding_name: "test_embedding".to_owned(),
+        };
+
+        let table = db
+            .create_empty_table("test", schema)
+            .mode(CreateTableMode::Overwrite)
+            .add_embedding(ed)
+            .unwrap()
+            .execute()
+            .await
+            .unwrap();
+
+        let table_schema = table.schema().await.unwrap();
+        assert!(table_schema.column_with_name("name").is_some());
+        assert!(table_schema.column_with_name("name_embedding").is_some());
+
+        let embedding_field = table_schema.field_with_name("name_embedding").unwrap();
+        assert_eq!(
+            embedding_field.data_type(),
+            &DataType::new_fixed_size_list(DataType::Float32, 128, true)
+        );
+
+        let input_schema = Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true)]));
+        let input_batch = RecordBatch::try_new(
+            input_schema.clone(),
+            vec![Arc::new(StringArray::from(vec![
+                Some("Alice"),
+                Some("Bob"),
+                Some("Charlie"),
+            ]))],
+        )
+        .unwrap();
+
+        let input_reader = Box::new(RecordBatchIterator::new(
+            vec![Ok(input_batch)].into_iter(),
+            input_schema,
+        ));
+
+        table.add(input_reader).execute().await.unwrap();
+
+        let results = table
+            .query()
+            .execute()
+            .await
+            .unwrap()
+            .try_collect::<Vec<_>>()
+            .await
+            .unwrap();
+
+        assert_eq!(results.len(), 1);
+        let batch = &results[0];
+        assert_eq!(batch.num_rows(), 3);
+        assert!(batch.column_by_name("name_embedding").is_some());
+
+        let embedding_col = batch
+            .column_by_name("name_embedding")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<FixedSizeListArray>()
+            .unwrap();
+        assert_eq!(embedding_col.len(), 3);
+    }
 }
--- a/rust/lancedb/src/database.rs
+++ b/rust/lancedb/src/database.rs
@@ -296,4 +296,10 @@ pub trait Database:
    /// Drop all tables in the database
    async fn drop_all_tables(&self, namespace: &[String]) -> Result<()>;
    fn as_any(&self) -> &dyn std::any::Any;
+
+    /// Get the equivalent namespace client of this database
+    /// For LanceNamespaceDatabase, it is the underlying LanceNamespace.
+    /// For ListingDatabase, it is the equivalent DirectoryNamespace.
+    /// For RemoteDatabase, it is the equivalent RestNamespace.
+    async fn namespace_client(&self) -> Result<Arc<dyn LanceNamespace>>;
 }
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -12,7 +12,7 @@ use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode};
 use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
 use lance_datafusion::utils::StreamingWriteSource;
 use lance_encoding::version::LanceFileVersion;
-use lance_io::object_store::StorageOptionsProvider;
+use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
 use lance_table::io::commit::commit_handler_from_url;
 use object_store::local::LocalFileSystem;
 use snafu::ResultExt;
@@ -356,7 +356,13 @@ impl ListingDatabase {
                    .clone()
                    .unwrap_or_else(|| Arc::new(lance::session::Session::default()));
                let os_params = ObjectStoreParams {
-                    storage_options: Some(options.storage_options.clone()),
+                    storage_options_accessor: if options.storage_options.is_empty() {
+                        None
+                    } else {
+                        Some(Arc::new(StorageOptionsAccessor::with_static_options(
+                            options.storage_options.clone(),
+                        )))
+                    },
                    ..Default::default()
                };
                let (object_store, base_path) = ObjectStore::from_uri_and_params(
@@ -463,9 +469,20 @@ impl ListingDatabase {
        validate_table_name(name)?;

        let mut uri = self.uri.clone();
-        // If the URI does not end with a slash, add one
-        if !uri.ends_with('/') {
-            uri.push('/');
+        // If the URI does not end with a path separator, add one
+        // Use forward slash for URIs (http://, s3://, gs://, file://, etc.)
+        // Use platform-specific separator for local paths without scheme
+        let has_scheme = uri.contains("://");
+        let ends_with_separator = uri.ends_with('/') || uri.ends_with('\\');
+
+        if !ends_with_separator {
+            if has_scheme {
+                // URIs always use forward slash
+                uri.push('/');
+            } else {
+                // Local path without scheme - use platform separator
+                uri.push(std::path::MAIN_SEPARATOR);
+            }
        }
        // Append the table name with the lance file extension
        uri.push_str(&format!("{}.{}", name, LANCE_FILE_EXTENSION));
@@ -481,7 +498,13 @@ impl ListingDatabase {

    async fn drop_tables(&self, names: Vec<String>) -> Result<()> {
        let object_store_params = ObjectStoreParams {
-            storage_options: Some(self.storage_options.clone()),
+            storage_options_accessor: if self.storage_options.is_empty() {
+                None
+            } else {
+                Some(Arc::new(StorageOptionsAccessor::with_static_options(
+                    self.storage_options.clone(),
+                )))
+            },
            ..Default::default()
        };
        let mut uri = self.uri.clone();
@@ -530,7 +553,7 @@ impl ListingDatabase {
            .lance_write_params
            .as_ref()
            .and_then(|p| p.store_params.as_ref())
-            .and_then(|sp| sp.storage_options.as_ref());
+            .and_then(|sp| sp.storage_options());

        let storage_version_override = storage_options
            .and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION))
@@ -581,21 +604,20 @@ impl ListingDatabase {
        // will cause a new connection to be created, and that connection will
        // be dropped from the cache when python GCs the table object, which
        // confounds reuse across tables.
-        if !self.storage_options.is_empty() {
-            let storage_options = write_params
+        if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
+            let store_params = write_params
                .store_params
-                .get_or_insert_with(Default::default)
-                .storage_options
                .get_or_insert_with(Default::default);
-            self.inherit_storage_options(storage_options);
-        }
-
-        // Set storage options provider if available
-        if self.storage_options_provider.is_some() {
-            write_params
-                .store_params
-                .get_or_insert_with(Default::default)
-                .storage_options_provider = self.storage_options_provider.clone();
+            let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
+            if !self.storage_options.is_empty() {
+                self.inherit_storage_options(&mut storage_options);
+            }
+            let accessor = if let Some(ref provider) = self.storage_options_provider {
+                StorageOptionsAccessor::with_initial_and_provider(storage_options, provider.clone())
+            } else {
+                StorageOptionsAccessor::with_static_options(storage_options)
+            };
+            store_params.storage_options_accessor = Some(Arc::new(accessor));
        }

        write_params.data_storage_version = self
@@ -881,7 +903,13 @@ impl Database for ListingDatabase {
        validate_table_name(&request.target_table_name)?;

        let storage_params = ObjectStoreParams {
-            storage_options: Some(self.storage_options.clone()),
+            storage_options_accessor: if self.storage_options.is_empty() {
+                None
+            } else {
+                Some(Arc::new(StorageOptionsAccessor::with_static_options(
+                    self.storage_options.clone(),
+                )))
+            },
            ..Default::default()
        };
        let read_params = ReadParams {
@@ -945,25 +973,28 @@ impl Database for ListingDatabase {
        // will cause a new connection to be created, and that connection will
        // be dropped from the cache when python GCs the table object, which
        // confounds reuse across tables.
-        if !self.storage_options.is_empty() {
-            let storage_options = request
+        if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
+            let store_params = request
                .lance_read_params
                .get_or_insert_with(Default::default)
                .store_options
-                .get_or_insert_with(Default::default)
-                .storage_options
                .get_or_insert_with(Default::default);
-            self.inherit_storage_options(storage_options);
-        }
-
-        // Set storage options provider if available
-        if self.storage_options_provider.is_some() {
-            request
-                .lance_read_params
-                .get_or_insert_with(Default::default)
-                .store_options
-                .get_or_insert_with(Default::default)
-                .storage_options_provider = self.storage_options_provider.clone();
+            let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
+            if !self.storage_options.is_empty() {
+                self.inherit_storage_options(&mut storage_options);
+            }
+            // Preserve request-level provider if no connection-level provider exists
+            let request_provider = store_params
+                .storage_options_accessor
+                .as_ref()
+                .and_then(|a| a.provider().cloned());
+            let provider = self.storage_options_provider.clone().or(request_provider);
+            let accessor = if let Some(provider) = provider {
+                StorageOptionsAccessor::with_initial_and_provider(storage_options, provider)
+            } else {
+                StorageOptionsAccessor::with_static_options(storage_options)
+            };
+            store_params.storage_options_accessor = Some(Arc::new(accessor));
        }

        // Some ReadParams are exposed in the OpenTableBuilder, but we also
@@ -1043,6 +1074,24 @@ impl Database for ListingDatabase {
    fn as_any(&self) -> &dyn std::any::Any {
        self
    }
+
+    async fn namespace_client(&self) -> Result<Arc<dyn lance_namespace::LanceNamespace>> {
+        // Create a DirectoryNamespace pointing to the same root with the same storage options
+        let mut builder = lance_namespace_impls::DirectoryNamespaceBuilder::new(&self.uri);
+
+        // Add storage options
+        if !self.storage_options.is_empty() {
+            builder = builder.storage_options(self.storage_options.clone());
+        }
+
+        // Use the same session
+        builder = builder.session(self.session.clone());
+
+        let namespace = builder.build().await.map_err(|e| Error::Runtime {
+            message: format!("Failed to create namespace client: {}", e),
+        })?;
+        Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
+    }
 }

 #[cfg(test)]
@@ -1053,6 +1102,7 @@ mod tests {
    use crate::table::{Table, TableDefinition};
    use arrow_array::{Int32Array, RecordBatch, StringArray};
    use arrow_schema::{DataType, Field, Schema};
+    use std::path::PathBuf;
    use tempfile::tempdir;

    async fn setup_database() -> (tempfile::TempDir, ListingDatabase) {
@@ -1851,7 +1901,9 @@ mod tests {
        let write_options = WriteOptions {
            lance_write_params: Some(lance::dataset::WriteParams {
                store_params: Some(lance::io::ObjectStoreParams {
-                    storage_options: Some(storage_options),
+                    storage_options_accessor: Some(Arc::new(
+                        StorageOptionsAccessor::with_static_options(storage_options),
+                    )),
                    ..Default::default()
                }),
                ..Default::default()
@@ -1925,7 +1977,9 @@ mod tests {
        let write_options = WriteOptions {
            lance_write_params: Some(lance::dataset::WriteParams {
                store_params: Some(lance::io::ObjectStoreParams {
-                    storage_options: Some(storage_options),
+                    storage_options_accessor: Some(Arc::new(
+                        StorageOptionsAccessor::with_static_options(storage_options),
+                    )),
                    ..Default::default()
                }),
                ..Default::default()
@@ -2027,4 +2081,76 @@ mod tests {
        let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
        assert_eq!(db_options.new_table_config.enable_stable_row_ids, None);
    }
+
+    #[tokio::test]
+    async fn test_table_uri() {
+        let (_tempdir, db) = setup_database().await;
+
+        let mut pb = PathBuf::new();
+        pb.push(db.uri.clone());
+        pb.push("test.lance");
+
+        let expected = pb.to_str().unwrap();
+        let uri = db.table_uri("test").ok().unwrap();
+        assert_eq!(uri, expected);
+    }
+
+    #[tokio::test]
+    async fn test_namespace_client() {
+        let (_tempdir, db) = setup_database().await;
+
+        // Create some tables first
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, false),
+        ]));
+
+        db.create_table(CreateTableRequest {
+            name: "table1".to_string(),
+            namespace: vec![],
+            data: CreateTableData::Empty(TableDefinition::new_from_schema(schema.clone())),
+            mode: CreateTableMode::Create,
+            write_options: Default::default(),
+            location: None,
+            namespace_client: None,
+        })
+        .await
+        .unwrap();
+
+        db.create_table(CreateTableRequest {
+            name: "table2".to_string(),
+            namespace: vec![],
+            data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
+            mode: CreateTableMode::Create,
+            write_options: Default::default(),
+            location: None,
+            namespace_client: None,
+        })
+        .await
+        .unwrap();
+
+        // Get the namespace client
+        let namespace_client = db.namespace_client().await;
+        assert!(namespace_client.is_ok());
+        let namespace_client = namespace_client.unwrap();
+
+        // Verify the namespace client can list the tables we created
+        // Use empty vec for root namespace
+        let list_result = namespace_client
+            .list_tables(lance_namespace::models::ListTablesRequest {
+                id: Some(vec![]),
+                ..Default::default()
+            })
+            .await;
+        assert!(
+            list_result.is_ok(),
+            "list_tables failed: {:?}",
+            list_result.err()
+        );
+
+        let tables = list_result.unwrap().tables;
+        assert_eq!(tables.len(), 2);
+        assert!(tables.contains(&"table1".to_string()));
+        assert!(tables.contains(&"table2".to_string()));
+    }
 }
--- a/rust/lancedb/src/database/namespace.rs
+++ b/rust/lancedb/src/database/namespace.rs
@@ -7,25 +7,25 @@ use std::collections::HashMap;
 use std::sync::Arc;

 use async_trait::async_trait;
-use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsProvider};
 use lance_namespace::{
    models::{
        CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
-        DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest,
-        DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest,
-        ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
+        DeclareTableRequest, DescribeNamespaceRequest, DescribeNamespaceResponse,
+        DescribeTableRequest, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest,
+        ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
    },
    LanceNamespace,
 };
 use lance_namespace_impls::ConnectBuilder;
+use log::warn;

-use crate::connection::ConnectRequest;
 use crate::database::ReadConsistency;
 use crate::error::{Error, Result};
+use crate::table::NativeTable;

 use super::{
-    listing::ListingDatabase, BaseTable, CloneTableRequest, CreateTableMode,
-    CreateTableRequest as DbCreateTableRequest, Database, OpenTableRequest, TableNamesRequest,
+    BaseTable, CloneTableRequest, CreateTableMode, CreateTableRequest as DbCreateTableRequest,
+    Database, OpenTableRequest, TableNamesRequest,
 };

 /// A database implementation that uses lance-namespace for table management
@@ -90,51 +90,6 @@ impl std::fmt::Display for LanceNamespaceDatabase {
    }
 }

-impl LanceNamespaceDatabase {
-    /// Create a temporary listing database for the given location
-    ///
-    /// Merges storage options with priority: connection < user < namespace
-    async fn create_listing_database(
-        &self,
-        location: &str,
-        table_id: Vec<String>,
-        user_storage_options: Option<&HashMap<String, String>>,
-        response_storage_options: Option<&HashMap<String, String>>,
-    ) -> Result<ListingDatabase> {
-        // Merge storage options: connection < user < namespace
-        let mut merged_storage_options = self.storage_options.clone();
-        if let Some(opts) = user_storage_options {
-            merged_storage_options.extend(opts.clone());
-        }
-        if let Some(opts) = response_storage_options {
-            merged_storage_options.extend(opts.clone());
-        }
-
-        let request = ConnectRequest {
-            uri: location.to_string(),
-            #[cfg(feature = "remote")]
-            client_config: Default::default(),
-            options: merged_storage_options,
-            read_consistency_interval: self.read_consistency_interval,
-            session: self.session.clone(),
-        };
-
-        let mut listing_db = ListingDatabase::connect_with_options(&request).await?;
-
-        // Create storage options provider only if namespace returned storage options
-        // (not just user-provided options)
-        if response_storage_options.is_some() {
-            let provider = Arc::new(LanceNamespaceStorageOptionsProvider::new(
-                self.namespace.clone(),
-                table_id,
-            )) as Arc<dyn StorageOptionsProvider>;
-            listing_db.storage_options_provider = Some(provider);
-        }
-
-        Ok(listing_db)
-    }
-}
-
 #[async_trait]
 impl Database for LanceNamespaceDatabase {
    fn uri(&self) -> &str {
@@ -183,6 +138,7 @@ impl Database for LanceNamespaceDatabase {
            id: Some(request.namespace),
            page_token: request.start_after,
            limit: request.limit.map(|l| l as i32),
+            ..Default::default()
        };

        let response = self.namespace.list_tables(ns_request).await?;
@@ -195,19 +151,11 @@ impl Database for LanceNamespaceDatabase {
    }

    async fn create_table(&self, request: DbCreateTableRequest) -> Result<Arc<dyn BaseTable>> {
-        // Extract user-provided storage options from request
-        let user_storage_options = request
-            .write_options
-            .lance_write_params
-            .as_ref()
-            .and_then(|lwp| lwp.store_params.as_ref())
-            .and_then(|sp| sp.storage_options.as_ref());
-
        let mut table_id = request.namespace.clone();
        table_id.push(request.name.clone());
        let describe_request = DescribeTableRequest {
            id: Some(table_id.clone()),
-            version: None,
+            ..Default::default()
        };

        let describe_result = self.namespace.describe_table(describe_request).await;
@@ -225,6 +173,7 @@ impl Database for LanceNamespaceDatabase {
                    // Drop the existing table - must succeed
                    let drop_request = DropTableRequest {
                        id: Some(table_id.clone()),
+                        ..Default::default()
                    };
                    self.namespace
                        .drop_table(drop_request)
@@ -235,34 +184,20 @@ impl Database for LanceNamespaceDatabase {
                }
            }
            CreateTableMode::ExistOk(_) => {
-                if let Ok(response) = describe_result {
-                    let location = response.location.ok_or_else(|| Error::Runtime {
-                        message: "Table location is missing from namespace response".to_string(),
-                    })?;
+                if describe_result.is_ok() {
+                    let native_table = NativeTable::open_from_namespace(
+                        self.namespace.clone(),
+                        &request.name,
+                        request.namespace.clone(),
+                        None,
+                        None,
+                        self.read_consistency_interval,
+                        self.server_side_query_enabled,
+                        self.session.clone(),
+                    )
+                    .await?;

-                    let listing_db = self
-                        .create_listing_database(
-                            &location,
-                            table_id.clone(),
-                            user_storage_options,
-                            response.storage_options.as_ref(),
-                        )
-                        .await?;
-
-                    let namespace_client = self
-                        .server_side_query_enabled
-                        .then(|| self.namespace.clone());
-
-                    return listing_db
-                        .open_table(OpenTableRequest {
-                            name: request.name.clone(),
-                            namespace: request.namespace.clone(),
-                            index_cache_size: None,
-                            lance_read_params: None,
-                            location: Some(location),
-                            namespace_client,
-                        })
-                        .await;
+                    return Ok(Arc::new(native_table));
                }
            }
        }
@@ -270,106 +205,85 @@ impl Database for LanceNamespaceDatabase {
        let mut table_id = request.namespace.clone();
        table_id.push(request.name.clone());

-        let create_empty_request = CreateEmptyTableRequest {
+        // Try declare_table first, falling back to create_empty_table for backwards
+        // compatibility with older namespace clients that don't support declare_table
+        let declare_request = DeclareTableRequest {
            id: Some(table_id.clone()),
-            location: None,
-            properties: if self.storage_options.is_empty() {
-                None
-            } else {
-                Some(self.storage_options.clone())
-            },
+            ..Default::default()
        };

-        let create_empty_response = self
-            .namespace
-            .create_empty_table(create_empty_request)
-            .await
-            .map_err(|e| Error::Runtime {
-                message: format!("Failed to create empty table: {}", e),
-            })?;
+        let location = match self.namespace.declare_table(declare_request).await {
+            Ok(response) => response.location.ok_or_else(|| Error::Runtime {
+                message: "Table location is missing from declare_table response".to_string(),
+            })?,
+            Err(e) => {
+                // Check if the error is "not supported" and try create_empty_table as fallback
+                let err_str = e.to_string().to_lowercase();
+                if err_str.contains("not supported") || err_str.contains("not implemented") {
+                    warn!(
+                        "declare_table is not supported by the namespace client, \
+                        falling back to deprecated create_empty_table. \
+                        create_empty_table is deprecated and will be removed in Lance 3.0.0. \
+                        Please upgrade your namespace client to support declare_table."
+                    );
+                    #[allow(deprecated)]
+                    let create_empty_request = CreateEmptyTableRequest {
+                        id: Some(table_id.clone()),
+                        ..Default::default()
+                    };

-        let location = create_empty_response
-            .location
-            .ok_or_else(|| Error::Runtime {
-                message: "Table location is missing from create_empty_table response".to_string(),
-            })?;
+                    #[allow(deprecated)]
+                    let create_response = self
+                        .namespace
+                        .create_empty_table(create_empty_request)
+                        .await
+                        .map_err(|e| Error::Runtime {
+                            message: format!("Failed to create empty table: {}", e),
+                        })?;

-        let listing_db = self
-            .create_listing_database(
-                &location,
-                table_id.clone(),
-                user_storage_options,
-                create_empty_response.storage_options.as_ref(),
-            )
-            .await?;
-
-        let namespace_client = self
-            .server_side_query_enabled
-            .then(|| self.namespace.clone());
-
-        let create_request = DbCreateTableRequest {
-            name: request.name,
-            namespace: request.namespace,
-            data: request.data,
-            mode: request.mode,
-            write_options: request.write_options,
-            location: Some(location),
-            namespace_client,
+                    create_response.location.ok_or_else(|| Error::Runtime {
+                        message: "Table location is missing from create_empty_table response"
+                            .to_string(),
+                    })?
+                } else {
+                    return Err(Error::Runtime {
+                        message: format!("Failed to declare table: {}", e),
+                    });
+                }
+            }
        };

-        listing_db.create_table(create_request).await
+        let native_table = NativeTable::create_from_namespace(
+            self.namespace.clone(),
+            &location,
+            &request.name,
+            request.namespace.clone(),
+            request.data,
+            None, // write_store_wrapper not used for namespace connections
+            request.write_options.lance_write_params,
+            self.read_consistency_interval,
+            self.server_side_query_enabled,
+            self.session.clone(),
+        )
+        .await?;
+
+        Ok(Arc::new(native_table))
    }

    async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
-        // Extract user-provided storage options from request
-        let user_storage_options = request
-            .lance_read_params
-            .as_ref()
-            .and_then(|lrp| lrp.store_options.as_ref())
-            .and_then(|so| so.storage_options.as_ref());
+        let native_table = NativeTable::open_from_namespace(
+            self.namespace.clone(),
+            &request.name,
+            request.namespace.clone(),
+            None, // write_store_wrapper not used for namespace connections
+            request.lance_read_params,
+            self.read_consistency_interval,
+            self.server_side_query_enabled,
+            self.session.clone(),
+        )
+        .await?;

-        let mut table_id = request.namespace.clone();
-        table_id.push(request.name.clone());
-
-        let describe_request = DescribeTableRequest {
-            id: Some(table_id.clone()),
-            version: None,
-        };
-        let response = self
-            .namespace
-            .describe_table(describe_request)
-            .await
-            .map_err(|e| Error::Runtime {
-                message: format!("Failed to describe table: {}", e),
-            })?;
-
-        let location = response.location.ok_or_else(|| Error::Runtime {
-            message: "Table location is missing from namespace response".to_string(),
-        })?;
-
-        let listing_db = self
-            .create_listing_database(
-                &location,
-                table_id.clone(),
-                user_storage_options,
-                response.storage_options.as_ref(),
-            )
-            .await?;
-
-        let namespace_client = self
-            .server_side_query_enabled
-            .then(|| self.namespace.clone());
-
-        let open_request = OpenTableRequest {
-            name: request.name.clone(),
-            namespace: request.namespace.clone(),
-            index_cache_size: request.index_cache_size,
-            lance_read_params: request.lance_read_params,
-            location: Some(location),
-            namespace_client,
-        };
-
-        listing_db.open_table(open_request).await
+        Ok(Arc::new(native_table))
    }

    async fn clone_table(&self, _request: CloneTableRequest) -> Result<Arc<dyn BaseTable>> {
@@ -394,7 +308,10 @@ impl Database for LanceNamespaceDatabase {
        let mut table_id = namespace.to_vec();
        table_id.push(name.to_string());

-        let drop_request = DropTableRequest { id: Some(table_id) };
+        let drop_request = DropTableRequest {
+            id: Some(table_id),
+            ..Default::default()
+        };
        self.namespace
            .drop_table(drop_request)
            .await
@@ -425,6 +342,10 @@ impl Database for LanceNamespaceDatabase {
    fn as_any(&self) -> &dyn std::any::Any {
        self
    }
+
+    async fn namespace_client(&self) -> Result<Arc<dyn LanceNamespace>> {
+        Ok(self.namespace.clone())
+    }
 }

 #[cfg(test)]
@@ -545,8 +466,7 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
+            ..Default::default()
        })
        .await
        .expect("Failed to create namespace");
@@ -606,8 +526,7 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
+            ..Default::default()
        })
        .await
        .expect("Failed to create namespace");
@@ -670,8 +589,7 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
+            ..Default::default()
        })
        .await
        .expect("Failed to create namespace");
@@ -754,8 +672,7 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
+            ..Default::default()
        })
        .await
        .expect("Failed to create namespace");
@@ -810,8 +727,7 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
+            ..Default::default()
        })
        .await
        .expect("Failed to create namespace");
@@ -891,8 +807,7 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
+            ..Default::default()
        })
        .await
        .expect("Failed to create namespace");
@@ -925,8 +840,7 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
+            ..Default::default()
        })
        .await
        .expect("Failed to create namespace");
--- a/rust/lancedb/src/dataloader/permutation/builder.rs
+++ b/rust/lancedb/src/dataloader/permutation/builder.rs
@@ -19,7 +19,7 @@ use crate::{
        split::{SplitStrategy, Splitter, SPLIT_ID_COLUMN},
        util::{rename_column, TemporaryDirectory},
    },
-    query::{ExecutableQuery, QueryBase},
+    query::{ExecutableQuery, QueryBase, Select},
    Error, Result, Table,
 };

@@ -27,6 +27,8 @@ pub const SRC_ROW_ID_COL: &str = "row_id";

 pub const SPLIT_NAMES_CONFIG_KEY: &str = "split_names";

+pub const DEFAULT_MEMORY_LIMIT: usize = 100 * 1024 * 1024;
+
 /// Where to store the permutation table
 #[derive(Debug, Clone, Default)]
 enum PermutationDestination {
@@ -167,10 +169,20 @@ impl PermutationBuilder {
        &self,
        data: SendableRecordBatchStream,
    ) -> Result<SendableRecordBatchStream> {
+        let memory_limit = std::env::var("LANCEDB_PERM_BUILDER_MEMORY_LIMIT")
+            .unwrap_or_else(|_| DEFAULT_MEMORY_LIMIT.to_string())
+            .parse::<usize>()
+            .unwrap_or_else(|_| {
+                log::error!(
+                    "Failed to parse LANCEDB_PERM_BUILDER_MEMORY_LIMIT, using default: {}",
+                    DEFAULT_MEMORY_LIMIT
+                );
+                DEFAULT_MEMORY_LIMIT
+            });
        let ctx = SessionContext::new_with_config_rt(
            SessionConfig::default(),
            RuntimeEnvBuilder::new()
-                .with_memory_limit(100 * 1024 * 1024, 1.0)
+                .with_memory_limit(memory_limit, 1.0)
                .with_disk_manager_builder(
                    DiskManagerBuilder::default()
                        .with_mode(self.config.temp_dir.to_disk_manager_mode()),
@@ -232,7 +244,7 @@ impl PermutationBuilder {
    /// Builds the permutation table and stores it in the given database.
    pub async fn build(self) -> Result<Table> {
        // First pass, apply filter and load row ids
-        let mut rows = self.base_table.query().with_row_id();
+        let mut rows = self.base_table.query().select(Select::columns(&[ROW_ID]));

        if let Some(filter) = &self.config.filter {
            rows = rows.only_if(filter);
@@ -321,6 +333,47 @@ mod tests {

    use super::*;

+    #[tokio::test]
+    async fn test_permutation_table_only_stores_row_id_and_split_id() {
+        let temp_dir = tempfile::tempdir().unwrap();
+
+        let db = connect(temp_dir.path().to_str().unwrap())
+            .execute()
+            .await
+            .unwrap();
+
+        let initial_data = lance_datagen::gen_batch()
+            .col("col_a", lance_datagen::array::step::<Int32Type>())
+            .col("col_b", lance_datagen::array::step::<Int32Type>())
+            .into_ldb_stream(RowCount::from(100), BatchCount::from(10));
+        let data_table = db
+            .create_table_streaming("base_tbl", initial_data)
+            .execute()
+            .await
+            .unwrap();
+
+        let permutation_table = PermutationBuilder::new(data_table.clone())
+            .with_split_strategy(
+                SplitStrategy::Sequential {
+                    sizes: SplitSizes::Percentages(vec![0.5, 0.5]),
+                },
+                None,
+            )
+            .with_filter("col_a > 57".to_string())
+            .build()
+            .await
+            .unwrap();
+
+        let schema = permutation_table.schema().await.unwrap();
+        let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
+        assert_eq!(
+            field_names,
+            vec!["row_id", "split_id"],
+            "Permutation table should only contain row_id and split_id columns, but found: {:?}",
+            field_names,
+        );
+    }
+
    #[tokio::test]
    async fn test_permutation_builder() {
        let temp_dir = tempfile::tempdir().unwrap();
@@ -352,8 +405,6 @@ mod tests {
            .await
            .unwrap();

-        println!("permutation_table: {:?}", permutation_table);
-
        // Potentially brittle seed-dependent values below
        assert_eq!(permutation_table.count_rows(None).await.unwrap(), 330);
        assert_eq!(
--- a/rust/lancedb/src/dataloader/permutation/shuffle.rs
+++ b/rust/lancedb/src/dataloader/permutation/shuffle.rs
@@ -171,7 +171,7 @@ impl Shuffler {
            // This is kind of an annoying limitation but if we allow runt clumps from batches then
            // clumps will get unaligned and we will mess up the clumps when we do the in-memory
            // shuffle step.  If this is a problem we can probably figure out a better way to do this.
-            if !is_last && batch.num_rows() as u64 % clump_size != 0 {
+            if !is_last && !(batch.num_rows() as u64).is_multiple_of(clump_size) {
                return Err(Error::Runtime {
                    message: format!(
                        "Expected batch size ({}) to be divisible by clump size ({})",
--- a/rust/lancedb/src/dataloader/permutation/split.rs
+++ b/rust/lancedb/src/dataloader/permutation/split.rs
@@ -1,12 +1,9 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use std::{
-    iter,
-    sync::{
-        atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
-        Arc,
-    },
+use std::sync::{
+    atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
+    Arc,
 };

 use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array};
@@ -15,6 +12,8 @@ use datafusion_common::hash_utils::create_hashes;
 use futures::{StreamExt, TryStreamExt};
 use lance_arrow::SchemaExt;

+use lance_core::ROW_ID;
+
 use crate::{
    arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
    dataloader::{
@@ -158,7 +157,7 @@ impl Splitter {
                remaining_in_split
            };

-            split_ids.extend(iter::repeat(split_id as u64).take(rows_to_add as usize));
+            split_ids.extend(std::iter::repeat_n(split_id as u64, rows_to_add as usize));
            if done {
                // Quit early if we've run out of splits
                break;
@@ -363,11 +362,15 @@ impl Splitter {

    pub fn project(&self, query: Query) -> Query {
        match &self.strategy {
-            SplitStrategy::Calculated { calculation } => query.select(Select::Dynamic(vec![(
-                SPLIT_ID_COLUMN.to_string(),
-                calculation.clone(),
-            )])),
-            SplitStrategy::Hash { columns, .. } => query.select(Select::Columns(columns.clone())),
+            SplitStrategy::Calculated { calculation } => query.select(Select::Dynamic(vec![
+                (SPLIT_ID_COLUMN.to_string(), calculation.clone()),
+                (ROW_ID.to_string(), ROW_ID.to_string()),
+            ])),
+            SplitStrategy::Hash { columns, .. } => {
+                let mut cols = columns.clone();
+                cols.push(ROW_ID.to_string());
+                query.select(Select::Columns(cols))
+            }
            _ => query,
        }
    }
@@ -662,7 +665,7 @@ mod tests {
        assert_eq!(split_batch.num_rows(), total_split_sizes as usize);
        let mut expected = Vec::with_capacity(total_split_sizes as usize);
        for (i, size) in expected_split_sizes.iter().enumerate() {
-            expected.extend(iter::repeat(i as u64).take(*size as usize));
+            expected.extend(std::iter::repeat_n(i as u64, *size as usize));
        }
        let expected = Arc::new(UInt64Array::from(expected)) as Arc<dyn Array>;

--- a/rust/lancedb/src/embeddings.rs
+++ b/rust/lancedb/src/embeddings.rs
@@ -120,8 +120,13 @@ impl MemoryRegistry {
 }

 /// A record batch reader that has embeddings applied to it
-/// This is a wrapper around another record batch reader that applies an embedding function
-/// when reading from the record batch
+///
+/// This is a wrapper around another record batch reader that applies embedding functions
+/// when reading from the record batch.
+///
+/// When multiple embedding functions are defined, they are computed in parallel using
+/// scoped threads to improve performance. For a single embedding function, computation
+/// is done inline without threading overhead.
 pub struct WithEmbeddings<R: RecordBatchReader> {
    inner: R,
    embeddings: Vec<(EmbeddingDefinition, Arc<dyn EmbeddingFunction>)>,
@@ -235,6 +240,48 @@ impl<R: RecordBatchReader> WithEmbeddings<R> {
            column_definitions,
        })
    }
+
+    fn compute_embeddings_parallel(&self, batch: &RecordBatch) -> Result<Vec<Arc<dyn Array>>> {
+        if self.embeddings.len() == 1 {
+            let (fld, func) = &self.embeddings[0];
+            let src_column =
+                batch
+                    .column_by_name(&fld.source_column)
+                    .ok_or_else(|| Error::InvalidInput {
+                        message: format!("Source column '{}' not found", fld.source_column),
+                    })?;
+            return Ok(vec![func.compute_source_embeddings(src_column.clone())?]);
+        }
+
+        // Parallel path: multiple embeddings
+        std::thread::scope(|s| {
+            let handles: Vec<_> = self
+                .embeddings
+                .iter()
+                .map(|(fld, func)| {
+                    let src_column = batch.column_by_name(&fld.source_column).ok_or_else(|| {
+                        Error::InvalidInput {
+                            message: format!("Source column '{}' not found", fld.source_column),
+                        }
+                    })?;
+
+                    let handle =
+                        s.spawn(move || func.compute_source_embeddings(src_column.clone()));
+
+                    Ok(handle)
+                })
+                .collect::<Result<_>>()?;
+
+            handles
+                .into_iter()
+                .map(|h| {
+                    h.join().map_err(|e| Error::Runtime {
+                        message: format!("Thread panicked during embedding computation: {:?}", e),
+                    })?
+                })
+                .collect()
+        })
+    }
 }

 impl<R: RecordBatchReader> Iterator for MaybeEmbedded<R> {
@@ -262,19 +309,19 @@ impl<R: RecordBatchReader> Iterator for WithEmbeddings<R> {
    fn next(&mut self) -> Option<Self::Item> {
        let batch = self.inner.next()?;
        match batch {
-            Ok(mut batch) => {
-                // todo: parallelize this
-                for (fld, func) in self.embeddings.iter() {
-                    let src_column = batch.column_by_name(&fld.source_column).unwrap();
-                    let embedding = match func.compute_source_embeddings(src_column.clone()) {
-                        Ok(embedding) => embedding,
-                        Err(e) => {
-                            return Some(Err(arrow_schema::ArrowError::ComputeError(format!(
-                                "Error computing embedding: {}",
-                                e
-                            ))))
-                        }
-                    };
+            Ok(batch) => {
+                let embeddings = match self.compute_embeddings_parallel(&batch) {
+                    Ok(emb) => emb,
+                    Err(e) => {
+                        return Some(Err(arrow_schema::ArrowError::ComputeError(format!(
+                            "Error computing embedding: {}",
+                            e
+                        ))))
+                    }
+                };
+
+                let mut batch = batch;
+                for ((fld, _), embedding) in self.embeddings.iter().zip(embeddings.iter()) {
                    let dst_field_name = fld
                        .dest_column
                        .clone()
@@ -286,7 +333,7 @@ impl<R: RecordBatchReader> Iterator for WithEmbeddings<R> {
                        embedding.nulls().is_some(),
                    );

-                    match batch.try_with_column(dst_field.clone(), embedding) {
+                    match batch.try_with_column(dst_field.clone(), embedding.clone()) {
                        Ok(b) => batch = b,
                        Err(e) => return Some(Err(e)),
                    };
--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -297,10 +297,10 @@ impl IvfPqIndexBuilder {
 }

 pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
-    if dim % 16 == 0 {
+    if dim.is_multiple_of(16) {
        // Should be more aggressive than this default.
        dim / 16
-    } else if dim % 8 == 0 {
+    } else if dim.is_multiple_of(8) {
        dim / 8
    } else {
        log::warn!(
--- a/rust/lancedb/src/lib.rs
+++ b/rust/lancedb/src/lib.rs
@@ -25,13 +25,14 @@
 //!
 //! ## Crate Features
 //!
-//! ### Experimental Features
-//!
-//! These features are not enabled by default.  They are experimental or in-development features that
-//! are not yet ready to be released.
-//!
-//! - `remote` - Enable remote client to connect to LanceDB cloud.  This is not yet fully implemented
-//!   and should not be enabled.
+//! - `aws` - Enable AWS S3 object store support.
+//! - `dynamodb` - Enable DynamoDB manifest store support.
+//! - `azure` - Enable Azure Blob Storage object store support.
+//! - `gcs` - Enable Google Cloud Storage object store support.
+//! - `oss` - Enable Alibaba Cloud OSS object store support.
+//! - `remote` - Enable remote client to connect to LanceDB cloud.
+//! - `huggingface` - Enable HuggingFace Hub integration for loading datasets from the Hub.
+//! - `fp16kernels` - Enable FP16 kernels for faster vector search on CPU.
 //!
 //! ### Quick Start
 //!
@@ -50,17 +51,15 @@
 //! - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud object store
 //! - `db://dbname` - Lance Cloud
 //!
-//! You can also use [`ConnectOptions`] to configure the connection to the database.
+//! You can also use [`ConnectBuilder`] to configure the connection to the database.
 //!
 //! ```rust
-//! use object_store::aws::AwsCredential;
 //! # tokio::runtime::Runtime::new().unwrap().block_on(async {
 //! let db = lancedb::connect("data/sample-lancedb")
-//!     .aws_creds(AwsCredential {
-//!         key_id: "some_key".to_string(),
-//!         secret_key: "some_secret".to_string(),
-//!         token: None,
-//!     })
+//!     .storage_options([
+//!         ("aws_access_key_id", "some_key"),
+//!         ("aws_secret_access_key", "some_secret"),
+//!     ])
 //!     .execute()
 //!     .await
 //!     .unwrap();
--- a/rust/lancedb/src/remote/client.rs
+++ b/rust/lancedb/src/remote/client.rs
@@ -232,6 +232,38 @@ impl HttpSend for Sender {
    }
 }

+/// Parsed components from a database URL (db://...)
+pub struct ParsedDbUrl {
+    pub db_name: String,
+    pub db_prefix: Option<String>,
+}
+
+/// Parse a database URL and extract the database name and optional prefix.
+///
+/// Expected format: `db://db_name` or `db://db_name/prefix`
+pub fn parse_db_url(db_url: &str) -> Result<ParsedDbUrl> {
+    let parsed_url = url::Url::parse(db_url).map_err(|err| Error::InvalidInput {
+        message: format!("db_url is not a valid URL. '{db_url}'. Error: {err}"),
+    })?;
+    debug_assert_eq!(parsed_url.scheme(), "db");
+    if !parsed_url.has_host() {
+        return Err(Error::InvalidInput {
+            message: format!("Invalid database URL (missing host) '{}'", db_url),
+        });
+    }
+    let db_name = parsed_url.host_str().unwrap().to_string();
+    let db_prefix = {
+        let prefix = parsed_url.path().trim_start_matches('/');
+        if prefix.is_empty() {
+            None
+        } else {
+            Some(prefix.to_string())
+        }
+    };
+
+    Ok(ParsedDbUrl { db_name, db_prefix })
+}
+
 impl RestfulLanceDbClient<Sender> {
    fn get_timeout(passed: Option<Duration>, env_var: &str) -> Result<Option<Duration>> {
        if let Some(passed) = passed {
@@ -250,32 +282,12 @@ impl RestfulLanceDbClient<Sender> {
    }

    pub fn try_new(
-        db_url: &str,
-        api_key: &str,
+        parsed_url: &ParsedDbUrl,
        region: &str,
        host_override: Option<String>,
+        default_headers: HeaderMap,
        client_config: ClientConfig,
-        options: &RemoteOptions,
    ) -> Result<Self> {
-        let parsed_url = url::Url::parse(db_url).map_err(|err| Error::InvalidInput {
-            message: format!("db_url is not a valid URL. '{db_url}'. Error: {err}"),
-        })?;
-        debug_assert_eq!(parsed_url.scheme(), "db");
-        if !parsed_url.has_host() {
-            return Err(Error::InvalidInput {
-                message: format!("Invalid database URL (missing host) '{}'", db_url),
-            });
-        }
-        let db_name = parsed_url.host_str().unwrap();
-        let db_prefix = {
-            let prefix = parsed_url.path().trim_start_matches('/');
-            if prefix.is_empty() {
-                None
-            } else {
-                Some(prefix)
-            }
-        };
-
        // Get the timeouts
        let timeout =
            Self::get_timeout(client_config.timeout_config.timeout, "LANCE_CLIENT_TIMEOUT")?;
@@ -348,15 +360,7 @@ impl RestfulLanceDbClient<Sender> {
        }

        let client = client_builder
-            .default_headers(Self::default_headers(
-                api_key,
-                region,
-                db_name,
-                host_override.is_some(),
-                options,
-                db_prefix,
-                &client_config,
-            )?)
+            .default_headers(default_headers)
            .user_agent(client_config.user_agent)
            .build()
            .map_err(|err| Error::Other {
@@ -366,7 +370,7 @@ impl RestfulLanceDbClient<Sender> {

        let host = match host_override {
            Some(host_override) => host_override,
-            None => format!("https://{}.{}.api.lancedb.com", db_name, region),
+            None => format!("https://{}.{}.api.lancedb.com", parsed_url.db_name, region),
        };
        debug!("Created client for host: {}", host);
        let retry_config = client_config.retry_config.clone().try_into()?;
@@ -389,7 +393,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
        &self.host
    }

-    fn default_headers(
+    pub fn default_headers(
        api_key: &str,
        region: &str,
        db_name: &str,
--- a/rust/lancedb/src/remote/db.rs
+++ b/rust/lancedb/src/remote/db.rs
@@ -189,6 +189,10 @@ pub struct RemoteDatabase<S: HttpSend = Sender> {
    client: RestfulLanceDbClient<S>,
    table_cache: Cache<String, Arc<RemoteTable<S>>>,
    uri: String,
+    /// Headers to pass to the namespace client for authentication
+    namespace_headers: HashMap<String, String>,
+    /// TLS configuration for mTLS support
+    tls_config: Option<super::client::TlsConfig>,
 }

 impl RemoteDatabase {
@@ -200,13 +204,32 @@ impl RemoteDatabase {
        client_config: ClientConfig,
        options: RemoteOptions,
    ) -> Result<Self> {
-        let client = RestfulLanceDbClient::try_new(
-            uri,
+        let parsed = super::client::parse_db_url(uri)?;
+        let header_map = RestfulLanceDbClient::<Sender>::default_headers(
            api_key,
            region,
-            host_override,
-            client_config,
+            &parsed.db_name,
+            host_override.is_some(),
            &options,
+            parsed.db_prefix.as_deref(),
+            &client_config,
+        )?;
+
+        let namespace_headers: HashMap<String, String> = header_map
+            .iter()
+            .filter_map(|(k, v)| {
+                v.to_str()
+                    .ok()
+                    .map(|val| (k.as_str().to_string(), val.to_string()))
+            })
+            .collect();
+
+        let client = RestfulLanceDbClient::try_new(
+            &parsed,
+            region,
+            host_override,
+            header_map,
+            client_config.clone(),
        )?;

        let table_cache = Cache::builder()
@@ -218,6 +241,8 @@ impl RemoteDatabase {
            client,
            table_cache,
            uri: uri.to_owned(),
+            namespace_headers,
+            tls_config: client_config.tls_config,
        })
    }
 }
@@ -240,6 +265,8 @@ mod test_utils {
                client,
                table_cache: Cache::new(0),
                uri: "http://localhost".to_string(),
+                namespace_headers: HashMap::new(),
+                tls_config: None,
            }
        }

@@ -248,11 +275,13 @@ mod test_utils {
            F: Fn(reqwest::Request) -> http::Response<T> + Send + Sync + 'static,
            T: Into<reqwest::Body>,
        {
-            let client = client_with_handler_and_config(handler, config);
+            let client = client_with_handler_and_config(handler, config.clone());
            Self {
                client,
                table_cache: Cache::new(0),
                uri: "http://localhost".to_string(),
+                namespace_headers: config.extra_headers.clone(),
+                tls_config: config.tls_config.clone(),
            }
        }
    }
@@ -716,7 +745,8 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
        let namespace_id = build_namespace_identifier(namespace_parts, &self.client.id_delimiter);
        let req = self
            .client
-            .get(&format!("/v1/namespace/{}/describe", namespace_id));
+            .post(&format!("/v1/namespace/{}/describe", namespace_id))
+            .json(&DescribeNamespaceRequest::default());

        let (request_id, resp) = self.client.send(req).await?;
        let resp = self.client.check_response(&request_id, resp).await?;
@@ -727,6 +757,31 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
    fn as_any(&self) -> &dyn std::any::Any {
        self
    }
+
+    async fn namespace_client(&self) -> Result<Arc<dyn lance_namespace::LanceNamespace>> {
+        // Create a RestNamespace pointing to the same remote host with the same authentication headers
+        let mut builder = lance_namespace_impls::RestNamespaceBuilder::new(self.client.host())
+            .delimiter(&self.client.id_delimiter)
+            // TODO: support header provider
+            .headers(self.namespace_headers.clone());
+
+        // Apply mTLS configuration if present
+        if let Some(tls_config) = &self.tls_config {
+            if let Some(cert_file) = &tls_config.cert_file {
+                builder = builder.cert_file(cert_file);
+            }
+            if let Some(key_file) = &tls_config.key_file {
+                builder = builder.key_file(key_file);
+            }
+            if let Some(ssl_ca_cert) = &tls_config.ssl_ca_cert {
+                builder = builder.ssl_ca_cert(ssl_ca_cert);
+            }
+            builder = builder.assert_hostname(tls_config.assert_hostname);
+        }
+
+        let namespace = builder.build();
+        Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
+    }
 }

 /// RemoteOptions contains a subset of StorageOptions that are compatible with Remote LanceDB connections
@@ -1518,4 +1573,260 @@ mod tests {
            panic!("Expected HTTP error");
        }
    }
+
+    #[tokio::test]
+    async fn test_namespace_client() {
+        let conn = Connection::new_with_handler(|_| {
+            http::Response::builder()
+                .status(200)
+                .body(r#"{"tables": []}"#)
+                .unwrap()
+        });
+
+        // Get the namespace client from the connection's internal database
+        let namespace_client = conn.namespace_client().await;
+        assert!(namespace_client.is_ok());
+    }
+
+    #[tokio::test]
+    async fn test_namespace_client_with_tls_config() {
+        use crate::remote::client::TlsConfig;
+
+        let tls_config = TlsConfig {
+            cert_file: Some("/path/to/cert.pem".to_string()),
+            key_file: Some("/path/to/key.pem".to_string()),
+            ssl_ca_cert: Some("/path/to/ca.pem".to_string()),
+            assert_hostname: true,
+        };
+
+        let client_config = ClientConfig {
+            tls_config: Some(tls_config),
+            ..Default::default()
+        };
+
+        let conn = Connection::new_with_handler_and_config(
+            |_| {
+                http::Response::builder()
+                    .status(200)
+                    .body(r#"{"tables": []}"#)
+                    .unwrap()
+            },
+            client_config,
+        );
+
+        // Get the namespace client - it should be created with the TLS config
+        let namespace_client = conn.namespace_client().await;
+        assert!(namespace_client.is_ok());
+    }
+
+    #[tokio::test]
+    async fn test_namespace_client_with_headers() {
+        let mut extra_headers = HashMap::new();
+        extra_headers.insert("X-Custom-Header".to_string(), "custom-value".to_string());
+
+        let client_config = ClientConfig {
+            extra_headers,
+            ..Default::default()
+        };
+
+        let conn = Connection::new_with_handler_and_config(
+            |_| {
+                http::Response::builder()
+                    .status(200)
+                    .body(r#"{"tables": []}"#)
+                    .unwrap()
+            },
+            client_config,
+        );
+
+        // Get the namespace client - it should be created with the extra headers
+        let namespace_client = conn.namespace_client().await;
+        assert!(namespace_client.is_ok());
+    }
+
+    /// Integration tests using RestAdapter to run RemoteDatabase against a real namespace server
+    mod rest_adapter_integration {
+        use super::*;
+        use lance_namespace::models::ListTablesRequest;
+        use lance_namespace_impls::{DirectoryNamespaceBuilder, RestAdapter, RestAdapterConfig};
+        use std::sync::Arc;
+        use tempfile::TempDir;
+
+        /// Test fixture that manages a REST server backed by DirectoryNamespace
+        struct RestServerFixture {
+            _temp_dir: TempDir,
+            server_handle: lance_namespace_impls::RestAdapterHandle,
+            server_url: String,
+        }
+
+        impl RestServerFixture {
+            async fn new() -> Self {
+                let temp_dir = TempDir::new().unwrap();
+                let temp_path = temp_dir.path().to_str().unwrap().to_string();
+
+                // Create DirectoryNamespace backend
+                let backend = DirectoryNamespaceBuilder::new(&temp_path)
+                    .build()
+                    .await
+                    .unwrap();
+                let backend = Arc::new(backend);
+
+                // Start REST server with port 0 (OS assigns available port)
+                let config = RestAdapterConfig {
+                    port: 0,
+                    ..Default::default()
+                };
+
+                let server = RestAdapter::new(backend, config);
+                let server_handle = server.start().await.unwrap();
+
+                // Get the actual port assigned by OS
+                let actual_port = server_handle.port();
+                let server_url = format!("http://127.0.0.1:{}", actual_port);
+
+                Self {
+                    _temp_dir: temp_dir,
+                    server_handle,
+                    server_url,
+                }
+            }
+        }
+
+        impl Drop for RestServerFixture {
+            fn drop(&mut self) {
+                self.server_handle.shutdown();
+            }
+        }
+
+        #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+        async fn test_remote_database_with_rest_adapter() {
+            use lance_namespace::models::CreateNamespaceRequest;
+
+            let fixture = RestServerFixture::new().await;
+
+            // Connect to the REST server using lancedb Connection
+            // Use db://dummy as URI and set actual server URL via host_override
+            let conn = ConnectBuilder::new("db://dummy")
+                .api_key("test-api-key")
+                .region("us-east-1")
+                .host_override(&fixture.server_url)
+                .execute()
+                .await
+                .unwrap();
+
+            // Create a child namespace first
+            let namespace = vec!["test_ns".to_string()];
+            conn.create_namespace(CreateNamespaceRequest {
+                id: Some(namespace.clone()),
+                ..Default::default()
+            })
+            .await
+            .expect("Failed to create namespace");
+
+            // Create a table in the child namespace
+            let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
+            let data = RecordBatch::try_new(
+                schema.clone(),
+                vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+            )
+            .unwrap();
+            let reader = RecordBatchIterator::new([Ok(data.clone())], schema.clone());
+
+            let table = conn
+                .create_table("test_table", reader)
+                .namespace(namespace.clone())
+                .execute()
+                .await;
+            assert!(table.is_ok(), "Failed to create table: {:?}", table.err());
+
+            // List tables in the child namespace
+            let list_response = conn
+                .list_tables(ListTablesRequest {
+                    id: Some(namespace.clone()),
+                    ..Default::default()
+                })
+                .await
+                .expect("Failed to list tables");
+            assert_eq!(list_response.tables, vec!["test_table"]);
+
+            // Get namespace client and verify it can also list tables
+            let namespace_client = conn.namespace_client().await.unwrap();
+            let list_response = namespace_client
+                .list_tables(ListTablesRequest {
+                    id: Some(namespace.clone()),
+                    ..Default::default()
+                })
+                .await
+                .unwrap();
+            assert_eq!(list_response.tables, vec!["test_table"]);
+
+            // Open the table from the child namespace
+            let opened_table = conn
+                .open_table("test_table")
+                .namespace(namespace.clone())
+                .execute()
+                .await;
+            assert!(
+                opened_table.is_ok(),
+                "Failed to open table: {:?}",
+                opened_table.err()
+            );
+            assert_eq!(opened_table.unwrap().name(), "test_table");
+        }
+
+        #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+        async fn test_remote_database_with_multiple_tables() {
+            use lance_namespace::models::CreateNamespaceRequest;
+
+            let fixture = RestServerFixture::new().await;
+
+            // Connect to the REST server
+            // Use db://dummy as URI and set actual server URL via host_override
+            let conn = ConnectBuilder::new("db://dummy")
+                .api_key("test-api-key")
+                .region("us-east-1")
+                .host_override(&fixture.server_url)
+                .execute()
+                .await
+                .unwrap();
+
+            // Create a child namespace first
+            let namespace = vec!["multi_table_ns".to_string()];
+            conn.create_namespace(CreateNamespaceRequest {
+                id: Some(namespace.clone()),
+                ..Default::default()
+            })
+            .await
+            .expect("Failed to create namespace");
+
+            // Create multiple tables in the child namespace
+            let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
+
+            for i in 1..=3 {
+                let data =
+                    RecordBatch::try_new(schema.clone(), vec![Arc::new(Int32Array::from(vec![i]))])
+                        .unwrap();
+                let reader = RecordBatchIterator::new([Ok(data.clone())], schema.clone());
+
+                conn.create_table(format!("table{}", i), reader)
+                    .namespace(namespace.clone())
+                    .execute()
+                    .await
+                    .unwrap_or_else(|e| panic!("Failed to create table{}: {:?}", i, e));
+            }
+
+            // List tables in the child namespace
+            let list_response = conn
+                .list_tables(ListTablesRequest {
+                    id: Some(namespace.clone()),
+                    ..Default::default()
+                })
+                .await
+                .unwrap();
+            assert_eq!(list_response.tables.len(), 3);
+            assert!(list_response.tables.contains(&"table1".to_string()));
+            assert!(list_response.tables.contains(&"table2".to_string()));
+            assert!(list_response.tables.contains(&"table3".to_string()));
+        }
+    }
 }
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

+pub mod insert;
+
 use crate::index::Index;
 use crate::index::IndexStatistics;
 use crate::query::{QueryFilter, QueryRequest, Select, VectorQueryRequest};
@@ -204,6 +206,7 @@ pub struct RemoteTable<S: HttpSend = Sender> {
    server_version: ServerVersion,

    version: RwLock<Option<u64>>,
+    location: RwLock<Option<String>>,
 }

 impl<S: HttpSend> RemoteTable<S> {
@@ -221,6 +224,7 @@ impl<S: HttpSend> RemoteTable<S> {
            identifier,
            server_version,
            version: RwLock::new(None),
+            location: RwLock::new(None),
        }
    }

@@ -466,7 +470,9 @@ impl<S: HttpSend> RemoteTable<S> {
        self.apply_query_params(&mut body, &query.base)?;

        // Apply general parameters, before we dispatch based on number of query vectors.
-        body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
+        if let Some(distance_type) = query.distance_type {
+            body["distance_type"] = serde_json::json!(distance_type);
+        }
        // In 0.23.1 we migrated from `nprobes` to `minimum_nprobes` and `maximum_nprobes`.
        // Old client / new server: since minimum_nprobes is missing, fallback to nprobes
        // New client / old server: old server will only see nprobes, make sure to set both
@@ -639,6 +645,7 @@ impl<S: HttpSend> RemoteTable<S> {
 struct TableDescription {
    version: u64,
    schema: JsonSchema,
+    location: Option<String>,
 }

 impl<S: HttpSend> std::fmt::Display for RemoteTable<S> {
@@ -667,6 +674,7 @@ mod test_utils {
                identifier: name,
                server_version: version.map(ServerVersion).unwrap_or_default(),
                version: RwLock::new(None),
+                location: RwLock::new(None),
            }
        }
    }
@@ -1088,6 +1096,17 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
                    body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
                }
            }
+            Index::IvfRq(index) => {
+                body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_RQ".to_string());
+                body[METRIC_TYPE_KEY] =
+                    serde_json::Value::String(index.distance_type.to_string().to_lowercase());
+                if let Some(num_partitions) = index.num_partitions {
+                    body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
+                }
+                if let Some(num_bits) = index.num_bits {
+                    body["num_bits"] = serde_json::Value::Number(num_bits.into());
+                }
+            }
            Index::BTree(_) => {
                body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
            }
@@ -1450,14 +1469,42 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
            message: "table_definition is not supported on LanceDB cloud.".into(),
        })
    }
-    fn dataset_uri(&self) -> &str {
-        "NOT_SUPPORTED"
+    async fn uri(&self) -> Result<String> {
+        // Check if we already have the location cached
+        {
+            let location = self.location.read().await;
+            if let Some(ref loc) = *location {
+                return Ok(loc.clone());
+            }
+        }
+
+        // Fetch from server via describe
+        let description = self.describe().await?;
+        let location = description.location.ok_or_else(|| Error::NotSupported {
+            message: "Table URI not supported by the server".into(),
+        })?;
+
+        // Cache the location for future use
+        {
+            let mut cached_location = self.location.write().await;
+            *cached_location = Some(location.clone());
+        }
+
+        Ok(location)
    }

    async fn storage_options(&self) -> Option<HashMap<String, String>> {
        None
    }

+    async fn initial_storage_options(&self) -> Option<HashMap<String, String>> {
+        None
+    }
+
+    async fn latest_storage_options(&self) -> Result<Option<HashMap<String, String>>> {
+        Ok(None)
+    }
+
    async fn stats(&self) -> Result<TableStatistics> {
        let request = self
            .client
@@ -1473,6 +1520,21 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
        })?;
        Ok(stats)
    }
+
+    async fn create_insert_exec(
+        &self,
+        input: Arc<dyn ExecutionPlan>,
+        write_params: lance::dataset::WriteParams,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let overwrite = matches!(write_params.mode, lance::dataset::WriteMode::Overwrite);
+        Ok(Arc::new(insert::RemoteInsertExec::new(
+            self.name.clone(),
+            self.identifier.clone(),
+            self.client.clone(),
+            input,
+            overwrite,
+        )))
+    }
 }

 #[derive(Serialize)]
@@ -2195,7 +2257,6 @@ mod tests {
            let body: serde_json::Value = serde_json::from_slice(body).unwrap();
            let mut expected_body = serde_json::json!({
                "prefilter": true,
-                "distance_type": "l2",
                "nprobes": 20,
                "minimum_nprobes": 20,
                "maximum_nprobes": 20,
@@ -3321,4 +3382,69 @@ mod tests {
        let result = table.drop_columns(&["old_col1", "old_col2"]).await.unwrap();
        assert_eq!(result.version, 5);
    }
+
+    #[tokio::test]
+    async fn test_uri() {
+        let table = Table::new_with_handler("my_table", |request| {
+            assert_eq!(request.method(), "POST");
+            assert_eq!(request.url().path(), "/v1/table/my_table/describe/");
+
+            http::Response::builder()
+                .status(200)
+                .body(r#"{"version": 1, "schema": {"fields": []}, "location": "s3://bucket/path/to/table"}"#)
+                .unwrap()
+        });
+
+        let uri = table.uri().await.unwrap();
+        assert_eq!(uri, "s3://bucket/path/to/table");
+    }
+
+    #[tokio::test]
+    async fn test_uri_missing_location() {
+        let table = Table::new_with_handler("my_table", |request| {
+            assert_eq!(request.method(), "POST");
+            assert_eq!(request.url().path(), "/v1/table/my_table/describe/");
+
+            // Server returns response without location field
+            http::Response::builder()
+                .status(200)
+                .body(r#"{"version": 1, "schema": {"fields": []}}"#)
+                .unwrap()
+        });
+
+        let result = table.uri().await;
+        assert!(result.is_err());
+        assert!(matches!(&result, Err(Error::NotSupported { .. })));
+    }
+
+    #[tokio::test]
+    async fn test_uri_caching() {
+        use std::sync::atomic::{AtomicUsize, Ordering};
+        use std::sync::Arc;
+
+        let call_count = Arc::new(AtomicUsize::new(0));
+        let call_count_clone = call_count.clone();
+
+        let table = Table::new_with_handler("my_table", move |request| {
+            assert_eq!(request.url().path(), "/v1/table/my_table/describe/");
+            call_count_clone.fetch_add(1, Ordering::SeqCst);
+
+            http::Response::builder()
+                .status(200)
+                .body(
+                    r#"{"version": 1, "schema": {"fields": []}, "location": "gs://bucket/table"}"#,
+                )
+                .unwrap()
+        });
+
+        // First call should fetch from server
+        let uri1 = table.uri().await.unwrap();
+        assert_eq!(uri1, "gs://bucket/table");
+        assert_eq!(call_count.load(Ordering::SeqCst), 1);
+
+        // Second call should use cached value
+        let uri2 = table.uri().await.unwrap();
+        assert_eq!(uri2, "gs://bucket/table");
+        assert_eq!(call_count.load(Ordering::SeqCst), 1); // Still 1, no new call
+    }
 }
--- a/rust/lancedb/src/remote/table/insert.rs
+++ b/rust/lancedb/src/remote/table/insert.rs
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+//! DataFusion ExecutionPlan for inserting data into remote LanceDB tables.
+
+use std::any::Any;
+use std::sync::{Arc, Mutex};
+
+use arrow_array::{ArrayRef, RecordBatch, UInt64Array};
+use arrow_ipc::CompressionType;
+use arrow_schema::ArrowError;
+use datafusion_common::{DataFusionError, Result as DataFusionResult};
+use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+use datafusion_physical_expr::EquivalenceProperties;
+use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
+use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties};
+use futures::StreamExt;
+use http::header::CONTENT_TYPE;
+
+use crate::remote::client::{HttpSend, RestfulLanceDbClient, Sender};
+use crate::remote::table::RemoteTable;
+use crate::remote::ARROW_STREAM_CONTENT_TYPE;
+use crate::table::datafusion::insert::COUNT_SCHEMA;
+use crate::table::AddResult;
+use crate::Error;
+
+/// ExecutionPlan for inserting data into a remote LanceDB table.
+///
+/// This plan:
+/// 1. Requires single partition (no parallel remote inserts yet)
+/// 2. Streams data as Arrow IPC to `/v1/table/{id}/insert/` endpoint
+/// 3. Stores AddResult for retrieval after execution
+#[derive(Debug)]
+pub struct RemoteInsertExec<S: HttpSend = Sender> {
+    table_name: String,
+    identifier: String,
+    client: RestfulLanceDbClient<S>,
+    input: Arc<dyn ExecutionPlan>,
+    overwrite: bool,
+    properties: PlanProperties,
+    add_result: Arc<Mutex<Option<AddResult>>>,
+}
+
+impl<S: HttpSend + 'static> RemoteInsertExec<S> {
+    /// Create a new RemoteInsertExec.
+    pub fn new(
+        table_name: String,
+        identifier: String,
+        client: RestfulLanceDbClient<S>,
+        input: Arc<dyn ExecutionPlan>,
+        overwrite: bool,
+    ) -> Self {
+        let schema = COUNT_SCHEMA.clone();
+        let properties = PlanProperties::new(
+            EquivalenceProperties::new(schema),
+            datafusion_physical_plan::Partitioning::UnknownPartitioning(1),
+            datafusion_physical_plan::execution_plan::EmissionType::Final,
+            datafusion_physical_plan::execution_plan::Boundedness::Bounded,
+        );
+
+        Self {
+            table_name,
+            identifier,
+            client,
+            input,
+            overwrite,
+            properties,
+            add_result: Arc::new(Mutex::new(None)),
+        }
+    }
+
+    /// Get the add result after execution.
+    // TODO: this will be used when we wire this up to Table::add().
+    #[allow(dead_code)]
+    pub fn add_result(&self) -> Option<AddResult> {
+        self.add_result.lock().unwrap().clone()
+    }
+
+    fn stream_as_body(data: SendableRecordBatchStream) -> DataFusionResult<reqwest::Body> {
+        let options = arrow_ipc::writer::IpcWriteOptions::default()
+            .try_with_compression(Some(CompressionType::LZ4_FRAME))?;
+        let writer = arrow_ipc::writer::StreamWriter::try_new_with_options(
+            Vec::new(),
+            &data.schema(),
+            options,
+        )?;
+
+        let stream = futures::stream::try_unfold((data, writer), move |(mut data, mut writer)| {
+            async move {
+                match data.next().await {
+                    Some(Ok(batch)) => {
+                        writer.write(&batch)?;
+                        let buffer = std::mem::take(writer.get_mut());
+                        Ok(Some((buffer, (data, writer))))
+                    }
+                    Some(Err(e)) => Err(e),
+                    None => {
+                        if let Err(ArrowError::IpcError(_msg)) = writer.finish() {
+                            // Will error if already closed.
+                            return Ok(None);
+                        };
+                        let buffer = std::mem::take(writer.get_mut());
+                        Ok(Some((buffer, (data, writer))))
+                    }
+                }
+            }
+        });
+
+        Ok(reqwest::Body::wrap_stream(stream))
+    }
+}
+
+impl<S: HttpSend + 'static> DisplayAs for RemoteInsertExec<S> {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(
+                    f,
+                    "RemoteInsertExec: table={}, overwrite={}",
+                    self.table_name, self.overwrite
+                )
+            }
+            DisplayFormatType::TreeRender => {
+                write!(f, "RemoteInsertExec")
+            }
+        }
+    }
+}
+
+impl<S: HttpSend + 'static> ExecutionPlan for RemoteInsertExec<S> {
+    fn name(&self) -> &str {
+        Self::static_name()
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+
+    fn maintains_input_order(&self) -> Vec<bool> {
+        vec![false]
+    }
+
+    fn required_input_distribution(&self) -> Vec<datafusion_physical_plan::Distribution> {
+        // Until we have a separate commit endpoint, we need to do all inserts in a single partition
+        vec![datafusion_physical_plan::Distribution::SinglePartition]
+    }
+
+    fn benefits_from_input_partitioning(&self) -> Vec<bool> {
+        vec![false]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
+        if children.len() != 1 {
+            return Err(DataFusionError::Internal(
+                "RemoteInsertExec requires exactly one child".to_string(),
+            ));
+        }
+        Ok(Arc::new(Self::new(
+            self.table_name.clone(),
+            self.identifier.clone(),
+            self.client.clone(),
+            children[0].clone(),
+            self.overwrite,
+        )))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> DataFusionResult<SendableRecordBatchStream> {
+        if partition != 0 {
+            return Err(DataFusionError::Internal(
+                "RemoteInsertExec only supports single partition execution".to_string(),
+            ));
+        }
+
+        let input_stream = self.input.execute(0, context)?;
+        let client = self.client.clone();
+        let identifier = self.identifier.clone();
+        let overwrite = self.overwrite;
+        let add_result = self.add_result.clone();
+        let table_name = self.table_name.clone();
+
+        let stream = futures::stream::once(async move {
+            let mut request = client
+                .post(&format!("/v1/table/{}/insert/", identifier))
+                .header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
+
+            if overwrite {
+                request = request.query(&[("mode", "overwrite")]);
+            }
+
+            let body = Self::stream_as_body(input_stream)?;
+            let request = request.body(body);
+
+            let (request_id, response) = client
+                .send(request)
+                .await
+                .map_err(|e| DataFusionError::External(Box::new(e)))?;
+
+            let response =
+                RemoteTable::<Sender>::handle_table_not_found(&table_name, response, &request_id)
+                    .await
+                    .map_err(|e| DataFusionError::External(Box::new(e)))?;
+
+            let response = client
+                .check_response(&request_id, response)
+                .await
+                .map_err(|e| DataFusionError::External(Box::new(e)))?;
+
+            let body_text = response.text().await.map_err(|e| {
+                DataFusionError::External(Box::new(Error::Http {
+                    source: Box::new(e),
+                    request_id: request_id.clone(),
+                    status_code: None,
+                }))
+            })?;
+
+            let parsed_result = if body_text.trim().is_empty() {
+                // Backward compatible with old servers
+                AddResult { version: 0 }
+            } else {
+                serde_json::from_str(&body_text).map_err(|e| {
+                    DataFusionError::External(Box::new(Error::Http {
+                        source: format!("Failed to parse add response: {}", e).into(),
+                        request_id: request_id.clone(),
+                        status_code: None,
+                    }))
+                })?
+            };
+
+            {
+                let mut res_lock = add_result.lock().map_err(|_| {
+                    DataFusionError::Execution("Failed to acquire lock for add_result".to_string())
+                })?;
+                *res_lock = Some(parsed_result);
+            }
+
+            // Return a single batch with count 0 (actual count is tracked in add_result)
+            let count_array: ArrayRef = Arc::new(UInt64Array::from(vec![0u64]));
+            let batch = RecordBatch::try_new(COUNT_SCHEMA.clone(), vec![count_array])?;
+            Ok::<_, DataFusionError>(batch)
+        });
+
+        Ok(Box::pin(RecordBatchStreamAdapter::new(
+            COUNT_SCHEMA.clone(),
+            stream,
+        )))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow_array::record_batch;
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+    use datafusion::prelude::SessionContext;
+    use datafusion_catalog::MemTable;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::sync::Arc;
+
+    use crate::remote::ARROW_STREAM_CONTENT_TYPE;
+    use crate::table::datafusion::BaseTableAdapter;
+    use crate::Table;
+
+    fn schema_json() -> &'static str {
+        r#"{"fields": [{"name": "id", "type": {"type": "int32"}, "nullable": true}]}"#
+    }
+
+    #[tokio::test]
+    async fn test_remote_insert_exec_execute_empty() {
+        let request_count = Arc::new(AtomicUsize::new(0));
+        let request_count_clone = request_count.clone();
+
+        let table = Table::new_with_handler("my_table", move |request| {
+            let path = request.url().path();
+
+            if path == "/v1/table/my_table/describe/" {
+                // Return schema for BaseTableAdapter::try_new
+                return http::Response::builder()
+                    .status(200)
+                    .body(format!(r#"{{"version": 1, "schema": {}}}"#, schema_json()))
+                    .unwrap();
+            }
+
+            if path == "/v1/table/my_table/insert/" {
+                assert_eq!(request.method(), "POST");
+                assert_eq!(
+                    request.headers().get("Content-Type").unwrap(),
+                    ARROW_STREAM_CONTENT_TYPE
+                );
+                request_count_clone.fetch_add(1, Ordering::SeqCst);
+
+                return http::Response::builder()
+                    .status(200)
+                    .body(r#"{"version": 2}"#.to_string())
+                    .unwrap();
+            }
+
+            panic!("Unexpected request path: {}", path);
+        });
+
+        let schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "id",
+            DataType::Int32,
+            true,
+        )]));
+
+        // Create empty MemTable (no batches)
+        let source_table = MemTable::try_new(schema, vec![vec![]]).unwrap();
+
+        let ctx = SessionContext::new();
+
+        // Register the remote table as insert target
+        let provider = BaseTableAdapter::try_new(table.base_table().clone())
+            .await
+            .unwrap();
+        ctx.register_table("my_table", Arc::new(provider)).unwrap();
+
+        // Register empty source
+        ctx.register_table("empty_source", Arc::new(source_table))
+            .unwrap();
+
+        // Execute the INSERT
+        ctx.sql("INSERT INTO my_table SELECT * FROM empty_source")
+            .await
+            .unwrap()
+            .collect()
+            .await
+            .unwrap();
+
+        // Verify: should have made exactly one HTTP request even with empty input
+        assert_eq!(request_count.load(Ordering::SeqCst), 1);
+    }
+
+    #[tokio::test]
+    async fn test_remote_insert_exec_multi_partition() {
+        let request_count = Arc::new(AtomicUsize::new(0));
+        let request_count_clone = request_count.clone();
+
+        let table = Table::new_with_handler("my_table", move |request| {
+            let path = request.url().path();
+
+            if path == "/v1/table/my_table/describe/" {
+                // Return schema for BaseTableAdapter::try_new
+                return http::Response::builder()
+                    .status(200)
+                    .body(format!(r#"{{"version": 1, "schema": {}}}"#, schema_json()))
+                    .unwrap();
+            }
+
+            if path == "/v1/table/my_table/insert/" {
+                assert_eq!(request.method(), "POST");
+                assert_eq!(
+                    request.headers().get("Content-Type").unwrap(),
+                    ARROW_STREAM_CONTENT_TYPE
+                );
+                request_count_clone.fetch_add(1, Ordering::SeqCst);
+
+                return http::Response::builder()
+                    .status(200)
+                    .body(r#"{"version": 2}"#.to_string())
+                    .unwrap();
+            }
+
+            panic!("Unexpected request path: {}", path);
+        });
+
+        let schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "id",
+            DataType::Int32,
+            true,
+        )]));
+
+        // Create MemTable with multiple partitions and multiple batches
+        let source_table = MemTable::try_new(
+            schema,
+            vec![
+                // Partition 0
+                vec![
+                    record_batch!(("id", Int32, [1, 2])).unwrap(),
+                    record_batch!(("id", Int32, [3, 4])).unwrap(),
+                ],
+                // Partition 1
+                vec![record_batch!(("id", Int32, [5, 6, 7])).unwrap()],
+                // Partition 2
+                vec![record_batch!(("id", Int32, [8])).unwrap()],
+            ],
+        )
+        .unwrap();
+
+        let ctx = SessionContext::new();
+
+        // Register the remote table as insert target
+        let provider = BaseTableAdapter::try_new(table.base_table().clone())
+            .await
+            .unwrap();
+        ctx.register_table("my_table", Arc::new(provider)).unwrap();
+
+        // Register multi-partition source
+        ctx.register_table("multi_partition_source", Arc::new(source_table))
+            .unwrap();
+
+        // Get the physical plan and verify it includes a repartition to 1
+        let df = ctx
+            .sql("INSERT INTO my_table SELECT * FROM multi_partition_source")
+            .await
+            .unwrap();
+        let plan = df.clone().create_physical_plan().await.unwrap();
+        let plan_str = datafusion::physical_plan::displayable(plan.as_ref())
+            .indent(true)
+            .to_string();
+
+        // The plan should include a CoalescePartitionsExec to merge partitions
+        assert!(
+            plan_str.contains("CoalescePartitionsExec"),
+            "Expected CoalescePartitionsExec in plan:\n{}",
+            plan_str
+        );
+
+        // Execute the INSERT
+        df.collect().await.unwrap();
+
+        // Verify: should have made exactly one HTTP request despite multiple input partitions
+        assert_eq!(request_count.load(Ordering::SeqCst), 1);
+    }
+}
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -23,13 +23,11 @@ pub use lance::dataset::ColumnAlteration;
 pub use lance::dataset::NewColumnTransform;
 pub use lance::dataset::ReadParams;
 pub use lance::dataset::Version;
-use lance::dataset::{
-    InsertBuilder, UpdateBuilder as LanceUpdateBuilder, WhenMatched, WriteMode, WriteParams,
-};
+use lance::dataset::{InsertBuilder, WhenMatched, WriteMode, WriteParams};
 use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
 use lance::index::vector::utils::infer_vector_dim;
 use lance::index::vector::VectorIndexParams;
-use lance::io::WrappingObjectStore;
+use lance::io::{ObjectStoreParams, WrappingObjectStore};
 use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan};
 use lance_datafusion::utils::StreamingWriteSource;
 use lance_index::scalar::{BuiltinIndexType, ScalarIndexParams};
@@ -40,9 +38,10 @@ use lance_index::vector::pq::PQBuildParams;
 use lance_index::vector::sq::builder::SQBuildParams;
 use lance_index::DatasetIndexExt;
 use lance_index::IndexType;
+use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsAccessor};
 use lance_namespace::models::{
-    QueryTableRequest as NsQueryTableRequest, QueryTableRequestFullTextQuery,
-    QueryTableRequestVector, StringFtsQuery,
+    QueryTableRequest as NsQueryTableRequest, QueryTableRequestColumns,
+    QueryTableRequestFullTextQuery, QueryTableRequestVector, StringFtsQuery,
 };
 use lance_namespace::LanceNamespace;
 use lance_table::format::Manifest;
@@ -78,10 +77,13 @@ use self::merge::MergeInsertBuilder;

 pub mod datafusion;
 pub(crate) mod dataset;
+pub mod delete;
 pub mod merge;
+pub mod update;

 use crate::index::waiter::wait_for_index;
 pub use chrono::Duration;
+pub use delete::DeleteResult;
 use futures::future::{join_all, Either};
 pub use lance::dataset::optimize::CompactionOptions;
 pub use lance::dataset::refs::{TagContents, Tags as LanceTags};
@@ -90,6 +92,7 @@ use lance::dataset::statistics::DatasetStatisticsExt;
 use lance_index::frag_reuse::FRAG_REUSE_INDEX_NAME;
 pub use lance_index::optimize::OptimizeOptions;
 use serde_with::skip_serializing_none;
+pub use update::{UpdateBuilder, UpdateResult};

 /// Defines the type of column
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -326,72 +329,6 @@ impl<T: IntoArrow> AddDataBuilder<T> {
    }
 }

-/// A builder for configuring an [`Table::update`] operation
-#[derive(Debug, Clone)]
-pub struct UpdateBuilder {
-    parent: Arc<dyn BaseTable>,
-    pub(crate) filter: Option<String>,
-    pub(crate) columns: Vec<(String, String)>,
-}
-
-impl UpdateBuilder {
-    fn new(parent: Arc<dyn BaseTable>) -> Self {
-        Self {
-            parent,
-            filter: None,
-            columns: Vec::new(),
-        }
-    }
-
-    /// Limits the update operation to rows matching the given filter
-    ///
-    /// If a row does not match the filter then it will be left unchanged.
-    pub fn only_if(mut self, filter: impl Into<String>) -> Self {
-        self.filter = Some(filter.into());
-        self
-    }
-
-    /// Specifies a column to update
-    ///
-    /// This method may be called multiple times to update multiple columns
-    ///
-    /// The `update_expr` should be an SQL expression explaining how to calculate
-    /// the new value for the column.  The expression will be evaluated against the
-    /// previous row's value.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// # use lancedb::Table;
-    /// # async fn doctest_helper(tbl: Table) {
-    ///   let mut operation = tbl.update();
-    ///   // Increments the `bird_count` value by 1
-    ///   operation = operation.column("bird_count", "bird_count + 1");
-    ///   operation.execute().await.unwrap();
-    /// # }
-    /// ```
-    pub fn column(
-        mut self,
-        column_name: impl Into<String>,
-        update_expr: impl Into<String>,
-    ) -> Self {
-        self.columns.push((column_name.into(), update_expr.into()));
-        self
-    }
-
-    /// Executes the update operation.
-    /// Returns the update result
-    pub async fn execute(self) -> Result<UpdateResult> {
-        if self.columns.is_empty() {
-            Err(Error::InvalidInput {
-                message: "at least one column must be specified in an update operation".to_string(),
-            })
-        } else {
-            self.parent.clone().update(self).await
-        }
-    }
-}
-
 /// Filters that can be used to limit the rows returned by a query
 pub enum Filter {
    /// A SQL filter string
@@ -425,17 +362,6 @@ pub trait Tags: Send + Sync {
    async fn update(&mut self, tag: &str, version: u64) -> Result<()>;
 }

-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
-pub struct UpdateResult {
-    #[serde(default)]
-    pub rows_updated: u64,
-    // The commit version associated with the operation.
-    // A version of `0` indicates compatibility with legacy servers that do not return
-    /// a commit version.
-    #[serde(default)]
-    pub version: u64,
-}
-
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
 pub struct AddResult {
    // The commit version associated with the operation.
@@ -445,15 +371,6 @@ pub struct AddResult {
    pub version: u64,
 }

-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
-pub struct DeleteResult {
-    // The commit version associated with the operation.
-    // A version of `0` indicates compatibility with legacy servers that do not return
-    /// a commit version.
-    #[serde(default)]
-    pub version: u64,
-}
-
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
 pub struct MergeResult {
    // The commit version associated with the operation.
@@ -607,10 +524,20 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
    async fn list_versions(&self) -> Result<Vec<Version>>;
    /// Get the table definition.
    async fn table_definition(&self) -> Result<TableDefinition>;
-    /// Get the table URI
-    fn dataset_uri(&self) -> &str;
+    /// Get the table URI (storage location)
+    async fn uri(&self) -> Result<String>;
    /// Get the storage options used when opening this table, if any.
+    #[deprecated(since = "0.25.0", note = "Use initial_storage_options() instead")]
    async fn storage_options(&self) -> Option<HashMap<String, String>>;
+    /// Get the initial storage options that were passed in when opening this table.
+    ///
+    /// For dynamically refreshed options (e.g., credential vending), use [`Self::latest_storage_options`].
+    async fn initial_storage_options(&self) -> Option<HashMap<String, String>>;
+    /// Get the latest storage options, refreshing from provider if configured.
+    ///
+    /// Returns `Ok(Some(options))` if storage options are available (static or refreshed),
+    /// `Ok(None)` if no storage options were configured, or `Err(...)` if refresh failed.
+    async fn latest_storage_options(&self) -> Result<Option<HashMap<String, String>>>;
    /// Poll until the columns are fully indexed. Will return Error::Timeout if the columns
    /// are not fully indexed within the timeout.
    async fn wait_for_index(
@@ -620,6 +547,19 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
    ) -> Result<()>;
    /// Get statistics on the table
    async fn stats(&self) -> Result<TableStatistics>;
+    /// Create an ExecutionPlan for inserting data into the table.
+    ///
+    /// This is used by the DataFusion TableProvider implementation to support
+    /// INSERT INTO statements.
+    async fn create_insert_exec(
+        &self,
+        _input: Arc<dyn datafusion_physical_plan::ExecutionPlan>,
+        _write_params: WriteParams,
+    ) -> Result<Arc<dyn datafusion_physical_plan::ExecutionPlan>> {
+        Err(Error::NotSupported {
+            message: "create_insert_exec not implemented".to_string(),
+        })
+    }
 }

 /// A Table is a collection of strong typed Rows.
@@ -1316,20 +1256,43 @@ impl Table {
        self.inner.list_indices().await
    }

-    /// Get the underlying dataset URI
+    /// Get the table URI (storage location)
    ///
-    /// Warning: This is an internal API and the return value is subject to change.
-    pub fn dataset_uri(&self) -> &str {
-        self.inner.dataset_uri()
+    /// Returns the full storage location of the table (e.g., S3/GCS path).
+    /// For remote tables, this fetches the location from the server via describe.
+    pub async fn uri(&self) -> Result<String> {
+        self.inner.uri().await
    }

    /// Get the storage options used when opening this table, if any.
    ///
    /// Warning: This is an internal API and the return value is subject to change.
+    #[deprecated(since = "0.25.0", note = "Use initial_storage_options() instead")]
    pub async fn storage_options(&self) -> Option<HashMap<String, String>> {
+        #[allow(deprecated)]
        self.inner.storage_options().await
    }

+    /// Get the initial storage options that were passed in when opening this table.
+    ///
+    /// For dynamically refreshed options (e.g., credential vending), use [`Self::latest_storage_options`].
+    ///
+    /// Warning: This is an internal API and the return value is subject to change.
+    pub async fn initial_storage_options(&self) -> Option<HashMap<String, String>> {
+        self.inner.initial_storage_options().await
+    }
+
+    /// Get the latest storage options, refreshing from provider if configured.
+    ///
+    /// This method is useful for credential vending scenarios where storage options
+    /// may be refreshed dynamically. If no dynamic provider is configured, this
+    /// returns the initial static options.
+    ///
+    /// Warning: This is an internal API and the return value is subject to change.
+    pub async fn latest_storage_options(&self) -> Result<Option<HashMap<String, String>>> {
+        self.inner.latest_storage_options().await
+    }
+
    /// Get statistics about an index.
    /// Returns None if the index does not exist.
    pub async fn index_stats(
@@ -1423,7 +1386,9 @@ impl Table {
            })
            .collect::<Vec<_>>();

-        let unioned = Arc::new(UnionExec::new(projected_plans));
+        let unioned = UnionExec::try_new(projected_plans).map_err(|err| Error::Runtime {
+            message: err.to_string(),
+        })?;
        // We require 1 partition in the final output
        let repartitioned = RepartitionExec::try_new(
            unioned,
@@ -1611,6 +1576,101 @@ impl NativeTable {
        self
    }

+    /// Opens an existing Table using a namespace client.
+    ///
+    /// This method uses `DatasetBuilder::from_namespace` to open the table, which
+    /// automatically fetches the table location and storage options from the namespace.
+    /// This eliminates the need to pre-fetch and merge storage options before opening.
+    ///
+    /// # Arguments
+    ///
+    /// * `namespace_client` - The namespace client to use for fetching table metadata
+    /// * `name` - The table name
+    /// * `namespace` - The namespace path (e.g., vec!["parent", "child"])
+    /// * `write_store_wrapper` - Optional wrapper for the object store on write path
+    /// * `params` - Optional read parameters
+    /// * `read_consistency_interval` - Optional interval for read consistency
+    /// * `server_side_query_enabled` - Whether to enable server-side query execution.
+    ///   When true, the namespace_client will be stored and queries will be executed
+    ///   on the namespace server. When false, the namespace is only used for opening
+    ///   the table, and queries are executed locally.
+    /// * `session` - Optional session for object stores and caching
+    ///
+    /// # Returns
+    ///
+    /// * A [NativeTable] object.
+    #[allow(clippy::too_many_arguments)]
+    pub async fn open_from_namespace(
+        namespace_client: Arc<dyn LanceNamespace>,
+        name: &str,
+        namespace: Vec<String>,
+        write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
+        params: Option<ReadParams>,
+        read_consistency_interval: Option<std::time::Duration>,
+        server_side_query_enabled: bool,
+        session: Option<Arc<lance::session::Session>>,
+    ) -> Result<Self> {
+        let mut params = params.unwrap_or_default();
+
+        // Set the session in read params
+        if let Some(sess) = session {
+            params.session(sess);
+        }
+
+        // patch the params if we have a write store wrapper
+        let params = match write_store_wrapper.clone() {
+            Some(wrapper) => params.patch_with_store_wrapper(wrapper)?,
+            None => params,
+        };
+
+        // Build table_id from namespace + name
+        let mut table_id = namespace.clone();
+        table_id.push(name.to_string());
+
+        // Use DatasetBuilder::from_namespace which automatically fetches location
+        // and storage options from the namespace
+        let builder = DatasetBuilder::from_namespace(namespace_client.clone(), table_id)
+            .await
+            .map_err(|e| match e {
+                lance::Error::Namespace { source, .. } => Error::Runtime {
+                    message: format!("Failed to get table info from namespace: {:?}", source),
+                },
+                source => Error::Lance { source },
+            })?;
+
+        let dataset = builder
+            .with_read_params(params)
+            .load()
+            .await
+            .map_err(|e| match e {
+                lance::Error::DatasetNotFound { .. } => Error::TableNotFound {
+                    name: name.to_string(),
+                    source: Box::new(e),
+                },
+                source => Error::Lance { source },
+            })?;
+
+        let uri = dataset.uri().to_string();
+        let dataset = DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval);
+        let id = Self::build_id(&namespace, name);
+
+        let stored_namespace_client = if server_side_query_enabled {
+            Some(namespace_client)
+        } else {
+            None
+        };
+
+        Ok(Self {
+            name: name.to_string(),
+            namespace,
+            id,
+            uri,
+            dataset,
+            read_consistency_interval,
+            namespace_client: stored_namespace_client,
+        })
+    }
+
    fn get_table_name(uri: &str) -> Result<String> {
        let path = Path::new(uri);
        let name = path
@@ -1722,6 +1782,108 @@ impl NativeTable {
        .await
    }

+    /// Creates a new Table using a namespace client for storage options.
+    ///
+    /// This method sets up a `StorageOptionsProvider` from the namespace client,
+    /// enabling automatic credential refresh for cloud storage. The namespace
+    /// is used for:
+    /// 1. Setting up storage options provider for credential vending
+    /// 2. Optionally enabling server-side query execution
+    ///
+    /// # Arguments
+    ///
+    /// * `namespace_client` - The namespace client to use for storage options
+    /// * `uri` - The URI to the table (obtained from create_empty_table response)
+    /// * `name` - The table name
+    /// * `namespace` - The namespace path (e.g., vec!["parent", "child"])
+    /// * `batches` - RecordBatch to be saved in the database
+    /// * `write_store_wrapper` - Optional wrapper for the object store on write path
+    /// * `params` - Optional write parameters
+    /// * `read_consistency_interval` - Optional interval for read consistency
+    /// * `server_side_query_enabled` - Whether to enable server-side query execution
+    ///
+    /// # Returns
+    ///
+    /// * A [NativeTable] object.
+    #[allow(clippy::too_many_arguments)]
+    pub async fn create_from_namespace(
+        namespace_client: Arc<dyn LanceNamespace>,
+        uri: &str,
+        name: &str,
+        namespace: Vec<String>,
+        batches: impl StreamingWriteSource,
+        write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
+        params: Option<WriteParams>,
+        read_consistency_interval: Option<std::time::Duration>,
+        server_side_query_enabled: bool,
+        session: Option<Arc<lance::session::Session>>,
+    ) -> Result<Self> {
+        // Build table_id from namespace + name for the storage options provider
+        let mut table_id = namespace.clone();
+        table_id.push(name.to_string());
+
+        // Set up storage options provider from namespace
+        let storage_options_provider = Arc::new(LanceNamespaceStorageOptionsProvider::new(
+            namespace_client.clone(),
+            table_id,
+        ));
+
+        // Start with provided params or defaults
+        let mut params = params.unwrap_or_default();
+
+        // Set the session in write params
+        if let Some(sess) = session {
+            params.session = Some(sess);
+        }
+
+        // Ensure store_params exists and set the storage options provider
+        let store_params = params
+            .store_params
+            .get_or_insert_with(ObjectStoreParams::default);
+        let accessor = match store_params.storage_options().cloned() {
+            Some(options) => {
+                StorageOptionsAccessor::with_initial_and_provider(options, storage_options_provider)
+            }
+            None => StorageOptionsAccessor::with_provider(storage_options_provider),
+        };
+        store_params.storage_options_accessor = Some(Arc::new(accessor));
+
+        // Patch the params if we have a write store wrapper
+        let params = match write_store_wrapper.clone() {
+            Some(wrapper) => params.patch_with_store_wrapper(wrapper)?,
+            None => params,
+        };
+
+        let insert_builder = InsertBuilder::new(uri).with_params(&params);
+        let dataset = insert_builder
+            .execute_stream(batches)
+            .await
+            .map_err(|e| match e {
+                lance::Error::DatasetAlreadyExists { .. } => Error::TableAlreadyExists {
+                    name: name.to_string(),
+                },
+                source => Error::Lance { source },
+            })?;
+
+        let id = Self::build_id(&namespace, name);
+
+        let stored_namespace_client = if server_side_query_enabled {
+            Some(namespace_client)
+        } else {
+            None
+        };
+
+        Ok(Self {
+            name: name.to_string(),
+            namespace,
+            id,
+            uri: uri.to_string(),
+            dataset: DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval),
+            read_consistency_interval,
+            namespace_client: stored_namespace_client,
+        })
+    }
+
    async fn optimize_indices(&self, options: &OptimizeOptions) -> Result<()> {
        info!("LanceDB: optimizing indices: {:?}", options);
        self.dataset
@@ -1860,7 +2022,7 @@ impl NativeTable {
            return provided;
        }
        let suggested = suggested_num_sub_vectors(dim);
-        if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 {
+        if num_bits.is_some_and(|num_bits| num_bits == 4) && !suggested.is_multiple_of(2) {
            // num_sub_vectors must be even when 4 bits are used
            suggested + 1
        } else {
@@ -2152,7 +2314,10 @@ impl NativeTable {
                // Convert select to columns list
                let columns = match &vq.base.select {
                    Select::All => None,
-                    Select::Columns(cols) => Some(cols.clone()),
+                    Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
+                        column_names: Some(cols.clone()),
+                        column_aliases: None,
+                    })),
                    Select::Dynamic(_) => {
                        return Err(Error::NotSupported {
                            message:
@@ -2205,7 +2370,7 @@ impl NativeTable {
                    with_row_id: Some(vq.base.with_row_id),
                    bypass_vector_index: Some(!vq.use_index),
                    full_text_query,
-                    version: None,
+                    ..Default::default()
                })
            }
            AnyQuery::Query(q) => {
@@ -2225,7 +2390,10 @@ impl NativeTable {

                let columns = match &q.select {
                    Select::All => None,
-                    Select::Columns(cols) => Some(cols.clone()),
+                    Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
+                        column_names: Some(cols.clone()),
+                        column_aliases: None,
+                    })),
                    Select::Dynamic(_) => {
                        return Err(Error::NotSupported {
                            message: "Dynamic columns are not supported for server-side query"
@@ -2264,18 +2432,11 @@ impl NativeTable {
                    columns,
                    prefilter: Some(q.prefilter),
                    offset: q.offset.map(|o| o as i32),
-                    ef: None,
-                    refine_factor: None,
-                    distance_type: None,
-                    nprobes: None,
                    vector_column: None, // No vector column for plain queries
                    with_row_id: Some(q.with_row_id),
                    bypass_vector_index: Some(true), // No vector index for plain queries
                    full_text_query,
-                    version: None,
-                    fast_search: None,
-                    lower_bound: None,
-                    upper_bound: None,
+                    ..Default::default()
                })
            }
        }
@@ -2604,25 +2765,8 @@ impl BaseTable for NativeTable {
    }

    async fn update(&self, update: UpdateBuilder) -> Result<UpdateResult> {
-        let dataset = self.dataset.get().await?.clone();
-        let mut builder = LanceUpdateBuilder::new(Arc::new(dataset));
-        if let Some(predicate) = update.filter {
-            builder = builder.update_where(&predicate)?;
-        }
-
-        for (column, value) in update.columns {
-            builder = builder.set(column, &value)?;
-        }
-
-        let operation = builder.build()?;
-        let res = operation.execute().await?;
-        self.dataset
-            .set_latest(res.new_dataset.as_ref().clone())
-            .await;
-        Ok(UpdateResult {
-            rows_updated: res.rows_updated,
-            version: res.new_dataset.version().version,
-        })
+        // Delegate to the submodule implementation
+        update::execute_update(self, update).await
    }

    async fn create_plan(
@@ -2880,11 +3024,8 @@ impl BaseTable for NativeTable {

    /// Delete rows from the table
    async fn delete(&self, predicate: &str) -> Result<DeleteResult> {
-        let mut dataset = self.dataset.get_mut().await?;
-        dataset.delete(predicate).await?;
-        Ok(DeleteResult {
-            version: dataset.version().version,
-        })
+        // Delegate to the submodule implementation
+        delete::execute_delete(self, predicate).await
    }

    async fn tags(&self) -> Result<Box<dyn Tags + '_>> {
@@ -3028,16 +3169,25 @@ impl BaseTable for NativeTable {
        Ok(results.into_iter().flatten().collect())
    }

-    fn dataset_uri(&self) -> &str {
-        self.uri.as_str()
+    async fn uri(&self) -> Result<String> {
+        Ok(self.uri.clone())
    }

    async fn storage_options(&self) -> Option<HashMap<String, String>> {
+        self.initial_storage_options().await
+    }
+
+    async fn initial_storage_options(&self) -> Option<HashMap<String, String>> {
        self.dataset
            .get()
            .await
            .ok()
-            .and_then(|dataset| dataset.storage_options().cloned())
+            .and_then(|dataset| dataset.initial_storage_options().cloned())
+    }
+
+    async fn latest_storage_options(&self) -> Result<Option<HashMap<String, String>>> {
+        let dataset = self.dataset.get().await?;
+        Ok(dataset.latest_storage_options().await?.map(|o| o.0))
    }

    async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
@@ -3153,6 +3303,21 @@ impl BaseTable for NativeTable {
        };
        Ok(stats)
    }
+
+    async fn create_insert_exec(
+        &self,
+        input: Arc<dyn datafusion_physical_plan::ExecutionPlan>,
+        write_params: WriteParams,
+    ) -> Result<Arc<dyn datafusion_physical_plan::ExecutionPlan>> {
+        let ds = self.dataset.get().await?;
+        let dataset = Arc::new((*ds).clone());
+        Ok(Arc::new(datafusion::insert::InsertExec::new(
+            self.dataset.clone(),
+            dataset,
+            input,
+            write_params,
+        )))
+    }
 }

 #[skip_serializing_none]
@@ -3202,22 +3367,18 @@ pub struct FragmentSummaryStats {
 #[cfg(test)]
 #[allow(deprecated)]
 mod tests {
-    use std::iter;
    use std::sync::atomic::{AtomicBool, Ordering};
    use std::sync::Arc;
    use std::time::Duration;

    use arrow_array::{
        builder::{ListBuilder, StringBuilder},
-        Array, BooleanArray, Date32Array, FixedSizeListArray, Float32Array, Float64Array,
-        Int32Array, Int64Array, LargeStringArray, RecordBatch, RecordBatchIterator,
-        RecordBatchReader, StringArray, TimestampMillisecondArray, TimestampNanosecondArray,
-        UInt32Array,
+        Array, BooleanArray, FixedSizeListArray, Float32Array, Int32Array, LargeStringArray,
+        RecordBatch, RecordBatchIterator, RecordBatchReader, StringArray,
    };
    use arrow_array::{BinaryArray, LargeBinaryArray};
    use arrow_data::ArrayDataBuilder;
-    use arrow_schema::{DataType, Field, Schema, TimeUnit};
-    use futures::TryStreamExt;
+    use arrow_schema::{DataType, Field, Schema};
    use lance::dataset::WriteMode;
    use lance::io::{ObjectStoreParams, WrappingObjectStore};
    use lance::Dataset;
@@ -3229,7 +3390,6 @@ mod tests {
    use crate::connection::ConnectBuilder;
    use crate::index::scalar::{BTreeIndexBuilder, BitmapIndexBuilder};
    use crate::index::vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder};
-    use crate::query::{ExecutableQuery, QueryBase};

    #[tokio::test]
    async fn test_open() {
@@ -3451,306 +3611,6 @@ mod tests {
        assert_eq!(table.name(), "test");
    }

-    #[tokio::test]
-    async fn test_update_with_predicate() {
-        let tmp_dir = tempdir().unwrap();
-        let dataset_path = tmp_dir.path().join("test.lance");
-        let uri = dataset_path.to_str().unwrap();
-        let conn = connect(uri)
-            .read_consistency_interval(Duration::from_secs(0))
-            .execute()
-            .await
-            .unwrap();
-
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("name", DataType::Utf8, false),
-        ]));
-
-        let record_batch_iter = RecordBatchIterator::new(
-            vec![RecordBatch::try_new(
-                schema.clone(),
-                vec![
-                    Arc::new(Int32Array::from_iter_values(0..10)),
-                    Arc::new(StringArray::from_iter_values(vec![
-                        "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
-                    ])),
-                ],
-            )
-            .unwrap()]
-            .into_iter()
-            .map(Ok),
-            schema.clone(),
-        );
-
-        let table = conn
-            .create_table("my_table", record_batch_iter)
-            .execute()
-            .await
-            .unwrap();
-
-        table
-            .update()
-            .only_if("id > 5")
-            .column("name", "'foo'")
-            .execute()
-            .await
-            .unwrap();
-
-        let mut batches = table
-            .query()
-            .select(Select::columns(&["id", "name"]))
-            .execute()
-            .await
-            .unwrap()
-            .try_collect::<Vec<_>>()
-            .await
-            .unwrap();
-
-        while let Some(batch) = batches.pop() {
-            let ids = batch
-                .column(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .iter()
-                .collect::<Vec<_>>();
-            let names = batch
-                .column(1)
-                .as_any()
-                .downcast_ref::<StringArray>()
-                .unwrap()
-                .iter()
-                .collect::<Vec<_>>();
-            for (i, name) in names.iter().enumerate() {
-                let id = ids[i].unwrap();
-                let name = name.unwrap();
-                if id > 5 {
-                    assert_eq!(name, "foo");
-                } else {
-                    assert_eq!(name, &format!("{}", (b'a' + id as u8) as char));
-                }
-            }
-        }
-    }
-
-    #[tokio::test]
-    async fn test_update_all_types() {
-        let tmp_dir = tempdir().unwrap();
-        let dataset_path = tmp_dir.path().join("test.lance");
-        let uri = dataset_path.to_str().unwrap();
-        let conn = connect(uri)
-            .read_consistency_interval(Duration::from_secs(0))
-            .execute()
-            .await
-            .unwrap();
-
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("int32", DataType::Int32, false),
-            Field::new("int64", DataType::Int64, false),
-            Field::new("uint32", DataType::UInt32, false),
-            Field::new("string", DataType::Utf8, false),
-            Field::new("large_string", DataType::LargeUtf8, false),
-            Field::new("float32", DataType::Float32, false),
-            Field::new("float64", DataType::Float64, false),
-            Field::new("bool", DataType::Boolean, false),
-            Field::new("date32", DataType::Date32, false),
-            Field::new(
-                "timestamp_ns",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                false,
-            ),
-            Field::new(
-                "timestamp_ms",
-                DataType::Timestamp(TimeUnit::Millisecond, None),
-                false,
-            ),
-            Field::new(
-                "vec_f32",
-                DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 2),
-                false,
-            ),
-            Field::new(
-                "vec_f64",
-                DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float64, true)), 2),
-                false,
-            ),
-        ]));
-
-        let record_batch_iter = RecordBatchIterator::new(
-            vec![RecordBatch::try_new(
-                schema.clone(),
-                vec![
-                    Arc::new(Int32Array::from_iter_values(0..10)),
-                    Arc::new(Int64Array::from_iter_values(0..10)),
-                    Arc::new(UInt32Array::from_iter_values(0..10)),
-                    Arc::new(StringArray::from_iter_values(vec![
-                        "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
-                    ])),
-                    Arc::new(LargeStringArray::from_iter_values(vec![
-                        "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
-                    ])),
-                    Arc::new(Float32Array::from_iter_values((0..10).map(|i| i as f32))),
-                    Arc::new(Float64Array::from_iter_values((0..10).map(|i| i as f64))),
-                    Arc::new(Into::<BooleanArray>::into(vec![
-                        true, false, true, false, true, false, true, false, true, false,
-                    ])),
-                    Arc::new(Date32Array::from_iter_values(0..10)),
-                    Arc::new(TimestampNanosecondArray::from_iter_values(0..10)),
-                    Arc::new(TimestampMillisecondArray::from_iter_values(0..10)),
-                    Arc::new(
-                        create_fixed_size_list(
-                            Float32Array::from_iter_values((0..20).map(|i| i as f32)),
-                            2,
-                        )
-                        .unwrap(),
-                    ),
-                    Arc::new(
-                        create_fixed_size_list(
-                            Float64Array::from_iter_values((0..20).map(|i| i as f64)),
-                            2,
-                        )
-                        .unwrap(),
-                    ),
-                ],
-            )
-            .unwrap()]
-            .into_iter()
-            .map(Ok),
-            schema.clone(),
-        );
-
-        let table = conn
-            .create_table("my_table", record_batch_iter)
-            .execute()
-            .await
-            .unwrap();
-
-        // check it can do update for each type
-        let updates: Vec<(&str, &str)> = vec![
-            ("string", "'foo'"),
-            ("large_string", "'large_foo'"),
-            ("int32", "1"),
-            ("int64", "1"),
-            ("uint32", "1"),
-            ("float32", "1.0"),
-            ("float64", "1.0"),
-            ("bool", "true"),
-            ("date32", "1"),
-            ("timestamp_ns", "1"),
-            ("timestamp_ms", "1"),
-            ("vec_f32", "[1.0, 1.0]"),
-            ("vec_f64", "[1.0, 1.0]"),
-        ];
-
-        let mut update_op = table.update();
-        for (column, value) in updates {
-            update_op = update_op.column(column, value);
-        }
-        update_op.execute().await.unwrap();
-
-        let mut batches = table
-            .query()
-            .select(Select::columns(&[
-                "string",
-                "large_string",
-                "int32",
-                "int64",
-                "uint32",
-                "float32",
-                "float64",
-                "bool",
-                "date32",
-                "timestamp_ns",
-                "timestamp_ms",
-                "vec_f32",
-                "vec_f64",
-            ]))
-            .execute()
-            .await
-            .unwrap()
-            .try_collect::<Vec<_>>()
-            .await
-            .unwrap();
-        let batch = batches.pop().unwrap();
-
-        macro_rules! assert_column {
-            ($column:expr, $array_type:ty, $expected:expr) => {
-                let array = $column
-                    .as_any()
-                    .downcast_ref::<$array_type>()
-                    .unwrap()
-                    .iter()
-                    .collect::<Vec<_>>();
-                for v in array {
-                    assert_eq!(v, Some($expected));
-                }
-            };
-        }
-
-        assert_column!(batch.column(0), StringArray, "foo");
-        assert_column!(batch.column(1), LargeStringArray, "large_foo");
-        assert_column!(batch.column(2), Int32Array, 1);
-        assert_column!(batch.column(3), Int64Array, 1);
-        assert_column!(batch.column(4), UInt32Array, 1);
-        assert_column!(batch.column(5), Float32Array, 1.0);
-        assert_column!(batch.column(6), Float64Array, 1.0);
-        assert_column!(batch.column(7), BooleanArray, true);
-        assert_column!(batch.column(8), Date32Array, 1);
-        assert_column!(batch.column(9), TimestampNanosecondArray, 1);
-        assert_column!(batch.column(10), TimestampMillisecondArray, 1);
-
-        let array = batch
-            .column(11)
-            .as_any()
-            .downcast_ref::<FixedSizeListArray>()
-            .unwrap()
-            .iter()
-            .collect::<Vec<_>>();
-        for v in array {
-            let v = v.unwrap();
-            let f32array = v.as_any().downcast_ref::<Float32Array>().unwrap();
-            for v in f32array {
-                assert_eq!(v, Some(1.0));
-            }
-        }
-
-        let array = batch
-            .column(12)
-            .as_any()
-            .downcast_ref::<FixedSizeListArray>()
-            .unwrap()
-            .iter()
-            .collect::<Vec<_>>();
-        for v in array {
-            let v = v.unwrap();
-            let f64array = v.as_any().downcast_ref::<Float64Array>().unwrap();
-            for v in f64array {
-                assert_eq!(v, Some(1.0));
-            }
-        }
-    }
-
-    #[tokio::test]
-    async fn test_update_via_expr() {
-        let tmp_dir = tempdir().unwrap();
-        let dataset_path = tmp_dir.path().join("test.lance");
-        let uri = dataset_path.to_str().unwrap();
-        let conn = connect(uri)
-            .read_consistency_interval(Duration::from_secs(0))
-            .execute()
-            .await
-            .unwrap();
-        let tbl = conn
-            .create_table("my_table", make_test_batches())
-            .execute()
-            .await
-            .unwrap();
-        assert_eq!(1, tbl.count_rows(Some("i == 0".to_string())).await.unwrap());
-        tbl.update().column("i", "i+1").execute().await.unwrap();
-        assert_eq!(0, tbl.count_rows(Some("i == 0".to_string())).await.unwrap());
-    }
-
    #[derive(Default, Debug)]
    struct NoOpCacheWrapper {
        called: AtomicBool,
@@ -3819,7 +3679,7 @@ mod tests {
                schema.clone(),
                vec![
                    Arc::new(Int32Array::from_iter_values(offset..(offset + 10))),
-                    Arc::new(Int32Array::from_iter_values(iter::repeat(age).take(10))),
+                    Arc::new(Int32Array::from_iter_values(std::iter::repeat_n(age, 10))),
                ],
            )],
            schema,
@@ -4950,7 +4810,13 @@ mod tests {
        assert_eq!(ns_request.k, 10);
        assert_eq!(ns_request.offset, Some(5));
        assert_eq!(ns_request.filter, Some("id > 0".to_string()));
-        assert_eq!(ns_request.columns, Some(vec!["id".to_string()]));
+        assert_eq!(
+            ns_request
+                .columns
+                .as_ref()
+                .and_then(|c| c.column_names.as_ref()),
+            Some(&vec!["id".to_string()])
+        );
        assert_eq!(ns_request.vector_column, Some("vector".to_string()));
        assert_eq!(ns_request.distance_type, Some("l2".to_string()));
        assert!(ns_request.vector.single_vector.is_some());
@@ -4991,7 +4857,13 @@ mod tests {
        assert_eq!(ns_request.k, 20);
        assert_eq!(ns_request.offset, Some(5));
        assert_eq!(ns_request.filter, Some("id > 5".to_string()));
-        assert_eq!(ns_request.columns, Some(vec!["id".to_string()]));
+        assert_eq!(
+            ns_request
+                .columns
+                .as_ref()
+                .and_then(|c| c.column_names.as_ref()),
+            Some(&vec!["id".to_string()])
+        );
        assert_eq!(ns_request.with_row_id, Some(true));
        assert_eq!(ns_request.bypass_vector_index, Some(true));
        assert!(ns_request.vector_column.is_none()); // No vector column for plain queries
--- a/rust/lancedb/src/table/datafusion.rs
+++ b/rust/lancedb/src/table/datafusion.rs
@@ -3,6 +3,7 @@

 //! This module contains adapters to allow LanceDB tables to be used as DataFusion table providers.

+pub mod insert;
 pub mod udtf;

 use std::{collections::HashMap, sync::Arc};
@@ -13,11 +14,12 @@ use async_trait::async_trait;
 use datafusion_catalog::{Session, TableProvider};
 use datafusion_common::{DataFusionError, Result as DataFusionResult, Statistics};
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
-use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType};
+use datafusion_expr::{dml::InsertOp, Expr, TableProviderFilterPushDown, TableType};
 use datafusion_physical_plan::{
    stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
 };
 use futures::{TryFutureExt, TryStreamExt};
+use lance::dataset::{WriteMode, WriteParams};

 use super::{AnyQuery, BaseTable};
 use crate::{
@@ -250,6 +252,33 @@ impl TableProvider for BaseTableAdapter {
        // TODO
        None
    }
+
+    async fn insert_into(
+        &self,
+        _state: &dyn Session,
+        input: Arc<dyn ExecutionPlan>,
+        insert_op: InsertOp,
+    ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
+        let mode = match insert_op {
+            InsertOp::Append => WriteMode::Append,
+            InsertOp::Overwrite => WriteMode::Overwrite,
+            InsertOp::Replace => {
+                return Err(DataFusionError::NotImplemented(
+                    "Replace mode is not supported for LanceDB tables".to_string(),
+                ))
+            }
+        };
+
+        let write_params = WriteParams {
+            mode,
+            ..Default::default()
+        };
+
+        self.table
+            .create_insert_exec(input, write_params)
+            .await
+            .map_err(|e| DataFusionError::External(e.into()))
+    }
 }

 #[cfg(test)]
--- a/rust/lancedb/src/table/datafusion/insert.rs
+++ b/rust/lancedb/src/table/datafusion/insert.rs
@@ -0,0 +1,446 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+//! DataFusion ExecutionPlan for inserting data into LanceDB tables.
+
+use std::any::Any;
+use std::sync::{Arc, LazyLock, Mutex};
+
+use arrow_array::{RecordBatch, UInt64Array};
+use arrow_schema::{DataType, Field, Schema as ArrowSchema, SchemaRef};
+use datafusion_common::{DataFusionError, Result as DataFusionResult};
+use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
+use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
+use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
+use datafusion_physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties,
+};
+use lance::dataset::transaction::{Operation, Transaction};
+use lance::dataset::{CommitBuilder, InsertBuilder, WriteParams};
+use lance::Dataset;
+use lance_table::format::Fragment;
+
+use crate::table::dataset::DatasetConsistencyWrapper;
+
+pub(crate) static COUNT_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(|| {
+    Arc::new(ArrowSchema::new(vec![Field::new(
+        "count",
+        DataType::UInt64,
+        false,
+    )]))
+});
+
+fn operation_fragments(operation: &Operation) -> &[Fragment] {
+    match operation {
+        Operation::Append { fragments } => fragments,
+        Operation::Overwrite { fragments, .. } => fragments,
+        _ => &[],
+    }
+}
+
+fn count_rows_from_operation(operation: &Operation) -> u64 {
+    operation_fragments(operation)
+        .iter()
+        .map(|f| f.num_rows().unwrap_or(0) as u64)
+        .sum()
+}
+
+fn operation_fragments_mut(operation: &mut Operation) -> &mut Vec<Fragment> {
+    match operation {
+        Operation::Append { fragments } => fragments,
+        Operation::Overwrite { fragments, .. } => fragments,
+        _ => panic!("Unsupported operation type for getting mutable fragments"),
+    }
+}
+
+fn merge_transactions(mut transactions: Vec<Transaction>) -> Option<Transaction> {
+    let mut first = transactions.pop()?;
+
+    for txn in transactions {
+        let first_fragments = operation_fragments_mut(&mut first.operation);
+        let txn_fragments = operation_fragments(&txn.operation);
+        first_fragments.extend_from_slice(txn_fragments);
+    }
+
+    Some(first)
+}
+
+/// ExecutionPlan for inserting data into a native LanceDB table.
+///
+/// This plan executes inserts by:
+/// 1. Each partition writes data independently using InsertBuilder::execute_uncommitted_stream
+/// 2. The last partition to complete commits all transactions atomically
+/// 3. Returns the count of inserted rows per partition
+#[derive(Debug)]
+pub struct InsertExec {
+    ds_wrapper: DatasetConsistencyWrapper,
+    dataset: Arc<Dataset>,
+    input: Arc<dyn ExecutionPlan>,
+    write_params: WriteParams,
+    properties: PlanProperties,
+    partial_transactions: Arc<Mutex<Vec<Transaction>>>,
+}
+
+impl InsertExec {
+    pub fn new(
+        ds_wrapper: DatasetConsistencyWrapper,
+        dataset: Arc<Dataset>,
+        input: Arc<dyn ExecutionPlan>,
+        write_params: WriteParams,
+    ) -> Self {
+        let schema = COUNT_SCHEMA.clone();
+        let num_partitions = input.output_partitioning().partition_count();
+        let properties = PlanProperties::new(
+            EquivalenceProperties::new(schema),
+            Partitioning::UnknownPartitioning(num_partitions),
+            EmissionType::Final,
+            Boundedness::Bounded,
+        );
+
+        Self {
+            ds_wrapper,
+            dataset,
+            input,
+            write_params,
+            properties,
+            partial_transactions: Arc::new(Mutex::new(Vec::with_capacity(num_partitions))),
+        }
+    }
+}
+
+impl DisplayAs for InsertExec {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(f, "InsertExec: mode={:?}", self.write_params.mode)
+            }
+            DisplayFormatType::TreeRender => {
+                write!(f, "InsertExec")
+            }
+        }
+    }
+}
+
+impl ExecutionPlan for InsertExec {
+    fn name(&self) -> &str {
+        Self::static_name()
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+
+    fn maintains_input_order(&self) -> Vec<bool> {
+        vec![false]
+    }
+
+    fn benefits_from_input_partitioning(&self) -> Vec<bool> {
+        vec![false]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
+        if children.len() != 1 {
+            return Err(DataFusionError::Internal(
+                "InsertExec requires exactly one child".to_string(),
+            ));
+        }
+        Ok(Arc::new(Self::new(
+            self.ds_wrapper.clone(),
+            self.dataset.clone(),
+            children[0].clone(),
+            self.write_params.clone(),
+        )))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> DataFusionResult<SendableRecordBatchStream> {
+        let input_stream = self.input.execute(partition, context)?;
+        let dataset = self.dataset.clone();
+        let write_params = self.write_params.clone();
+        let partial_transactions = self.partial_transactions.clone();
+        let total_partitions = self.input.output_partitioning().partition_count();
+        let ds_wrapper = self.ds_wrapper.clone();
+
+        let stream = futures::stream::once(async move {
+            let transaction = InsertBuilder::new(dataset.clone())
+                .with_params(&write_params)
+                .execute_uncommitted_stream(input_stream)
+                .await?;
+
+            let num_rows = count_rows_from_operation(&transaction.operation);
+
+            let to_commit = {
+                // Don't hold the lock over an await point.
+                let mut txns = partial_transactions.lock().unwrap();
+                txns.push(transaction);
+                if txns.len() == total_partitions {
+                    Some(std::mem::take(&mut *txns))
+                } else {
+                    None
+                }
+            };
+
+            if let Some(transactions) = to_commit {
+                if let Some(merged_txn) = merge_transactions(transactions) {
+                    let new_dataset = CommitBuilder::new(dataset.clone())
+                        .execute(merged_txn)
+                        .await?;
+                    ds_wrapper.set_latest(new_dataset).await;
+                }
+            }
+
+            Ok(RecordBatch::try_new(
+                COUNT_SCHEMA.clone(),
+                vec![Arc::new(UInt64Array::from(vec![num_rows]))],
+            )?)
+        });
+
+        Ok(Box::pin(RecordBatchStreamAdapter::new(
+            COUNT_SCHEMA.clone(),
+            stream,
+        )))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::vec;
+
+    use super::*;
+    use arrow_array::{record_batch, Int32Array, RecordBatchIterator};
+    use datafusion::prelude::SessionContext;
+    use datafusion_catalog::MemTable;
+    use tempfile::tempdir;
+
+    use crate::connect;
+
+    #[tokio::test]
+    async fn test_insert_via_sql() {
+        let tmp_dir = tempdir().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+
+        let db = connect(uri).execute().await.unwrap();
+
+        // Create initial table
+        let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap();
+        let schema = batch.schema();
+        let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
+
+        let table = db
+            .create_table("test_insert", Box::new(reader))
+            .execute()
+            .await
+            .unwrap();
+
+        // Verify initial count
+        assert_eq!(table.count_rows(None).await.unwrap(), 3);
+
+        let ctx = SessionContext::new();
+        let provider =
+            crate::table::datafusion::BaseTableAdapter::try_new(table.base_table().clone())
+                .await
+                .unwrap();
+        ctx.register_table("test_insert", Arc::new(provider))
+            .unwrap();
+
+        ctx.sql("INSERT INTO test_insert VALUES (4), (5), (6)")
+            .await
+            .unwrap()
+            .collect()
+            .await
+            .unwrap();
+
+        // Verify final count
+        table.checkout_latest().await.unwrap();
+        assert_eq!(table.count_rows(None).await.unwrap(), 6);
+    }
+
+    #[tokio::test]
+    async fn test_insert_overwrite_via_sql() {
+        let tmp_dir = tempdir().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+
+        let db = connect(uri).execute().await.unwrap();
+
+        // Create initial table with 3 rows
+        let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap();
+        let schema = batch.schema();
+        let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
+
+        let table = db
+            .create_table("test_overwrite", Box::new(reader))
+            .execute()
+            .await
+            .unwrap();
+
+        assert_eq!(table.count_rows(None).await.unwrap(), 3);
+
+        let ctx = SessionContext::new();
+        let provider =
+            crate::table::datafusion::BaseTableAdapter::try_new(table.base_table().clone())
+                .await
+                .unwrap();
+        ctx.register_table("test_overwrite", Arc::new(provider))
+            .unwrap();
+
+        ctx.sql("INSERT OVERWRITE INTO test_overwrite VALUES (10), (20)")
+            .await
+            .unwrap()
+            .collect()
+            .await
+            .unwrap();
+
+        // Verify: should have 2 rows (overwritten, not appended)
+        table.checkout_latest().await.unwrap();
+        assert_eq!(table.count_rows(None).await.unwrap(), 2);
+    }
+
+    #[tokio::test]
+    async fn test_insert_empty_batch() {
+        let tmp_dir = tempdir().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+
+        let db = connect(uri).execute().await.unwrap();
+
+        // Create initial table
+        let schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "id",
+            DataType::Int32,
+            false,
+        )]));
+        let batches = vec![RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap()];
+        let reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema.clone());
+
+        let table = db
+            .create_table("test_empty", Box::new(reader))
+            .execute()
+            .await
+            .unwrap();
+
+        assert_eq!(table.count_rows(None).await.unwrap(), 3);
+
+        let ctx = SessionContext::new();
+        let provider =
+            crate::table::datafusion::BaseTableAdapter::try_new(table.base_table().clone())
+                .await
+                .unwrap();
+        ctx.register_table("test_empty", Arc::new(provider))
+            .unwrap();
+
+        let source_schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "id",
+            DataType::Int32,
+            false,
+        )]));
+        // Empty batches
+        let source_reader = RecordBatchIterator::new(
+            std::iter::empty::<Result<RecordBatch, arrow_schema::ArrowError>>(),
+            source_schema,
+        );
+        let source_table = db
+            .create_table("empty_source", Box::new(source_reader))
+            .execute()
+            .await
+            .unwrap();
+        let source_provider =
+            crate::table::datafusion::BaseTableAdapter::try_new(source_table.base_table().clone())
+                .await
+                .unwrap();
+        ctx.register_table("empty_source", Arc::new(source_provider))
+            .unwrap();
+
+        // Execute INSERT with empty source
+        ctx.sql("INSERT INTO test_empty SELECT * FROM empty_source")
+            .await
+            .unwrap()
+            .collect()
+            .await
+            .unwrap();
+
+        // Verify: should still have 3 rows (nothing inserted)
+        table.checkout_latest().await.unwrap();
+        assert_eq!(table.count_rows(None).await.unwrap(), 3);
+    }
+
+    #[tokio::test]
+    async fn test_insert_multiple_batches() {
+        let tmp_dir = tempdir().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+
+        let db = connect(uri).execute().await.unwrap();
+
+        // Create initial table
+        let schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "id",
+            DataType::Int32,
+            true,
+        )]));
+        let batches =
+            vec![
+                RecordBatch::try_new(schema.clone(), vec![Arc::new(Int32Array::from(vec![1]))])
+                    .unwrap(),
+            ];
+        let reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema.clone());
+
+        let table = db
+            .create_table("test_multi_batch", Box::new(reader))
+            .execute()
+            .await
+            .unwrap();
+
+        let ctx = SessionContext::new();
+        let provider =
+            crate::table::datafusion::BaseTableAdapter::try_new(table.base_table().clone())
+                .await
+                .unwrap();
+        ctx.register_table("test_multi_batch", Arc::new(provider))
+            .unwrap();
+
+        // Memtable with multiple batches and multiple partitions
+        let source_table = MemTable::try_new(
+            schema.clone(),
+            vec![
+                // Partition 0
+                vec![
+                    record_batch!(("id", Int32, [2, 3])).unwrap(),
+                    record_batch!(("id", Int32, [4, 5])).unwrap(),
+                ],
+                // Partition 1
+                vec![record_batch!(("id", Int32, [6, 7, 8])).unwrap()],
+            ],
+        )
+        .unwrap();
+        ctx.register_table("multi_batch_source", Arc::new(source_table))
+            .unwrap();
+
+        ctx.sql("INSERT INTO test_multi_batch SELECT * FROM multi_batch_source")
+            .await
+            .unwrap()
+            .collect()
+            .await
+            .unwrap();
+
+        // Verify: should have 1 + 2 + 2 + 3 = 8 rows
+        table.checkout_latest().await.unwrap();
+        assert_eq!(table.count_rows(None).await.unwrap(), 8);
+    }
+}
--- a/rust/lancedb/src/table/dataset.rs
+++ b/rust/lancedb/src/table/dataset.rs
@@ -100,7 +100,8 @@ impl DatasetRef {
                let should_checkout = match &target_ref {
                    refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
                    refs::Ref::Version(_, None) => true, // No specific version, always checkout
-                    refs::Ref::Tag(_) => true,           // Always checkout for tags
+                    refs::Ref::VersionNumber(target_ver) => version != target_ver,
+                    refs::Ref::Tag(_) => true, // Always checkout for tags
                };

                if should_checkout {
--- a/rust/lancedb/src/table/delete.rs
+++ b/rust/lancedb/src/table/delete.rs
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+use serde::{Deserialize, Serialize};
+
+use super::NativeTable;
+use crate::Result;
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
+pub struct DeleteResult {
+    // The commit version associated with the operation.
+    // A version of `0` indicates compatibility with legacy servers that do not return
+    /// a commit version.
+    #[serde(default)]
+    pub version: u64,
+}
+
+/// Internal implementation of the delete logic
+///
+/// This logic was moved from NativeTable::delete to keep table.rs clean.
+pub(crate) async fn execute_delete(table: &NativeTable, predicate: &str) -> Result<DeleteResult> {
+    // We access the dataset from the table. Since this is in the same module hierarchy (super),
+    // and 'dataset' is pub(crate), we can access it.
+    let mut dataset = table.dataset.get_mut().await?;
+
+    // Perform the actual delete on the Lance dataset
+    dataset.delete(predicate).await?;
+
+    // Return the result with the new version
+    Ok(DeleteResult {
+        version: dataset.version().version,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::connect;
+    use arrow_array::{record_batch, Int32Array, RecordBatch, RecordBatchIterator};
+    use arrow_schema::{DataType, Field, Schema};
+    use std::sync::Arc;
+
+    use crate::query::ExecutableQuery;
+    use futures::TryStreamExt;
+    #[tokio::test]
+    async fn test_delete_simple() {
+        let conn = connect("memory://").execute().await.unwrap();
+
+        // 1. Create a table with values 0 to 9
+        let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, false)]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(Int32Array::from_iter_values(0..10))],
+        )
+        .unwrap();
+
+        let table = conn
+            .create_table(
+                "test_delete",
+                RecordBatchIterator::new(vec![Ok(batch)], schema),
+            )
+            .execute()
+            .await
+            .unwrap();
+
+        // 2. Verify initial state
+        assert_eq!(table.count_rows(None).await.unwrap(), 10);
+
+        // 3. Execute Delete (removes values > 5)
+        table.delete("i > 5").await.unwrap();
+
+        // 4. Verify results
+        assert_eq!(table.count_rows(None).await.unwrap(), 6); // 0, 1, 2, 3, 4, 5 remain
+
+        // 5. Verify specific data consistency
+        let batches = table
+            .query()
+            .execute()
+            .await
+            .unwrap()
+            .try_collect::<Vec<_>>()
+            .await
+            .unwrap();
+        let batch = &batches[0];
+        let array = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+
+        // Ensure no value > 5 exists
+        for val in array.iter() {
+            assert!(val.unwrap() <= 5);
+        }
+    }
+    #[tokio::test]
+    async fn rows_removed_schema_same() {
+        let conn = connect("memory://").execute().await.unwrap();
+        let batch = record_batch!(
+            ("id", Int32, [1, 2, 3, 4, 5]),
+            ("name", Utf8, ["a", "b", "c", "d", "e"])
+        )
+        .unwrap();
+        let original_schema = batch.schema();
+
+        let table = conn
+            .create_table(
+                "test_delete_all",
+                RecordBatchIterator::new(vec![Ok(batch)], original_schema.clone()),
+            )
+            .execute()
+            .await
+            .unwrap();
+
+        table.delete("true").await.unwrap();
+
+        assert_eq!(table.count_rows(None).await.unwrap(), 0);
+
+        let current_schema = table.schema().await.unwrap();
+        //check if the original schema is the same as current
+        assert_eq!(current_schema, original_schema);
+    }
+
+    #[tokio::test]
+    async fn test_delete_false_increments_version() {
+        let conn = connect("memory://").execute().await.unwrap();
+
+        // Create a table with 5 rows
+        let batch = record_batch!(("id", Int32, [1, 2, 3, 4, 5])).unwrap();
+
+        let schema = batch.schema();
+
+        let table = conn
+            .create_table(
+                "test_delete_noop",
+                RecordBatchIterator::new(vec![Ok(batch)], schema),
+            )
+            .execute()
+            .await
+            .unwrap();
+
+        // Capture the initial state (Rows = 5, Version = 1)
+        let initial_rows = table.count_rows(None).await.unwrap();
+        let initial_version = table.version().await.unwrap();
+
+        assert_eq!(initial_rows, 5);
+        table.delete("false").await.unwrap();
+
+        // Rows should still be 5
+        let current_rows = table.count_rows(None).await.unwrap();
+        assert_eq!(
+            current_rows, initial_rows,
+            "Data should not change when predicate is false"
+        );
+
+        // version check
+        let current_version = table.version().await.unwrap();
+        assert!(
+            current_version > initial_version,
+            "Table version must increment after delete operation"
+        );
+    }
+}
--- a/rust/lancedb/src/table/update.rs
+++ b/rust/lancedb/src/table/update.rs
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+use std::sync::Arc;
+
+use lance::dataset::UpdateBuilder as LanceUpdateBuilder;
+use serde::{Deserialize, Serialize};
+
+use super::{BaseTable, NativeTable};
+use crate::Error;
+use crate::Result;
+
+/// The result of an update operation
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
+pub struct UpdateResult {
+    #[serde(default)]
+    pub rows_updated: u64,
+    /// The commit version associated with the operation.
+    #[serde(default)]
+    pub version: u64,
+}
+
+/// A builder for configuring a [`crate::table::Table::update`] operation
+#[derive(Debug, Clone)]
+pub struct UpdateBuilder {
+    parent: Arc<dyn BaseTable>,
+    pub(crate) filter: Option<String>,
+    pub(crate) columns: Vec<(String, String)>,
+}
+
+impl UpdateBuilder {
+    pub(crate) fn new(parent: Arc<dyn BaseTable>) -> Self {
+        Self {
+            parent,
+            filter: None,
+            columns: Vec::new(),
+        }
+    }
+
+    /// Limits the update operation to rows matching the given filter
+    ///
+    /// If a row does not match the filter then it will be left unchanged.
+    pub fn only_if(mut self, filter: impl Into<String>) -> Self {
+        self.filter = Some(filter.into());
+        self
+    }
+
+    /// Specifies a column to update
+    ///
+    /// This method may be called multiple times to update multiple columns
+    ///
+    /// The `update_expr` should be an SQL expression explaining how to calculate
+    /// the new value for the column.  The expression will be evaluated against the
+    /// previous row's value.
+    pub fn column(
+        mut self,
+        column_name: impl Into<String>,
+        update_expr: impl Into<String>,
+    ) -> Self {
+        self.columns.push((column_name.into(), update_expr.into()));
+        self
+    }
+
+    /// Executes the update operation.
+    pub async fn execute(self) -> Result<UpdateResult> {
+        if self.columns.is_empty() {
+            Err(Error::InvalidInput {
+                message: "at least one column must be specified in an update operation".to_string(),
+            })
+        } else {
+            self.parent.clone().update(self).await
+        }
+    }
+}
+
+/// Internal implementation of the update logic
+pub(crate) async fn execute_update(
+    table: &NativeTable,
+    update: UpdateBuilder,
+) -> Result<UpdateResult> {
+    // 1. Snapshot the current dataset
+    let dataset = table.dataset.get().await?.clone();
+
+    // 2. Initialize the Lance Core builder
+    let mut builder = LanceUpdateBuilder::new(Arc::new(dataset));
+
+    // 3. Apply the filter (WHERE clause)
+    if let Some(predicate) = update.filter {
+        builder = builder.update_where(&predicate)?;
+    }
+
+    // 4. Apply the columns (SET clause)
+    for (column, value) in update.columns {
+        builder = builder.set(column, &value)?;
+    }
+
+    // 5. Execute the operation (Write new files)
+    let operation = builder.build()?;
+    let res = operation.execute().await?;
+
+    // 6. Update the table's view of the latest version
+    table
+        .dataset
+        .set_latest(res.new_dataset.as_ref().clone())
+        .await;
+
+    Ok(UpdateResult {
+        rows_updated: res.rows_updated,
+        version: res.new_dataset.version().version,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::connect;
+    use crate::query::QueryBase;
+    use crate::query::{ExecutableQuery, Select};
+    use arrow_array::{
+        record_batch, Array, BooleanArray, Date32Array, FixedSizeListArray, Float32Array,
+        Float64Array, Int32Array, Int64Array, LargeStringArray, RecordBatch, RecordBatchIterator,
+        RecordBatchReader, StringArray, TimestampMillisecondArray, TimestampNanosecondArray,
+        UInt32Array,
+    };
+    use arrow_data::ArrayDataBuilder;
+    use arrow_schema::{ArrowError, DataType, Field, Schema, TimeUnit};
+    use futures::TryStreamExt;
+    use std::sync::Arc;
+    use std::time::Duration;
+
+    #[tokio::test]
+    async fn test_update_all_types() {
+        let conn = connect("memory://")
+            .read_consistency_interval(Duration::from_secs(0))
+            .execute()
+            .await
+            .unwrap();
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("int32", DataType::Int32, false),
+            Field::new("int64", DataType::Int64, false),
+            Field::new("uint32", DataType::UInt32, false),
+            Field::new("string", DataType::Utf8, false),
+            Field::new("large_string", DataType::LargeUtf8, false),
+            Field::new("float32", DataType::Float32, false),
+            Field::new("float64", DataType::Float64, false),
+            Field::new("bool", DataType::Boolean, false),
+            Field::new("date32", DataType::Date32, false),
+            Field::new(
+                "timestamp_ns",
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            ),
+            Field::new(
+                "timestamp_ms",
+                DataType::Timestamp(TimeUnit::Millisecond, None),
+                false,
+            ),
+            Field::new(
+                "vec_f32",
+                DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 2),
+                false,
+            ),
+            Field::new(
+                "vec_f64",
+                DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float64, true)), 2),
+                false,
+            ),
+        ]));
+
+        let record_batch_iter = RecordBatchIterator::new(
+            vec![RecordBatch::try_new(
+                schema.clone(),
+                vec![
+                    Arc::new(Int32Array::from_iter_values(0..10)),
+                    Arc::new(Int64Array::from_iter_values(0..10)),
+                    Arc::new(UInt32Array::from_iter_values(0..10)),
+                    Arc::new(StringArray::from_iter_values(vec![
+                        "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
+                    ])),
+                    Arc::new(LargeStringArray::from_iter_values(vec![
+                        "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
+                    ])),
+                    Arc::new(Float32Array::from_iter_values((0..10).map(|i| i as f32))),
+                    Arc::new(Float64Array::from_iter_values((0..10).map(|i| i as f64))),
+                    Arc::new(Into::<BooleanArray>::into(vec![
+                        true, false, true, false, true, false, true, false, true, false,
+                    ])),
+                    Arc::new(Date32Array::from_iter_values(0..10)),
+                    Arc::new(TimestampNanosecondArray::from_iter_values(0..10)),
+                    Arc::new(TimestampMillisecondArray::from_iter_values(0..10)),
+                    Arc::new(
+                        create_fixed_size_list(
+                            Float32Array::from_iter_values((0..20).map(|i| i as f32)),
+                            2,
+                        )
+                        .unwrap(),
+                    ),
+                    Arc::new(
+                        create_fixed_size_list(
+                            Float64Array::from_iter_values((0..20).map(|i| i as f64)),
+                            2,
+                        )
+                        .unwrap(),
+                    ),
+                ],
+            )
+            .unwrap()]
+            .into_iter()
+            .map(Ok),
+            schema.clone(),
+        );
+
+        let table = conn
+            .create_table("my_table", record_batch_iter)
+            .execute()
+            .await
+            .unwrap();
+
+        // check it can do update for each type
+        let updates: Vec<(&str, &str)> = vec![
+            ("string", "'foo'"),
+            ("large_string", "'large_foo'"),
+            ("int32", "1"),
+            ("int64", "1"),
+            ("uint32", "1"),
+            ("float32", "1.0"),
+            ("float64", "1.0"),
+            ("bool", "true"),
+            ("date32", "1"),
+            ("timestamp_ns", "1"),
+            ("timestamp_ms", "1"),
+            ("vec_f32", "[1.0, 1.0]"),
+            ("vec_f64", "[1.0, 1.0]"),
+        ];
+
+        let mut update_op = table.update();
+        for (column, value) in updates {
+            update_op = update_op.column(column, value);
+        }
+        update_op.execute().await.unwrap();
+
+        let mut batches = table
+            .query()
+            .select(Select::columns(&[
+                "string",
+                "large_string",
+                "int32",
+                "int64",
+                "uint32",
+                "float32",
+                "float64",
+                "bool",
+                "date32",
+                "timestamp_ns",
+                "timestamp_ms",
+                "vec_f32",
+                "vec_f64",
+            ]))
+            .execute()
+            .await
+            .unwrap()
+            .try_collect::<Vec<_>>()
+            .await
+            .unwrap();
+        let batch = batches.pop().unwrap();
+
+        macro_rules! assert_column {
+            ($column:expr, $array_type:ty, $expected:expr) => {
+                let array = $column
+                    .as_any()
+                    .downcast_ref::<$array_type>()
+                    .unwrap()
+                    .iter()
+                    .collect::<Vec<_>>();
+                for v in array {
+                    assert_eq!(v, Some($expected));
+                }
+            };
+        }
+
+        assert_column!(batch.column(0), StringArray, "foo");
+        assert_column!(batch.column(1), LargeStringArray, "large_foo");
+        assert_column!(batch.column(2), Int32Array, 1);
+        assert_column!(batch.column(3), Int64Array, 1);
+        assert_column!(batch.column(4), UInt32Array, 1);
+        assert_column!(batch.column(5), Float32Array, 1.0);
+        assert_column!(batch.column(6), Float64Array, 1.0);
+        assert_column!(batch.column(7), BooleanArray, true);
+        assert_column!(batch.column(8), Date32Array, 1);
+        assert_column!(batch.column(9), TimestampNanosecondArray, 1);
+        assert_column!(batch.column(10), TimestampMillisecondArray, 1);
+
+        let array = batch
+            .column(11)
+            .as_any()
+            .downcast_ref::<FixedSizeListArray>()
+            .unwrap()
+            .iter()
+            .collect::<Vec<_>>();
+        for v in array {
+            let v = v.unwrap();
+            let f32array = v.as_any().downcast_ref::<Float32Array>().unwrap();
+            for v in f32array {
+                assert_eq!(v, Some(1.0));
+            }
+        }
+
+        let array = batch
+            .column(12)
+            .as_any()
+            .downcast_ref::<FixedSizeListArray>()
+            .unwrap()
+            .iter()
+            .collect::<Vec<_>>();
+        for v in array {
+            let v = v.unwrap();
+            let f64array = v.as_any().downcast_ref::<Float64Array>().unwrap();
+            for v in f64array {
+                assert_eq!(v, Some(1.0));
+            }
+        }
+    }
+    ///Two helper functions
+    fn create_fixed_size_list<T: Array>(
+        values: T,
+        list_size: i32,
+    ) -> Result<FixedSizeListArray, ArrowError> {
+        let list_type = DataType::FixedSizeList(
+            Arc::new(Field::new("item", values.data_type().clone(), true)),
+            list_size,
+        );
+        let data = ArrayDataBuilder::new(list_type)
+            .len(values.len() / list_size as usize)
+            .add_child_data(values.into_data())
+            .build()
+            .unwrap();
+
+        Ok(FixedSizeListArray::from(data))
+    }
+
+    fn make_test_batches() -> impl RecordBatchReader + Send + Sync + 'static {
+        let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, false)]));
+        RecordBatchIterator::new(
+            vec![RecordBatch::try_new(
+                schema.clone(),
+                vec![Arc::new(Int32Array::from_iter_values(0..10))],
+            )],
+            schema,
+        )
+    }
+
+    #[tokio::test]
+    async fn test_update_with_predicate() {
+        let conn = connect("memory://")
+            .read_consistency_interval(Duration::from_secs(0))
+            .execute()
+            .await
+            .unwrap();
+
+        let batch = record_batch!(
+            ("id", Int32, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
+            (
+                "name",
+                Utf8,
+                ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
+            )
+        )
+        .unwrap();
+
+        let schema = batch.schema();
+        // need the iterator for create table
+        let record_batch_iter = RecordBatchIterator::new(vec![Ok(batch)], schema);
+
+        let table = conn
+            .create_table("my_table", record_batch_iter)
+            .execute()
+            .await
+            .unwrap();
+
+        table
+            .update()
+            .only_if("id > 5")
+            .column("name", "'foo'")
+            .execute()
+            .await
+            .unwrap();
+
+        let mut batches = table
+            .query()
+            .select(Select::columns(&["id", "name"]))
+            .execute()
+            .await
+            .unwrap()
+            .try_collect::<Vec<_>>()
+            .await
+            .unwrap();
+
+        while let Some(batch) = batches.pop() {
+            let ids = batch
+                .column(0)
+                .as_any()
+                .downcast_ref::<Int32Array>()
+                .unwrap()
+                .iter()
+                .collect::<Vec<_>>();
+            let names = batch
+                .column(1)
+                .as_any()
+                .downcast_ref::<StringArray>()
+                .unwrap()
+                .iter()
+                .collect::<Vec<_>>();
+            for (i, name) in names.iter().enumerate() {
+                let id = ids[i].unwrap();
+                let name = name.unwrap();
+                if id > 5 {
+                    assert_eq!(name, "foo");
+                } else {
+                    assert_eq!(name, &format!("{}", (b'a' + id as u8) as char));
+                }
+            }
+        }
+    }
+
+    #[tokio::test]
+    async fn test_update_via_expr() {
+        let conn = connect("memory://")
+            .read_consistency_interval(Duration::from_secs(0))
+            .execute()
+            .await
+            .unwrap();
+        let tbl = conn
+            .create_table("my_table", make_test_batches())
+            .execute()
+            .await
+            .unwrap();
+        assert_eq!(1, tbl.count_rows(Some("i == 0".to_string())).await.unwrap());
+        tbl.update().column("i", "i+1").execute().await.unwrap();
+        assert_eq!(0, tbl.count_rows(Some("i == 0".to_string())).await.unwrap());
+    }
+}
--- a/Show More
+++ b/Show More