Bump version: 0.25.2-beta.1 → 0.25.2-beta.2

ci: fix Python and Node CI on main (#2700 )
Example failure: https://github.com/lancedb/lancedb/actions/runs/18237024283/job/51932651993
2025-12-24 13:59:58 +00:00 · 2025-10-06 18:09:24 +00:00 · 2025-10-06 09:40:08 -07:00 · 2025-10-03 17:38:40 -07:00 · 2025-10-03 16:47:05 -07:00 · 2025-10-02 10:53:05 -07:00
75 changed files with 2766 additions and 511 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.22.1-beta.2"
+current_version = "0.22.2-beta.1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/actions/create-failure-issue/action.yml
+++ b/.github/actions/create-failure-issue/action.yml
@@ -0,0 +1,45 @@
+name: Create Failure Issue
+description: Creates a GitHub issue if any jobs in the workflow failed
+
+inputs:
+  job-results:
+    description: 'JSON string of job results from needs context'
+    required: true
+  workflow-name:
+    description: 'Name of the workflow'
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - name: Check for failures and create issue
+      shell: bash
+      env:
+        JOB_RESULTS: ${{ inputs.job-results }}
+        WORKFLOW_NAME: ${{ inputs.workflow-name }}
+        RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        # Check if any job failed
+        if echo "$JOB_RESULTS" | jq -e 'to_entries | any(.value.result == "failure")' > /dev/null; then
+          echo "Detected job failures, creating issue..."
+
+          # Extract failed job names
+          FAILED_JOBS=$(echo "$JOB_RESULTS" | jq -r 'to_entries | map(select(.value.result == "failure")) | map(.key) | join(", ")')
+
+          # Create issue with workflow name, failed jobs, and run URL
+          gh issue create \
+            --title "$WORKFLOW_NAME Failed ($FAILED_JOBS)" \
+            --body "The workflow **$WORKFLOW_NAME** failed during execution.
+
+        **Failed jobs:** $FAILED_JOBS
+
+        **Run URL:** $RUN_URL
+
+        Please investigate the failed jobs and address any issues." \
+            --label "ci"
+
+          echo "Issue created successfully"
+        else
+          echo "No job failures detected, skipping issue creation"
+        fi
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -38,3 +38,17 @@ jobs:
      - name: Publish the package
        run: |
          cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
+  report-failure:
+    name: Report Workflow Failure
+    runs-on: ubuntu-latest
+    needs: [build]
+    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    permissions:
+      contents: read
+      issues: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/create-failure-issue
+        with:
+          job-results: ${{ toJSON(needs) }}
+          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -56,8 +56,9 @@ jobs:
        with:
          node-version: 20
          cache: 'npm'
+          cache-dependency-path: docs/package-lock.json
      - name: Install node dependencies
-        working-directory: node
+        working-directory: nodejs
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -43,7 +43,6 @@ jobs:
      - uses: Swatinem/rust-cache@v2
      - uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
-          toolchain: "1.81.0"
          cache-workspaces: "./java/core/lancedb-jni"
          # Disable full debug symbol generation to speed up CI build and keep memory down
          # "1" means line tables only, which is useful for panic tracebacks.
@@ -112,3 +111,17 @@ jobs:
        env:
          SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
          SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
+  report-failure:
+    name: Report Workflow Failure
+    runs-on: ubuntu-latest
+    needs: [linux-arm64, linux-x86, macos-arm64]
+    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    permissions:
+      contents: read
+      issues: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/create-failure-issue
+        with:
+          job-results: ${{ toJSON(needs) }}
+          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -6,6 +6,7 @@ on:
      - main
  pull_request:
    paths:
+      - Cargo.toml
      - nodejs/**
      - .github/workflows/nodejs.yml
      - docker-compose.yml
@@ -116,7 +117,7 @@ jobs:
        set -e
        npm ci
        npm run docs
-        if ! git diff --exit-code -- . ':(exclude)Cargo.lock'; then
+        if ! git diff --exit-code -- ../ ':(exclude)Cargo.lock'; then
          echo "Docs need to be updated"
          echo "Run 'npm run docs', fix any warnings, and commit the changes."
          exit 1
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -365,3 +365,17 @@ jobs:
            ARGS="$ARGS --tag preview"
          fi
          npm publish $ARGS
+  report-failure:
+    name: Report Workflow Failure
+    runs-on: ubuntu-latest
+    needs: [build-lancedb, test-lancedb, publish]
+    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    permissions:
+      contents: read
+      issues: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/create-failure-issue
+        with:
+          job-results: ${{ toJSON(needs) }}
+          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -173,3 +173,17 @@ jobs:
          generate_release_notes: false
          name: Python LanceDB v${{ steps.extract_version.outputs.version }}
          body: ${{ steps.python_release_notes.outputs.changelog }}
+  report-failure:
+    name: Report Workflow Failure
+    runs-on: ubuntu-latest
+    needs: [linux, mac, windows]
+    permissions:
+      contents: read
+      issues: write
+    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/create-failure-issue
+        with:
+          job-results: ${{ toJSON(needs) }}
+          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -6,6 +6,7 @@ on:
      - main
  pull_request:
    paths:
+      - Cargo.toml
      - python/**
      - .github/workflows/python.yml

--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -96,6 +96,7 @@ jobs:
      # Need up-to-date compilers for kernels
      CC: clang-18
      CXX: clang++-18
+      GH_TOKEN: ${{ secrets.SOPHON_READ_TOKEN }}
    steps:
      - uses: actions/checkout@v4
        with:
@@ -117,15 +118,17 @@ jobs:
          sudo chmod 600 /swapfile
          sudo mkswap /swapfile
          sudo swapon /swapfile
-      - name: Start S3 integration test environment
-        working-directory: .
-        run: docker compose up --detach --wait
      - name: Build
        run: cargo build --all-features --tests --locked --examples
-      - name: Run tests
-        run: cargo test --all-features --locked
+      - name: Run feature tests
+        run: make -C ./lancedb feature-tests
      - name: Run examples
        run: cargo run --example simple --locked
+      - name: Run remote tests
+        # Running this requires access to secrets, so skip if this is
+        # a PR from a fork.
+        if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
+        run: make -C ./lancedb remote-tests

  macos:
    timeout-minutes: 30
--- a/.github/workflows/trigger-vectordb-recipes.yml
+++ b/.github/workflows/trigger-vectordb-recipes.yml
@@ -1,26 +0,0 @@
-name: Trigger vectordb-recipers workflow
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    paths:
-      - .github/workflows/trigger-vectordb-recipes.yml
-  workflow_dispatch:
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Trigger vectordb-recipes workflow
-        uses: actions/github-script@v6
-        with:
-          github-token: ${{ secrets.VECTORDB_RECIPES_ACTION_TOKEN }}
-          script: |
-            const result = await github.rest.actions.createWorkflowDispatch({
-                owner: 'lancedb',
-                repo: 'vectordb-recipes',
-                workflow_id: 'examples-test.yml',
-                ref: 'main'
-            });
-            console.log(result);
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,14 +15,15 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.35.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-io = { "version" = "=0.35.0", default-features = false, "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-index = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-linalg = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-table = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-testing = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-datafusion = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-encoding = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
+lance = { "version" = "=0.38.0", default-features = false, "features" = ["dynamodb"] }
+lance-io = { "version" = "=0.38.0", default-features = false }
+lance-index = "=0.38.0"
+lance-linalg = "=0.38.0"
+lance-table = "=0.38.0"
+lance-testing = "=0.38.0"
+lance-datafusion = "=0.38.0"
+lance-encoding = "=0.38.0"
+lance-namespace = "0.0.16"
 # Note that this one does not include pyarrow
 arrow = { version = "55.1", optional = false }
 arrow-array = "55.1"
@@ -30,7 +31,6 @@ arrow-data = "55.1"
 arrow-ipc = "55.1"
 arrow-ord = "55.1"
 arrow-schema = "55.1"
-arrow-arith = "55.1"
 arrow-cast = "55.1"
 async-trait = "0"
 datafusion = { version = "49.0", default-features = false }
@@ -51,7 +51,6 @@ pin-project = "1.0.7"
 snafu = "0.8"
 url = "2"
 num-traits = "0.2"
-rand = "0.9"
 regex = "1.10"
 lazy_static = "1"
 semver = "1.0.25"
@@ -59,7 +58,17 @@ crunchy = "0.2.4"
 # Temporary pins to work around downstream issues
 # https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
 chrono = "=0.4.41"
-# https://github.com/RustCrypto/formats/issues/1684
-base64ct = "=1.6.0"
 # Workaround for: https://github.com/Lokathor/bytemuck/issues/306
 bytemuck_derive = ">=1.8.1, <1.9.0"
+
+# This is only needed when we reference preview releases of lance
+# [patch.crates-io]
+# # Force to use the same lance version as the rest of the project to avoid duplicate dependencies
+# lance = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-io = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-index = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-linalg = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-table = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-testing = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-datafusion = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-encoding = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
--- a/ci/create_lancedb_test_connection.sh
+++ b/ci/create_lancedb_test_connection.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+
+export RUST_LOG=info
+exec ./lancedb server --port 0 --sql-port 0  --data-dir "${1}"
--- a/ci/run_with_docker_compose.sh
+++ b/ci/run_with_docker_compose.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+#
+# A script for running the given command together with a docker compose environment.
+#
+
+# Bring down the docker setup once the command is done running.
+tear_down() {
+    docker compose -p fixture down
+}
+trap tear_down EXIT
+
+set +xe
+
+# Clean up any existing docker setup and bring up a new one.
+docker compose -p fixture up --detach --wait || exit 1
+
+"${@}"
--- a/ci/run_with_test_connection.sh
+++ b/ci/run_with_test_connection.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+
+#
+# A script for running the given command together with the lancedb cli.
+#
+
+die() {
+    echo $?
+    exit 1
+}
+
+check_command_exists() {
+    command="${1}"
+    which ${command} &> /dev/null || \
+        die "Unable to locate command: ${command}. Did you install it?"
+}
+
+if [[ ! -e ./lancedb ]]; then
+    if [[ -v SOPHON_READ_TOKEN ]]; then
+        INPUT="lancedb-linux-x64"
+        gh release \
+            --repo lancedb/lancedb \
+            download ci-support-binaries \
+            --pattern "${INPUT}" \
+            || die "failed to fetch cli."
+        check_command_exists openssl
+        openssl enc -aes-256-cbc \
+            -d -pbkdf2 \
+            -pass "env:SOPHON_READ_TOKEN" \
+            -in "${INPUT}" \
+            -out ./lancedb-linux-x64.tar.gz \
+            || die "openssl failed"
+        TARGET="${INPUT}.tar.gz"
+    else
+        ARCH="x64"
+        if [[ $OSTYPE == 'darwin'* ]]; then
+            UNAME=$(uname -m)
+            if [[ $UNAME == 'arm64' ]]; then
+                ARCH='arm64'
+            fi
+            OSTYPE="macos"
+        elif [[ $OSTYPE == 'linux'* ]]; then
+            if [[ $UNAME == 'aarch64' ]]; then
+                ARCH='arm64'
+            fi
+            OSTYPE="linux"
+        else
+            die "unknown OSTYPE: $OSTYPE"
+        fi
+
+        check_command_exists gh
+        TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
+        gh release \
+            --repo lancedb/sophon \
+            download lancedb-cli-v0.0.3 \
+            --pattern "${TARGET}" \
+            || die "failed to fetch cli."
+    fi
+
+    check_command_exists tar
+    tar xvf "${TARGET}" || die "tar failed."
+    [[ -e ./lancedb ]] || die "failed to extract lancedb."
+fi
+
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+export CREATE_LANCEDB_TEST_CONNECTION_SCRIPT="${SCRIPT_DIR}/create_lancedb_test_connection.sh"
+
+"${@}"
--- a/ci/set_lance_version.py
+++ b/ci/set_lance_version.py
@@ -1,4 +1,5 @@
 import argparse
+import re
 import sys
 import json

@@ -18,8 +19,12 @@ def run_command(command: str) -> str:

 def get_latest_stable_version() -> str:
    version_line = run_command("cargo info lance | grep '^version:'")
-    version = version_line.split(" ")[1].strip()
-    return version
+    # Example output: "version: 0.35.0 (latest 0.37.0)"
+    match = re.search(r'\(latest ([0-9.]+)\)', version_line)
+    if match:
+        return match.group(1)
+    # Fallback: use the first version after 'version:'
+    return version_line.split("version:")[1].split()[0].strip()


 def get_latest_preview_version() -> str:
@@ -112,7 +117,7 @@ def update_cargo_toml(line_updater):
    lance_line = ""
    is_parsing_lance_line = False
    for line in lines:
-        if line.startswith("lance"):
+        if line.startswith("lance") and not line.startswith("lance-namespace"):
            # Check if this is a single-line or multi-line entry
            # Single-line entries either:
            # 1. End with } (complete inline table)
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -70,6 +70,22 @@ plugins:
  - mkdocs-jupyter
  - render_swagger:
      allow_arbitrary_locations: true
+  - redirects:
+      redirect_maps:
+        # Redirect the home page and other top-level markdown files. This enables maximum SEO benefit
+        # other sub-pages are handled by the ingected js in overrides/partials/header.html
+        'index.md': 'https://lancedb.com/docs/'
+        'guides/tables.md': 'https://lancedb.com/docs/tables/'
+        'ann_indexes.md': 'https://lancedb.com/docs/indexing/'
+        'basic.md': 'https://lancedb.com/docs/quickstart/'
+        'faq.md': 'https://lancedb.com/docs/faq/'
+        'embeddings/understanding_embeddings.md': 'https://lancedb.com/docs/embedding/'
+        'integrations.md': 'https://lancedb.com/docs/integrations/'
+        'examples.md': 'https://lancedb.com/docs/tutorials/'
+        'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
+        'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
+
+

 markdown_extensions:
  - admonition
@@ -386,4 +402,4 @@ extra:
    - icon: fontawesome/brands/x-twitter
      link: https://twitter.com/lancedb
    - icon: fontawesome/brands/linkedin
-      link: https://www.linkedin.com/company/lancedb
+      link: https://www.linkedin.com/company/lancedb
--- a/docs/overrides/partials/header.html
+++ b/docs/overrides/partials/header.html
@@ -19,7 +19,13 @@
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  IN THE SOFTWARE.
 -->
-
+<div id="deprecation-banner" style="background-color: #f8d7da; color: #721c24; padding: 1em; text-align: center;">                                                           
+    <p style="margin: 0; font-size: 1.1em;">                                                                                                           
+        <strong>This documentation site is deprecated.</strong>                                                                           
+        Please visit our new documentation site at <a href="https://lancedb.com/docs" style="color: #721c24; text-decoration: underline;">
+          lancedb.com/docs</a> for the latest information.                                                                                        
+    </p>                                                                                                                                               
+  </div>  
 {% set class = "md-header" %}
 {% if "navigation.tabs.sticky" in features %}
  {% set class = class ~ " md-header--shadow md-header--lifted" %}
@@ -150,9 +156,9 @@

    <div style="margin-left: 10px; margin-right: 5px;">
        <a href="https://discord.com/invite/zMM32dvNtd" target="_blank" rel="noopener noreferrer">
-            <svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg"  viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
-        </a>
-    </div>
+          <svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg"  viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
+      </a>
+  </div>
    <div style="margin-left: 5px; margin-right: 5px;">
        <a href="https://twitter.com/lancedb" target="_blank" rel="noopener noreferrer">
            <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0,0,256,256" width="25px" height="25px" fill-rule="nonzero"><g fill-opacity="0" fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><path d="M0,256v-256h256v256z" id="bgRectangle"></path></g><g fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><g transform="scale(4,4)"><path d="M57,17.114c-1.32,1.973 -2.991,3.707 -4.916,5.097c0.018,0.423 0.028,0.847 0.028,1.274c0,13.013 -9.902,28.018 -28.016,28.018c-5.562,0 -12.81,-1.948 -15.095,-4.423c0.772,0.092 1.556,0.138 2.35,0.138c4.615,0 8.861,-1.575 12.23,-4.216c-4.309,-0.079 -7.946,-2.928 -9.199,-6.84c1.96,0.308 4.447,-0.17 4.447,-0.17c0,0 -7.7,-1.322 -7.899,-9.779c2.226,1.291 4.46,1.231 4.46,1.231c0,0 -4.441,-2.734 -4.379,-8.195c0.037,-3.221 1.331,-4.953 1.331,-4.953c8.414,10.361 20.298,10.29 20.298,10.29c0,0 -0.255,-1.471 -0.255,-2.243c0,-5.437 4.408,-9.847 9.847,-9.847c2.832,0 5.391,1.196 7.187,3.111c2.245,-0.443 4.353,-1.263 6.255,-2.391c-0.859,3.44 -4.329,5.448 -4.329,5.448c0,0 2.969,-0.329 5.655,-1.55z"></path></g></g></svg>
@@ -173,4 +179,77 @@
      {% include "partials/tabs.html" %}
    {% endif %}
  {% endif %}
-</header>
+</header>
+
+<script>
+  (function() {
+    function checkPathAndRedirect() {
+      var banner = document.getElementById('deprecation-banner');
+
+      if (document.querySelector('meta[http-equiv="refresh"]')) {
+        return; // The redirects plugin is already handling this page.
+      }
+
+      var currentPath = window.location.pathname;
+
+      var cleanPath = currentPath.endsWith('/') && currentPath.length > 1
+        ? currentPath.slice(0, -1)
+        : currentPath;
+
+      // These are the ONLY paths that should remain on the old site
+      var apiPaths = [
+        '/lancedb/python', 
+        '/lancedb/javascript', 
+        '/lancedb/js',
+        '/lancedb/api_reference'
+      ];
+      
+      var isApiPage = apiPaths.some(function(apiPath) {
+        return cleanPath.startsWith(apiPath);
+      });
+
+      if (isApiPage) {
+        if (banner) {
+          banner.style.display = 'none';
+        }
+      } else {
+        if (banner) {
+          banner.style.display = 'block';
+        }
+        
+        // Add noindex meta tag to prevent indexing of old docs for seo
+        var noindexMeta = document.createElement('meta');
+        noindexMeta.setAttribute('name', 'robots');
+        noindexMeta.setAttribute('content', 'noindex, follow');
+        document.head.appendChild(noindexMeta);
+
+        // Add canonical link to point to the new docs to reward new site for seo
+        var canonicalLink = document.createElement('link');
+        canonicalLink.setAttribute('rel', 'canonical');
+        canonicalLink.setAttribute('href', 'https://lancedb.com/docs');
+        document.head.appendChild(canonicalLink);
+        
+        window.location.replace('https://lancedb.com/docs');
+      }
+    }
+
+    // Run the check only if doc is ready. This makes sure we catch the initial load
+    // and redirect.
+    if (document.readyState === 'loading') {
+      document.addEventListener('DOMContentLoaded', checkPathAndRedirect);
+    } else {
+      checkPathAndRedirect();
+    }
+
+    // Use an interval to handle subsequent navigation clicks.
+    var lastPath = window.location.pathname;
+    setInterval(function() {
+      if (window.location.pathname !== lastPath) {
+        lastPath = window.location.pathname;
+        checkPathAndRedirect();
+      }
+    }, 2000); // keeping it 2 second to make it easy for user to understand
+              // what's happening
+
+  })();
+</script>
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,3 +5,4 @@ mkdocstrings[python]==0.25.2
 griffe
 mkdocs-render-swagger-plugin
 pydantic
+mkdocs-redirects
--- a/docs/src/js/classes/Connection.md
+++ b/docs/src/js/classes/Connection.md
@@ -25,6 +25,51 @@ the underlying connection has been closed.

 ## Methods

+### cloneTable()
+
+```ts
+abstract cloneTable(
+   targetTableName,
+   sourceUri,
+   options?): Promise<Table>
+```
+
+Clone a table from a source table.
+
+A shallow clone creates a new table that shares the underlying data files
+with the source table but has its own independent manifest. This allows
+both the source and cloned tables to evolve independently while initially
+sharing the same data, deletion, and index files.
+
+#### Parameters
+
+* **targetTableName**: `string`
+    The name of the target table to create.
+
+* **sourceUri**: `string`
+    The URI of the source table to clone from.
+
+* **options?**
+    Clone options.
+
+* **options.isShallow?**: `boolean`
+    Whether to perform a shallow clone (defaults to true).
+
+* **options.sourceTag?**: `string`
+    The tag of the source table to clone.
+
+* **options.sourceVersion?**: `number`
+    The version of the source table to clone.
+
+* **options.targetNamespace?**: `string`[]
+    The namespace for the target table (defaults to root namespace).
+
+#### Returns
+
+`Promise`&lt;[`Table`](Table.md)&gt;
+
+***
+
 ### close()

 ```ts
--- a/docs/src/js/classes/MergeInsertBuilder.md
+++ b/docs/src/js/classes/MergeInsertBuilder.md
@@ -52,6 +52,30 @@ the merge result

 ***

+### useIndex()
+
+```ts
+useIndex(useIndex): MergeInsertBuilder
+```
+
+Controls whether to use indexes for the merge operation.
+
+When set to `true` (the default), the operation will use an index if available
+on the join key for improved performance. When set to `false`, it forces a full
+table scan even if an index exists. This can be useful for benchmarking or when
+the query optimizer chooses a suboptimal path.
+
+#### Parameters
+
+* **useIndex**: `boolean`
+    Whether to use indices for the merge operation. Defaults to `true`.
+
+#### Returns
+
+[`MergeInsertBuilder`](MergeInsertBuilder.md)
+
+***
+
 ### whenMatchedUpdateAll()

 ```ts
--- a/docs/src/js/functions/makeArrowTable.md
+++ b/docs/src/js/functions/makeArrowTable.md
@@ -13,7 +13,7 @@ function makeArrowTable(
   metadata?): ArrowTable
 ```

-An enhanced version of the makeTable function from Apache Arrow
+An enhanced version of the apache-arrow makeTable function from Apache Arrow
 that supports nested fields and embeddings columns.

 (typically you do not need to call this function.  It will be called automatically
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -78,6 +78,7 @@
 - [TableNamesOptions](interfaces/TableNamesOptions.md)
 - [TableStatistics](interfaces/TableStatistics.md)
 - [TimeoutConfig](interfaces/TimeoutConfig.md)
+- [TlsConfig](interfaces/TlsConfig.md)
 - [TokenResponse](interfaces/TokenResponse.md)
 - [UpdateOptions](interfaces/UpdateOptions.md)
 - [UpdateResult](interfaces/UpdateResult.md)
--- a/docs/src/js/interfaces/ClientConfig.md
+++ b/docs/src/js/interfaces/ClientConfig.md
@@ -40,6 +40,14 @@ optional timeoutConfig: TimeoutConfig;

 ***

+### tlsConfig?
+
+```ts
+optional tlsConfig: TlsConfig;
+```
+
+***
+
 ### userAgent?

 ```ts
--- a/docs/src/js/interfaces/TlsConfig.md
+++ b/docs/src/js/interfaces/TlsConfig.md
@@ -0,0 +1,49 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / TlsConfig
+
+# Interface: TlsConfig
+
+TLS/mTLS configuration for the remote HTTP client.
+
+## Properties
+
+### assertHostname?
+
+```ts
+optional assertHostname: boolean;
+```
+
+Whether to verify the hostname in the server's certificate.
+
+***
+
+### certFile?
+
+```ts
+optional certFile: string;
+```
+
+Path to the client certificate file (PEM format) for mTLS authentication.
+
+***
+
+### keyFile?
+
+```ts
+optional keyFile: string;
+```
+
+Path to the client private key file (PEM format) for mTLS authentication.
+
+***
+
+### sslCaCert?
+
+```ts
+optional sslCaCert: string;
+```
+
+Path to the CA certificate file (PEM format) for server verification.
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.1-beta.2</version>
+        <version>0.22.2-beta.1</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.1-beta.2</version>
+        <version>0.22.2-beta.1</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.22.1-beta.2</version>
+    <version>0.22.2-beta.1</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.22.1-beta.2"
+version = "0.22.2-beta.1"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -1,17 +1,5 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-import {
-  Bool,
-  Field,
-  Int32,
-  List,
-  Schema,
-  Struct,
-  Uint8,
-  Utf8,
-} from "apache-arrow";
-
 import * as arrow15 from "apache-arrow-15";
 import * as arrow16 from "apache-arrow-16";
 import * as arrow17 from "apache-arrow-17";
@@ -25,11 +13,9 @@ import {
  fromTableToBuffer,
  makeArrowTable,
  makeEmptyTable,
-  tableFromIPC,
 } from "../lancedb/arrow";
 import {
  EmbeddingFunction,
-  FieldOptions,
  FunctionOptions,
 } from "../lancedb/embedding/embedding_function";
 import { EmbeddingFunctionConfig } from "../lancedb/embedding/registry";
@@ -1008,5 +994,64 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(result).toEqual(null);
      });
    });
+
+    describe("boolean null handling", function () {
+      it("should handle null values in nullable boolean fields", () => {
+        const { makeArrowTable } = require("../lancedb/arrow");
+        const schema = new Schema([new Field("test", new arrow.Bool(), true)]);
+
+        // Test with all null values
+        const data = [{ test: null }];
+        const table = makeArrowTable(data, { schema });
+
+        expect(table.numRows).toBe(1);
+        expect(table.schema.names).toEqual(["test"]);
+        expect(table.getChild("test")!.get(0)).toBeNull();
+      });
+
+      it("should handle mixed null and non-null boolean values", () => {
+        const { makeArrowTable } = require("../lancedb/arrow");
+        const schema = new Schema([new Field("test", new Bool(), true)]);
+
+        // Test with mixed values
+        const data = [{ test: true }, { test: null }, { test: false }];
+        const table = makeArrowTable(data, { schema });
+
+        expect(table.numRows).toBe(3);
+        expect(table.getChild("test")!.get(0)).toBe(true);
+        expect(table.getChild("test")!.get(1)).toBeNull();
+        expect(table.getChild("test")!.get(2)).toBe(false);
+      });
+    });
+
+    // Test for the undefined values bug fix
+    describe("undefined values handling", () => {
+      it("should handle mixed undefined and actual values", () => {
+        const schema = new Schema([
+          new Field("text", new Utf8(), true), // nullable
+          new Field("number", new Int32(), true), // nullable
+          new Field("bool", new Bool(), true), // nullable
+        ]);
+
+        const data = [
+          { text: undefined, number: 42, bool: true },
+          { text: "hello", number: undefined, bool: false },
+          { text: "world", number: 123, bool: undefined },
+        ];
+        const table = makeArrowTable(data, { schema });
+
+        const result = table.toArray();
+        expect(result).toHaveLength(3);
+        expect(result[0].text).toBe(null);
+        expect(result[0].number).toBe(42);
+        expect(result[0].bool).toBe(true);
+        expect(result[1].text).toBe("hello");
+        expect(result[1].number).toBe(null);
+        expect(result[1].bool).toBe(false);
+        expect(result[2].text).toBe("world");
+        expect(result[2].number).toBe(123);
+        expect(result[2].bool).toBe(null);
+      });
+    });
  },
 );
--- a/nodejs/test/remote.test.ts
+++ b/nodejs/test/remote.test.ts
@@ -7,7 +7,6 @@ import {
  ClientConfig,
  Connection,
  ConnectionOptions,
-  NativeJsHeaderProvider,
  TlsConfig,
  connect,
 } from "../lancedb";
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -39,7 +39,6 @@ import {
  Operator,
  instanceOfFullTextQuery,
 } from "../lancedb/query";
-import exp = require("constants");

 describe.each([arrow15, arrow16, arrow17, arrow18])(
  "Given a table",
@@ -212,8 +211,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      },
    );

-    // TODO: https://github.com/lancedb/lancedb/issues/1832
-    it.skip("should be able to omit nullable fields", async () => {
+    it("should be able to omit nullable fields", async () => {
      const db = await connect(tmpDir.name);
      const schema = new arrow.Schema([
        new arrow.Field(
@@ -237,23 +235,36 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      await table.add([data3]);

      let res = await table.query().limit(10).toArray();
-      const resVector = res.map((r) => r.get("vector").toArray());
+      const resVector = res.map((r) =>
+        r.vector ? Array.from(r.vector) : null,
+      );
      expect(resVector).toEqual([null, data2.vector, data3.vector]);
-      const resItem = res.map((r) => r.get("item").toArray());
+      const resItem = res.map((r) => r.item);
      expect(resItem).toEqual(["foo", null, "bar"]);
-      const resPrice = res.map((r) => r.get("price").toArray());
+      const resPrice = res.map((r) => r.price);
      expect(resPrice).toEqual([10.0, 2.0, 3.0]);

      const data4 = { item: "foo" };
      // We can't omit a column if it's not nullable
-      await expect(table.add([data4])).rejects.toThrow("Invalid user input");
+      await expect(table.add([data4])).rejects.toThrow(
+        "Append with different schema",
+      );

      // But we can alter columns to make them nullable
      await table.alterColumns([{ path: "price", nullable: true }]);
      await table.add([data4]);

-      res = (await table.query().limit(10).toArray()).map((r) => r.toJSON());
-      expect(res).toEqual([data1, data2, data3, data4]);
+      res = (await table.query().limit(10).toArray()).map((r) => ({
+        ...r.toJSON(),
+        vector: r.vector ? Array.from(r.vector) : null,
+      }));
+      // Rust fills missing nullable fields with null
+      expect(res).toEqual([
+        { ...data1, vector: null },
+        { ...data2, item: null },
+        data3,
+        { ...data4, price: null, vector: null },
+      ]);
    });

    it("should be able to insert nullable data for non-nullable fields", async () => {
@@ -331,6 +342,43 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      const table = await db.createTable("my_table", data);
      expect(await table.countRows()).toEqual(2);
    });
+
+    it("should allow undefined and omitted nullable vector fields", async () => {
+      // Test for the bug: can't pass undefined or omit vector column
+      const db = await connect("memory://");
+      const schema = new arrow.Schema([
+        new arrow.Field("id", new arrow.Int32(), true),
+        new arrow.Field(
+          "vector",
+          new arrow.FixedSizeList(
+            32,
+            new arrow.Field("item", new arrow.Float32(), true),
+          ),
+          true, // nullable = true
+        ),
+      ]);
+      const table = await db.createEmptyTable("test_table", schema);
+
+      // Should not throw error for undefined value
+      await table.add([{ id: 0, vector: undefined }]);
+
+      // Should not throw error for omitted field
+      await table.add([{ id: 1 }]);
+
+      // Should still work for null
+      await table.add([{ id: 2, vector: null }]);
+
+      // Should still work for actual vector
+      const testVector = new Array(32).fill(0.5);
+      await table.add([{ id: 3, vector: testVector }]);
+      expect(await table.countRows()).toEqual(4);
+
+      const res = await table.query().limit(10).toArray();
+      const resVector = res.map((r) =>
+        r.vector ? Array.from(r.vector) : null,
+      );
+      expect(resVector).toEqual([null, null, null, testVector]);
+    });
  },
 );

@@ -488,6 +536,32 @@ describe("merge insert", () => {
        .execute(newData, { timeoutMs: 0 }),
    ).rejects.toThrow("merge insert timed out");
  });
+
+  test("useIndex", async () => {
+    const newData = [
+      { a: 2, b: "x" },
+      { a: 4, b: "z" },
+    ];
+
+    // Test with useIndex(true) - should work fine
+    const result1 = await table
+      .mergeInsert("a")
+      .whenNotMatchedInsertAll()
+      .useIndex(true)
+      .execute(newData);
+
+    expect(result1.numInsertedRows).toBe(1); // Only a=4 should be inserted
+
+    // Test with useIndex(false) - should also work fine
+    const newData2 = [{ a: 5, b: "w" }];
+    const result2 = await table
+      .mergeInsert("a")
+      .whenNotMatchedInsertAll()
+      .useIndex(false)
+      .execute(newData2);
+
+    expect(result2.numInsertedRows).toBe(1); // a=5 should be inserted
+  });
 });

 describe("When creating an index", () => {
@@ -1429,7 +1503,9 @@ describe("when optimizing a dataset", () => {

  it("delete unverified", async () => {
    const version = await table.version();
-    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${version - 1}.manifest`;
+    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
+      version - 1
+    }.manifest`;
    fs.rmSync(versionFile);

    let stats = await table.optimize({ deleteUnverified: false });
--- a/nodejs/biome.json
+++ b/nodejs/biome.json
@@ -48,6 +48,7 @@
        "noUnreachableSuper": "error",
        "noUnsafeFinally": "error",
        "noUnsafeOptionalChaining": "error",
+        "noUnusedImports": "error",
        "noUnusedLabels": "error",
        "noUnusedVariables": "warn",
        "useIsNan": "error",
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -41,7 +41,6 @@ import {
  vectorFromArray as badVectorFromArray,
  makeBuilder,
  makeData,
-  makeTable,
 } from "apache-arrow";
 import { Buffers } from "apache-arrow/data";
 import { type EmbeddingFunction } from "./embedding/embedding_function";
@@ -279,7 +278,7 @@ export class MakeArrowTableOptions {
 }

 /**
- * An enhanced version of the {@link makeTable} function from Apache Arrow
+ * An enhanced version of the apache-arrow makeTable function from Apache Arrow
 * that supports nested fields and embeddings columns.
 *
 * (typically you do not need to call this function.  It will be called automatically
@@ -705,7 +704,7 @@ function transposeData(
      }
      return current;
    });
-    return makeVector(values, field.type);
+    return makeVector(values, field.type, undefined, field.nullable);
  }
 }

@@ -752,9 +751,30 @@ function makeVector(
  values: unknown[],
  type?: DataType,
  stringAsDictionary?: boolean,
+  nullable?: boolean,
  // biome-ignore lint/suspicious/noExplicitAny: skip
 ): Vector<any> {
  if (type !== undefined) {
+    // Convert undefined values to null for nullable fields
+    if (nullable) {
+      values = values.map((v) => (v === undefined ? null : v));
+    }
+
+    // workaround for: https://github.com/apache/arrow-js/issues/68
+    if (DataType.isBool(type)) {
+      const hasNonNullValue = values.some((v) => v !== null && v !== undefined);
+      if (!hasNonNullValue) {
+        const nullBitmap = new Uint8Array(Math.ceil(values.length / 8));
+        const data = makeData({
+          type: type,
+          length: values.length,
+          nullCount: values.length,
+          nullBitmap,
+        });
+        return arrowMakeVector(data);
+      }
+    }
+
    // No need for inference, let Arrow create it
    if (type instanceof Int) {
      if (DataType.isInt(type) && type.bitWidth === 64) {
@@ -879,7 +899,12 @@ async function applyEmbeddingsFromMetadata(
  for (const field of schema.fields) {
    if (!(field.name in columns)) {
      const nullValues = new Array(table.numRows).fill(null);
-      columns[field.name] = makeVector(nullValues, field.type);
+      columns[field.name] = makeVector(
+        nullValues,
+        field.type,
+        undefined,
+        field.nullable,
+      );
    }
  }

@@ -943,7 +968,12 @@ async function applyEmbeddings<T>(
    } else if (schema != null) {
      const destField = schema.fields.find((f) => f.name === destColumn);
      if (destField != null) {
-        newColumns[destColumn] = makeVector([], destField.type);
+        newColumns[destColumn] = makeVector(
+          [],
+          destField.type,
+          undefined,
+          destField.nullable,
+        );
      } else {
        throw new Error(
          `Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`,
@@ -1255,19 +1285,36 @@ function validateSchemaEmbeddings(
    if (isFixedSizeList(field.type)) {
      field = sanitizeField(field);
      if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
+        // Check if there's an embedding function registered for this field
+        let hasEmbeddingFunction = false;
+
+        // Check schema metadata for embedding functions
        if (schema.metadata.has("embedding_functions")) {
          const embeddings = JSON.parse(
            schema.metadata.get("embedding_functions")!,
          );
-          if (
-            // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
-            embeddings.find((f: any) => f["vectorColumn"] === field.name) ===
-            undefined
-          ) {
+          // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
+          if (embeddings.find((f: any) => f["vectorColumn"] === field.name)) {
+            hasEmbeddingFunction = true;
+          }
+        }
+
+        // Check passed embedding function parameter
+        if (embeddings && embeddings.vectorColumn === field.name) {
+          hasEmbeddingFunction = true;
+        }
+
+        // If the field is nullable AND there's no embedding function, allow undefined/omitted values
+        if (field.nullable && !hasEmbeddingFunction) {
+          fields.push(field);
+        } else {
+          // Either not nullable OR has embedding function - require explicit values
+          if (hasEmbeddingFunction) {
+            // Don't add to missingEmbeddingFields since this is expected to be filled by embedding function
+            fields.push(field);
+          } else {
            missingEmbeddingFields.push(field);
          }
-        } else {
-          missingEmbeddingFields.push(field);
        }
      } else {
        fields.push(field);
--- a/nodejs/lancedb/connection.ts
+++ b/nodejs/lancedb/connection.ts
@@ -3,7 +3,6 @@

 import {
  Data,
-  Schema,
  SchemaLike,
  TableLike,
  fromTableToStreamBuffer,
--- a/nodejs/lancedb/merge.ts
+++ b/nodejs/lancedb/merge.ts
@@ -70,6 +70,23 @@ export class MergeInsertBuilder {
      this.#schema,
    );
  }
+
+  /**
+   * Controls whether to use indexes for the merge operation.
+   *
+   * When set to `true` (the default), the operation will use an index if available
+   * on the join key for improved performance. When set to `false`, it forces a full
+   * table scan even if an index exists. This can be useful for benchmarking or when
+   * the query optimizer chooses a suboptimal path.
+   *
+   * @param useIndex - Whether to use indices for the merge operation. Defaults to `true`.
+   */
+  useIndex(useIndex: boolean): MergeInsertBuilder {
+    return new MergeInsertBuilder(
+      this.#native.useIndex(useIndex),
+      this.#schema,
+    );
+  }
  /**
   * Executes the merge insert operation
   *
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.2-beta.1",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.2-beta.1",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.2-beta.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.2-beta.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.2-beta.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.2-beta.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.22.1-beta.2",
+  "version": "0.22.2-beta.1",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.2-beta.1",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.22.1-beta.2",
+  "version": "0.22.2-beta.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.22.1-beta.2",
+      "version": "0.22.2-beta.1",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.22.1-beta.2",
+  "version": "0.22.2-beta.1",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/merge.rs
+++ b/nodejs/src/merge.rs
@@ -43,6 +43,13 @@ impl NativeMergeInsertBuilder {
        self.inner.timeout(Duration::from_millis(timeout as u64));
    }

+    #[napi]
+    pub fn use_index(&self, use_index: bool) -> Self {
+        let mut this = self.clone();
+        this.inner.use_index(use_index);
+        this
+    }
+
    #[napi(catch_unwind)]
    pub async fn execute(&self, buf: Buffer) -> napi::Result<MergeResult> {
        let data = ipc_file_to_batches(buf.to_vec())
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.25.1-beta.3"
+current_version = "0.25.2-beta.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.25.1-beta.3"
+version = "0.25.2-beta.2"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -10,7 +10,7 @@ dependencies = [
    "pyarrow>=16",
    "pydantic>=1.10",
    "tqdm>=4.27.0",
-    "lance-namespace==0.0.6"
+    "lance-namespace>=0.0.16"
 ]
 description = "lancedb"
 authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
--- a/python/python/lancedb/merge.py
+++ b/python/python/lancedb/merge.py
@@ -33,6 +33,7 @@ class LanceMergeInsertBuilder(object):
        self._when_not_matched_by_source_delete = False
        self._when_not_matched_by_source_condition = None
        self._timeout = None
+        self._use_index = True

    def when_matched_update_all(
        self, *, where: Optional[str] = None
@@ -78,6 +79,23 @@ class LanceMergeInsertBuilder(object):
            self._when_not_matched_by_source_condition = condition
        return self

+    def use_index(self, use_index: bool) -> LanceMergeInsertBuilder:
+        """
+        Controls whether to use indexes for the merge operation.
+
+        When set to `True` (the default), the operation will use an index if available
+        on the join key for improved performance. When set to `False`, it forces a full
+        table scan even if an index exists. This can be useful for benchmarking or when
+        the query optimizer chooses a suboptimal path.
+
+        Parameters
+        ----------
+        use_index: bool
+            Whether to use indices for the merge operation. Defaults to `True`.
+        """
+        self._use_index = use_index
+        return self
+
    def execute(
        self,
        new_data: DATA,
--- a/python/python/lancedb/rerankers/init.py
+++ b/python/python/lancedb/rerankers/init.py
@@ -9,6 +9,7 @@ from .linear_combination import LinearCombinationReranker
 from .openai import OpenaiReranker
 from .jinaai import JinaReranker
 from .rrf import RRFReranker
+from .mrr import MRRReranker
 from .answerdotai import AnswerdotaiRerankers
 from .voyageai import VoyageAIReranker

@@ -23,4 +24,5 @@ __all__ = [
    "RRFReranker",
    "AnswerdotaiRerankers",
    "VoyageAIReranker",
+    "MRRReranker",
 ]
--- a/python/python/lancedb/rerankers/mrr.py
+++ b/python/python/lancedb/rerankers/mrr.py
@@ -0,0 +1,169 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+
+from typing import Union, List, TYPE_CHECKING
+import pyarrow as pa
+import numpy as np
+
+from collections import defaultdict
+from .base import Reranker
+
+if TYPE_CHECKING:
+    from ..table import LanceVectorQueryBuilder
+
+
+class MRRReranker(Reranker):
+    """
+    Reranks the results using Mean Reciprocal Rank (MRR) algorithm based
+    on the scores of vector and FTS search.
+    Algorithm reference - https://en.wikipedia.org/wiki/Mean_reciprocal_rank
+
+    MRR calculates the average of reciprocal ranks across different search results.
+    For each document, it computes the reciprocal of its rank in each system,
+    then takes the mean of these reciprocal ranks as the final score.
+
+    Parameters
+    ----------
+    weight_vector : float, default 0.5
+        Weight for vector search results (0.0 to 1.0)
+    weight_fts : float, default 0.5
+        Weight for FTS search results (0.0 to 1.0)
+        Note: weight_vector + weight_fts should equal 1.0
+    return_score : str, default "relevance"
+        Options are "relevance" or "all"
+        The type of score to return. If "relevance", will return only the relevance
+        score. If "all", will return all scores from the vector and FTS search along
+        with the relevance score.
+    """
+
+    def __init__(
+        self,
+        weight_vector: float = 0.5,
+        weight_fts: float = 0.5,
+        return_score="relevance",
+    ):
+        if not (0.0 <= weight_vector <= 1.0):
+            raise ValueError("weight_vector must be between 0.0 and 1.0")
+        if not (0.0 <= weight_fts <= 1.0):
+            raise ValueError("weight_fts must be between 0.0 and 1.0")
+        if abs(weight_vector + weight_fts - 1.0) > 1e-6:
+            raise ValueError("weight_vector + weight_fts must equal 1.0")
+
+        super().__init__(return_score)
+        self.weight_vector = weight_vector
+        self.weight_fts = weight_fts
+
+    def rerank_hybrid(
+        self,
+        query: str,  # noqa: F821
+        vector_results: pa.Table,
+        fts_results: pa.Table,
+    ):
+        vector_ids = vector_results["_rowid"].to_pylist() if vector_results else []
+        fts_ids = fts_results["_rowid"].to_pylist() if fts_results else []
+
+        # Maps result_id to list of (type, reciprocal_rank)
+        mrr_score_map = defaultdict(list)
+
+        if vector_ids:
+            for rank, result_id in enumerate(vector_ids, 1):
+                reciprocal_rank = 1.0 / rank
+                mrr_score_map[result_id].append(("vector", reciprocal_rank))
+
+        if fts_ids:
+            for rank, result_id in enumerate(fts_ids, 1):
+                reciprocal_rank = 1.0 / rank
+                mrr_score_map[result_id].append(("fts", reciprocal_rank))
+
+        final_mrr_scores = {}
+        for result_id, scores in mrr_score_map.items():
+            vector_rr = 0.0
+            fts_rr = 0.0
+
+            for score_type, reciprocal_rank in scores:
+                if score_type == "vector":
+                    vector_rr = reciprocal_rank
+                elif score_type == "fts":
+                    fts_rr = reciprocal_rank
+
+            # If a document doesn't appear, its reciprocal rank is 0
+            weighted_mrr = self.weight_vector * vector_rr + self.weight_fts * fts_rr
+            final_mrr_scores[result_id] = weighted_mrr
+
+        combined_results = self.merge_results(vector_results, fts_results)
+        combined_row_ids = combined_results["_rowid"].to_pylist()
+        relevance_scores = [final_mrr_scores[row_id] for row_id in combined_row_ids]
+        combined_results = combined_results.append_column(
+            "_relevance_score", pa.array(relevance_scores, type=pa.float32())
+        )
+        combined_results = combined_results.sort_by(
+            [("_relevance_score", "descending")]
+        )
+
+        if self.score == "relevance":
+            combined_results = self._keep_relevance_score(combined_results)
+
+        return combined_results
+
+    def rerank_multivector(
+        self,
+        vector_results: Union[List[pa.Table], List["LanceVectorQueryBuilder"]],
+        query: str = None,
+        deduplicate: bool = True,  # noqa: F821
+    ):
+        """
+        Reranks the results from multiple vector searches using MRR algorithm.
+        Each vector search result is treated as a separate ranking system,
+        and MRR calculates the mean of reciprocal ranks across all systems.
+        This cannot reuse rerank_hybrid because MRR semantics require treating
+        each vector result as a separate ranking system.
+        """
+        if not all(isinstance(v, type(vector_results[0])) for v in vector_results):
+            raise ValueError(
+                "All elements in vector_results should be of the same type"
+            )
+
+        # avoid circular import
+        if type(vector_results[0]).__name__ == "LanceVectorQueryBuilder":
+            vector_results = [result.to_arrow() for result in vector_results]
+        elif not isinstance(vector_results[0], pa.Table):
+            raise ValueError(
+                "vector_results should be a list of pa.Table or LanceVectorQueryBuilder"
+            )
+
+        if not all("_rowid" in result.column_names for result in vector_results):
+            raise ValueError(
+                "'_rowid' is required for deduplication. \
+                    add _rowid to search results like this: \
+                    `search().with_row_id(True)`"
+            )
+
+        mrr_score_map = defaultdict(list)
+
+        for result_table in vector_results:
+            result_ids = result_table["_rowid"].to_pylist()
+            for rank, result_id in enumerate(result_ids, 1):
+                reciprocal_rank = 1.0 / rank
+                mrr_score_map[result_id].append(reciprocal_rank)
+
+        final_mrr_scores = {}
+        for result_id, reciprocal_ranks in mrr_score_map.items():
+            mean_rr = np.mean(reciprocal_ranks)
+            final_mrr_scores[result_id] = mean_rr
+
+        combined = pa.concat_tables(vector_results, **self._concat_tables_args)
+        combined = self._deduplicate(combined)
+
+        combined_row_ids = combined["_rowid"].to_pylist()
+
+        relevance_scores = [final_mrr_scores[row_id] for row_id in combined_row_ids]
+        combined = combined.append_column(
+            "_relevance_score", pa.array(relevance_scores, type=pa.float32())
+        )
+        combined = combined.sort_by([("_relevance_score", "descending")])
+
+        if self.score == "relevance":
+            combined = self._keep_relevance_score(combined)
+
+        return combined
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -1470,10 +1470,7 @@ class Table(ABC):
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
        retrain: bool, default False
-            If True, retrain the vector indices, this would refine the IVF clustering
-            and quantization, which may improve the search accuracy. It's faster than
-            re-creating the index from scratch, so it's recommended to try this first,
-            when the data distribution has changed significantly.
+            This parameter is no longer used and is deprecated.

        Experimental API
        ----------------
@@ -2835,10 +2832,7 @@ class LanceTable(Table):
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
        retrain: bool, default False
-            If True, retrain the vector indices, this would refine the IVF clustering
-            and quantization, which may improve the search accuracy. It's faster than
-            re-creating the index from scratch, so it's recommended to try this first,
-            when the data distribution has changed significantly.
+            This parameter is no longer used and is deprecated.

        Experimental API
        ----------------
@@ -3926,6 +3920,7 @@ class AsyncTable:
                when_not_matched_by_source_delete=merge._when_not_matched_by_source_delete,
                when_not_matched_by_source_condition=merge._when_not_matched_by_source_condition,
                timeout=merge._timeout,
+                use_index=merge._use_index,
            ),
        )

@@ -4298,10 +4293,7 @@ class AsyncTable:
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
        retrain: bool, default False
-            If True, retrain the vector indices, this would refine the IVF clustering
-            and quantization, which may improve the search accuracy. It's faster than
-            re-creating the index from scratch, so it's recommended to try this first,
-            when the data distribution has changed significantly.
+            This parameter is no longer used and is deprecated.

        Experimental API
        ----------------
@@ -4324,10 +4316,19 @@ class AsyncTable:
        cleanup_since_ms: Optional[int] = None
        if cleanup_older_than is not None:
            cleanup_since_ms = round(cleanup_older_than.total_seconds() * 1000)
+
+        if retrain:
+            import warnings
+
+            warnings.warn(
+                "The 'retrain' parameter is deprecated and will be removed in a "
+                "future version.",
+                DeprecationWarning,
+            )
+
        return await self._inner.optimize(
            cleanup_since_ms=cleanup_since_ms,
            delete_unverified=delete_unverified,
-            retrain=retrain,
        )

    async def list_indices(self) -> Iterable[IndexConfig]:
--- a/python/python/tests/test_index.py
+++ b/python/python/tests/test_index.py
@@ -35,6 +35,8 @@ async def some_table(db_async):
            "tags": [
                [f"tag{random.randint(0, 8)}" for _ in range(2)] for _ in range(NROWS)
            ],
+            "is_active": [random.choice([True, False]) for _ in range(NROWS)],
+            "data": [random.randbytes(random.randint(0, 128)) for _ in range(NROWS)],
        }
    )
    return await db_async.create_table(
@@ -99,10 +101,17 @@ async def test_create_fixed_size_binary_index(some_table: AsyncTable):
@pytest.mark.asyncio
 async def test_create_bitmap_index(some_table: AsyncTable):
    await some_table.create_index("id", config=Bitmap())
+    await some_table.create_index("is_active", config=Bitmap())
+    await some_table.create_index("data", config=Bitmap())
    indices = await some_table.list_indices()
-    assert str(indices) == '[Index(Bitmap, columns=["id"], name="id_idx")]'
-    indices = await some_table.list_indices()
-    assert len(indices) == 1
+    assert len(indices) == 3
+    assert indices[0].index_type == "Bitmap"
+    assert indices[0].columns == ["id"]
+    assert indices[1].index_type == "Bitmap"
+    assert indices[1].columns == ["is_active"]
+    assert indices[2].index_type == "Bitmap"
+    assert indices[2].columns == ["data"]
+
    index_name = indices[0].name
    stats = await some_table.index_stats(index_name)
    assert stats.index_type == "BITMAP"
@@ -111,6 +120,11 @@ async def test_create_bitmap_index(some_table: AsyncTable):
    assert stats.num_unindexed_rows == 0
    assert stats.num_indices == 1

+    assert (
+        "ScalarIndexQuery"
+        in await some_table.query().where("is_active = TRUE").explain_plan()
+    )
+

@pytest.mark.asyncio
 async def test_create_label_list_index(some_table: AsyncTable):
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -22,6 +22,7 @@ from lancedb.rerankers import (
    JinaReranker,
    AnswerdotaiRerankers,
    VoyageAIReranker,
+    MRRReranker,
 )
 from lancedb.table import LanceTable

@@ -46,6 +47,7 @@ def get_test_table(tmp_path, use_tantivy):
        db,
        "my_table",
        schema=MyTable,
+        mode="overwrite",
    )

    # Need to test with a bunch of phrases to make sure sorting is consistent
@@ -96,7 +98,7 @@ def get_test_table(tmp_path, use_tantivy):
    )

    # Create a fts index
-    table.create_fts_index("text", use_tantivy=use_tantivy)
+    table.create_fts_index("text", use_tantivy=use_tantivy, replace=True)

    return table, MyTable

@@ -320,6 +322,34 @@ def test_rrf_reranker(tmp_path, use_tantivy):
    _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)


+@pytest.mark.parametrize("use_tantivy", [True, False])
+def test_mrr_reranker(tmp_path, use_tantivy):
+    reranker = MRRReranker()
+    _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
+
+    # Test multi-vector part
+    table, schema = get_test_table(tmp_path, use_tantivy)
+    query = "single player experience"
+    rs1 = table.search(query, vector_column_name="vector").limit(10).with_row_id(True)
+    rs2 = (
+        table.search(query, vector_column_name="meta_vector")
+        .limit(10)
+        .with_row_id(True)
+    )
+    result = reranker.rerank_multivector([rs1, rs2])
+    assert "_relevance_score" in result.column_names
+    assert len(result) <= 20
+
+    if len(result) > 1:
+        assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
+            "The _relevance_score should be descending."
+        )
+
+    # Test with duplicate results
+    result_deduped = reranker.rerank_multivector([rs1, rs2, rs1])
+    assert len(result_deduped) == len(result)
+
+
 def test_rrf_reranker_distance():
    data = pa.table(
        {
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -591,12 +591,11 @@ impl Table {
    }

    /// Optimize the on-disk data by compacting and pruning old data, for better performance.
-    #[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None, retrain=None))]
+    #[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None))]
    pub fn optimize(
        self_: PyRef<'_, Self>,
        cleanup_since_ms: Option<u64>,
        delete_unverified: Option<bool>,
-        retrain: Option<bool>,
    ) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
        let older_than = if let Some(ms) = cleanup_since_ms {
@@ -632,10 +631,9 @@ impl Table {
                .prune
                .unwrap();
            inner
-                .optimize(lancedb::table::OptimizeAction::Index(match retrain {
-                    Some(true) => OptimizeOptions::retrain(),
-                    _ => OptimizeOptions::default(),
-                }))
+                .optimize(lancedb::table::OptimizeAction::Index(
+                    OptimizeOptions::default(),
+                ))
                .await
                .infer_error()?;
            Ok(OptimizeStats {
@@ -674,6 +672,9 @@ impl Table {
        if let Some(timeout) = parameters.timeout {
            builder.timeout(timeout);
        }
+        if let Some(use_index) = parameters.use_index {
+            builder.use_index(use_index);
+        }

        future_into_py(self_.py(), async move {
            let res = builder.execute(Box::new(batches)).await.infer_error()?;
@@ -833,6 +834,7 @@ pub struct MergeInsertParams {
    when_not_matched_by_source_delete: bool,
    when_not_matched_by_source_condition: Option<String>,
    timeout: Option<std::time::Duration>,
+    use_index: Option<bool>,
 }

 #[pyclass]
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.22.1-beta.2"
+version = "0.22.2-beta.1"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -36,6 +36,7 @@ lance-table = { workspace = true }
 lance-linalg = { workspace = true }
 lance-testing = { workspace = true }
 lance-encoding = { workspace = true }
+lance-namespace = { workspace = true }
 moka = { workspace = true }
 pin-project = { workspace = true }
 tokio = { version = "1.23", features = ["rt-multi-thread"] }
@@ -81,6 +82,7 @@ crunchy.workspace = true
 bytemuck_derive.workspace = true

 [dev-dependencies]
+anyhow = "1"
 tempfile = "3.5.0"
 rand = { version = "0.9", features = ["small_rng"] }
 random_word = { version = "0.4.3", features = ["en"] }
--- a/rust/lancedb/Makefile
+++ b/rust/lancedb/Makefile
@@ -0,0 +1,19 @@
+#
+# Makefile for running tests.
+#
+
+# Run all tests.
+all-tests: feature-tests remote-tests
+
+# Run tests for every feature. This requires using docker compose to set up
+# the environment.
+feature-tests:
+	../../ci/run_with_docker_compose.sh \
+		cargo test --all-features --tests --locked --examples
+.PHONY: feature-tests
+
+# Run tests against remote endpoints.
+remote-tests:
+	../../ci/run_with_test_connection.sh \
+		cargo test --features remote --locked
+.PHONY: remote-tests
--- a/rust/lancedb/src/connection.rs
+++ b/rust/lancedb/src/connection.rs
@@ -1015,6 +1015,117 @@ pub fn connect(uri: &str) -> ConnectBuilder {
    ConnectBuilder::new(uri)
 }

+pub struct ConnectNamespaceBuilder {
+    ns_impl: String,
+    properties: HashMap<String, String>,
+    storage_options: HashMap<String, String>,
+    read_consistency_interval: Option<std::time::Duration>,
+    embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
+    session: Option<Arc<lance::session::Session>>,
+}
+
+impl ConnectNamespaceBuilder {
+    fn new(ns_impl: &str, properties: HashMap<String, String>) -> Self {
+        Self {
+            ns_impl: ns_impl.to_string(),
+            properties,
+            storage_options: HashMap::new(),
+            read_consistency_interval: None,
+            embedding_registry: None,
+            session: None,
+        }
+    }
+
+    /// Set an option for the storage layer.
+    ///
+    /// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
+    pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
+        self.storage_options.insert(key.into(), value.into());
+        self
+    }
+
+    /// Set multiple options for the storage layer.
+    ///
+    /// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
+    pub fn storage_options(
+        mut self,
+        pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
+    ) -> Self {
+        for (key, value) in pairs {
+            self.storage_options.insert(key.into(), value.into());
+        }
+        self
+    }
+
+    /// The interval at which to check for updates from other processes.
+    ///
+    /// If left unset, consistency is not checked. For maximum read
+    /// performance, this is the default. For strong consistency, set this to
+    /// zero seconds. Then every read will check for updates from other processes.
+    /// As a compromise, set this to a non-zero duration for eventual consistency.
+    pub fn read_consistency_interval(
+        mut self,
+        read_consistency_interval: std::time::Duration,
+    ) -> Self {
+        self.read_consistency_interval = Some(read_consistency_interval);
+        self
+    }
+
+    /// Provide a custom [`EmbeddingRegistry`] to use for this connection.
+    pub fn embedding_registry(mut self, registry: Arc<dyn EmbeddingRegistry>) -> Self {
+        self.embedding_registry = Some(registry);
+        self
+    }
+
+    /// Set a custom session for object stores and caching.
+    ///
+    /// By default, a new session with default configuration will be created.
+    /// This method allows you to provide a custom session with your own
+    /// configuration for object store registries, caching, etc.
+    pub fn session(mut self, session: Arc<lance::session::Session>) -> Self {
+        self.session = Some(session);
+        self
+    }
+
+    /// Execute the connection
+    pub async fn execute(self) -> Result<Connection> {
+        use crate::database::namespace::LanceNamespaceDatabase;
+
+        let internal = Arc::new(
+            LanceNamespaceDatabase::connect(
+                &self.ns_impl,
+                self.properties,
+                self.storage_options,
+                self.read_consistency_interval,
+                self.session,
+            )
+            .await?,
+        );
+
+        Ok(Connection {
+            internal,
+            uri: format!("namespace://{}", self.ns_impl),
+            embedding_registry: self
+                .embedding_registry
+                .unwrap_or_else(|| Arc::new(MemoryRegistry::new())),
+        })
+    }
+}
+
+/// Connect to a LanceDB database through a namespace.
+///
+/// # Arguments
+///
+/// * `ns_impl` - The namespace implementation to use (e.g., "dir" for directory-based, "rest" for REST API)
+/// * `properties` - Configuration properties for the namespace implementation
+/// ```
+pub fn connect_namespace(
+    ns_impl: &str,
+    properties: HashMap<String, String>,
+) -> ConnectNamespaceBuilder {
+    ConnectNamespaceBuilder::new(ns_impl, properties)
+}
+
 #[cfg(all(test, feature = "remote"))]
 mod test_utils {
    use super::*;
@@ -1059,6 +1170,7 @@ mod tests {
    use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
    use crate::query::QueryBase;
    use crate::query::{ExecutableQuery, QueryExecutionOptions};
+    use crate::test_connection::test_utils::new_test_connection;
    use arrow::compute::concat_batches;
    use arrow_array::RecordBatchReader;
    use arrow_schema::{DataType, Field, Schema};
@@ -1074,11 +1186,8 @@ mod tests {

    #[tokio::test]
    async fn test_connect() {
-        let tmp_dir = tempdir().unwrap();
-        let uri = tmp_dir.path().to_str().unwrap();
-        let db = connect(uri).execute().await.unwrap();
-
-        assert_eq!(db.uri, uri);
+        let tc = new_test_connection().await.unwrap();
+        assert_eq!(tc.connection.uri, tc.uri);
    }

    #[cfg(not(windows))]
@@ -1144,16 +1253,10 @@ mod tests {
        assert_eq!(tables, names[..7]);
    }

-    #[tokio::test]
-    async fn test_connect_s3() {
-        // let db = Database::connect("s3://bucket/path/to/database").await.unwrap();
-    }
-
    #[tokio::test]
    async fn test_open_table() {
-        let tmp_dir = tempdir().unwrap();
-        let uri = tmp_dir.path().to_str().unwrap();
-        let db = connect(uri).execute().await.unwrap();
+        let tc = new_test_connection().await.unwrap();
+        let db = tc.connection;

        assert_eq!(db.table_names().execute().await.unwrap().len(), 0);
        // open non-exist table
--- a/rust/lancedb/src/data/inspect.rs
+++ b/rust/lancedb/src/data/inspect.rs
@@ -52,13 +52,13 @@ pub fn infer_vector_columns(
    for field in reader.schema().fields() {
        match field.data_type() {
            DataType::FixedSizeList(sub_field, _) if sub_field.data_type().is_floating() => {
-                columns.push(field.name().to_string());
+                columns.push(field.name().clone());
            }
            DataType::List(sub_field) if sub_field.data_type().is_floating() && !strict => {
-                columns_to_infer.insert(field.name().to_string(), None);
+                columns_to_infer.insert(field.name().clone(), None);
            }
            DataType::LargeList(sub_field) if sub_field.data_type().is_floating() && !strict => {
-                columns_to_infer.insert(field.name().to_string(), None);
+                columns_to_infer.insert(field.name().clone(), None);
            }
            _ => {}
        }
--- a/rust/lancedb/src/database.rs
+++ b/rust/lancedb/src/database.rs
@@ -29,6 +29,7 @@ use crate::error::Result;
 use crate::table::{BaseTable, TableDefinition, WriteOptions};

 pub mod listing;
+pub mod namespace;

 pub trait DatabaseOptions {
    fn serialize_into_map(&self, map: &mut HashMap<String, String>);
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -718,9 +718,9 @@ impl Database for ListingDatabase {
            .map_err(|e| Error::Lance { source: e })?;

        let version_ref = match (request.source_version, request.source_tag) {
-            (Some(v), None) => Ok(Ref::Version(v)),
+            (Some(v), None) => Ok(Ref::Version(None, Some(v))),
            (None, Some(tag)) => Ok(Ref::Tag(tag)),
-            (None, None) => Ok(Ref::Version(source_dataset.version().version)),
+            (None, None) => Ok(Ref::Version(None, Some(source_dataset.version().version))),
            _ => Err(Error::InvalidInput {
                message: "Cannot specify both source_version and source_tag".to_string(),
            }),
@@ -728,7 +728,7 @@ impl Database for ListingDatabase {

        let target_uri = self.table_uri(&request.target_table_name)?;
        source_dataset
-            .shallow_clone(&target_uri, version_ref, storage_params)
+            .shallow_clone(&target_uri, version_ref, Some(storage_params))
            .await
            .map_err(|e| Error::Lance { source: e })?;

--- a/rust/lancedb/src/database/namespace.rs
+++ b/rust/lancedb/src/database/namespace.rs
@@ -0,0 +1,840 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+//! Namespace-based database implementation that delegates table management to lance-namespace
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use lance_namespace::{
+    connect as connect_namespace,
+    models::{
+        CreateEmptyTableRequest, CreateNamespaceRequest, DescribeTableRequest,
+        DropNamespaceRequest, DropTableRequest, ListNamespacesRequest, ListTablesRequest,
+    },
+    LanceNamespace,
+};
+
+use crate::connection::ConnectRequest;
+use crate::database::listing::ListingDatabase;
+use crate::error::{Error, Result};
+
+use super::{
+    BaseTable, CloneTableRequest, CreateNamespaceRequest as DbCreateNamespaceRequest,
+    CreateTableMode, CreateTableRequest as DbCreateTableRequest, Database,
+    DropNamespaceRequest as DbDropNamespaceRequest,
+    ListNamespacesRequest as DbListNamespacesRequest, OpenTableRequest, TableNamesRequest,
+};
+
+/// A database implementation that uses lance-namespace for table management
+pub struct LanceNamespaceDatabase {
+    namespace: Arc<dyn LanceNamespace>,
+    // Storage options to be inherited by tables
+    storage_options: HashMap<String, String>,
+    // Read consistency interval for tables
+    read_consistency_interval: Option<std::time::Duration>,
+    // Optional session for object stores and caching
+    session: Option<Arc<lance::session::Session>>,
+}
+
+impl LanceNamespaceDatabase {
+    pub async fn connect(
+        ns_impl: &str,
+        ns_properties: HashMap<String, String>,
+        storage_options: HashMap<String, String>,
+        read_consistency_interval: Option<std::time::Duration>,
+        session: Option<Arc<lance::session::Session>>,
+    ) -> Result<Self> {
+        let namespace = connect_namespace(ns_impl, ns_properties.clone())
+            .await
+            .map_err(|e| Error::InvalidInput {
+                message: format!("Failed to connect to namespace: {:?}", e),
+            })?;
+
+        Ok(Self {
+            namespace,
+            storage_options,
+            read_consistency_interval,
+            session,
+        })
+    }
+
+    /// Helper method to create a ListingDatabase from a table location
+    ///
+    /// This method:
+    /// 1. Validates that the location ends with <table_name>.lance
+    /// 2. Extracts the parent directory from the location
+    /// 3. Creates a ListingDatabase at that parent directory
+    async fn create_listing_database(
+        &self,
+        table_name: &str,
+        location: &str,
+        additional_storage_options: Option<HashMap<String, String>>,
+    ) -> Result<Arc<ListingDatabase>> {
+        let expected_suffix = format!("{}.lance", table_name);
+        if !location.ends_with(&expected_suffix) {
+            return Err(Error::Runtime {
+                message: format!(
+                    "Invalid table location '{}': expected to end with '{}'",
+                    location, expected_suffix
+                ),
+            });
+        }
+
+        let parent_dir = location
+            .rsplit_once('/')
+            .map(|(parent, _)| parent.to_string())
+            .ok_or_else(|| Error::Runtime {
+                message: format!("Invalid table location '{}': no parent directory", location),
+            })?;
+
+        let mut merged_storage_options = self.storage_options.clone();
+        if let Some(opts) = additional_storage_options {
+            merged_storage_options.extend(opts);
+        }
+
+        let connect_request = ConnectRequest {
+            uri: parent_dir,
+            options: merged_storage_options,
+            read_consistency_interval: self.read_consistency_interval,
+            session: self.session.clone(),
+            #[cfg(feature = "remote")]
+            client_config: Default::default(),
+        };
+
+        let listing_db = ListingDatabase::connect_with_options(&connect_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to create listing database: {}", e),
+            })?;
+
+        Ok(Arc::new(listing_db))
+    }
+}
+
+impl std::fmt::Debug for LanceNamespaceDatabase {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("LanceNamespaceDatabase")
+            .field("storage_options", &self.storage_options)
+            .field("read_consistency_interval", &self.read_consistency_interval)
+            .finish()
+    }
+}
+
+impl std::fmt::Display for LanceNamespaceDatabase {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "LanceNamespaceDatabase")
+    }
+}
+
+#[async_trait]
+impl Database for LanceNamespaceDatabase {
+    async fn list_namespaces(&self, request: DbListNamespacesRequest) -> Result<Vec<String>> {
+        let ns_request = ListNamespacesRequest {
+            id: if request.namespace.is_empty() {
+                None
+            } else {
+                Some(request.namespace)
+            },
+            page_token: request.page_token,
+            limit: request.limit.map(|l| l as i32),
+        };
+
+        let response = self
+            .namespace
+            .list_namespaces(ns_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to list namespaces: {}", e),
+            })?;
+
+        Ok(response.namespaces)
+    }
+
+    async fn create_namespace(&self, request: DbCreateNamespaceRequest) -> Result<()> {
+        let ns_request = CreateNamespaceRequest {
+            id: if request.namespace.is_empty() {
+                None
+            } else {
+                Some(request.namespace)
+            },
+            mode: None,
+            properties: None,
+        };
+
+        self.namespace
+            .create_namespace(ns_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to create namespace: {}", e),
+            })?;
+
+        Ok(())
+    }
+
+    async fn drop_namespace(&self, request: DbDropNamespaceRequest) -> Result<()> {
+        let ns_request = DropNamespaceRequest {
+            id: if request.namespace.is_empty() {
+                None
+            } else {
+                Some(request.namespace)
+            },
+            mode: None,
+            behavior: None,
+        };
+
+        self.namespace
+            .drop_namespace(ns_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to drop namespace: {}", e),
+            })?;
+
+        Ok(())
+    }
+
+    async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>> {
+        let ns_request = ListTablesRequest {
+            id: if request.namespace.is_empty() {
+                None
+            } else {
+                Some(request.namespace)
+            },
+            page_token: request.start_after,
+            limit: request.limit.map(|l| l as i32),
+        };
+
+        let response =
+            self.namespace
+                .list_tables(ns_request)
+                .await
+                .map_err(|e| Error::Runtime {
+                    message: format!("Failed to list tables: {}", e),
+                })?;
+
+        Ok(response.tables)
+    }
+
+    async fn create_table(&self, request: DbCreateTableRequest) -> Result<Arc<dyn BaseTable>> {
+        let mut table_id = request.namespace.clone();
+        table_id.push(request.name.clone());
+        let describe_request = DescribeTableRequest {
+            id: Some(table_id.clone()),
+            version: None,
+        };
+
+        let describe_result = self.namespace.describe_table(describe_request).await;
+
+        match request.mode {
+            CreateTableMode::Create => {
+                if describe_result.is_ok() {
+                    return Err(Error::TableAlreadyExists {
+                        name: request.name.clone(),
+                    });
+                }
+            }
+            CreateTableMode::Overwrite => {
+                if describe_result.is_ok() {
+                    // Drop the existing table - must succeed
+                    let drop_request = DropTableRequest {
+                        id: Some(table_id.clone()),
+                    };
+                    self.namespace
+                        .drop_table(drop_request)
+                        .await
+                        .map_err(|e| Error::Runtime {
+                            message: format!("Failed to drop existing table for overwrite: {}", e),
+                        })?;
+                }
+            }
+            CreateTableMode::ExistOk(_) => {
+                if let Ok(response) = describe_result {
+                    let location = response.location.ok_or_else(|| Error::Runtime {
+                        message: "Table location is missing from namespace response".to_string(),
+                    })?;
+
+                    let listing_db = self
+                        .create_listing_database(&request.name, &location, response.storage_options)
+                        .await?;
+
+                    return listing_db
+                        .open_table(OpenTableRequest {
+                            name: request.name.clone(),
+                            namespace: request.namespace.clone(),
+                            index_cache_size: None,
+                            lance_read_params: None,
+                        })
+                        .await;
+                }
+            }
+        }
+
+        let mut table_id = request.namespace.clone();
+        table_id.push(request.name.clone());
+
+        let create_empty_request = CreateEmptyTableRequest {
+            id: Some(table_id),
+            location: None,
+            properties: if self.storage_options.is_empty() {
+                None
+            } else {
+                Some(self.storage_options.clone())
+            },
+        };
+
+        let create_empty_response = self
+            .namespace
+            .create_empty_table(create_empty_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to create empty table: {}", e),
+            })?;
+
+        let location = create_empty_response
+            .location
+            .ok_or_else(|| Error::Runtime {
+                message: "Table location is missing from create_empty_table response".to_string(),
+            })?;
+
+        let listing_db = self
+            .create_listing_database(
+                &request.name,
+                &location,
+                create_empty_response.storage_options,
+            )
+            .await?;
+
+        listing_db.create_table(request).await
+    }
+
+    async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
+        let mut table_id = request.namespace.clone();
+        table_id.push(request.name.clone());
+
+        let describe_request = DescribeTableRequest {
+            id: Some(table_id),
+            version: None,
+        };
+        let response = self
+            .namespace
+            .describe_table(describe_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to describe table: {}", e),
+            })?;
+
+        let location = response.location.ok_or_else(|| Error::Runtime {
+            message: "Table location is missing from namespace response".to_string(),
+        })?;
+
+        let listing_db = self
+            .create_listing_database(&request.name, &location, response.storage_options)
+            .await?;
+
+        listing_db.open_table(request).await
+    }
+
+    async fn clone_table(&self, _request: CloneTableRequest) -> Result<Arc<dyn BaseTable>> {
+        Err(Error::NotSupported {
+            message: "clone_table is not supported for namespace connections".to_string(),
+        })
+    }
+
+    async fn rename_table(
+        &self,
+        _cur_name: &str,
+        _new_name: &str,
+        _cur_namespace: &[String],
+        _new_namespace: &[String],
+    ) -> Result<()> {
+        Err(Error::NotSupported {
+            message: "rename_table is not supported for namespace connections".to_string(),
+        })
+    }
+
+    async fn drop_table(&self, name: &str, namespace: &[String]) -> Result<()> {
+        let mut table_id = namespace.to_vec();
+        table_id.push(name.to_string());
+
+        let drop_request = DropTableRequest { id: Some(table_id) };
+        self.namespace
+            .drop_table(drop_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to drop table: {}", e),
+            })?;
+
+        Ok(())
+    }
+
+    async fn drop_all_tables(&self, namespace: &[String]) -> Result<()> {
+        let tables = self
+            .table_names(TableNamesRequest {
+                namespace: namespace.to_vec(),
+                start_after: None,
+                limit: None,
+            })
+            .await?;
+
+        for table in tables {
+            self.drop_table(&table, namespace).await?;
+        }
+
+        Ok(())
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+}
+
+#[cfg(test)]
+#[cfg(not(windows))] // TODO: support windows for lance-namespace
+mod tests {
+    use super::*;
+    use crate::connect_namespace;
+    use crate::query::ExecutableQuery;
+    use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray};
+    use arrow_schema::{DataType, Field, Schema};
+    use futures::TryStreamExt;
+    use tempfile::tempdir;
+
+    /// Helper function to create test data
+    fn create_test_data() -> RecordBatchIterator<
+        std::vec::IntoIter<std::result::Result<RecordBatch, arrow_schema::ArrowError>>,
+    > {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, false),
+        ]));
+
+        let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+        let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie", "David", "Eve"]);
+
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(id_array), Arc::new(name_array)],
+        )
+        .unwrap();
+        RecordBatchIterator::new(vec![std::result::Result::Ok(batch)].into_iter(), schema)
+    }
+
+    #[tokio::test]
+    async fn test_namespace_connection_simple() {
+        // Test that namespace connections work with simple connect_namespace(impl_type, properties)
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        // This should succeed with directory-based namespace
+        let result = connect_namespace("dir", properties).execute().await;
+
+        assert!(result.is_ok());
+    }
+
+    #[tokio::test]
+    async fn test_namespace_connection_with_storage_options() {
+        // Test namespace connections with storage options
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        // This should succeed with directory-based namespace and storage options
+        let result = connect_namespace("dir", properties)
+            .storage_option("timeout", "30s")
+            .execute()
+            .await;
+
+        assert!(result.is_ok());
+    }
+
+    #[tokio::test]
+    async fn test_namespace_connection_with_all_options() {
+        use crate::embeddings::MemoryRegistry;
+        use std::time::Duration;
+
+        // Test namespace connections with all configuration options
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let embedding_registry = Arc::new(MemoryRegistry::new());
+        let session = Arc::new(lance::session::Session::default());
+
+        // Test with all options set
+        let result = connect_namespace("dir", properties)
+            .storage_option("timeout", "30s")
+            .storage_options([("cache_size", "1gb"), ("region", "us-east-1")])
+            .read_consistency_interval(Duration::from_secs(5))
+            .embedding_registry(embedding_registry.clone())
+            .session(session.clone())
+            .execute()
+            .await;
+
+        assert!(result.is_ok());
+
+        let conn = result.unwrap();
+
+        // Verify embedding registry is set correctly
+        assert!(std::ptr::eq(
+            conn.embedding_registry() as *const _,
+            embedding_registry.as_ref() as *const _
+        ));
+    }
+
+    #[tokio::test]
+    async fn test_namespace_create_table_basic() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        // Connect to namespace using DirectoryNamespace
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Test: Create a table
+        let test_data = create_test_data();
+        let table = conn
+            .create_table("test_table", test_data)
+            .execute()
+            .await
+            .expect("Failed to create table");
+
+        // Verify: Table was created and can be queried
+        let results = table
+            .query()
+            .execute()
+            .await
+            .expect("Failed to query table")
+            .try_collect::<Vec<_>>()
+            .await
+            .expect("Failed to collect results");
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].num_rows(), 5);
+
+        // Verify: Table appears in table_names
+        let table_names = conn
+            .table_names()
+            .execute()
+            .await
+            .expect("Failed to list tables");
+        assert!(table_names.contains(&"test_table".to_string()));
+    }
+
+    #[tokio::test]
+    async fn test_namespace_describe_table() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        // Connect to namespace
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Create a table first
+        let test_data = create_test_data();
+        let _table = conn
+            .create_table("describe_test", test_data)
+            .execute()
+            .await
+            .expect("Failed to create table");
+
+        // Test: Open the table (which internally uses describe_table)
+        let opened_table = conn
+            .open_table("describe_test")
+            .execute()
+            .await
+            .expect("Failed to open table");
+
+        // Verify: Can query the opened table
+        let results = opened_table
+            .query()
+            .execute()
+            .await
+            .expect("Failed to query table")
+            .try_collect::<Vec<_>>()
+            .await
+            .expect("Failed to collect results");
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].num_rows(), 5);
+
+        // Verify schema matches
+        let schema = opened_table.schema().await.expect("Failed to get schema");
+        assert_eq!(schema.fields.len(), 2);
+        assert_eq!(schema.field(0).name(), "id");
+        assert_eq!(schema.field(1).name(), "name");
+    }
+
+    #[tokio::test]
+    async fn test_namespace_create_table_overwrite_mode() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Create initial table with 5 rows
+        let test_data1 = create_test_data();
+        let _table1 = conn
+            .create_table("overwrite_test", test_data1)
+            .execute()
+            .await
+            .expect("Failed to create table");
+
+        // Create new data with 3 rows
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, false),
+        ]));
+        let id_array = Int32Array::from(vec![10, 20, 30]);
+        let name_array = StringArray::from(vec!["New1", "New2", "New3"]);
+        let test_data2 = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(id_array), Arc::new(name_array)],
+        )
+        .unwrap();
+
+        // Test: Overwrite the table
+        let table2 = conn
+            .create_table(
+                "overwrite_test",
+                RecordBatchIterator::new(
+                    vec![std::result::Result::Ok(test_data2)].into_iter(),
+                    schema,
+                ),
+            )
+            .mode(CreateTableMode::Overwrite)
+            .execute()
+            .await
+            .expect("Failed to overwrite table");
+
+        // Verify: Table has new data (3 rows instead of 5)
+        let results = table2
+            .query()
+            .execute()
+            .await
+            .expect("Failed to query table")
+            .try_collect::<Vec<_>>()
+            .await
+            .expect("Failed to collect results");
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].num_rows(), 3);
+
+        // Verify the data is actually the new data
+        let id_col = results[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(id_col.value(0), 10);
+        assert_eq!(id_col.value(1), 20);
+        assert_eq!(id_col.value(2), 30);
+    }
+
+    #[tokio::test]
+    async fn test_namespace_create_table_exist_ok_mode() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Create initial table with test data
+        let test_data1 = create_test_data();
+        let _table1 = conn
+            .create_table("exist_ok_test", test_data1)
+            .execute()
+            .await
+            .expect("Failed to create table");
+
+        // Try to create again with exist_ok mode
+        let test_data2 = create_test_data();
+        let table2 = conn
+            .create_table("exist_ok_test", test_data2)
+            .mode(CreateTableMode::exist_ok(|req| req))
+            .execute()
+            .await
+            .expect("Failed with exist_ok mode");
+
+        // Verify: Table still has original data (5 rows)
+        let results = table2
+            .query()
+            .execute()
+            .await
+            .expect("Failed to query table")
+            .try_collect::<Vec<_>>()
+            .await
+            .expect("Failed to collect results");
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].num_rows(), 5);
+    }
+
+    #[tokio::test]
+    async fn test_namespace_create_multiple_tables() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Create first table
+        let test_data1 = create_test_data();
+        let _table1 = conn
+            .create_table("table1", test_data1)
+            .execute()
+            .await
+            .expect("Failed to create first table");
+
+        // Create second table
+        let test_data2 = create_test_data();
+        let _table2 = conn
+            .create_table("table2", test_data2)
+            .execute()
+            .await
+            .expect("Failed to create second table");
+
+        // Verify: Both tables appear in table list
+        let table_names = conn
+            .table_names()
+            .execute()
+            .await
+            .expect("Failed to list tables");
+
+        assert!(table_names.contains(&"table1".to_string()));
+        assert!(table_names.contains(&"table2".to_string()));
+
+        // Verify: Can open both tables
+        let opened_table1 = conn
+            .open_table("table1")
+            .execute()
+            .await
+            .expect("Failed to open table1");
+
+        let opened_table2 = conn
+            .open_table("table2")
+            .execute()
+            .await
+            .expect("Failed to open table2");
+
+        // Verify both tables work
+        let count1 = opened_table1
+            .count_rows(None)
+            .await
+            .expect("Failed to count rows in table1");
+        assert_eq!(count1, 5);
+
+        let count2 = opened_table2
+            .count_rows(None)
+            .await
+            .expect("Failed to count rows in table2");
+        assert_eq!(count2, 5);
+    }
+
+    #[tokio::test]
+    async fn test_namespace_table_not_found() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Test: Try to open a non-existent table
+        let result = conn.open_table("non_existent_table").execute().await;
+
+        // Verify: Should return an error
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_namespace_drop_table() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Create a table first
+        let test_data = create_test_data();
+        let _table = conn
+            .create_table("drop_test", test_data)
+            .execute()
+            .await
+            .expect("Failed to create table");
+
+        // Verify table exists
+        let table_names_before = conn
+            .table_names()
+            .execute()
+            .await
+            .expect("Failed to list tables");
+        assert!(table_names_before.contains(&"drop_test".to_string()));
+
+        // Test: Drop the table
+        conn.drop_table("drop_test", &[])
+            .await
+            .expect("Failed to drop table");
+
+        // Verify: Table no longer exists
+        let table_names_after = conn
+            .table_names()
+            .execute()
+            .await
+            .expect("Failed to list tables");
+        assert!(!table_names_after.contains(&"drop_test".to_string()));
+
+        // Verify: Cannot open dropped table
+        let open_result = conn.open_table("drop_test").execute().await;
+        assert!(open_result.is_err());
+    }
+}
--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -8,7 +8,7 @@
 //! values
 use std::cmp::max;

-use lance::table::format::{Index, Manifest};
+use lance::table::format::{IndexMetadata, Manifest};

 use crate::DistanceType;

@@ -19,7 +19,7 @@ pub struct VectorIndex {
 }

 impl VectorIndex {
-    pub fn new_from_format(manifest: &Manifest, index: &Index) -> Self {
+    pub fn new_from_format(manifest: &Manifest, index: &IndexMetadata) -> Self {
        let fields = index
            .fields
            .iter()
--- a/rust/lancedb/src/lib.rs
+++ b/rust/lancedb/src/lib.rs
@@ -206,13 +206,14 @@ pub mod query;
 pub mod remote;
 pub mod rerankers;
 pub mod table;
+pub mod test_connection;
 pub mod utils;

 use std::fmt::Display;

 use serde::{Deserialize, Serialize};

-pub use connection::Connection;
+pub use connection::{ConnectNamespaceBuilder, Connection};
 pub use error::{Error, Result};
 use lance_linalg::distance::DistanceType as LanceDistanceType;
 pub use table::Table;
@@ -289,6 +290,8 @@ impl Display for DistanceType {

 /// Connect to a database
 pub use connection::connect;
+/// Connect to a namespace-backed database
+pub use connection::connect_namespace;

 /// Re-export Lance Session and ObjectStoreRegistry for custom session creation
 pub use lance::session::Session;
--- a/rust/lancedb/src/remote/db.rs
+++ b/rust/lancedb/src/remote/db.rs
@@ -647,7 +647,7 @@ impl From<StorageOptions> for RemoteOptions {
        let mut filtered = HashMap::new();
        for opt in supported_opts {
            if let Some(v) = options.0.get(opt) {
-                filtered.insert(opt.to_string(), v.to_string());
+                filtered.insert(opt.to_string(), v.clone());
            }
        }
        Self::new(filtered)
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -1452,6 +1452,14 @@ struct MergeInsertRequest {
    when_not_matched_insert_all: bool,
    when_not_matched_by_source_delete: bool,
    when_not_matched_by_source_delete_filt: Option<String>,
+    // For backwards compatibility, only serialize use_index when it's false
+    // (the default is true)
+    #[serde(skip_serializing_if = "is_true")]
+    use_index: bool,
+}
+
+fn is_true(b: &bool) -> bool {
+    *b
 }

 impl TryFrom<MergeInsertBuilder> for MergeInsertRequest {
@@ -1476,6 +1484,8 @@ impl TryFrom<MergeInsertBuilder> for MergeInsertRequest {
            when_not_matched_insert_all: value.when_not_matched_insert_all,
            when_not_matched_by_source_delete: value.when_not_matched_by_source_delete,
            when_not_matched_by_source_delete_filt: value.when_not_matched_by_source_delete_filt,
+            // Only serialize use_index when it's false for backwards compatibility
+            use_index: value.use_index,
        })
    }
 }
@@ -1942,6 +1952,7 @@ mod tests {
                assert_eq!(params["when_not_matched_by_source_delete"], "false");
                assert!(!params.contains_key("when_matched_update_all_filt"));
                assert!(!params.contains_key("when_not_matched_by_source_delete_filt"));
+                assert!(!params.contains_key("use_index"));

                if old_server {
                    http::Response::builder().status(200).body("{}").unwrap()
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -1383,30 +1383,35 @@ impl Table {
 }

 pub struct NativeTags {
-    inner: LanceTags,
+    dataset: dataset::DatasetConsistencyWrapper,
 }
 #[async_trait]
 impl Tags for NativeTags {
    async fn list(&self) -> Result<HashMap<String, TagContents>> {
-        Ok(self.inner.list().await?)
+        let dataset = self.dataset.get().await?;
+        Ok(dataset.tags().list().await?)
    }

    async fn get_version(&self, tag: &str) -> Result<u64> {
-        Ok(self.inner.get_version(tag).await?)
+        let dataset = self.dataset.get().await?;
+        Ok(dataset.tags().get_version(tag).await?)
    }

    async fn create(&mut self, tag: &str, version: u64) -> Result<()> {
-        self.inner.create(tag, version).await?;
+        let dataset = self.dataset.get().await?;
+        dataset.tags().create(tag, version).await?;
        Ok(())
    }

    async fn delete(&mut self, tag: &str) -> Result<()> {
-        self.inner.delete(tag).await?;
+        let dataset = self.dataset.get().await?;
+        dataset.tags().delete(tag).await?;
        Ok(())
    }

    async fn update(&mut self, tag: &str, version: u64) -> Result<()> {
-        self.inner.update(tag, version).await?;
+        let dataset = self.dataset.get().await?;
+        dataset.tags().update(tag, version).await?;
        Ok(())
    }
 }
@@ -1780,13 +1785,13 @@ impl NativeTable {
                        BuiltinIndexType::BTree,
                    )))
                } else {
-                    return Err(Error::InvalidInput {
+                    Err(Error::InvalidInput {
                        message: format!(
                            "there are no indices supported for the field `{}` with the data type {}",
                            field.name(),
                            field.data_type()
                        ),
-                    });
+                    })?
                }
            }
            Index::BTree(_) => {
@@ -1976,6 +1981,8 @@ impl NativeTable {
    /// Delete keys from the config
    pub async fn delete_config_keys(&self, delete_keys: &[&str]) -> Result<()> {
        let mut dataset = self.dataset.get_mut().await?;
+        // TODO: update this when we implement metadata APIs
+        #[allow(deprecated)]
        dataset.delete_config_keys(delete_keys).await?;
        Ok(())
    }
@@ -1986,6 +1993,8 @@ impl NativeTable {
        upsert_values: impl IntoIterator<Item = (String, String)>,
    ) -> Result<()> {
        let mut dataset = self.dataset.get_mut().await?;
+        // TODO: update this when we implement metadata APIs
+        #[allow(deprecated)]
        dataset.replace_schema_metadata(upsert_values).await?;
        Ok(())
    }
@@ -2395,6 +2404,7 @@ impl BaseTable for NativeTable {
        } else {
            builder.when_not_matched_by_source(WhenNotMatchedBySource::Keep);
        }
+        builder.use_index(params.use_index);

        let future = if let Some(timeout) = params.timeout {
            // The default retry timeout is 30s, so we pass the full timeout down
@@ -2435,10 +2445,8 @@ impl BaseTable for NativeTable {
    }

    async fn tags(&self) -> Result<Box<dyn Tags + '_>> {
-        let dataset = self.dataset.get().await?;
-
        Ok(Box::new(NativeTags {
-            inner: dataset.tags.clone(),
+            dataset: self.dataset.clone(),
        }))
    }

@@ -2755,6 +2763,7 @@ mod tests {
        RecordBatchReader, StringArray, TimestampMillisecondArray, TimestampNanosecondArray,
        UInt32Array,
    };
+    use arrow_array::{BinaryArray, LargeBinaryArray};
    use arrow_data::ArrayDataBuilder;
    use arrow_schema::{DataType, Field, Schema, TimeUnit};
    use futures::TryStreamExt;
@@ -2902,6 +2911,38 @@ mod tests {
        );
    }

+    #[tokio::test]
+    async fn test_merge_insert_use_index() {
+        let tmp_dir = tempdir().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+        let conn = connect(uri).execute().await.unwrap();
+
+        // Create a dataset with i=0..10
+        let batches = merge_insert_test_batches(0, 0);
+        let table = conn
+            .create_table("my_table", batches)
+            .execute()
+            .await
+            .unwrap();
+        assert_eq!(table.count_rows(None).await.unwrap(), 10);
+
+        // Test use_index=true (default behavior)
+        let new_batches = Box::new(merge_insert_test_batches(5, 1));
+        let mut merge_insert_builder = table.merge_insert(&["i"]);
+        merge_insert_builder.when_not_matched_insert_all();
+        merge_insert_builder.use_index(true);
+        merge_insert_builder.execute(new_batches).await.unwrap();
+        assert_eq!(table.count_rows(None).await.unwrap(), 15);
+
+        // Test use_index=false (force table scan)
+        let new_batches = Box::new(merge_insert_test_batches(15, 2));
+        let mut merge_insert_builder = table.merge_insert(&["i"]);
+        merge_insert_builder.when_not_matched_insert_all();
+        merge_insert_builder.use_index(false);
+        merge_insert_builder.execute(new_batches).await.unwrap();
+        assert_eq!(table.count_rows(None).await.unwrap(), 25);
+    }
+
    #[tokio::test]
    async fn test_add_overwrite() {
        let tmp_dir = tempdir().unwrap();
@@ -3688,6 +3729,10 @@ mod tests {
        let schema = Arc::new(Schema::new(vec![
            Field::new("id", DataType::Int32, false),
            Field::new("category", DataType::Utf8, true),
+            Field::new("large_category", DataType::LargeUtf8, true),
+            Field::new("is_active", DataType::Boolean, true),
+            Field::new("data", DataType::Binary, true),
+            Field::new("large_data", DataType::LargeBinary, true),
        ]));

        let batch = RecordBatch::try_new(
@@ -3697,6 +3742,16 @@ mod tests {
                Arc::new(StringArray::from_iter_values(
                    (0..100).map(|i| format!("category_{}", i % 5)),
                )),
+                Arc::new(LargeStringArray::from_iter_values(
+                    (0..100).map(|i| format!("large_category_{}", i % 5)),
+                )),
+                Arc::new(BooleanArray::from_iter((0..100).map(|i| Some(i % 2 == 0)))),
+                Arc::new(BinaryArray::from_iter_values(
+                    (0_u32..100).map(|i| i.to_le_bytes()),
+                )),
+                Arc::new(LargeBinaryArray::from_iter_values(
+                    (0_u32..100).map(|i| i.to_le_bytes()),
+                )),
            ],
        )
        .unwrap();
@@ -3717,12 +3772,58 @@ mod tests {
            .await
            .unwrap();

+        // Create bitmap index on the "is_active" column
+        table
+            .create_index(&["is_active"], Index::Bitmap(Default::default()))
+            .execute()
+            .await
+            .unwrap();
+
+        // Create bitmap index on the "data" column
+        table
+            .create_index(&["data"], Index::Bitmap(Default::default()))
+            .execute()
+            .await
+            .unwrap();
+
+        // Create bitmap index on the "large_data" column
+        table
+            .create_index(&["large_data"], Index::Bitmap(Default::default()))
+            .execute()
+            .await
+            .unwrap();
+
+        // Create bitmap index on the "large_category" column
+        table
+            .create_index(&["large_category"], Index::Bitmap(Default::default()))
+            .execute()
+            .await
+            .unwrap();
+
        // Verify the index was created
        let index_configs = table.list_indices().await.unwrap();
-        assert_eq!(index_configs.len(), 1);
-        let index = index_configs.into_iter().next().unwrap();
+        assert_eq!(index_configs.len(), 5);
+
+        let mut configs_iter = index_configs.into_iter();
+        let index = configs_iter.next().unwrap();
        assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
        assert_eq!(index.columns, vec!["category".to_string()]);
+
+        let index = configs_iter.next().unwrap();
+        assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
+        assert_eq!(index.columns, vec!["is_active".to_string()]);
+
+        let index = configs_iter.next().unwrap();
+        assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
+        assert_eq!(index.columns, vec!["data".to_string()]);
+
+        let index = configs_iter.next().unwrap();
+        assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
+        assert_eq!(index.columns, vec!["large_data".to_string()]);
+
+        let index = configs_iter.next().unwrap();
+        assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
+        assert_eq!(index.columns, vec!["large_category".to_string()]);
    }

    #[tokio::test]
--- a/rust/lancedb/src/table/datafusion.rs
+++ b/rust/lancedb/src/table/datafusion.rs
@@ -172,7 +172,7 @@ impl TableProvider for BaseTableAdapter {
        if let Some(projection) = projection {
            let field_names = projection
                .iter()
-                .map(|i| self.schema.field(*i).name().to_string())
+                .map(|i| self.schema.field(*i).name().clone())
                .collect();
            query.select = Select::Columns(field_names);
        }
--- a/rust/lancedb/src/table/dataset.rs
+++ b/rust/lancedb/src/table/dataset.rs
@@ -98,8 +98,9 @@ impl DatasetRef {
            }
            Self::TimeTravel { dataset, version } => {
                let should_checkout = match &target_ref {
-                    refs::Ref::Version(target_ver) => version != target_ver,
-                    refs::Ref::Tag(_) => true, // Always checkout for tags
+                    refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
+                    refs::Ref::Version(_, None) => true, // No specific version, always checkout
+                    refs::Ref::Tag(_) => true,           // Always checkout for tags
                };

                if should_checkout {
--- a/rust/lancedb/src/table/merge.rs
+++ b/rust/lancedb/src/table/merge.rs
@@ -22,6 +22,7 @@ pub struct MergeInsertBuilder {
    pub(crate) when_not_matched_by_source_delete: bool,
    pub(crate) when_not_matched_by_source_delete_filt: Option<String>,
    pub(crate) timeout: Option<Duration>,
+    pub(crate) use_index: bool,
 }

 impl MergeInsertBuilder {
@@ -35,6 +36,7 @@ impl MergeInsertBuilder {
            when_not_matched_by_source_delete: false,
            when_not_matched_by_source_delete_filt: None,
            timeout: None,
+            use_index: true,
        }
    }

@@ -101,6 +103,19 @@ impl MergeInsertBuilder {
        self
    }

+    /// Controls whether to use indexes for the merge operation.
+    ///
+    /// When set to `true` (the default), the operation will use an index if available
+    /// on the join key for improved performance. When set to `false`, it forces a full
+    /// table scan even if an index exists. This can be useful for benchmarking or when
+    /// the query optimizer chooses a suboptimal path.
+    ///
+    /// If not set, defaults to `true` (use index if available).
+    pub fn use_index(&mut self, use_index: bool) -> &mut Self {
+        self.use_index = use_index;
+        self
+    }
+
    /// Executes the merge insert operation
    ///
    /// Returns version and statistics about the merge operation including the number of rows
--- a/rust/lancedb/src/test_connection.rs
+++ b/rust/lancedb/src/test_connection.rs
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+//! Functions for testing connections.
+
+#[cfg(test)]
+pub mod test_utils {
+    use regex::Regex;
+    use std::env;
+    use std::io::{BufRead, BufReader};
+    use std::process::{Child, ChildStdout, Command, Stdio};
+
+    use crate::{connect, Connection};
+    use anyhow::{bail, Result};
+    use tempfile::{tempdir, TempDir};
+
+    pub struct TestConnection {
+        pub uri: String,
+        pub connection: Connection,
+        _temp_dir: Option<TempDir>,
+        _process: Option<TestProcess>,
+    }
+
+    struct TestProcess {
+        child: Child,
+    }
+
+    impl Drop for TestProcess {
+        #[allow(unused_must_use)]
+        fn drop(&mut self) {
+            self.child.kill();
+        }
+    }
+
+    pub async fn new_test_connection() -> Result<TestConnection> {
+        match env::var("CREATE_LANCEDB_TEST_CONNECTION_SCRIPT") {
+            Ok(script_path) => new_remote_connection(&script_path).await,
+            Err(_e) => new_local_connection().await,
+        }
+    }
+
+    async fn new_remote_connection(script_path: &str) -> Result<TestConnection> {
+        let temp_dir = tempdir()?;
+        let data_path = temp_dir.path().to_str().unwrap().to_string();
+        let child_result = Command::new(script_path)
+            .stdin(Stdio::null())
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped())
+            .arg(data_path.clone())
+            .spawn();
+        if child_result.is_err() {
+            bail!(format!(
+                "Unable to run {}: {:?}",
+                script_path,
+                child_result.err()
+            ));
+        }
+        let mut process = TestProcess {
+            child: child_result.unwrap(),
+        };
+        let stdout = BufReader::new(process.child.stdout.take().unwrap());
+        let port = read_process_port(stdout)?;
+        let uri = "db://test";
+        let host_override = format!("http://localhost:{}", port);
+        let connection = create_new_connection(uri, &host_override).await?;
+        Ok(TestConnection {
+            uri: uri.to_string(),
+            connection,
+            _temp_dir: Some(temp_dir),
+            _process: Some(process),
+        })
+    }
+
+    fn read_process_port(mut stdout: BufReader<ChildStdout>) -> Result<String> {
+        let mut line = String::new();
+        let re = Regex::new(r"Query node now listening on 0.0.0.0:(.*)").unwrap();
+        loop {
+            let result = stdout.read_line(&mut line);
+            if let Err(err) = result {
+                bail!(format!(
+                    "read_process_port: error while reading from process output: {}",
+                    err
+                ));
+            } else if result.unwrap() == 0 {
+                bail!("read_process_port: hit EOF before reading port from process output.");
+            }
+            if re.is_match(&line) {
+                let caps = re.captures(&line).unwrap();
+                return Ok(caps[1].to_string());
+            }
+        }
+    }
+
+    #[cfg(feature = "remote")]
+    async fn create_new_connection(
+        uri: &str,
+        host_override: &str,
+    ) -> crate::error::Result<Connection> {
+        connect(uri)
+            .region("us-east-1")
+            .api_key("sk_localtest")
+            .host_override(host_override)
+            .execute()
+            .await
+    }
+
+    #[cfg(not(feature = "remote"))]
+    async fn create_new_connection(
+        _uri: &str,
+        _host_override: &str,
+    ) -> crate::error::Result<Connection> {
+        panic!("remote feature not supported");
+    }
+
+    async fn new_local_connection() -> Result<TestConnection> {
+        let temp_dir = tempdir()?;
+        let uri = temp_dir.path().to_str().unwrap();
+        let connection = connect(uri).execute().await?;
+        Ok(TestConnection {
+            uri: uri.to_string(),
+            connection,
+            _temp_dir: Some(temp_dir),
+            _process: None,
+        })
+    }
+}
--- a/rust/lancedb/src/utils.rs
+++ b/rust/lancedb/src/utils.rs
@@ -174,7 +174,7 @@ pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result
            ),
        })
    } else {
-        Ok(candidates[0].to_string())
+        Ok(candidates[0].clone())
    }
 }

@@ -195,7 +195,15 @@ pub fn supported_btree_data_type(dtype: &DataType) -> bool {
 }

 pub fn supported_bitmap_data_type(dtype: &DataType) -> bool {
-    dtype.is_integer() || matches!(dtype, DataType::Utf8)
+    dtype.is_integer()
+        || matches!(
+            dtype,
+            DataType::Utf8
+                | DataType::LargeUtf8
+                | DataType::Binary
+                | DataType::LargeBinary
+                | DataType::Boolean
+        )
 }

 pub fn supported_label_list_data_type(dtype: &DataType) -> bool {
Author	SHA1	Message	Date
Lance Release	70958f6366	Bump version: 0.25.2-beta.1 → 0.25.2-beta.2	2025-10-06 18:09:24 +00:00
Will Jones	1ac745eb18	ci: fix Python and Node CI on main (#2700 ) Example failure: https://github.com/lancedb/lancedb/actions/runs/18237024283/job/51932651993	2025-10-06 09:40:08 -07:00
Will Jones	1357fe8aa1	ci: run remote tests on PRs only if they aren't a fork (#2697 )	2025-10-03 17:38:40 -07:00
LuQQiu	0d78929893	feat: upgrade lance to 0.38.0 (#2695 ) https://github.com/lancedb/lance/releases/tag/v0.38.0 --------- Co-authored-by: Will Jones <willjones127@gmail.com>	2025-10-03 16:47:05 -07:00
Neha Prasad	9e2a68541e	fix(node): allow undefined/omitted values for nullable vector fields (#2656 ) Problem: When a vector field is marked as nullable, users should be able to omit it or pass `undefined`, but this was throwing an error: "Table has embeddings: 'vector', but no embedding function was provided" fixes: #2646 Solution: Modified `validateSchemaEmbeddings` to check `field.nullable` before treating `undefined` values as missing embedding fields. Changes: - Fixed validation logic in `nodejs/lancedb/arrow.ts` - Enabled previously skipped test for nullable fields - Added reproduction test case Behavior: - ✅ `{ vector: undefined }` now works for nullable fields - ✅ `{}` (omitted field) now works for nullable fields - ✅ `{ vector: null }` still works (unchanged) - ✅ Non-nullable fields still properly throw errors (unchanged) --------- Co-authored-by: Will Jones <willjones127@gmail.com> Co-authored-by: neha <neha@posthog.com>	2025-10-02 10:53:05 -07:00
Will Jones	1aa0fd16e7	ci: automatic issue creation for failed publish workflows (#2694 ) ## Summary - Created custom GitHub Action that creates issues when workflow jobs fail - Added report-failure jobs to cargo-publish.yml, java-publish.yml, npm-publish.yml, and pypi-publish.yml - Issues are created automatically with workflow name, failed job names, and run URL ## Test plan - Workflows will only create issues on actual release or workflow_dispatch events - Can be tested by triggering workflow_dispatch on a publish workflow Based on lancedb/lance#4873 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude <noreply@anthropic.com>	2025-10-02 08:24:16 -07:00
Lance Release	fec2a05629	Bump version: 0.22.2-beta.0 → 0.22.2-beta.1	2025-09-30 19:31:44 +00:00
Lance Release	79a1cd60ee	Bump version: 0.25.2-beta.0 → 0.25.2-beta.1	2025-09-30 19:30:39 +00:00
Colin Patrick McCabe	88807a59a4	fix: have CI download from ci-support-binaries (#2692 ) Have CI download from ci-support-binaries to fix the build.	2025-09-30 11:54:43 -07:00
Jack Ye	e0e7e01ea8	fix: inflated release size due to lance-namespace transitive dependency (#2691 ) Fixed the issue on lance-namespace side to avoid pinning to a specific lance version. This should fix the issue of the increased release artifact size and build time.	2025-09-30 11:18:32 -07:00
Ayush Chaurasia	a416ebc11d	fix: use correct nodejs path for ci (#2689 )	2025-09-30 14:18:42 +05:30
Ayush Chaurasia	f941054baf	docs: fix doc deployment and remove recipes workflow trigger (#2688 )	2025-09-30 13:10:39 +05:30
Ayush Chaurasia	1a81c46505	docs: transition to new docs (#2681 )	2025-09-29 11:37:08 +05:30
Colin Patrick McCabe	82b25a71e9	feat: add support for test_remote_connections (#2666 ) Add a new test feature which allows for running the lancedb tests against a remote server. Convert over a few tests in src/connection.rs as a proof of concept. To make local development easier, the remote tests can be run locally from a Makefile. This file can also be used to run the feature tests, with a single invocation of 'make'. (The feature tests require bringing up a docker compose environment.)	2025-09-26 11:24:43 -07:00
Jack Ye	13c613d45f	chore: upgrade lance to v0.37.1-beta.1 (#2682 )	2025-09-25 23:12:09 -07:00
Weston Pace	e07389a36c	feat: allow bitmap indexes on large-string, binary, large-binary, and bitmap (#2678 ) The underlying `pylance` already supported this, it was just blocked out by an over-eager validation function Closes #1981	2025-09-25 09:46:42 -07:00
Lance Release	e7e9e80b1d	Bump version: 0.22.1 → 0.22.2-beta.0	2025-09-24 22:54:54 +00:00
Lance Release	247fb58400	Bump version: 0.25.1 → 0.25.2-beta.0	2025-09-24 22:54:09 +00:00
Jack Ye	504bdc471c	feat(rust): support namespace backed database (#2664 ) This PR adds support for namespace-backed databases through lance-namespace integration, enabling centralized table management through namespace APIs. --------- Co-authored-by: Claude <noreply@anthropic.com>	2025-09-24 15:33:31 -07:00
Will Jones	d617cdef4a	feat: add use_index parameter to merge insert operations (#2674 ) ## Summary Exposes `use_index` Merge Insert parameter, which was created upstream in https://github.com/lancedb/lance/pull/4688. ## API Examples ### Python ```python # Force table scan table.merge_insert(["id"]) \ .when_not_matched_insert_all() \ .use_index(False) \ .execute(data) ``` ### Node.js/TypeScript ```typescript // Force table scan await table.mergeInsert("id") .whenNotMatchedInsertAll() .useIndex(false) .execute(data); ``` ### Rust ```rust // Force table scan let mut builder = table.merge_insert(&["id"]); builder.when_not_matched_insert_all() .use_index(false); builder.execute(data).await?; ``` 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Claude <noreply@anthropic.com>	2025-09-24 12:50:21 -07:00
Will Jones	356d7046fd	ci: fix test failure on main (#2677 ) Test was in wrong position.	2025-09-24 09:46:04 -07:00
Will Jones	48e5caabda	ci(nodejs): lint for unused imports (#2673 )	2025-09-23 18:49:42 -07:00
Lance Release	d6cc68f671	Bump version: 0.22.1-beta.4 → 0.22.1	2025-09-23 22:07:31 +00:00
Lance Release	55eacfa685	Bump version: 0.22.1-beta.3 → 0.22.1-beta.4	2025-09-23 22:06:45 +00:00
Lance Release	222e3264ab	Bump version: 0.25.1-beta.4 → 0.25.1	2025-09-23 22:06:08 +00:00
Lance Release	13505026cb	Bump version: 0.25.1-beta.3 → 0.25.1-beta.4	2025-09-23 22:06:08 +00:00
Neha Prasad	b0800b4b71	fix: undefined values should become null in nullable fields (#2658 ) ### Bug Fix: Undefined Values in Nullable Fields Issue: When inserting data with `undefined` values into nullable fields, LanceDB was incorrectly coercing them to default values (`false` for booleans, `NaN` for numbers, `""` for strings) instead of `null`. Fix: Modified the `makeVector()` function in `arrow.ts` to properly convert `undefined` values to `null` for nullable fields before passing data to Apache Arrow. fixes: #2645 Result: Now `{ text: undefined, number: undefined, bool: undefined }` correctly becomes `{ text: null, number: null, bool: null }` when fields are marked as nullable in the schema. Files Changed: - `nodejs/lancedb/arrow.ts` (core fix) - `nodejs/__test__/arrow.test.ts` (test coverage) - This ensures proper null handling for nullable fields as expected by users. --------- Co-authored-by: Will Jones <willjones127@gmail.com>	2025-09-23 14:29:52 -07:00
Neha Prasad	1befebf614	fix(node): handle null values in nullable boolean fields (#2657 ) ### Solution Added special handling in `makeVector` function for boolean arrays where all values are null. The fix creates a proper null bitmap using `makeData` and `arrowMakeVector` instead of relying on Apache Arrow's `vectorFromArray` which doesn't handle this edge case correctly. fixes: #2644 ### Changes - Added null value detection for boolean types in `makeVector` function - Creates proper Arrow data structure with null bitmap when all boolean values are null - Preserves existing behavior for non-null boolean values and other data types - Fixes the boolean null value bug while maintaining backward compatibility. --------- Co-authored-by: Will Jones <willjones127@gmail.com>	2025-09-23 14:07:00 -07:00
Will Jones	1ab60fae7f	feat: upgrade Lance to v0.37.0 (#2672 ) Change logs: * https://github.com/lancedb/lance/releases/tag/v0.37.0 * https://github.com/lancedb/lance/releases/tag/v0.36.0	2025-09-23 13:41:47 -07:00
Ayush Chaurasia	e921c90c1b	feat: support mean reciprocal rank reranker (#2671 ) The basic idea of MRR is this - https://www.evidentlyai.com/ranking-metrics/mean-reciprocal-rank-mrr I've implemented a weighted version for allowing user to set weightage between vector and fts. The gist is something like this ### Scenario A: Document at rank 1 in one set, absent from another ``` # Assuming equal weights: weight_vector = 0.5, weight_fts = 0.5 vector_rr = 1.0 # rank 1 → 1/1 = 1.0 fts_rr = 0.0 # absent → 0.0 weighted_mrr = 0.5 × 1.0 + 0.5 × 0.0 = 0.5 ``` ### Scenario B: Document at rank 1 in one set, rank 2 in another ``` # Same weights: weight_vector = 0.5, weight_fts = 0.5 vector_rr = 1.0 # rank 1 → 1/1 = 1.0 fts_rr = 0.5 # rank 2 → 1/2 = 0.5 weighted_mrr = 0.5 × 1.0 + 0.5 × 0.5 = 0.5 + 0.25 = 0.75 ``` And so with `return_score="all"` the result looks something like this (this is from the reranker tests). Because this is a weighted rank based reranker, some results might have the same score ``` text vector _distance _rowid _score _relevance_score 0 I am your father [-0.010703234, 0.069315575, 0.030076642, 0.002... 8.149148e-13 8589934598 10.978719 1.000000 1 the ground beneath my feet [-0.09500901, 0.00092102867, 0.0755851, 0.0372... 1.376896e+00 8589934604 NaN 0.250000 2 I find your lack of faith disturbing [0.07525753, -0.0100010475, 0.09990541, 0.0209... NaN 8589934595 3.483394 0.250000 3 but I don't wanna die [0.033476487, -0.011235877, -0.057625435, -0.0... 1.538222e+00 8589934610 1.130355 0.238095 4 if you strike me down I shall become more powe... [0.00432201, 0.030120496, 5.3317923e-05, 0.033... 1.381086e+00 8589934594 0.715157 0.216667 5 I see a salty message written in the eves [-0.04213107, 0.0016004723, 0.061052393, -0.02... 1.638301e+00 8589934603 1.043785 0.133333 6 but his son was mortal [0.012462767, 0.049041674, -0.057339743, -0.04... 1.421566e+00 8589934620 NaN 0.125000 7 I've got a bad feeling about this [-0.06973199, -0.029960092, 0.02641632, -0.031... NaN 8589934596 1.043785 0.125000 8 now that's a name I haven't heard in a long time [-0.014374257, -0.013588792, -0.07487557, 0.03... 1.597573e+00 8589934593 0.848772 0.118056 9 he was a god [-0.0258895, 0.11925236, -0.029397793, 0.05888... 1.423147e+00 8589934618 NaN 0.100000 10 I wish they would make another one [-0.14737535, -0.015304729, 0.04318139, -0.061... NaN 8589934622 1.043785 0.100000 11 Kratos had a son [-0.057455737, 0.13734367, -0.03537109, -0.000... 1.488075e+00 8589934617 NaN 0.083333 12 I don't wanna live like this [-0.0028891307, 0.015214227, 0.025183653, 0.08... NaN 8589934609 1.043785 0.071429 13 I see a mansard roof through the trees [0.052383978, 0.087759204, 0.014739997, 0.0239... NaN 8589934602 1.043785 0.062500 14 great kid don't get cocky [-0.047043696, 0.054648954, -0.008509666, -0.0... 1.618125e+00 8589934592 NaN 0.055556 ```	2025-09-23 18:25:18 +05:30
Lance Release	05a4ea646a	Bump version: 0.22.1-beta.2 → 0.22.1-beta.3	2025-09-22 04:49:00 +00:00