Bump version: 0.25.2 → 0.25.3-beta.0

feat: bump lance to 0.38.3-beta.2 and rust to 1.90.0 (#2714 )
feat: a utility for creating "permutation views" (#2552 )
2026-01-03 10:22:56 +00:00 · 2025-10-14 02:25:16 +00:00 · 2025-10-10 14:02:41 -07:00 · 2025-10-09 18:07:31 -07:00 · 2025-10-09 14:23:56 -07:00 · 2025-10-09 09:33:38 -07:00
130 changed files with 9425 additions and 1666 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.22.1-beta.0"
+current_version = "0.22.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/actions/create-failure-issue/action.yml
+++ b/.github/actions/create-failure-issue/action.yml
@@ -0,0 +1,45 @@
 name: Create Failure Issue
 description: Creates a GitHub issue if any jobs in the workflow failed
 inputs:
  job-results:
    description: 'JSON string of job results from needs context'
    required: true
  workflow-name:
    description: 'Name of the workflow'
    required: true
 runs:
  using: composite
  steps:
    - name: Check for failures and create issue
      shell: bash
      env:
        JOB_RESULTS: ${{ inputs.job-results }}
        WORKFLOW_NAME: ${{ inputs.workflow-name }}
        RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
        GH_TOKEN: ${{ github.token }}
      run: |
        # Check if any job failed
        if echo "$JOB_RESULTS" | jq -e 'to_entries | any(.value.result == "failure")' > /dev/null; then
          echo "Detected job failures, creating issue..."
          # Extract failed job names
          FAILED_JOBS=$(echo "$JOB_RESULTS" | jq -r 'to_entries | map(select(.value.result == "failure")) | map(.key) | join(", ")')
          # Create issue with workflow name, failed jobs, and run URL
          gh issue create \
            --title "$WORKFLOW_NAME Failed ($FAILED_JOBS)" \
            --body "The workflow **$WORKFLOW_NAME** failed during execution.
        **Failed jobs:** $FAILED_JOBS
        **Run URL:** $RUN_URL
        Please investigate the failed jobs and address any issues." \
            --label "ci"
          echo "Issue created successfully"
        else
          echo "No job failures detected, skipping issue creation"
        fi
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -38,3 +38,17 @@ jobs:
      - name: Publish the package
        run: |
          cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
  report-failure:
    name: Report Workflow Failure
    runs-on: ubuntu-latest
    needs: [build]
    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
    permissions:
      contents: read
      issues: write
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/create-failure-issue
        with:
          job-results: ${{ toJSON(needs) }}
          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -56,8 +56,9 @@ jobs:
        with:
          node-version: 20
          cache: 'npm'
          cache-dependency-path: docs/package-lock.json
      - name: Install node dependencies
-        working-directory: node
+        working-directory: nodejs
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
--- a/.github/workflows/docs_test.yml
+++ b/.github/workflows/docs_test.yml
@@ -24,7 +24,8 @@ env:
 jobs:
  test-python:
    name: Test doc python code
-    runs-on: ubuntu-24.04
+    runs-on: warp-ubuntu-2204-x64-8x
    timeout-minutes: 60
    steps:
    - name: Checkout
      uses: actions/checkout@v4
@@ -48,7 +49,6 @@ jobs:
      uses: swatinem/rust-cache@v2
    - name: Build Python
      working-directory: docs/test
      timeout-minutes: 60
      run:
        python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r requirements.txt
    - name: Create test files
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -43,7 +43,6 @@ jobs:
      - uses: Swatinem/rust-cache@v2
      - uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
          toolchain: "1.81.0"
          cache-workspaces: "./java/core/lancedb-jni"
          # Disable full debug symbol generation to speed up CI build and keep memory down
          # "1" means line tables only, which is useful for panic tracebacks.
@@ -112,3 +111,17 @@ jobs:
        env:
          SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
          SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
  report-failure:
    name: Report Workflow Failure
    runs-on: ubuntu-latest
    needs: [linux-arm64, linux-x86, macos-arm64]
    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
    permissions:
      contents: read
      issues: write
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/create-failure-issue
        with:
          job-results: ${{ toJSON(needs) }}
          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -6,6 +6,7 @@ on:
      - main
  pull_request:
    paths:
      - Cargo.toml
      - nodejs/**
      - .github/workflows/nodejs.yml
      - docker-compose.yml
@@ -116,7 +117,7 @@ jobs:
        set -e
        npm ci
        npm run docs
-        if ! git diff --exit-code -- . ':(exclude)Cargo.lock'; then
+        if ! git diff --exit-code -- ../ ':(exclude)Cargo.lock'; then
          echo "Docs need to be updated"
          echo "Run 'npm run docs', fix any warnings, and commit the changes."
          exit 1
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -365,3 +365,17 @@ jobs:
            ARGS="$ARGS --tag preview"
          fi
          npm publish $ARGS
  report-failure:
    name: Report Workflow Failure
    runs-on: ubuntu-latest
    needs: [build-lancedb, test-lancedb, publish]
    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
    permissions:
      contents: read
      issues: write
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/create-failure-issue
        with:
          job-results: ${{ toJSON(needs) }}
          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -56,7 +56,7 @@ jobs:
          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          fury_token: ${{ secrets.FURY_TOKEN }}
  mac:
-    timeout-minutes: 60
+    timeout-minutes: 90
    runs-on: ${{ matrix.config.runner }}
    strategy:
      matrix:
@@ -64,7 +64,7 @@ jobs:
          - target: x86_64-apple-darwin
            runner: macos-13
          - target: aarch64-apple-darwin
-            runner: macos-14
+            runner: warp-macos-14-arm64-6x
    env:
      MACOSX_DEPLOYMENT_TARGET: 10.15
    steps:
@@ -173,3 +173,17 @@ jobs:
          generate_release_notes: false
          name: Python LanceDB v${{ steps.extract_version.outputs.version }}
          body: ${{ steps.python_release_notes.outputs.changelog }}
  report-failure:
    name: Report Workflow Failure
    runs-on: ubuntu-latest
    needs: [linux, mac, windows]
    permissions:
      contents: read
      issues: write
    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/create-failure-issue
        with:
          job-results: ${{ toJSON(needs) }}
          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -6,6 +6,7 @@ on:
      - main
  pull_request:
    paths:
      - Cargo.toml
      - python/**
      - .github/workflows/python.yml
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -96,6 +96,7 @@ jobs:
      # Need up-to-date compilers for kernels
      CC: clang-18
      CXX: clang++-18
      GH_TOKEN: ${{ secrets.SOPHON_READ_TOKEN }}
    steps:
      - uses: actions/checkout@v4
        with:
@@ -117,15 +118,17 @@ jobs:
          sudo chmod 600 /swapfile
          sudo mkswap /swapfile
          sudo swapon /swapfile
      - name: Start S3 integration test environment
        working-directory: .
        run: docker compose up --detach --wait
      - name: Build
        run: cargo build --all-features --tests --locked --examples
-      - name: Run tests
+      - name: Run feature tests
-        run: cargo test --all-features --locked
+        run: make -C ./lancedb feature-tests
      - name: Run examples
        run: cargo run --example simple --locked
      - name: Run remote tests
        # Running this requires access to secrets, so skip if this is
        # a PR from a fork.
        if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
        run: make -C ./lancedb remote-tests
  macos:
    timeout-minutes: 30
--- a/.github/workflows/trigger-vectordb-recipes.yml
+++ b/.github/workflows/trigger-vectordb-recipes.yml
@@ -1,26 +0,0 @@
 name: Trigger vectordb-recipers workflow
 on:
  push:
    branches: [ main ]
  pull_request:
    paths:
      - .github/workflows/trigger-vectordb-recipes.yml
  workflow_dispatch:
 jobs:
  build:
    runs-on: ubuntu-latest
    steps:
      - name: Trigger vectordb-recipes workflow
        uses: actions/github-script@v6
        with:
          github-token: ${{ secrets.VECTORDB_RECIPES_ACTION_TOKEN }}
          script: |
            const result = await github.rest.actions.createWorkflowDispatch({
                owner: 'lancedb',
                repo: 'vectordb-recipes',
                workflow_id: 'examples-test.yml',
                ref: 'main'
            });
            console.log(result);
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,30 +15,34 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"
 [workspace.dependencies]
-lance = { "version" = "=0.35.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
+lance = { "version" = "=0.38.2", default-features = false, "features" = ["dynamodb"], "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-io = { "version" = "=0.35.0", default-features = false, "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
+lance-core = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-index = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
+lance-datagen = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-linalg = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
+lance-file = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-table = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
+lance-io = { "version" = "=0.38.2", default-features = false, "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-testing = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
+lance-index = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-datafusion = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
+lance-linalg = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-encoding = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
+lance-table = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 lance-testing = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 lance-datafusion = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 lance-encoding = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 lance-namespace = "0.0.18"
 ahash = "0.8"
 # Note that this one does not include pyarrow
-arrow = { version = "55.1", optional = false }
+arrow = { version = "56.2", optional = false }
-arrow-array = "55.1"
+arrow-array = "56.2"
-arrow-data = "55.1"
+arrow-data = "56.2"
-arrow-ipc = "55.1"
+arrow-ipc = "56.2"
-arrow-ord = "55.1"
+arrow-ord = "56.2"
-arrow-schema = "55.1"
+arrow-schema = "56.2"
-arrow-arith = "55.1"
+arrow-cast = "56.2"
 arrow-cast = "55.1"
 async-trait = "0"
-datafusion = { version = "49.0", default-features = false }
+datafusion = { version = "50.1", default-features = false }
-datafusion-catalog = "49.0"
+datafusion-catalog = "50.1"
-datafusion-common = { version = "49.0", default-features = false }
+datafusion-common = { version = "50.1", default-features = false }
-datafusion-execution = "49.0"
+datafusion-execution = "50.1"
-datafusion-expr = "49.0"
+datafusion-expr = "50.1"
-datafusion-physical-plan = "49.0"
+datafusion-physical-plan = "50.1"
 env_logger = "0.11"
 half = { "version" = "2.6.0", default-features = false, features = [
    "num-traits",
@@ -48,18 +52,26 @@ log = "0.4"
 moka = { version = "0.12", features = ["future"] }
 object_store = "0.12.0"
 pin-project = "1.0.7"
 rand = "0.9"
 snafu = "0.8"
 url = "2"
 num-traits = "0.2"
 rand = "0.9"
 regex = "1.10"
 lazy_static = "1"
 semver = "1.0.25"
 crunchy = "0.2.4"
-# Temporary pins to work around downstream issues
+chrono = "0.4"
 # https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
 chrono = "=0.4.41"
 # https://github.com/RustCrypto/formats/issues/1684
 base64ct = "=1.6.0"
 # Workaround for: https://github.com/Lokathor/bytemuck/issues/306
 bytemuck_derive = ">=1.8.1, <1.9.0"
 # This is only needed when we reference preview releases of lance
 # Force to use the same lance version as the rest of the project to avoid duplicate dependencies
 [patch.crates-io]
 lance = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 lance-io = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 lance-index = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 lance-linalg = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 lance-table = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 lance-testing = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 lance-datafusion = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 lance-encoding = { "version" = "=0.38.2", "tag" = "v0.38.3-beta.2", "git" = "https://github.com/lancedb/lance.git" }
--- a/ci/create_lancedb_test_connection.sh
+++ b/ci/create_lancedb_test_connection.sh
@@ -0,0 +1,4 @@
 #!/usr/bin/env bash
 export RUST_LOG=info
 exec ./lancedb server --port 0 --sql-port 0  --data-dir "${1}"
--- a/ci/run_with_docker_compose.sh
+++ b/ci/run_with_docker_compose.sh
@@ -0,0 +1,18 @@
 #!/usr/bin/env bash
 #
 # A script for running the given command together with a docker compose environment.
 #
 # Bring down the docker setup once the command is done running.
 tear_down() {
    docker compose -p fixture down
 }
 trap tear_down EXIT
 set +xe
 # Clean up any existing docker setup and bring up a new one.
 docker compose -p fixture up --detach --wait || exit 1
 "${@}"
--- a/ci/run_with_test_connection.sh
+++ b/ci/run_with_test_connection.sh
@@ -0,0 +1,68 @@
 #!/usr/bin/env bash
 #
 # A script for running the given command together with the lancedb cli.
 #
 die() {
    echo $?
    exit 1
 }
 check_command_exists() {
    command="${1}"
    which ${command} &> /dev/null || \
        die "Unable to locate command: ${command}. Did you install it?"
 }
 if [[ ! -e ./lancedb ]]; then
    if [[ -v SOPHON_READ_TOKEN ]]; then
        INPUT="lancedb-linux-x64"
        gh release \
            --repo lancedb/lancedb \
            download ci-support-binaries \
            --pattern "${INPUT}" \
            || die "failed to fetch cli."
        check_command_exists openssl
        openssl enc -aes-256-cbc \
            -d -pbkdf2 \
            -pass "env:SOPHON_READ_TOKEN" \
            -in "${INPUT}" \
            -out ./lancedb-linux-x64.tar.gz \
            || die "openssl failed"
        TARGET="${INPUT}.tar.gz"
    else
        ARCH="x64"
        if [[ $OSTYPE == 'darwin'* ]]; then
            UNAME=$(uname -m)
            if [[ $UNAME == 'arm64' ]]; then
                ARCH='arm64'
            fi
            OSTYPE="macos"
        elif [[ $OSTYPE == 'linux'* ]]; then
            if [[ $UNAME == 'aarch64' ]]; then
                ARCH='arm64'
            fi
            OSTYPE="linux"
        else
            die "unknown OSTYPE: $OSTYPE"
        fi
        check_command_exists gh
        TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
        gh release \
            --repo lancedb/sophon \
            download lancedb-cli-v0.0.3 \
            --pattern "${TARGET}" \
            || die "failed to fetch cli."
    fi
    check_command_exists tar
    tar xvf "${TARGET}" || die "tar failed."
    [[ -e ./lancedb ]] || die "failed to extract lancedb."
 fi
 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
 export CREATE_LANCEDB_TEST_CONNECTION_SCRIPT="${SCRIPT_DIR}/create_lancedb_test_connection.sh"
 "${@}"
--- a/ci/set_lance_version.py
+++ b/ci/set_lance_version.py
@@ -1,4 +1,5 @@
 import argparse
 import re
 import sys
 import json
@@ -18,8 +19,12 @@ def run_command(command: str) -> str:
 def get_latest_stable_version() -> str:
    version_line = run_command("cargo info lance | grep '^version:'")
-    version = version_line.split(" ")[1].strip()
+    # Example output: "version: 0.35.0 (latest 0.37.0)"
-    return version
+    match = re.search(r'\(latest ([0-9.]+)\)', version_line)
    if match:
        return match.group(1)
    # Fallback: use the first version after 'version:'
    return version_line.split("version:")[1].split()[0].strip()
 def get_latest_preview_version() -> str:
@@ -112,7 +117,7 @@ def update_cargo_toml(line_updater):
    lance_line = ""
    is_parsing_lance_line = False
    for line in lines:
-        if line.startswith("lance"):
+        if line.startswith("lance") and not line.startswith("lance-namespace"):
            # Check if this is a single-line or multi-line entry
            # Single-line entries either:
            # 1. End with } (complete inline table)
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -70,6 +70,23 @@ plugins:
  - mkdocs-jupyter
  - render_swagger:
      allow_arbitrary_locations: true
  - redirects:
      redirect_maps:
        # Redirect the home page and other top-level markdown files. This enables maximum SEO benefit
        # other sub-pages are handled by the ingected js in overrides/partials/header.html
        'index.md': 'https://lancedb.com/docs/'
        'guides/tables.md': 'https://lancedb.com/docs/tables/'
        'ann_indexes.md': 'https://lancedb.com/docs/indexing/'
        'basic.md': 'https://lancedb.com/docs/quickstart/'
        'faq.md': 'https://lancedb.com/docs/faq/'
        'embeddings/understanding_embeddings.md': 'https://lancedb.com/docs/embedding/'
        'integrations.md': 'https://lancedb.com/docs/integrations/'
        'examples.md': 'https://lancedb.com/docs/tutorials/'
        'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
        'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
        'guides/storage.md': 'https://lancedb.com/docs/storage/integrations'
 markdown_extensions:
  - admonition
--- a/docs/overrides/partials/header.html
+++ b/docs/overrides/partials/header.html
@@ -19,7 +19,13 @@
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  IN THE SOFTWARE.
 -->
-
+<div id="deprecation-banner" style="background-color: #f8d7da; color: #721c24; padding: 1em; text-align: center;">                                                           
    <p style="margin: 0; font-size: 1.1em;">                                                                                                           
        <strong>This documentation site is deprecated.</strong>                                                                           
        Please visit our new documentation site at <a href="https://lancedb.com/docs" style="color: #721c24; text-decoration: underline;">
          lancedb.com/docs</a> for the latest information.                                                                                        
    </p>                                                                                                                                               
  </div>  
 {% set class = "md-header" %}
 {% if "navigation.tabs.sticky" in features %}
  {% set class = class ~ " md-header--shadow md-header--lifted" %}
@@ -150,9 +156,9 @@
    <div style="margin-left: 10px; margin-right: 5px;">
        <a href="https://discord.com/invite/zMM32dvNtd" target="_blank" rel="noopener noreferrer">
-            <svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg"  viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
+          <svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg"  viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
-        </a>
+      </a>
-    </div>
+  </div>
    <div style="margin-left: 5px; margin-right: 5px;">
        <a href="https://twitter.com/lancedb" target="_blank" rel="noopener noreferrer">
            <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0,0,256,256" width="25px" height="25px" fill-rule="nonzero"><g fill-opacity="0" fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><path d="M0,256v-256h256v256z" id="bgRectangle"></path></g><g fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><g transform="scale(4,4)"><path d="M57,17.114c-1.32,1.973 -2.991,3.707 -4.916,5.097c0.018,0.423 0.028,0.847 0.028,1.274c0,13.013 -9.902,28.018 -28.016,28.018c-5.562,0 -12.81,-1.948 -15.095,-4.423c0.772,0.092 1.556,0.138 2.35,0.138c4.615,0 8.861,-1.575 12.23,-4.216c-4.309,-0.079 -7.946,-2.928 -9.199,-6.84c1.96,0.308 4.447,-0.17 4.447,-0.17c0,0 -7.7,-1.322 -7.899,-9.779c2.226,1.291 4.46,1.231 4.46,1.231c0,0 -4.441,-2.734 -4.379,-8.195c0.037,-3.221 1.331,-4.953 1.331,-4.953c8.414,10.361 20.298,10.29 20.298,10.29c0,0 -0.255,-1.471 -0.255,-2.243c0,-5.437 4.408,-9.847 9.847,-9.847c2.832,0 5.391,1.196 7.187,3.111c2.245,-0.443 4.353,-1.263 6.255,-2.391c-0.859,3.44 -4.329,5.448 -4.329,5.448c0,0 2.969,-0.329 5.655,-1.55z"></path></g></g></svg>
@@ -173,4 +179,77 @@
      {% include "partials/tabs.html" %}
    {% endif %}
  {% endif %}
-</header>
+</header>
 <script>
  (function() {
    function checkPathAndRedirect() {
      var banner = document.getElementById('deprecation-banner');
      if (document.querySelector('meta[http-equiv="refresh"]')) {
        return; // The redirects plugin is already handling this page.
      }
      var currentPath = window.location.pathname;
      var cleanPath = currentPath.endsWith('/') && currentPath.length > 1
        ? currentPath.slice(0, -1)
        : currentPath;
      // These are the ONLY paths that should remain on the old site
      var apiPaths = [
        '/lancedb/python', 
        '/lancedb/javascript', 
        '/lancedb/js',
        '/lancedb/api_reference'
      ];
      var isApiPage = apiPaths.some(function(apiPath) {
        return cleanPath.startsWith(apiPath);
      });
      if (isApiPage) {
        if (banner) {
          banner.style.display = 'none';
        }
      } else {
        if (banner) {
          banner.style.display = 'block';
        }
        // Add noindex meta tag to prevent indexing of old docs for seo
        var noindexMeta = document.createElement('meta');
        noindexMeta.setAttribute('name', 'robots');
        noindexMeta.setAttribute('content', 'noindex, follow');
        document.head.appendChild(noindexMeta);
        // Add canonical link to point to the new docs to reward new site for seo
        var canonicalLink = document.createElement('link');
        canonicalLink.setAttribute('rel', 'canonical');
        canonicalLink.setAttribute('href', 'https://lancedb.com/docs');
        document.head.appendChild(canonicalLink);
        window.location.replace('https://lancedb.com/docs');
      }
    }
    // Run the check only if doc is ready. This makes sure we catch the initial load
    // and redirect.
    if (document.readyState === 'loading') {
      document.addEventListener('DOMContentLoaded', checkPathAndRedirect);
    } else {
      checkPathAndRedirect();
    }
    // Use an interval to handle subsequent navigation clicks.
    var lastPath = window.location.pathname;
    setInterval(function() {
      if (window.location.pathname !== lastPath) {
        lastPath = window.location.pathname;
        checkPathAndRedirect();
      }
    }, 2000); // keeping it 2 second to make it easy for user to understand
              // what's happening
  })();
 </script>
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,3 +5,4 @@ mkdocstrings[python]==0.25.2
 griffe
 mkdocs-render-swagger-plugin
 pydantic
 mkdocs-redirects
--- a/docs/src/js/classes/Connection.md
+++ b/docs/src/js/classes/Connection.md
@@ -25,6 +25,51 @@ the underlying connection has been closed.
 ## Methods
 ### cloneTable()
 ```ts
 abstract cloneTable(
   targetTableName,
   sourceUri,
   options?): Promise<Table>
 ```
 Clone a table from a source table.
 A shallow clone creates a new table that shares the underlying data files
 with the source table but has its own independent manifest. This allows
 both the source and cloned tables to evolve independently while initially
 sharing the same data, deletion, and index files.
 #### Parameters
 * **targetTableName**: `string`
    The name of the target table to create.
 * **sourceUri**: `string`
    The URI of the source table to clone from.
 * **options?**
    Clone options.
 * **options.isShallow?**: `boolean`
    Whether to perform a shallow clone (defaults to true).
 * **options.sourceTag?**: `string`
    The tag of the source table to clone.
 * **options.sourceVersion?**: `number`
    The version of the source table to clone.
 * **options.targetNamespace?**: `string`[]
    The namespace for the target table (defaults to root namespace).
 #### Returns
 `Promise`&lt;[`Table`](Table.md)&gt;
 ***
 ### close()
 ```ts
--- a/docs/src/js/classes/Index.md
+++ b/docs/src/js/classes/Index.md
@@ -194,6 +194,37 @@ currently is also a memory intensive operation.
 ***
 ### ivfRq()
 ```ts
 static ivfRq(options?): Index
 ```
 Create an IvfRq index
 IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
 and organizes them into IVF partitions.
 The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
 The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
 between index size (and thus search speed) and index accuracy.
 The partitioning process is called IVF and the `num_partitions` parameter controls how
 many groups to create.
 Note that training an IVF RQ index on a large dataset is a slow operation and
 currently is also a memory intensive operation.
 #### Parameters
 * **options?**: `Partial`&lt;[`IvfRqOptions`](../interfaces/IvfRqOptions.md)&gt;
 #### Returns
 [`Index`](Index.md)
 ***
 ### labelList()
 ```ts
--- a/docs/src/js/classes/MergeInsertBuilder.md
+++ b/docs/src/js/classes/MergeInsertBuilder.md
@@ -52,6 +52,30 @@ the merge result
 ***
 ### useIndex()
 ```ts
 useIndex(useIndex): MergeInsertBuilder
 ```
 Controls whether to use indexes for the merge operation.
 When set to `true` (the default), the operation will use an index if available
 on the join key for improved performance. When set to `false`, it forces a full
 table scan even if an index exists. This can be useful for benchmarking or when
 the query optimizer chooses a suboptimal path.
 #### Parameters
 * **useIndex**: `boolean`
    Whether to use indices for the merge operation. Defaults to `true`.
 #### Returns
 [`MergeInsertBuilder`](MergeInsertBuilder.md)
 ***
 ### whenMatchedUpdateAll()
 ```ts
--- a/docs/src/js/classes/PermutationBuilder.md
+++ b/docs/src/js/classes/PermutationBuilder.md
@@ -0,0 +1,220 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / PermutationBuilder
 # Class: PermutationBuilder
 A PermutationBuilder for creating data permutations with splits, shuffling, and filtering.
 This class provides a TypeScript wrapper around the native Rust PermutationBuilder,
 offering methods to configure data splits, shuffling, and filtering before executing
 the permutation to create a new table.
 ## Methods
 ### execute()
 ```ts
 execute(): Promise<Table>
 ```
 Execute the permutation and create the destination table.
 #### Returns
 `Promise`&lt;[`Table`](Table.md)&gt;
 A Promise that resolves to the new Table instance
 #### Example
 ```ts
 const permutationTable = await builder.execute();
 console.log(`Created table: ${permutationTable.name}`);
 ```
 ***
 ### filter()
 ```ts
 filter(filter): PermutationBuilder
 ```
 Configure filtering for the permutation.
 #### Parameters
 * **filter**: `string`
    SQL filter expression
 #### Returns
 [`PermutationBuilder`](PermutationBuilder.md)
 A new PermutationBuilder instance
 #### Example
 ```ts
 builder.filter("age > 18 AND status = 'active'");
 ```
 ***
 ### shuffle()
 ```ts
 shuffle(options): PermutationBuilder
 ```
 Configure shuffling for the permutation.
 #### Parameters
 * **options**: [`ShuffleOptions`](../interfaces/ShuffleOptions.md)
    Configuration for shuffling
 #### Returns
 [`PermutationBuilder`](PermutationBuilder.md)
 A new PermutationBuilder instance
 #### Example
 ```ts
 // Basic shuffle
 builder.shuffle({ seed: 42 });
 // Shuffle with clump size
 builder.shuffle({ seed: 42, clumpSize: 10 });
 ```
 ***
 ### splitCalculated()
 ```ts
 splitCalculated(calculation): PermutationBuilder
 ```
 Configure calculated splits for the permutation.
 #### Parameters
 * **calculation**: `string`
    SQL expression for calculating splits
 #### Returns
 [`PermutationBuilder`](PermutationBuilder.md)
 A new PermutationBuilder instance
 #### Example
 ```ts
 builder.splitCalculated("user_id % 3");
 ```
 ***
 ### splitHash()
 ```ts
 splitHash(options): PermutationBuilder
 ```
 Configure hash-based splits for the permutation.
 #### Parameters
 * **options**: [`SplitHashOptions`](../interfaces/SplitHashOptions.md)
    Configuration for hash-based splitting
 #### Returns
 [`PermutationBuilder`](PermutationBuilder.md)
 A new PermutationBuilder instance
 #### Example
 ```ts
 builder.splitHash({
  columns: ["user_id"],
  splitWeights: [70, 30],
  discardWeight: 0
 });
 ```
 ***
 ### splitRandom()
 ```ts
 splitRandom(options): PermutationBuilder
 ```
 Configure random splits for the permutation.
 #### Parameters
 * **options**: [`SplitRandomOptions`](../interfaces/SplitRandomOptions.md)
    Configuration for random splitting
 #### Returns
 [`PermutationBuilder`](PermutationBuilder.md)
 A new PermutationBuilder instance
 #### Example
 ```ts
 // Split by ratios
 builder.splitRandom({ ratios: [0.7, 0.3], seed: 42 });
 // Split by counts
 builder.splitRandom({ counts: [1000, 500], seed: 42 });
 // Split with fixed size
 builder.splitRandom({ fixed: 100, seed: 42 });
 ```
 ***
 ### splitSequential()
 ```ts
 splitSequential(options): PermutationBuilder
 ```
 Configure sequential splits for the permutation.
 #### Parameters
 * **options**: [`SplitSequentialOptions`](../interfaces/SplitSequentialOptions.md)
    Configuration for sequential splitting
 #### Returns
 [`PermutationBuilder`](PermutationBuilder.md)
 A new PermutationBuilder instance
 #### Example
 ```ts
 // Split by ratios
 builder.splitSequential({ ratios: [0.8, 0.2] });
 // Split by counts
 builder.splitSequential({ counts: [800, 200] });
 // Split with fixed size
 builder.splitSequential({ fixed: 1000 });
 ```
--- a/docs/src/js/functions/makeArrowTable.md
+++ b/docs/src/js/functions/makeArrowTable.md
@@ -13,7 +13,7 @@ function makeArrowTable(
   metadata?): ArrowTable
 ```
-An enhanced version of the makeTable function from Apache Arrow
+An enhanced version of the apache-arrow makeTable function from Apache Arrow
 that supports nested fields and embeddings columns.
 (typically you do not need to call this function.  It will be called automatically
--- a/docs/src/js/functions/permutationBuilder.md
+++ b/docs/src/js/functions/permutationBuilder.md
@@ -0,0 +1,37 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / permutationBuilder
 # Function: permutationBuilder()
 ```ts
 function permutationBuilder(table, destTableName): PermutationBuilder
 ```
 Create a permutation builder for the given table.
 ## Parameters
 * **table**: [`Table`](../classes/Table.md)
    The source table to create a permutation from
 * **destTableName**: `string`
    The name for the destination permutation table
 ## Returns
 [`PermutationBuilder`](../classes/PermutationBuilder.md)
 A PermutationBuilder instance
 ## Example
 ```ts
 const builder = permutationBuilder(sourceTable, "training_data")
  .splitRandom({ ratios: [0.8, 0.2], seed: 42 })
  .shuffle({ seed: 123 });
 const trainingTable = await builder.execute();
 ```
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -28,6 +28,7 @@
 - [MultiMatchQuery](classes/MultiMatchQuery.md)
 - [NativeJsHeaderProvider](classes/NativeJsHeaderProvider.md)
 - [OAuthHeaderProvider](classes/OAuthHeaderProvider.md)
 - [PermutationBuilder](classes/PermutationBuilder.md)
 - [PhraseQuery](classes/PhraseQuery.md)
 - [Query](classes/Query.md)
 - [QueryBase](classes/QueryBase.md)
@@ -68,6 +69,7 @@
 - [IndexStatistics](interfaces/IndexStatistics.md)
 - [IvfFlatOptions](interfaces/IvfFlatOptions.md)
 - [IvfPqOptions](interfaces/IvfPqOptions.md)
 - [IvfRqOptions](interfaces/IvfRqOptions.md)
 - [MergeResult](interfaces/MergeResult.md)
 - [OpenTableOptions](interfaces/OpenTableOptions.md)
 - [OptimizeOptions](interfaces/OptimizeOptions.md)
@@ -75,9 +77,14 @@
 - [QueryExecutionOptions](interfaces/QueryExecutionOptions.md)
 - [RemovalStats](interfaces/RemovalStats.md)
 - [RetryConfig](interfaces/RetryConfig.md)
 - [ShuffleOptions](interfaces/ShuffleOptions.md)
 - [SplitHashOptions](interfaces/SplitHashOptions.md)
 - [SplitRandomOptions](interfaces/SplitRandomOptions.md)
 - [SplitSequentialOptions](interfaces/SplitSequentialOptions.md)
 - [TableNamesOptions](interfaces/TableNamesOptions.md)
 - [TableStatistics](interfaces/TableStatistics.md)
 - [TimeoutConfig](interfaces/TimeoutConfig.md)
 - [TlsConfig](interfaces/TlsConfig.md)
 - [TokenResponse](interfaces/TokenResponse.md)
 - [UpdateOptions](interfaces/UpdateOptions.md)
 - [UpdateResult](interfaces/UpdateResult.md)
@@ -101,3 +108,4 @@
 - [connect](functions/connect.md)
 - [makeArrowTable](functions/makeArrowTable.md)
 - [packBits](functions/packBits.md)
 - [permutationBuilder](functions/permutationBuilder.md)
--- a/docs/src/js/interfaces/ClientConfig.md
+++ b/docs/src/js/interfaces/ClientConfig.md
@@ -40,6 +40,14 @@ optional timeoutConfig: TimeoutConfig;
 ***
 ### tlsConfig?
 ```ts
 optional tlsConfig: TlsConfig;
 ```
 ***
 ### userAgent?
 ```ts
--- a/docs/src/js/interfaces/ShuffleOptions.md
+++ b/docs/src/js/interfaces/ShuffleOptions.md
@@ -0,0 +1,23 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / ShuffleOptions
 # Interface: ShuffleOptions
 ## Properties
 ### clumpSize?
 ```ts
 optional clumpSize: number;
 ```
 ***
 ### seed?
 ```ts
 optional seed: number;
 ```
--- a/docs/src/js/interfaces/SplitHashOptions.md
+++ b/docs/src/js/interfaces/SplitHashOptions.md
@@ -0,0 +1,31 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / SplitHashOptions
 # Interface: SplitHashOptions
 ## Properties
 ### columns
 ```ts
 columns: string[];
 ```
 ***
 ### discardWeight?
 ```ts
 optional discardWeight: number;
 ```
 ***
 ### splitWeights
 ```ts
 splitWeights: number[];
 ```
--- a/docs/src/js/interfaces/SplitRandomOptions.md
+++ b/docs/src/js/interfaces/SplitRandomOptions.md
@@ -0,0 +1,39 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / SplitRandomOptions
 # Interface: SplitRandomOptions
 ## Properties
 ### counts?
 ```ts
 optional counts: number[];
 ```
 ***
 ### fixed?
 ```ts
 optional fixed: number;
 ```
 ***
 ### ratios?
 ```ts
 optional ratios: number[];
 ```
 ***
 ### seed?
 ```ts
 optional seed: number;
 ```
--- a/docs/src/js/interfaces/SplitSequentialOptions.md
+++ b/docs/src/js/interfaces/SplitSequentialOptions.md
@@ -0,0 +1,31 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / SplitSequentialOptions
 # Interface: SplitSequentialOptions
 ## Properties
 ### counts?
 ```ts
 optional counts: number[];
 ```
 ***
 ### fixed?
 ```ts
 optional fixed: number;
 ```
 ***
 ### ratios?
 ```ts
 optional ratios: number[];
 ```
--- a/docs/src/js/interfaces/TlsConfig.md
+++ b/docs/src/js/interfaces/TlsConfig.md
@@ -0,0 +1,49 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / TlsConfig
 # Interface: TlsConfig
 TLS/mTLS configuration for the remote HTTP client.
 ## Properties
 ### assertHostname?
 ```ts
 optional assertHostname: boolean;
 ```
 Whether to verify the hostname in the server's certificate.
 ***
 ### certFile?
 ```ts
 optional certFile: string;
 ```
 Path to the client certificate file (PEM format) for mTLS authentication.
 ***
 ### keyFile?
 ```ts
 optional keyFile: string;
 ```
 Path to the client private key file (PEM format) for mTLS authentication.
 ***
 ### sslCaCert?
 ```ts
 optional sslCaCert: string;
 ```
 Path to the CA certificate file (PEM format) for server verification.
--- a/java/core/lancedb-jni/src/ffi.rs
+++ b/java/core/lancedb-jni/src/ffi.rs
@@ -16,6 +16,7 @@ pub trait JNIEnvExt {
    fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
    /// Get strings from Java List<String> object.
    #[allow(dead_code)]
    fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
    /// Get strings from Java String[] object.
--- a/java/core/lancedb-jni/src/traits.rs
+++ b/java/core/lancedb-jni/src/traits.rs
@@ -6,6 +6,7 @@ use jni::JNIEnv;
 use crate::Result;
 #[allow(dead_code)]
 pub trait FromJObject<T> {
    fn extract(&self) -> Result<T>;
 }
@@ -39,6 +40,7 @@ impl FromJObject<f64> for JObject<'_> {
    }
 }
 #[allow(dead_code)]
 pub trait FromJString {
    fn extract(&self, env: &mut JNIEnv) -> Result<String>;
 }
@@ -66,6 +68,7 @@ pub trait JMapExt {
    fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
 }
 #[allow(dead_code)]
 fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
 where
    for<'a> JObject<'a>: FromJObject<T>,
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.1-beta.0</version>
+        <version>0.22.2-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>
--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.1-beta.0</version>
+        <version>0.22.2-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.22.1-beta.0</version>
+    <version>0.22.2-final.0</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.22.1-beta.0"
+version = "0.22.2"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -1,17 +1,5 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 import {
  Bool,
  Field,
  Int32,
  List,
  Schema,
  Struct,
  Uint8,
  Utf8,
 } from "apache-arrow";
 import * as arrow15 from "apache-arrow-15";
 import * as arrow16 from "apache-arrow-16";
 import * as arrow17 from "apache-arrow-17";
@@ -25,11 +13,9 @@ import {
  fromTableToBuffer,
  makeArrowTable,
  makeEmptyTable,
  tableFromIPC,
 } from "../lancedb/arrow";
 import {
  EmbeddingFunction,
  FieldOptions,
  FunctionOptions,
 } from "../lancedb/embedding/embedding_function";
 import { EmbeddingFunctionConfig } from "../lancedb/embedding/registry";
@@ -1008,5 +994,64 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(result).toEqual(null);
      });
    });
    describe("boolean null handling", function () {
      it("should handle null values in nullable boolean fields", () => {
        const { makeArrowTable } = require("../lancedb/arrow");
        const schema = new Schema([new Field("test", new arrow.Bool(), true)]);
        // Test with all null values
        const data = [{ test: null }];
        const table = makeArrowTable(data, { schema });
        expect(table.numRows).toBe(1);
        expect(table.schema.names).toEqual(["test"]);
        expect(table.getChild("test")!.get(0)).toBeNull();
      });
      it("should handle mixed null and non-null boolean values", () => {
        const { makeArrowTable } = require("../lancedb/arrow");
        const schema = new Schema([new Field("test", new Bool(), true)]);
        // Test with mixed values
        const data = [{ test: true }, { test: null }, { test: false }];
        const table = makeArrowTable(data, { schema });
        expect(table.numRows).toBe(3);
        expect(table.getChild("test")!.get(0)).toBe(true);
        expect(table.getChild("test")!.get(1)).toBeNull();
        expect(table.getChild("test")!.get(2)).toBe(false);
      });
    });
    // Test for the undefined values bug fix
    describe("undefined values handling", () => {
      it("should handle mixed undefined and actual values", () => {
        const schema = new Schema([
          new Field("text", new Utf8(), true), // nullable
          new Field("number", new Int32(), true), // nullable
          new Field("bool", new Bool(), true), // nullable
        ]);
        const data = [
          { text: undefined, number: 42, bool: true },
          { text: "hello", number: undefined, bool: false },
          { text: "world", number: 123, bool: undefined },
        ];
        const table = makeArrowTable(data, { schema });
        const result = table.toArray();
        expect(result).toHaveLength(3);
        expect(result[0].text).toBe(null);
        expect(result[0].number).toBe(42);
        expect(result[0].bool).toBe(true);
        expect(result[1].text).toBe("hello");
        expect(result[1].number).toBe(null);
        expect(result[1].bool).toBe(false);
        expect(result[2].text).toBe("world");
        expect(result[2].number).toBe(123);
        expect(result[2].bool).toBe(null);
      });
    });
  },
 );
--- a/nodejs/test/connection.test.ts
+++ b/nodejs/test/connection.test.ts
@@ -203,3 +203,106 @@ describe("given a connection", () => {
    });
  });
 });
 describe("clone table functionality", () => {
  let tmpDir: tmp.DirResult;
  let db: Connection;
  beforeEach(async () => {
    tmpDir = tmp.dirSync({ unsafeCleanup: true });
    db = await connect(tmpDir.name);
  });
  afterEach(() => tmpDir.removeCallback());
  it("should clone a table with latest version (default behavior)", async () => {
    // Create source table with some data
    const data = [
      { id: 1, text: "hello", vector: [1.0, 2.0] },
      { id: 2, text: "world", vector: [3.0, 4.0] },
    ];
    const sourceTable = await db.createTable("source", data);
    // Add more data to create a new version
    const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
    await sourceTable.add(moreData);
    // Clone the table (should get latest version with 3 rows)
    const sourceUri = `${tmpDir.name}/source.lance`;
    const clonedTable = await db.cloneTable("cloned", sourceUri);
    // Verify cloned table has all 3 rows
    expect(await clonedTable.countRows()).toBe(3);
    expect((await db.tableNames()).includes("cloned")).toBe(true);
  });
  it("should clone a table from a specific version", async () => {
    // Create source table with initial data
    const data = [
      { id: 1, text: "hello", vector: [1.0, 2.0] },
      { id: 2, text: "world", vector: [3.0, 4.0] },
    ];
    const sourceTable = await db.createTable("source", data);
    // Get the initial version
    const initialVersion = await sourceTable.version();
    // Add more data to create a new version
    const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
    await sourceTable.add(moreData);
    // Verify source now has 3 rows
    expect(await sourceTable.countRows()).toBe(3);
    // Clone from the initial version (should have only 2 rows)
    const sourceUri = `${tmpDir.name}/source.lance`;
    const clonedTable = await db.cloneTable("cloned", sourceUri, {
      sourceVersion: initialVersion,
    });
    // Verify cloned table has only the initial 2 rows
    expect(await clonedTable.countRows()).toBe(2);
  });
  it("should clone a table from a tagged version", async () => {
    // Create source table with initial data
    const data = [
      { id: 1, text: "hello", vector: [1.0, 2.0] },
      { id: 2, text: "world", vector: [3.0, 4.0] },
    ];
    const sourceTable = await db.createTable("source", data);
    // Create a tag for the current version
    const tags = await sourceTable.tags();
    await tags.create("v1.0", await sourceTable.version());
    // Add more data after the tag
    const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
    await sourceTable.add(moreData);
    // Verify source now has 3 rows
    expect(await sourceTable.countRows()).toBe(3);
    // Clone from the tagged version (should have only 2 rows)
    const sourceUri = `${tmpDir.name}/source.lance`;
    const clonedTable = await db.cloneTable("cloned", sourceUri, {
      sourceTag: "v1.0",
    });
    // Verify cloned table has only the tagged version's 2 rows
    expect(await clonedTable.countRows()).toBe(2);
  });
  it("should fail when attempting deep clone", async () => {
    // Create source table with some data
    const data = [
      { id: 1, text: "hello", vector: [1.0, 2.0] },
      { id: 2, text: "world", vector: [3.0, 4.0] },
    ];
    await db.createTable("source", data);
    // Try to create a deep clone (should fail)
    const sourceUri = `${tmpDir.name}/source.lance`;
    await expect(
      db.cloneTable("cloned", sourceUri, { isShallow: false }),
    ).rejects.toThrow("Deep clone is not yet implemented");
  });
 });
--- a/nodejs/test/embedding.test.ts
+++ b/nodejs/test/embedding.test.ts
@@ -256,6 +256,60 @@ describe("embedding functions", () => {
    expect(actual).toHaveProperty("text");
  });
  it("should handle undefined vector field with embedding function correctly", async () => {
    @register("undefined_test")
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
      ndims() {
        return 3;
      }
      embeddingDataType(): Float {
        return new Float32();
      }
      async computeQueryEmbeddings(_data: string) {
        return [1, 2, 3];
      }
      async computeSourceEmbeddings(data: string[]) {
        return Array.from({ length: data.length }).fill([
          1, 2, 3,
        ]) as number[][];
      }
    }
    const func = getRegistry()
      .get<MockEmbeddingFunction>("undefined_test")!
      .create();
    const schema = new Schema([
      new Field("text", new Utf8(), true),
      new Field(
        "vector",
        new FixedSizeList(3, new Field("item", new Float32(), true)),
        true,
      ),
    ]);
    const db = await connect(tmpDir.name);
    const table = await db.createEmptyTable("test_undefined", schema, {
      embeddingFunction: {
        function: func,
        sourceColumn: "text",
        vectorColumn: "vector",
      },
    });
    // Test that undefined, null, and omitted vector fields all work
    await table.add([{ text: "test1", vector: undefined }]);
    await table.add([{ text: "test2", vector: null }]);
    await table.add([{ text: "test3" }]);
    const rows = await table.query().toArray();
    expect(rows.length).toBe(3);
    // All rows should have vectors computed by the embedding function
    for (const row of rows) {
      expect(row.vector).toBeDefined();
      expect(JSON.parse(JSON.stringify(row.vector))).toEqual([1, 2, 3]);
    }
  });
  test.each([new Float16(), new Float32(), new Float64()])(
    "should be able to provide manual embeddings with multiple float datatype",
    async (floatType) => {
--- a/nodejs/test/permutation.test.ts
+++ b/nodejs/test/permutation.test.ts
@@ -0,0 +1,234 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 import * as tmp from "tmp";
 import { Table, connect, permutationBuilder } from "../lancedb";
 import { makeArrowTable } from "../lancedb/arrow";
 describe("PermutationBuilder", () => {
  let tmpDir: tmp.DirResult;
  let table: Table;
  beforeEach(async () => {
    tmpDir = tmp.dirSync({ unsafeCleanup: true });
    const db = await connect(tmpDir.name);
    // Create test data
    const data = makeArrowTable(
      [
        { id: 1, value: 10 },
        { id: 2, value: 20 },
        { id: 3, value: 30 },
        { id: 4, value: 40 },
        { id: 5, value: 50 },
        { id: 6, value: 60 },
        { id: 7, value: 70 },
        { id: 8, value: 80 },
        { id: 9, value: 90 },
        { id: 10, value: 100 },
      ],
      { vectorColumns: {} },
    );
    table = await db.createTable("test_table", data);
  });
  afterEach(() => {
    tmpDir.removeCallback();
  });
  test("should create permutation builder", () => {
    const builder = permutationBuilder(table, "permutation_table");
    expect(builder).toBeDefined();
  });
  test("should execute basic permutation", async () => {
    const builder = permutationBuilder(table, "permutation_table");
    const permutationTable = await builder.execute();
    expect(permutationTable).toBeDefined();
    expect(permutationTable.name).toBe("permutation_table");
    const rowCount = await permutationTable.countRows();
    expect(rowCount).toBe(10);
  });
  test("should create permutation with random splits", async () => {
    const builder = permutationBuilder(table, "permutation_table").splitRandom({
      ratios: [1.0],
      seed: 42,
    });
    const permutationTable = await builder.execute();
    const rowCount = await permutationTable.countRows();
    expect(rowCount).toBe(10);
  });
  test("should create permutation with percentage splits", async () => {
    const builder = permutationBuilder(table, "permutation_table").splitRandom({
      ratios: [0.3, 0.7],
      seed: 42,
    });
    const permutationTable = await builder.execute();
    const rowCount = await permutationTable.countRows();
    expect(rowCount).toBe(10);
    // Check split distribution
    const split0Count = await permutationTable.countRows("split_id = 0");
    const split1Count = await permutationTable.countRows("split_id = 1");
    expect(split0Count).toBeGreaterThan(0);
    expect(split1Count).toBeGreaterThan(0);
    expect(split0Count + split1Count).toBe(10);
  });
  test("should create permutation with count splits", async () => {
    const builder = permutationBuilder(table, "permutation_table").splitRandom({
      counts: [3, 7],
      seed: 42,
    });
    const permutationTable = await builder.execute();
    const rowCount = await permutationTable.countRows();
    expect(rowCount).toBe(10);
    // Check split distribution
    const split0Count = await permutationTable.countRows("split_id = 0");
    const split1Count = await permutationTable.countRows("split_id = 1");
    expect(split0Count).toBe(3);
    expect(split1Count).toBe(7);
  });
  test("should create permutation with hash splits", async () => {
    const builder = permutationBuilder(table, "permutation_table").splitHash({
      columns: ["id"],
      splitWeights: [50, 50],
      discardWeight: 0,
    });
    const permutationTable = await builder.execute();
    const rowCount = await permutationTable.countRows();
    expect(rowCount).toBe(10);
    // Check that splits exist
    const split0Count = await permutationTable.countRows("split_id = 0");
    const split1Count = await permutationTable.countRows("split_id = 1");
    expect(split0Count).toBeGreaterThan(0);
    expect(split1Count).toBeGreaterThan(0);
    expect(split0Count + split1Count).toBe(10);
  });
  test("should create permutation with sequential splits", async () => {
    const builder = permutationBuilder(
      table,
      "permutation_table",
    ).splitSequential({ ratios: [0.5, 0.5] });
    const permutationTable = await builder.execute();
    const rowCount = await permutationTable.countRows();
    expect(rowCount).toBe(10);
    // Check split distribution - sequential should give exactly 5 and 5
    const split0Count = await permutationTable.countRows("split_id = 0");
    const split1Count = await permutationTable.countRows("split_id = 1");
    expect(split0Count).toBe(5);
    expect(split1Count).toBe(5);
  });
  test("should create permutation with calculated splits", async () => {
    const builder = permutationBuilder(
      table,
      "permutation_table",
    ).splitCalculated("id % 2");
    const permutationTable = await builder.execute();
    const rowCount = await permutationTable.countRows();
    expect(rowCount).toBe(10);
    // Check split distribution
    const split0Count = await permutationTable.countRows("split_id = 0");
    const split1Count = await permutationTable.countRows("split_id = 1");
    expect(split0Count).toBeGreaterThan(0);
    expect(split1Count).toBeGreaterThan(0);
    expect(split0Count + split1Count).toBe(10);
  });
  test("should create permutation with shuffle", async () => {
    const builder = permutationBuilder(table, "permutation_table").shuffle({
      seed: 42,
    });
    const permutationTable = await builder.execute();
    const rowCount = await permutationTable.countRows();
    expect(rowCount).toBe(10);
  });
  test("should create permutation with shuffle and clump size", async () => {
    const builder = permutationBuilder(table, "permutation_table").shuffle({
      seed: 42,
      clumpSize: 2,
    });
    const permutationTable = await builder.execute();
    const rowCount = await permutationTable.countRows();
    expect(rowCount).toBe(10);
  });
  test("should create permutation with filter", async () => {
    const builder = permutationBuilder(table, "permutation_table").filter(
      "value > 50",
    );
    const permutationTable = await builder.execute();
    const rowCount = await permutationTable.countRows();
    expect(rowCount).toBe(5); // Values 60, 70, 80, 90, 100
  });
  test("should chain multiple operations", async () => {
    const builder = permutationBuilder(table, "permutation_table")
      .filter("value <= 80")
      .splitRandom({ ratios: [0.5, 0.5], seed: 42 })
      .shuffle({ seed: 123 });
    const permutationTable = await builder.execute();
    const rowCount = await permutationTable.countRows();
    expect(rowCount).toBe(8); // Values 10, 20, 30, 40, 50, 60, 70, 80
    // Check split distribution
    const split0Count = await permutationTable.countRows("split_id = 0");
    const split1Count = await permutationTable.countRows("split_id = 1");
    expect(split0Count).toBeGreaterThan(0);
    expect(split1Count).toBeGreaterThan(0);
    expect(split0Count + split1Count).toBe(8);
  });
  test("should throw error for invalid split arguments", () => {
    const builder = permutationBuilder(table, "permutation_table");
    // Test no arguments provided
    expect(() => builder.splitRandom({})).toThrow(
      "Exactly one of 'ratios', 'counts', or 'fixed' must be provided",
    );
    // Test multiple arguments provided
    expect(() =>
      builder.splitRandom({ ratios: [0.5, 0.5], counts: [3, 7], seed: 42 }),
    ).toThrow("Exactly one of 'ratios', 'counts', or 'fixed' must be provided");
  });
  test("should throw error when builder is consumed", async () => {
    const builder = permutationBuilder(table, "permutation_table");
    // Execute once
    await builder.execute();
    // Should throw error on second execution
    await expect(builder.execute()).rejects.toThrow("Builder already consumed");
  });
 });
--- a/nodejs/test/remote.test.ts
+++ b/nodejs/test/remote.test.ts
@@ -7,7 +7,6 @@ import {
  ClientConfig,
  Connection,
  ConnectionOptions,
  NativeJsHeaderProvider,
  TlsConfig,
  connect,
 } from "../lancedb";
--- a/nodejs/test/sanitize.test.ts
+++ b/nodejs/test/sanitize.test.ts
@@ -0,0 +1,184 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 import * as arrow from "../lancedb/arrow";
 import { sanitizeField, sanitizeType } from "../lancedb/sanitize";
 describe("sanitize", function () {
  describe("sanitizeType function", function () {
    it("should handle type objects", function () {
      const type = new arrow.Int32();
      const result = sanitizeType(type);
      expect(result.typeId).toBe(arrow.Type.Int);
      expect((result as arrow.Int).bitWidth).toBe(32);
      expect((result as arrow.Int).isSigned).toBe(true);
      const floatType = {
        typeId: 3, // Type.Float = 3
        precision: 2,
        toString: () => "Float",
        isFloat: true,
        isFixedWidth: true,
      };
      const floatResult = sanitizeType(floatType);
      expect(floatResult).toBeInstanceOf(arrow.DataType);
      expect(floatResult.typeId).toBe(arrow.Type.Float);
      const floatResult2 = sanitizeType({ ...floatType, typeId: () => 3 });
      expect(floatResult2).toBeInstanceOf(arrow.DataType);
      expect(floatResult2.typeId).toBe(arrow.Type.Float);
    });
    const allTypeNameTestCases = [
      ["null", new arrow.Null()],
      ["binary", new arrow.Binary()],
      ["utf8", new arrow.Utf8()],
      ["bool", new arrow.Bool()],
      ["int8", new arrow.Int8()],
      ["int16", new arrow.Int16()],
      ["int32", new arrow.Int32()],
      ["int64", new arrow.Int64()],
      ["uint8", new arrow.Uint8()],
      ["uint16", new arrow.Uint16()],
      ["uint32", new arrow.Uint32()],
      ["uint64", new arrow.Uint64()],
      ["float16", new arrow.Float16()],
      ["float32", new arrow.Float32()],
      ["float64", new arrow.Float64()],
      ["datemillisecond", new arrow.DateMillisecond()],
      ["dateday", new arrow.DateDay()],
      ["timenanosecond", new arrow.TimeNanosecond()],
      ["timemicrosecond", new arrow.TimeMicrosecond()],
      ["timemillisecond", new arrow.TimeMillisecond()],
      ["timesecond", new arrow.TimeSecond()],
      ["intervaldaytime", new arrow.IntervalDayTime()],
      ["intervalyearmonth", new arrow.IntervalYearMonth()],
      ["durationnanosecond", new arrow.DurationNanosecond()],
      ["durationmicrosecond", new arrow.DurationMicrosecond()],
      ["durationmillisecond", new arrow.DurationMillisecond()],
      ["durationsecond", new arrow.DurationSecond()],
    ] as const;
    it.each(allTypeNameTestCases)(
      'should map type name "%s" to %s',
      function (name, expected) {
        const result = sanitizeType(name);
        expect(result).toBeInstanceOf(expected.constructor);
      },
    );
    const caseVariationTestCases = [
      ["NULL", new arrow.Null()],
      ["Utf8", new arrow.Utf8()],
      ["FLOAT32", new arrow.Float32()],
      ["DaTedAy", new arrow.DateDay()],
    ] as const;
    it.each(caseVariationTestCases)(
      'should be case insensitive for type name "%s" mapped to %s',
      function (name, expected) {
        const result = sanitizeType(name);
        expect(result).toBeInstanceOf(expected.constructor);
      },
    );
    it("should throw error for unrecognized type name", function () {
      expect(() => sanitizeType("invalid_type")).toThrow(
        "Unrecognized type name in schema: invalid_type",
      );
    });
  });
  describe("sanitizeField function", function () {
    it("should handle field with string type name", function () {
      const field = sanitizeField({
        name: "string_field",
        type: "utf8",
        nullable: true,
        metadata: new Map([["key", "value"]]),
      });
      expect(field).toBeInstanceOf(arrow.Field);
      expect(field.name).toBe("string_field");
      expect(field.type).toBeInstanceOf(arrow.Utf8);
      expect(field.nullable).toBe(true);
      expect(field.metadata?.get("key")).toBe("value");
    });
    it("should handle field with type object", function () {
      const floatType = {
        typeId: 3, // Float
        precision: 32,
      };
      const field = sanitizeField({
        name: "float_field",
        type: floatType,
        nullable: false,
      });
      expect(field).toBeInstanceOf(arrow.Field);
      expect(field.name).toBe("float_field");
      expect(field.type).toBeInstanceOf(arrow.DataType);
      expect(field.type.typeId).toBe(arrow.Type.Float);
      expect((field.type as arrow.Float64).precision).toBe(32);
      expect(field.nullable).toBe(false);
    });
    it("should handle field with direct Type instance", function () {
      const field = sanitizeField({
        name: "bool_field",
        type: new arrow.Bool(),
        nullable: true,
      });
      expect(field).toBeInstanceOf(arrow.Field);
      expect(field.name).toBe("bool_field");
      expect(field.type).toBeInstanceOf(arrow.Bool);
      expect(field.nullable).toBe(true);
    });
    it("should throw error for invalid field object", function () {
      expect(() =>
        sanitizeField({
          type: "int32",
          nullable: true,
        }),
      ).toThrow(
        "The field passed in is missing a `type`/`name`/`nullable` property",
      );
      // Invalid type
      expect(() =>
        sanitizeField({
          name: "invalid",
          type: { invalid: true },
          nullable: true,
        }),
      ).toThrow("Expected a Type to have a typeId property");
      // Invalid nullable
      expect(() =>
        sanitizeField({
          name: "invalid_nullable",
          type: "int32",
          nullable: "not a boolean",
        }),
      ).toThrow("The field passed in had a non-boolean `nullable` property");
    });
    it("should report error for invalid type name", function () {
      expect(() =>
        sanitizeField({
          name: "invalid_field",
          type: "invalid_type",
          nullable: true,
        }),
      ).toThrow(
        "Unable to sanitize type for field: invalid_field due to error: Error: Unrecognized type name in schema: invalid_type",
      );
    });
  });
 });
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -10,7 +10,13 @@ import * as arrow16 from "apache-arrow-16";
 import * as arrow17 from "apache-arrow-17";
 import * as arrow18 from "apache-arrow-18";
-import { MatchQuery, PhraseQuery, Table, connect } from "../lancedb";
+import {
  Connection,
  MatchQuery,
  PhraseQuery,
  Table,
  connect,
 } from "../lancedb";
 import {
  Table as ArrowTable,
  Field,
@@ -21,6 +27,8 @@ import {
  Int64,
  List,
  Schema,
  SchemaLike,
  Type,
  Uint8,
  Utf8,
  makeArrowTable,
@@ -39,7 +47,6 @@ import {
  Operator,
  instanceOfFullTextQuery,
 } from "../lancedb/query";
 import exp = require("constants");
 describe.each([arrow15, arrow16, arrow17, arrow18])(
  "Given a table",
@@ -212,8 +219,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      },
    );
-    // TODO: https://github.com/lancedb/lancedb/issues/1832
+    it("should be able to omit nullable fields", async () => {
    it.skip("should be able to omit nullable fields", async () => {
      const db = await connect(tmpDir.name);
      const schema = new arrow.Schema([
        new arrow.Field(
@@ -237,23 +243,36 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      await table.add([data3]);
      let res = await table.query().limit(10).toArray();
-      const resVector = res.map((r) => r.get("vector").toArray());
+      const resVector = res.map((r) =>
        r.vector ? Array.from(r.vector) : null,
      );
      expect(resVector).toEqual([null, data2.vector, data3.vector]);
-      const resItem = res.map((r) => r.get("item").toArray());
+      const resItem = res.map((r) => r.item);
      expect(resItem).toEqual(["foo", null, "bar"]);
-      const resPrice = res.map((r) => r.get("price").toArray());
+      const resPrice = res.map((r) => r.price);
      expect(resPrice).toEqual([10.0, 2.0, 3.0]);
      const data4 = { item: "foo" };
      // We can't omit a column if it's not nullable
-      await expect(table.add([data4])).rejects.toThrow("Invalid user input");
+      await expect(table.add([data4])).rejects.toThrow(
        "Append with different schema",
      );
      // But we can alter columns to make them nullable
      await table.alterColumns([{ path: "price", nullable: true }]);
      await table.add([data4]);
-      res = (await table.query().limit(10).toArray()).map((r) => r.toJSON());
+      res = (await table.query().limit(10).toArray()).map((r) => ({
-      expect(res).toEqual([data1, data2, data3, data4]);
+        ...r.toJSON(),
        vector: r.vector ? Array.from(r.vector) : null,
      }));
      // Rust fills missing nullable fields with null
      expect(res).toEqual([
        { ...data1, vector: null },
        { ...data2, item: null },
        data3,
        { ...data4, price: null, vector: null },
      ]);
    });
    it("should be able to insert nullable data for non-nullable fields", async () => {
@@ -331,6 +350,43 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      const table = await db.createTable("my_table", data);
      expect(await table.countRows()).toEqual(2);
    });
    it("should allow undefined and omitted nullable vector fields", async () => {
      // Test for the bug: can't pass undefined or omit vector column
      const db = await connect("memory://");
      const schema = new arrow.Schema([
        new arrow.Field("id", new arrow.Int32(), true),
        new arrow.Field(
          "vector",
          new arrow.FixedSizeList(
            32,
            new arrow.Field("item", new arrow.Float32(), true),
          ),
          true, // nullable = true
        ),
      ]);
      const table = await db.createEmptyTable("test_table", schema);
      // Should not throw error for undefined value
      await table.add([{ id: 0, vector: undefined }]);
      // Should not throw error for omitted field
      await table.add([{ id: 1 }]);
      // Should still work for null
      await table.add([{ id: 2, vector: null }]);
      // Should still work for actual vector
      const testVector = new Array(32).fill(0.5);
      await table.add([{ id: 3, vector: testVector }]);
      expect(await table.countRows()).toEqual(4);
      const res = await table.query().limit(10).toArray();
      const resVector = res.map((r) =>
        r.vector ? Array.from(r.vector) : null,
      );
      expect(resVector).toEqual([null, null, null, testVector]);
    });
  },
 );
@@ -488,6 +544,32 @@ describe("merge insert", () => {
        .execute(newData, { timeoutMs: 0 }),
    ).rejects.toThrow("merge insert timed out");
  });
  test("useIndex", async () => {
    const newData = [
      { a: 2, b: "x" },
      { a: 4, b: "z" },
    ];
    // Test with useIndex(true) - should work fine
    const result1 = await table
      .mergeInsert("a")
      .whenNotMatchedInsertAll()
      .useIndex(true)
      .execute(newData);
    expect(result1.numInsertedRows).toBe(1); // Only a=4 should be inserted
    // Test with useIndex(false) - should also work fine
    const newData2 = [{ a: 5, b: "w" }];
    const result2 = await table
      .mergeInsert("a")
      .whenNotMatchedInsertAll()
      .useIndex(false)
      .execute(newData2);
    expect(result2.numInsertedRows).toBe(1); // a=5 should be inserted
  });
 });
 describe("When creating an index", () => {
@@ -779,6 +861,15 @@ describe("When creating an index", () => {
    });
  });
  it("should be able to create IVF_RQ", async () => {
    await tbl.createIndex("vec", {
      config: Index.ivfRq({
        numPartitions: 10,
        numBits: 1,
      }),
    });
  });
  it("should allow me to replace (or not) an existing index", async () => {
    await tbl.createIndex("id");
    // Default is replace=true
@@ -1429,7 +1520,9 @@ describe("when optimizing a dataset", () => {
  it("delete unverified", async () => {
    const version = await table.version();
-    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${version - 1}.manifest`;
+    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
      version - 1
    }.manifest`;
    fs.rmSync(versionFile);
    let stats = await table.optimize({ deleteUnverified: false });
@@ -1943,3 +2036,52 @@ describe("column name options", () => {
    expect(results2.length).toBe(10);
  });
 });
 describe("when creating an empty table", () => {
  let con: Connection;
  beforeEach(async () => {
    const tmpDir = tmp.dirSync({ unsafeCleanup: true });
    con = await connect(tmpDir.name);
  });
  afterEach(() => {
    con.close();
  });
  it("can create an empty table from an arrow Schema", async () => {
    const schema = new Schema([
      new Field("id", new Int64()),
      new Field("vector", new Float64()),
    ]);
    const table = await con.createEmptyTable("test", schema);
    const actualSchema = await table.schema();
    expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
    expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
    expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
    expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
  });
  it("can create an empty table from schema that specifies field types by name", async () => {
    const schemaLike = {
      fields: [
        {
          name: "id",
          type: "int64",
          nullable: true,
        },
        {
          name: "vector",
          type: "float64",
          nullable: true,
        },
      ],
      metadata: new Map(),
      names: ["id", "vector"],
    } satisfies SchemaLike;
    const table = await con.createEmptyTable("test", schemaLike);
    const actualSchema = await table.schema();
    expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
    expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
    expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
    expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
  });
 });
--- a/nodejs/biome.json
+++ b/nodejs/biome.json
@@ -48,6 +48,7 @@
        "noUnreachableSuper": "error",
        "noUnsafeFinally": "error",
        "noUnsafeOptionalChaining": "error",
        "noUnusedImports": "error",
        "noUnusedLabels": "error",
        "noUnusedVariables": "warn",
        "useIsNan": "error",
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -41,7 +41,6 @@ import {
  vectorFromArray as badVectorFromArray,
  makeBuilder,
  makeData,
  makeTable,
 } from "apache-arrow";
 import { Buffers } from "apache-arrow/data";
 import { type EmbeddingFunction } from "./embedding/embedding_function";
@@ -74,7 +73,7 @@ export type FieldLike =
  | {
      type: string;
      name: string;
-      nullable?: boolean;
+      nullable: boolean;
      metadata?: Map<string, string>;
    };
@@ -279,7 +278,7 @@ export class MakeArrowTableOptions {
 }
 /**
- * An enhanced version of the {@link makeTable} function from Apache Arrow
+ * An enhanced version of the apache-arrow makeTable function from Apache Arrow
 * that supports nested fields and embeddings columns.
 *
 * (typically you do not need to call this function.  It will be called automatically
@@ -512,7 +511,11 @@ function* rowPathsAndValues(
    if (isObject(value)) {
      yield* rowPathsAndValues(value, [...basePath, key]);
    } else {
-      yield [[...basePath, key], value];
+      // Skip undefined values - they should be treated the same as missing fields
      // for embedding function purposes
      if (value !== undefined) {
        yield [[...basePath, key], value];
      }
    }
  }
 }
@@ -701,7 +704,7 @@ function transposeData(
      }
      return current;
    });
-    return makeVector(values, field.type);
+    return makeVector(values, field.type, undefined, field.nullable);
  }
 }
@@ -748,9 +751,30 @@ function makeVector(
  values: unknown[],
  type?: DataType,
  stringAsDictionary?: boolean,
  nullable?: boolean,
  // biome-ignore lint/suspicious/noExplicitAny: skip
 ): Vector<any> {
  if (type !== undefined) {
    // Convert undefined values to null for nullable fields
    if (nullable) {
      values = values.map((v) => (v === undefined ? null : v));
    }
    // workaround for: https://github.com/apache/arrow-js/issues/68
    if (DataType.isBool(type)) {
      const hasNonNullValue = values.some((v) => v !== null && v !== undefined);
      if (!hasNonNullValue) {
        const nullBitmap = new Uint8Array(Math.ceil(values.length / 8));
        const data = makeData({
          type: type,
          length: values.length,
          nullCount: values.length,
          nullBitmap,
        });
        return arrowMakeVector(data);
      }
    }
    // No need for inference, let Arrow create it
    if (type instanceof Int) {
      if (DataType.isInt(type) && type.bitWidth === 64) {
@@ -875,7 +899,12 @@ async function applyEmbeddingsFromMetadata(
  for (const field of schema.fields) {
    if (!(field.name in columns)) {
      const nullValues = new Array(table.numRows).fill(null);
-      columns[field.name] = makeVector(nullValues, field.type);
+      columns[field.name] = makeVector(
        nullValues,
        field.type,
        undefined,
        field.nullable,
      );
    }
  }
@@ -939,7 +968,12 @@ async function applyEmbeddings<T>(
    } else if (schema != null) {
      const destField = schema.fields.find((f) => f.name === destColumn);
      if (destField != null) {
-        newColumns[destColumn] = makeVector([], destField.type);
+        newColumns[destColumn] = makeVector(
          [],
          destField.type,
          undefined,
          destField.nullable,
        );
      } else {
        throw new Error(
          `Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`,
@@ -1251,19 +1285,36 @@ function validateSchemaEmbeddings(
    if (isFixedSizeList(field.type)) {
      field = sanitizeField(field);
      if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
        // Check if there's an embedding function registered for this field
        let hasEmbeddingFunction = false;
        // Check schema metadata for embedding functions
        if (schema.metadata.has("embedding_functions")) {
          const embeddings = JSON.parse(
            schema.metadata.get("embedding_functions")!,
          );
-          if (
+          // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
-            // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
+          if (embeddings.find((f: any) => f["vectorColumn"] === field.name)) {
-            embeddings.find((f: any) => f["vectorColumn"] === field.name) ===
+            hasEmbeddingFunction = true;
-            undefined
+          }
-          ) {
+        }
        // Check passed embedding function parameter
        if (embeddings && embeddings.vectorColumn === field.name) {
          hasEmbeddingFunction = true;
        }
        // If the field is nullable AND there's no embedding function, allow undefined/omitted values
        if (field.nullable && !hasEmbeddingFunction) {
          fields.push(field);
        } else {
          // Either not nullable OR has embedding function - require explicit values
          if (hasEmbeddingFunction) {
            // Don't add to missingEmbeddingFields since this is expected to be filled by embedding function
            fields.push(field);
          } else {
            missingEmbeddingFields.push(field);
          }
        } else {
          missingEmbeddingFields.push(field);
        }
      } else {
        fields.push(field);
--- a/nodejs/lancedb/connection.ts
+++ b/nodejs/lancedb/connection.ts
@@ -3,7 +3,6 @@
 import {
  Data,
  Schema,
  SchemaLike,
  TableLike,
  fromTableToStreamBuffer,
@@ -268,6 +267,33 @@ export abstract class Connection {
   * @param {string[]} namespace The namespace to drop tables from (defaults to root namespace).
   */
  abstract dropAllTables(namespace?: string[]): Promise<void>;
  /**
   * Clone a table from a source table.
   *
   * A shallow clone creates a new table that shares the underlying data files
   * with the source table but has its own independent manifest. This allows
   * both the source and cloned tables to evolve independently while initially
   * sharing the same data, deletion, and index files.
   *
   * @param {string} targetTableName - The name of the target table to create.
   * @param {string} sourceUri - The URI of the source table to clone from.
   * @param {object} options - Clone options.
   * @param {string[]} options.targetNamespace - The namespace for the target table (defaults to root namespace).
   * @param {number} options.sourceVersion - The version of the source table to clone.
   * @param {string} options.sourceTag - The tag of the source table to clone.
   * @param {boolean} options.isShallow - Whether to perform a shallow clone (defaults to true).
   */
  abstract cloneTable(
    targetTableName: string,
    sourceUri: string,
    options?: {
      targetNamespace?: string[];
      sourceVersion?: number;
      sourceTag?: string;
      isShallow?: boolean;
    },
  ): Promise<Table>;
 }
 /** @hideconstructor */
@@ -332,6 +358,28 @@ export class LocalConnection extends Connection {
    return new LocalTable(innerTable);
  }
  async cloneTable(
    targetTableName: string,
    sourceUri: string,
    options?: {
      targetNamespace?: string[];
      sourceVersion?: number;
      sourceTag?: string;
      isShallow?: boolean;
    },
  ): Promise<Table> {
    const innerTable = await this.inner.cloneTable(
      targetTableName,
      sourceUri,
      options?.targetNamespace ?? [],
      options?.sourceVersion ?? null,
      options?.sourceTag ?? null,
      options?.isShallow ?? true,
    );
    return new LocalTable(innerTable);
  }
  private getStorageOptions(
    options?: Partial<CreateTableOptions>,
  ): Record<string, string> | undefined {
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -43,6 +43,10 @@ export {
  DeleteResult,
  DropColumnsResult,
  UpdateResult,
  SplitRandomOptions,
  SplitHashOptions,
  SplitSequentialOptions,
  ShuffleOptions,
 } from "./native.js";
 export {
@@ -85,6 +89,7 @@ export {
  Index,
  IndexOptions,
  IvfPqOptions,
  IvfRqOptions,
  IvfFlatOptions,
  HnswPqOptions,
  HnswSqOptions,
@@ -110,6 +115,7 @@ export {
 export { MergeInsertBuilder, WriteExecutionOptions } from "./merge";
 export * as embedding from "./embedding";
 export { permutationBuilder, PermutationBuilder } from "./permutation";
 export * as rerankers from "./rerankers";
 export {
  SchemaLike,
--- a/nodejs/lancedb/indices.ts
+++ b/nodejs/lancedb/indices.ts
@@ -112,6 +112,77 @@ export interface IvfPqOptions {
  sampleRate?: number;
 }
 export interface IvfRqOptions {
  /**
   * The number of IVF partitions to create.
   *
   * This value should generally scale with the number of rows in the dataset.
   * By default the number of partitions is the square root of the number of
   * rows.
   *
   * If this value is too large then the first part of the search (picking the
   * right partition) will be slow. If this value is too small then the second
   * part of the search (searching within a partition) will be slow.
   */
  numPartitions?: number;
  /**
   * Number of bits per dimension for residual quantization.
   *
   * This value controls how much each residual component is compressed. The more
   * bits, the more accurate the index will be but the slower search. Typical values
   * are small integers; the default is 1 bit per dimension.
   */
  numBits?: number;
  /**
   * Distance type to use to build the index.
   *
   * Default value is "l2".
   *
   * This is used when training the index to calculate the IVF partitions
   * (vectors are grouped in partitions with similar vectors according to this
   * distance type) and during quantization.
   *
   * The distance type used to train an index MUST match the distance type used
   * to search the index. Failure to do so will yield inaccurate results.
   *
   * The following distance types are available:
   *
   * "l2" - Euclidean distance.
   * "cosine" - Cosine distance.
   * "dot" - Dot product.
   */
  distanceType?: "l2" | "cosine" | "dot";
  /**
   * Max iterations to train IVF kmeans.
   *
   * When training an IVF index we use kmeans to calculate the partitions. This parameter
   * controls how many iterations of kmeans to run.
   *
   * The default value is 50.
   */
  maxIterations?: number;
  /**
   * The number of vectors, per partition, to sample when training IVF kmeans.
   *
   * When an IVF index is trained, we need to calculate partitions. These are groups
   * of vectors that are similar to each other. To do this we use an algorithm called kmeans.
   *
   * Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
   * random sample of the data. This parameter controls the size of the sample. The total
   * number of vectors used to train the index is `sample_rate * num_partitions`.
   *
   * Increasing this value might improve the quality of the index but in most cases the
   * default should be sufficient.
   *
   * The default value is 256.
   */
  sampleRate?: number;
 }
 /**
 * Options to create an `HNSW_PQ` index
 */
@@ -523,6 +594,35 @@ export class Index {
        options?.distanceType,
        options?.numPartitions,
        options?.numSubVectors,
        options?.numBits,
        options?.maxIterations,
        options?.sampleRate,
      ),
    );
  }
  /**
   * Create an IvfRq index
   *
   * IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
   * and organizes them into IVF partitions.
   *
   * The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
   * The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
   * between index size (and thus search speed) and index accuracy.
   *
   * The partitioning process is called IVF and the `num_partitions` parameter controls how
   * many groups to create.
   *
   * Note that training an IVF RQ index on a large dataset is a slow operation and
   * currently is also a memory intensive operation.
   */
  static ivfRq(options?: Partial<IvfRqOptions>) {
    return new Index(
      LanceDbIndex.ivfRq(
        options?.distanceType,
        options?.numPartitions,
        options?.numBits,
        options?.maxIterations,
        options?.sampleRate,
      ),
--- a/nodejs/lancedb/merge.ts
+++ b/nodejs/lancedb/merge.ts
@@ -70,6 +70,23 @@ export class MergeInsertBuilder {
      this.#schema,
    );
  }
  /**
   * Controls whether to use indexes for the merge operation.
   *
   * When set to `true` (the default), the operation will use an index if available
   * on the join key for improved performance. When set to `false`, it forces a full
   * table scan even if an index exists. This can be useful for benchmarking or when
   * the query optimizer chooses a suboptimal path.
   *
   * @param useIndex - Whether to use indices for the merge operation. Defaults to `true`.
   */
  useIndex(useIndex: boolean): MergeInsertBuilder {
    return new MergeInsertBuilder(
      this.#native.useIndex(useIndex),
      this.#schema,
    );
  }
  /**
   * Executes the merge insert operation
   *
--- a/nodejs/lancedb/permutation.ts
+++ b/nodejs/lancedb/permutation.ts
@@ -0,0 +1,188 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 import {
  PermutationBuilder as NativePermutationBuilder,
  Table as NativeTable,
  ShuffleOptions,
  SplitHashOptions,
  SplitRandomOptions,
  SplitSequentialOptions,
  permutationBuilder as nativePermutationBuilder,
 } from "./native.js";
 import { LocalTable, Table } from "./table";
 /**
 * A PermutationBuilder for creating data permutations with splits, shuffling, and filtering.
 *
 * This class provides a TypeScript wrapper around the native Rust PermutationBuilder,
 * offering methods to configure data splits, shuffling, and filtering before executing
 * the permutation to create a new table.
 */
 export class PermutationBuilder {
  private inner: NativePermutationBuilder;
  /**
   * @hidden
   */
  constructor(inner: NativePermutationBuilder) {
    this.inner = inner;
  }
  /**
   * Configure random splits for the permutation.
   *
   * @param options - Configuration for random splitting
   * @returns A new PermutationBuilder instance
   * @example
   * ```ts
   * // Split by ratios
   * builder.splitRandom({ ratios: [0.7, 0.3], seed: 42 });
   *
   * // Split by counts
   * builder.splitRandom({ counts: [1000, 500], seed: 42 });
   *
   * // Split with fixed size
   * builder.splitRandom({ fixed: 100, seed: 42 });
   * ```
   */
  splitRandom(options: SplitRandomOptions): PermutationBuilder {
    const newInner = this.inner.splitRandom(options);
    return new PermutationBuilder(newInner);
  }
  /**
   * Configure hash-based splits for the permutation.
   *
   * @param options - Configuration for hash-based splitting
   * @returns A new PermutationBuilder instance
   * @example
   * ```ts
   * builder.splitHash({
   *   columns: ["user_id"],
   *   splitWeights: [70, 30],
   *   discardWeight: 0
   * });
   * ```
   */
  splitHash(options: SplitHashOptions): PermutationBuilder {
    const newInner = this.inner.splitHash(options);
    return new PermutationBuilder(newInner);
  }
  /**
   * Configure sequential splits for the permutation.
   *
   * @param options - Configuration for sequential splitting
   * @returns A new PermutationBuilder instance
   * @example
   * ```ts
   * // Split by ratios
   * builder.splitSequential({ ratios: [0.8, 0.2] });
   *
   * // Split by counts
   * builder.splitSequential({ counts: [800, 200] });
   *
   * // Split with fixed size
   * builder.splitSequential({ fixed: 1000 });
   * ```
   */
  splitSequential(options: SplitSequentialOptions): PermutationBuilder {
    const newInner = this.inner.splitSequential(options);
    return new PermutationBuilder(newInner);
  }
  /**
   * Configure calculated splits for the permutation.
   *
   * @param calculation - SQL expression for calculating splits
   * @returns A new PermutationBuilder instance
   * @example
   * ```ts
   * builder.splitCalculated("user_id % 3");
   * ```
   */
  splitCalculated(calculation: string): PermutationBuilder {
    const newInner = this.inner.splitCalculated(calculation);
    return new PermutationBuilder(newInner);
  }
  /**
   * Configure shuffling for the permutation.
   *
   * @param options - Configuration for shuffling
   * @returns A new PermutationBuilder instance
   * @example
   * ```ts
   * // Basic shuffle
   * builder.shuffle({ seed: 42 });
   *
   * // Shuffle with clump size
   * builder.shuffle({ seed: 42, clumpSize: 10 });
   * ```
   */
  shuffle(options: ShuffleOptions): PermutationBuilder {
    const newInner = this.inner.shuffle(options);
    return new PermutationBuilder(newInner);
  }
  /**
   * Configure filtering for the permutation.
   *
   * @param filter - SQL filter expression
   * @returns A new PermutationBuilder instance
   * @example
   * ```ts
   * builder.filter("age > 18 AND status = 'active'");
   * ```
   */
  filter(filter: string): PermutationBuilder {
    const newInner = this.inner.filter(filter);
    return new PermutationBuilder(newInner);
  }
  /**
   * Execute the permutation and create the destination table.
   *
   * @returns A Promise that resolves to the new Table instance
   * @example
   * ```ts
   * const permutationTable = await builder.execute();
   * console.log(`Created table: ${permutationTable.name}`);
   * ```
   */
  async execute(): Promise<Table> {
    const nativeTable: NativeTable = await this.inner.execute();
    return new LocalTable(nativeTable);
  }
 }
 /**
 * Create a permutation builder for the given table.
 *
 * @param table - The source table to create a permutation from
 * @param destTableName - The name for the destination permutation table
 * @returns A PermutationBuilder instance
 * @example
 * ```ts
 * const builder = permutationBuilder(sourceTable, "training_data")
 *   .splitRandom({ ratios: [0.8, 0.2], seed: 42 })
 *   .shuffle({ seed: 123 });
 *
 * const trainingTable = await builder.execute();
 * ```
 */
 export function permutationBuilder(
  table: Table,
  destTableName: string,
 ): PermutationBuilder {
  // Extract the inner native table from the TypeScript wrapper
  const localTable = table as LocalTable;
  // Access inner through type assertion since it's private
  const nativeBuilder = nativePermutationBuilder(
    // biome-ignore lint/suspicious/noExplicitAny: need access to private variable
    (localTable as any).inner,
    destTableName,
  );
  return new PermutationBuilder(nativeBuilder);
 }
--- a/nodejs/lancedb/sanitize.ts
+++ b/nodejs/lancedb/sanitize.ts
@@ -326,6 +326,9 @@ export function sanitizeDictionary(typeLike: object) {
 // biome-ignore lint/suspicious/noExplicitAny: skip
 export function sanitizeType(typeLike: unknown): DataType<any> {
  if (typeof typeLike === "string") {
    return dataTypeFromName(typeLike);
  }
  if (typeof typeLike !== "object" || typeLike === null) {
    throw Error("Expected a Type but object was null/undefined");
  }
@@ -447,7 +450,7 @@ export function sanitizeType(typeLike: unknown): DataType<any> {
    case Type.DurationSecond:
      return new DurationSecond();
    default:
-      throw new Error("Unrecoginized type id in schema: " + typeId);
+      throw new Error("Unrecognized type id in schema: " + typeId);
  }
 }
@@ -467,7 +470,15 @@ export function sanitizeField(fieldLike: unknown): Field {
      "The field passed in is missing a `type`/`name`/`nullable` property",
    );
  }
-  const type = sanitizeType(fieldLike.type);
+  let type: DataType;
  try {
    type = sanitizeType(fieldLike.type);
  } catch (error: unknown) {
    throw Error(
      `Unable to sanitize type for field: ${fieldLike.name} due to error: ${error}`,
      { cause: error },
    );
  }
  const name = fieldLike.name;
  if (!(typeof name === "string")) {
    throw Error("The field passed in had a non-string `name` property");
@@ -581,3 +592,46 @@ function sanitizeData(
    },
  );
 }
 const constructorsByTypeName = {
  null: () => new Null(),
  binary: () => new Binary(),
  utf8: () => new Utf8(),
  bool: () => new Bool(),
  int8: () => new Int8(),
  int16: () => new Int16(),
  int32: () => new Int32(),
  int64: () => new Int64(),
  uint8: () => new Uint8(),
  uint16: () => new Uint16(),
  uint32: () => new Uint32(),
  uint64: () => new Uint64(),
  float16: () => new Float16(),
  float32: () => new Float32(),
  float64: () => new Float64(),
  datemillisecond: () => new DateMillisecond(),
  dateday: () => new DateDay(),
  timenanosecond: () => new TimeNanosecond(),
  timemicrosecond: () => new TimeMicrosecond(),
  timemillisecond: () => new TimeMillisecond(),
  timesecond: () => new TimeSecond(),
  intervaldaytime: () => new IntervalDayTime(),
  intervalyearmonth: () => new IntervalYearMonth(),
  durationnanosecond: () => new DurationNanosecond(),
  durationmicrosecond: () => new DurationMicrosecond(),
  durationmillisecond: () => new DurationMillisecond(),
  durationsecond: () => new DurationSecond(),
 } as const;
 type MappableTypeName = keyof typeof constructorsByTypeName;
 export function dataTypeFromName(typeName: string): DataType {
  const normalizedTypeName = typeName.toLowerCase() as MappableTypeName;
  const _constructor = constructorsByTypeName[normalizedTypeName];
  if (!_constructor) {
    throw new Error("Unrecognized type name in schema: " + typeName);
  }
  return _constructor();
 }
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.2",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.2",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.2",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.2",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.2",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.2",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.22.1-beta.0",
+  "version": "0.22.2",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.22.1-beta.0",
+	"version": "0.22.2",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.22.1-beta.0",
+  "version": "0.22.2",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.22.1-beta.0",
+      "version": "0.22.2",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.22.1-beta.0",
+  "version": "0.22.2",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/connection.rs
+++ b/nodejs/src/connection.rs
@@ -213,6 +213,36 @@ impl Connection {
        Ok(Table::new(tbl))
    }
    #[napi(catch_unwind)]
    pub async fn clone_table(
        &self,
        target_table_name: String,
        source_uri: String,
        target_namespace: Vec<String>,
        source_version: Option<i64>,
        source_tag: Option<String>,
        is_shallow: bool,
    ) -> napi::Result<Table> {
        let mut builder = self
            .get_inner()?
            .clone_table(&target_table_name, &source_uri);
        builder = builder.target_namespace(target_namespace);
        if let Some(version) = source_version {
            builder = builder.source_version(version as u64);
        }
        if let Some(tag) = source_tag {
            builder = builder.source_tag(tag);
        }
        builder = builder.is_shallow(is_shallow);
        let tbl = builder.execute().await.default_error()?;
        Ok(Table::new(tbl))
    }
    /// Drop table with the name. Or raise an error if the table does not exist.
    #[napi(catch_unwind)]
    pub async fn drop_table(&self, name: String, namespace: Vec<String>) -> napi::Result<()> {
--- a/nodejs/src/index.rs
+++ b/nodejs/src/index.rs
@@ -6,6 +6,7 @@ use std::sync::Mutex;
 use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder};
 use lancedb::index::vector::{
    IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder,
    IvfRqIndexBuilder,
 };
 use lancedb::index::Index as LanceDbIndex;
 use napi_derive::napi;
@@ -65,6 +66,36 @@ impl Index {
        })
    }
    #[napi(factory)]
    pub fn ivf_rq(
        distance_type: Option<String>,
        num_partitions: Option<u32>,
        num_bits: Option<u32>,
        max_iterations: Option<u32>,
        sample_rate: Option<u32>,
    ) -> napi::Result<Self> {
        let mut ivf_rq_builder = IvfRqIndexBuilder::default();
        if let Some(distance_type) = distance_type {
            let distance_type = parse_distance_type(distance_type)?;
            ivf_rq_builder = ivf_rq_builder.distance_type(distance_type);
        }
        if let Some(num_partitions) = num_partitions {
            ivf_rq_builder = ivf_rq_builder.num_partitions(num_partitions);
        }
        if let Some(num_bits) = num_bits {
            ivf_rq_builder = ivf_rq_builder.num_bits(num_bits);
        }
        if let Some(max_iterations) = max_iterations {
            ivf_rq_builder = ivf_rq_builder.max_iterations(max_iterations);
        }
        if let Some(sample_rate) = sample_rate {
            ivf_rq_builder = ivf_rq_builder.sample_rate(sample_rate);
        }
        Ok(Self {
            inner: Mutex::new(Some(LanceDbIndex::IvfRq(ivf_rq_builder))),
        })
    }
    #[napi(factory)]
    pub fn ivf_flat(
        distance_type: Option<String>,
--- a/nodejs/src/lib.rs
+++ b/nodejs/src/lib.rs
@@ -12,6 +12,7 @@ mod header;
 mod index;
 mod iterator;
 pub mod merge;
 pub mod permutation;
 mod query;
 pub mod remote;
 mod rerankers;
--- a/nodejs/src/merge.rs
+++ b/nodejs/src/merge.rs
@@ -43,6 +43,13 @@ impl NativeMergeInsertBuilder {
        self.inner.timeout(Duration::from_millis(timeout as u64));
    }
    #[napi]
    pub fn use_index(&self, use_index: bool) -> Self {
        let mut this = self.clone();
        this.inner.use_index(use_index);
        this
    }
    #[napi(catch_unwind)]
    pub async fn execute(&self, buf: Buffer) -> napi::Result<MergeResult> {
        let data = ipc_file_to_batches(buf.to_vec())
--- a/nodejs/src/permutation.rs
+++ b/nodejs/src/permutation.rs
@@ -0,0 +1,222 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 use std::sync::{Arc, Mutex};
 use crate::{error::NapiErrorExt, table::Table};
 use lancedb::dataloader::{
    permutation::{PermutationBuilder as LancePermutationBuilder, ShuffleStrategy},
    split::{SplitSizes, SplitStrategy},
 };
 use napi_derive::napi;
 #[napi(object)]
 pub struct SplitRandomOptions {
    pub ratios: Option<Vec<f64>>,
    pub counts: Option<Vec<i64>>,
    pub fixed: Option<i64>,
    pub seed: Option<i64>,
 }
 #[napi(object)]
 pub struct SplitHashOptions {
    pub columns: Vec<String>,
    pub split_weights: Vec<i64>,
    pub discard_weight: Option<i64>,
 }
 #[napi(object)]
 pub struct SplitSequentialOptions {
    pub ratios: Option<Vec<f64>>,
    pub counts: Option<Vec<i64>>,
    pub fixed: Option<i64>,
 }
 #[napi(object)]
 pub struct ShuffleOptions {
    pub seed: Option<i64>,
    pub clump_size: Option<i64>,
 }
 pub struct PermutationBuilderState {
    pub builder: Option<LancePermutationBuilder>,
    pub dest_table_name: String,
 }
 #[napi]
 pub struct PermutationBuilder {
    state: Arc<Mutex<PermutationBuilderState>>,
 }
 impl PermutationBuilder {
    pub fn new(builder: LancePermutationBuilder, dest_table_name: String) -> Self {
        Self {
            state: Arc::new(Mutex::new(PermutationBuilderState {
                builder: Some(builder),
                dest_table_name,
            })),
        }
    }
 }
 impl PermutationBuilder {
    fn modify(
        &self,
        func: impl FnOnce(LancePermutationBuilder) -> LancePermutationBuilder,
    ) -> napi::Result<Self> {
        let mut state = self.state.lock().unwrap();
        let builder = state
            .builder
            .take()
            .ok_or_else(|| napi::Error::from_reason("Builder already consumed"))?;
        state.builder = Some(func(builder));
        Ok(Self {
            state: self.state.clone(),
        })
    }
 }
 #[napi]
 impl PermutationBuilder {
    /// Configure random splits
    #[napi]
    pub fn split_random(&self, options: SplitRandomOptions) -> napi::Result<Self> {
        // Check that exactly one split type is provided
        let split_args_count = [
            options.ratios.is_some(),
            options.counts.is_some(),
            options.fixed.is_some(),
        ]
        .iter()
        .filter(|&&x| x)
        .count();
        if split_args_count != 1 {
            return Err(napi::Error::from_reason(
                "Exactly one of 'ratios', 'counts', or 'fixed' must be provided",
            ));
        }
        let sizes = if let Some(ratios) = options.ratios {
            SplitSizes::Percentages(ratios)
        } else if let Some(counts) = options.counts {
            SplitSizes::Counts(counts.into_iter().map(|c| c as u64).collect())
        } else if let Some(fixed) = options.fixed {
            SplitSizes::Fixed(fixed as u64)
        } else {
            unreachable!("One of the split arguments must be provided");
        };
        let seed = options.seed.map(|s| s as u64);
        self.modify(|builder| builder.with_split_strategy(SplitStrategy::Random { seed, sizes }))
    }
    /// Configure hash-based splits
    #[napi]
    pub fn split_hash(&self, options: SplitHashOptions) -> napi::Result<Self> {
        let split_weights = options
            .split_weights
            .into_iter()
            .map(|w| w as u64)
            .collect();
        let discard_weight = options.discard_weight.unwrap_or(0) as u64;
        self.modify(|builder| {
            builder.with_split_strategy(SplitStrategy::Hash {
                columns: options.columns,
                split_weights,
                discard_weight,
            })
        })
    }
    /// Configure sequential splits
    #[napi]
    pub fn split_sequential(&self, options: SplitSequentialOptions) -> napi::Result<Self> {
        // Check that exactly one split type is provided
        let split_args_count = [
            options.ratios.is_some(),
            options.counts.is_some(),
            options.fixed.is_some(),
        ]
        .iter()
        .filter(|&&x| x)
        .count();
        if split_args_count != 1 {
            return Err(napi::Error::from_reason(
                "Exactly one of 'ratios', 'counts', or 'fixed' must be provided",
            ));
        }
        let sizes = if let Some(ratios) = options.ratios {
            SplitSizes::Percentages(ratios)
        } else if let Some(counts) = options.counts {
            SplitSizes::Counts(counts.into_iter().map(|c| c as u64).collect())
        } else if let Some(fixed) = options.fixed {
            SplitSizes::Fixed(fixed as u64)
        } else {
            unreachable!("One of the split arguments must be provided");
        };
        self.modify(|builder| builder.with_split_strategy(SplitStrategy::Sequential { sizes }))
    }
    /// Configure calculated splits
    #[napi]
    pub fn split_calculated(&self, calculation: String) -> napi::Result<Self> {
        self.modify(|builder| {
            builder.with_split_strategy(SplitStrategy::Calculated { calculation })
        })
    }
    /// Configure shuffling
    #[napi]
    pub fn shuffle(&self, options: ShuffleOptions) -> napi::Result<Self> {
        let seed = options.seed.map(|s| s as u64);
        let clump_size = options.clump_size.map(|c| c as u64);
        self.modify(|builder| {
            builder.with_shuffle_strategy(ShuffleStrategy::Random { seed, clump_size })
        })
    }
    /// Configure filtering
    #[napi]
    pub fn filter(&self, filter: String) -> napi::Result<Self> {
        self.modify(|builder| builder.with_filter(filter))
    }
    /// Execute the permutation builder and create the table
    #[napi]
    pub async fn execute(&self) -> napi::Result<Table> {
        let (builder, dest_table_name) = {
            let mut state = self.state.lock().unwrap();
            let builder = state
                .builder
                .take()
                .ok_or_else(|| napi::Error::from_reason("Builder already consumed"))?;
            let dest_table_name = std::mem::take(&mut state.dest_table_name);
            (builder, dest_table_name)
        };
        let table = builder.build(&dest_table_name).await.default_error()?;
        Ok(Table::new(table))
    }
 }
 /// Create a permutation builder for the given table
 #[napi]
 pub fn permutation_builder(
    table: &crate::table::Table,
    dest_table_name: String,
 ) -> napi::Result<PermutationBuilder> {
    use lancedb::dataloader::permutation::PermutationBuilder as LancePermutationBuilder;
    let inner_table = table.inner_ref()?.clone();
    let inner_builder = LancePermutationBuilder::new(inner_table);
    Ok(PermutationBuilder::new(inner_builder, dest_table_name))
 }
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -26,7 +26,7 @@ pub struct Table {
 }
 impl Table {
-    fn inner_ref(&self) -> napi::Result<&LanceDbTable> {
+    pub(crate) fn inner_ref(&self) -> napi::Result<&LanceDbTable> {
        self.inner
            .as_ref()
            .ok_or_else(|| napi::Error::from_reason(format!("Table {} is closed", self.name)))
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.25.1-beta.1"
+current_version = "0.25.3-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
@@ -24,6 +24,19 @@ commit = true
 message = "Bump version: {current_version} → {new_version}"
 commit_args = ""
 # Update Cargo.lock after version bump
 pre_commit_hooks = [
  """
    cd python && cargo update -p lancedb-python
    if git diff --quiet ../Cargo.lock; then
        echo "Cargo.lock unchanged"
    else
        git add ../Cargo.lock
        echo "Updated and staged Cargo.lock"
    fi
    """,
 ]
 [tool.bumpversion.parts.pre_l]
 values = ["beta", "final"]
 optional_value = "final"
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.25.1-beta.1"
+version = "0.25.3-beta.0"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
@@ -14,12 +14,12 @@ name = "_lancedb"
 crate-type = ["cdylib"]
 [dependencies]
-arrow = { version = "55.1", features = ["pyarrow"] }
+arrow = { version = "56.2", features = ["pyarrow"] }
 async-trait = "0.1"
 lancedb = { path = "../rust/lancedb", default-features = false }
 env_logger.workspace = true
-pyo3 = { version = "0.24", features = ["extension-module", "abi3-py39"] }
+pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
-pyo3-async-runtimes = { version = "0.24", features = [
+pyo3-async-runtimes = { version = "0.25", features = [
    "attributes",
    "tokio-runtime",
 ] }
@@ -28,7 +28,7 @@ futures.workspace = true
 tokio = { version = "1.40", features = ["sync"] }
 [build-dependencies]
-pyo3-build-config = { version = "0.24", features = [
+pyo3-build-config = { version = "0.25", features = [
    "extension-module",
    "abi3-py39",
 ] }
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -5,12 +5,12 @@ dynamic = ["version"]
 dependencies = [
    "deprecation",
    "numpy",
-    "overrides>=0.7",
+    "overrides>=0.7; python_version<'3.12'",
    "packaging",
    "pyarrow>=16",
    "pydantic>=1.10",
    "tqdm>=4.27.0",
-    "lance-namespace==0.0.6"
+    "lance-namespace>=0.0.16"
 ]
 description = "lancedb"
 authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -60,6 +60,15 @@ class Connection(object):
        storage_options: Optional[Dict[str, str]] = None,
        index_cache_size: Optional[int] = None,
    ) -> Table: ...
    async def clone_table(
        self,
        target_table_name: str,
        source_uri: str,
        target_namespace: List[str] = [],
        source_version: Optional[int] = None,
        source_tag: Optional[str] = None,
        is_shallow: bool = True,
    ) -> Table: ...
    async def rename_table(
        self,
        cur_name: str,
@@ -124,6 +133,7 @@ class Tags:
    async def update(self, tag: str, version: int): ...
 class IndexConfig:
    name: str
    index_type: str
    columns: List[str]
@@ -286,3 +296,34 @@ class AlterColumnsResult:
 class DropColumnsResult:
    version: int
 class AsyncPermutationBuilder:
    def select(self, projections: Dict[str, str]) -> "AsyncPermutationBuilder": ...
    def split_random(
        self,
        *,
        ratios: Optional[List[float]] = None,
        counts: Optional[List[int]] = None,
        fixed: Optional[int] = None,
        seed: Optional[int] = None,
    ) -> "AsyncPermutationBuilder": ...
    def split_hash(
        self, columns: List[str], split_weights: List[int], *, discard_weight: int = 0
    ) -> "AsyncPermutationBuilder": ...
    def split_sequential(
        self,
        *,
        ratios: Optional[List[float]] = None,
        counts: Optional[List[int]] = None,
        fixed: Optional[int] = None,
    ) -> "AsyncPermutationBuilder": ...
    def split_calculated(self, calculation: str) -> "AsyncPermutationBuilder": ...
    def shuffle(
        self, seed: Optional[int], clump_size: Optional[int]
    ) -> "AsyncPermutationBuilder": ...
    def filter(self, filter: str) -> "AsyncPermutationBuilder": ...
    async def execute(self) -> Table: ...
 def async_permutation_builder(
    table: Table, dest_table_name: str
 ) -> AsyncPermutationBuilder: ...
--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -5,11 +5,20 @@
 from __future__ import annotations
 from abc import abstractmethod
 from datetime import timedelta
 from pathlib import Path
 import sys
 from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union
 if sys.version_info >= (3, 12):
    from typing import override
    class EnforceOverrides:
        pass
 else:
    from overrides import EnforceOverrides, override  # type: ignore
 from lancedb.embeddings.registry import EmbeddingFunctionRegistry
 from overrides import EnforceOverrides, override  # type: ignore
 from lancedb.common import data_to_reader, sanitize_uri, validate_schema
 from lancedb.background_loop import LOOP
@@ -32,7 +41,6 @@ import deprecation
 if TYPE_CHECKING:
    import pyarrow as pa
    from .pydantic import LanceModel
    from datetime import timedelta
    from ._lancedb import Connection as LanceDbConnection
    from .common import DATA, URI
@@ -444,7 +452,12 @@ class LanceDBConnection(DBConnection):
        read_consistency_interval: Optional[timedelta] = None,
        storage_options: Optional[Dict[str, str]] = None,
        session: Optional[Session] = None,
        _inner: Optional[LanceDbConnection] = None,
    ):
        if _inner is not None:
            self._conn = _inner
            return
        if not isinstance(uri, Path):
            scheme = get_uri_scheme(uri)
        is_local = isinstance(uri, Path) or scheme == "file"
@@ -453,11 +466,6 @@ class LanceDBConnection(DBConnection):
                uri = Path(uri)
            uri = uri.expanduser().absolute()
            Path(uri).mkdir(parents=True, exist_ok=True)
        self._uri = str(uri)
        self._entered = False
        self.read_consistency_interval = read_consistency_interval
        self.storage_options = storage_options
        self.session = session
        if read_consistency_interval is not None:
            read_consistency_interval_secs = read_consistency_interval.total_seconds()
@@ -476,10 +484,32 @@ class LanceDBConnection(DBConnection):
                session,
            )
        # TODO: It would be nice if we didn't store self.storage_options but it is
        # currently used by the LanceTable.to_lance method.  This doesn't _really_
        # work because some paths like LanceDBConnection.from_inner will lose the
        # storage_options.  Also, this class really shouldn't be holding any state
        # beyond _conn.
        self.storage_options = storage_options
        self._conn = AsyncConnection(LOOP.run(do_connect()))
    @property
    def read_consistency_interval(self) -> Optional[timedelta]:
        return LOOP.run(self._conn.get_read_consistency_interval())
    @property
    def session(self) -> Optional[Session]:
        return self._conn.session
    @property
    def uri(self) -> str:
        return self._conn.uri
    @classmethod
    def from_inner(cls, inner: LanceDbConnection):
        return cls(None, _inner=inner)
    def __repr__(self) -> str:
-        val = f"{self.__class__.__name__}(uri={self._uri!r}"
+        val = f"{self.__class__.__name__}(uri={self._conn.uri!r}"
        if self.read_consistency_interval is not None:
            val += f", read_consistency_interval={repr(self.read_consistency_interval)}"
        val += ")"
@@ -489,6 +519,10 @@ class LanceDBConnection(DBConnection):
        conn = AsyncConnection(await lancedb_connect(self.uri))
        return await conn.table_names(start_after=start_after, limit=limit)
    @property
    def _inner(self) -> LanceDbConnection:
        return self._conn._inner
    @override
    def list_namespaces(
        self,
@@ -665,6 +699,60 @@ class LanceDBConnection(DBConnection):
            index_cache_size=index_cache_size,
        )
    def clone_table(
        self,
        target_table_name: str,
        source_uri: str,
        *,
        target_namespace: List[str] = [],
        source_version: Optional[int] = None,
        source_tag: Optional[str] = None,
        is_shallow: bool = True,
    ) -> LanceTable:
        """Clone a table from a source table.
        A shallow clone creates a new table that shares the underlying data files
        with the source table but has its own independent manifest. This allows
        both the source and cloned tables to evolve independently while initially
        sharing the same data, deletion, and index files.
        Parameters
        ----------
        target_table_name: str
            The name of the target table to create.
        source_uri: str
            The URI of the source table to clone from.
        target_namespace: List[str], optional
            The namespace for the target table.
            None or empty list represents root namespace.
        source_version: int, optional
            The version of the source table to clone.
        source_tag: str, optional
            The tag of the source table to clone.
        is_shallow: bool, default True
            Whether to perform a shallow clone (True) or deep clone (False).
            Currently only shallow clone is supported.
        Returns
        -------
        A LanceTable object representing the cloned table.
        """
        LOOP.run(
            self._conn.clone_table(
                target_table_name,
                source_uri,
                target_namespace=target_namespace,
                source_version=source_version,
                source_tag=source_tag,
                is_shallow=is_shallow,
            )
        )
        return LanceTable.open(
            self,
            target_table_name,
            namespace=target_namespace,
        )
    @override
    def drop_table(
        self,
@@ -794,6 +882,13 @@ class AsyncConnection(object):
    def uri(self) -> str:
        return self._inner.uri
    async def get_read_consistency_interval(self) -> Optional[timedelta]:
        interval_secs = await self._inner.get_read_consistency_interval()
        if interval_secs is not None:
            return timedelta(seconds=interval_secs)
        else:
            return None
    async def list_namespaces(
        self,
        namespace: List[str] = [],
@@ -1136,6 +1231,54 @@ class AsyncConnection(object):
        )
        return AsyncTable(table)
    async def clone_table(
        self,
        target_table_name: str,
        source_uri: str,
        *,
        target_namespace: List[str] = [],
        source_version: Optional[int] = None,
        source_tag: Optional[str] = None,
        is_shallow: bool = True,
    ) -> AsyncTable:
        """Clone a table from a source table.
        A shallow clone creates a new table that shares the underlying data files
        with the source table but has its own independent manifest. This allows
        both the source and cloned tables to evolve independently while initially
        sharing the same data, deletion, and index files.
        Parameters
        ----------
        target_table_name: str
            The name of the target table to create.
        source_uri: str
            The URI of the source table to clone from.
        target_namespace: List[str], optional
            The namespace for the target table.
            None or empty list represents root namespace.
        source_version: int, optional
            The version of the source table to clone.
        source_tag: str, optional
            The tag of the source table to clone.
        is_shallow: bool, default True
            Whether to perform a shallow clone (True) or deep clone (False).
            Currently only shallow clone is supported.
        Returns
        -------
        An AsyncTable object representing the cloned table.
        """
        table = await self._inner.clone_table(
            target_table_name,
            source_uri,
            target_namespace=target_namespace,
            source_version=source_version,
            source_tag=source_tag,
            is_shallow=is_shallow,
        )
        return AsyncTable(table)
    async def rename_table(
        self,
        cur_name: str,
--- a/python/python/lancedb/embeddings/registry.py
+++ b/python/python/lancedb/embeddings/registry.py
@@ -122,7 +122,7 @@ class EmbeddingFunctionRegistry:
            obj["vector_column"]: EmbeddingFunctionConfig(
                vector_column=obj["vector_column"],
                source_column=obj["source_column"],
-                function=self.get(obj["name"])(**obj["model"]),
+                function=self.get(obj["name"]).create(**obj["model"]),
            )
            for obj in raw_list
        }
--- a/python/python/lancedb/index.py
+++ b/python/python/lancedb/index.py
@@ -251,6 +251,13 @@ class HnswPq:
        results. In most cases, there is no benefit to setting this higher than 500.
        This value should be set to a value that is not less than `ef` in the
        search phase.
    target_partition_size, default is 1,048,576
        The target size of each partition.
        This value controls the tradeoff between search performance and accuracy.
        faster search but less accurate results as higher value.
    """
    distance_type: Literal["l2", "cosine", "dot"] = "l2"
@@ -261,6 +268,7 @@ class HnswPq:
    sample_rate: int = 256
    m: int = 20
    ef_construction: int = 300
    target_partition_size: Optional[int] = None
@dataclass
@@ -351,6 +359,12 @@ class HnswSq:
        This value should be set to a value that is not less than `ef` in the search
        phase.
    target_partition_size, default is 1,048,576
        The target size of each partition.
        This value controls the tradeoff between search performance and accuracy.
        faster search but less accurate results as higher value.
    """
    distance_type: Literal["l2", "cosine", "dot"] = "l2"
@@ -359,6 +373,7 @@ class HnswSq:
    sample_rate: int = 256
    m: int = 20
    ef_construction: int = 300
    target_partition_size: Optional[int] = None
@dataclass
@@ -444,12 +459,20 @@ class IvfFlat:
        cases the default should be sufficient.
        The default value is 256.
    target_partition_size, default is 8192
        The target size of each partition.
        This value controls the tradeoff between search performance and accuracy.
        faster search but less accurate results as higher value.
    """
    distance_type: Literal["l2", "cosine", "dot", "hamming"] = "l2"
    num_partitions: Optional[int] = None
    max_iterations: int = 50
    sample_rate: int = 256
    target_partition_size: Optional[int] = None
@dataclass
@@ -564,6 +587,13 @@ class IvfPq:
        cases the default should be sufficient.
        The default value is 256.
    target_partition_size, default is 8192
        The target size of each partition.
        This value controls the tradeoff between search performance and accuracy.
        faster search but less accurate results as higher value.
    """
    distance_type: Literal["l2", "cosine", "dot"] = "l2"
@@ -572,11 +602,56 @@ class IvfPq:
    num_bits: int = 8
    max_iterations: int = 50
    sample_rate: int = 256
    target_partition_size: Optional[int] = None
@dataclass
 class IvfRq:
    """Describes an IVF RQ Index
    IVF-RQ (Residual Quantization) stores a compressed copy of each vector using
    residual quantization and organizes them into IVF partitions. Parameters
    largely mirror IVF-PQ for consistency.
    Attributes
    ----------
    distance_type: str, default "l2"
        Distance metric used to train the index and for quantization.
        The following distance types are available:
        "l2" - Euclidean distance.
        "cosine" - Cosine distance.
        "dot" - Dot product.
    num_partitions: int, default sqrt(num_rows)
        Number of IVF partitions to create.
    num_bits: int, default 1
        Number of bits to encode each dimension.
    max_iterations: int, default 50
        Max iterations to train kmeans when computing IVF partitions.
    sample_rate: int, default 256
        Controls the number of training vectors: sample_rate * num_partitions.
    target_partition_size, default is 8192
        Target size of each partition.
    """
    distance_type: Literal["l2", "cosine", "dot"] = "l2"
    num_partitions: Optional[int] = None
    num_bits: int = 1
    max_iterations: int = 50
    sample_rate: int = 256
    target_partition_size: Optional[int] = None
 __all__ = [
    "BTree",
    "IvfPq",
    "IvfRq",
    "IvfFlat",
    "HnswPq",
    "HnswSq",
--- a/python/python/lancedb/merge.py
+++ b/python/python/lancedb/merge.py
@@ -33,6 +33,7 @@ class LanceMergeInsertBuilder(object):
        self._when_not_matched_by_source_delete = False
        self._when_not_matched_by_source_condition = None
        self._timeout = None
        self._use_index = True
    def when_matched_update_all(
        self, *, where: Optional[str] = None
@@ -78,6 +79,23 @@ class LanceMergeInsertBuilder(object):
            self._when_not_matched_by_source_condition = condition
        return self
    def use_index(self, use_index: bool) -> LanceMergeInsertBuilder:
        """
        Controls whether to use indexes for the merge operation.
        When set to `True` (the default), the operation will use an index if available
        on the join key for improved performance. When set to `False`, it forces a full
        table scan even if an index exists. This can be useful for benchmarking or when
        the query optimizer chooses a suboptimal path.
        Parameters
        ----------
        use_index: bool
            Whether to use indices for the merge operation. Defaults to `True`.
        """
        self._use_index = use_index
        return self
    def execute(
        self,
        new_data: DATA,
--- a/python/python/lancedb/namespace.py
+++ b/python/python/lancedb/namespace.py
@@ -12,13 +12,18 @@ from __future__ import annotations
 from typing import Dict, Iterable, List, Optional, Union
 import os
 import sys
 if sys.version_info >= (3, 12):
    from typing import override
 else:
    from overrides import override
 from lancedb.db import DBConnection
 from lancedb.table import LanceTable, Table
 from lancedb.util import validate_table_name
 from lancedb.common import validate_schema
 from lancedb.table import sanitize_create_table
 from overrides import override
 from lance_namespace import LanceNamespace, connect as namespace_connect
 from lance_namespace_urllib3_client.models import (
--- a/python/python/lancedb/permutation.py
+++ b/python/python/lancedb/permutation.py
@@ -0,0 +1,72 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors
 from ._lancedb import async_permutation_builder
 from .table import LanceTable
 from .background_loop import LOOP
 from typing import Optional
 class PermutationBuilder:
    def __init__(self, table: LanceTable, dest_table_name: str):
        self._async = async_permutation_builder(table, dest_table_name)
    def select(self, projections: dict[str, str]) -> "PermutationBuilder":
        self._async.select(projections)
        return self
    def split_random(
        self,
        *,
        ratios: Optional[list[float]] = None,
        counts: Optional[list[int]] = None,
        fixed: Optional[int] = None,
        seed: Optional[int] = None,
    ) -> "PermutationBuilder":
        self._async.split_random(ratios=ratios, counts=counts, fixed=fixed, seed=seed)
        return self
    def split_hash(
        self,
        columns: list[str],
        split_weights: list[int],
        *,
        discard_weight: Optional[int] = None,
    ) -> "PermutationBuilder":
        self._async.split_hash(columns, split_weights, discard_weight=discard_weight)
        return self
    def split_sequential(
        self,
        *,
        ratios: Optional[list[float]] = None,
        counts: Optional[list[int]] = None,
        fixed: Optional[int] = None,
    ) -> "PermutationBuilder":
        self._async.split_sequential(ratios=ratios, counts=counts, fixed=fixed)
        return self
    def split_calculated(self, calculation: str) -> "PermutationBuilder":
        self._async.split_calculated(calculation)
        return self
    def shuffle(
        self, *, seed: Optional[int] = None, clump_size: Optional[int] = None
    ) -> "PermutationBuilder":
        self._async.shuffle(seed=seed, clump_size=clump_size)
        return self
    def filter(self, filter: str) -> "PermutationBuilder":
        self._async.filter(filter)
        return self
    def execute(self) -> LanceTable:
        async def do_execute():
            inner_tbl = await self._async.execute()
            return LanceTable.from_inner(inner_tbl)
        return LOOP.run(do_execute())
 def permutation_builder(table: LanceTable, dest_table_name: str) -> PermutationBuilder:
    return PermutationBuilder(table, dest_table_name)
--- a/python/python/lancedb/remote/db.py
+++ b/python/python/lancedb/remote/db.py
@@ -5,15 +5,20 @@
 from datetime import timedelta
 import logging
 from concurrent.futures import ThreadPoolExecutor
 import sys
 from typing import Any, Dict, Iterable, List, Optional, Union
 from urllib.parse import urlparse
 import warnings
 if sys.version_info >= (3, 12):
    from typing import override
 else:
    from overrides import override
 # Remove this import to fix circular dependency
 # from lancedb import connect_async
 from lancedb.remote import ClientConfig
 import pyarrow as pa
 from overrides import override
 from ..common import DATA
 from ..db import DBConnection, LOOP
@@ -212,6 +217,53 @@ class RemoteDBConnection(DBConnection):
        table = LOOP.run(self._conn.open_table(name, namespace=namespace))
        return RemoteTable(table, self.db_name)
    def clone_table(
        self,
        target_table_name: str,
        source_uri: str,
        *,
        target_namespace: List[str] = [],
        source_version: Optional[int] = None,
        source_tag: Optional[str] = None,
        is_shallow: bool = True,
    ) -> Table:
        """Clone a table from a source table.
        Parameters
        ----------
        target_table_name: str
            The name of the target table to create.
        source_uri: str
            The URI of the source table to clone from.
        target_namespace: List[str], optional
            The namespace for the target table.
            None or empty list represents root namespace.
        source_version: int, optional
            The version of the source table to clone.
        source_tag: str, optional
            The tag of the source table to clone.
        is_shallow: bool, default True
            Whether to perform a shallow clone (True) or deep clone (False).
            Currently only shallow clone is supported.
        Returns
        -------
        A RemoteTable object representing the cloned table.
        """
        from .table import RemoteTable
        table = LOOP.run(
            self._conn.clone_table(
                target_table_name,
                source_uri,
                target_namespace=target_namespace,
                source_version=source_version,
                source_tag=source_tag,
                is_shallow=is_shallow,
            )
        )
        return RemoteTable(table, self.db_name)
    @override
    def create_table(
        self,
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -114,7 +114,7 @@ class RemoteTable(Table):
        index_type: Literal["BTREE", "BITMAP", "LABEL_LIST", "scalar"] = "scalar",
        *,
        replace: bool = False,
-        wait_timeout: timedelta = None,
+        wait_timeout: Optional[timedelta] = None,
        name: Optional[str] = None,
    ):
        """Creates a scalar index
@@ -153,7 +153,7 @@ class RemoteTable(Table):
        column: str,
        *,
        replace: bool = False,
-        wait_timeout: timedelta = None,
+        wait_timeout: Optional[timedelta] = None,
        with_position: bool = False,
        # tokenizer configs:
        base_tokenizer: str = "simple",
--- a/python/python/lancedb/rerankers/init.py
+++ b/python/python/lancedb/rerankers/init.py
@@ -9,6 +9,7 @@ from .linear_combination import LinearCombinationReranker
 from .openai import OpenaiReranker
 from .jinaai import JinaReranker
 from .rrf import RRFReranker
 from .mrr import MRRReranker
 from .answerdotai import AnswerdotaiRerankers
 from .voyageai import VoyageAIReranker
@@ -23,4 +24,5 @@ __all__ = [
    "RRFReranker",
    "AnswerdotaiRerankers",
    "VoyageAIReranker",
    "MRRReranker",
 ]
--- a/python/python/lancedb/rerankers/mrr.py
+++ b/python/python/lancedb/rerankers/mrr.py
@@ -0,0 +1,169 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors
 from typing import Union, List, TYPE_CHECKING
 import pyarrow as pa
 import numpy as np
 from collections import defaultdict
 from .base import Reranker
 if TYPE_CHECKING:
    from ..table import LanceVectorQueryBuilder
 class MRRReranker(Reranker):
    """
    Reranks the results using Mean Reciprocal Rank (MRR) algorithm based
    on the scores of vector and FTS search.
    Algorithm reference - https://en.wikipedia.org/wiki/Mean_reciprocal_rank
    MRR calculates the average of reciprocal ranks across different search results.
    For each document, it computes the reciprocal of its rank in each system,
    then takes the mean of these reciprocal ranks as the final score.
    Parameters
    ----------
    weight_vector : float, default 0.5
        Weight for vector search results (0.0 to 1.0)
    weight_fts : float, default 0.5
        Weight for FTS search results (0.0 to 1.0)
        Note: weight_vector + weight_fts should equal 1.0
    return_score : str, default "relevance"
        Options are "relevance" or "all"
        The type of score to return. If "relevance", will return only the relevance
        score. If "all", will return all scores from the vector and FTS search along
        with the relevance score.
    """
    def __init__(
        self,
        weight_vector: float = 0.5,
        weight_fts: float = 0.5,
        return_score="relevance",
    ):
        if not (0.0 <= weight_vector <= 1.0):
            raise ValueError("weight_vector must be between 0.0 and 1.0")
        if not (0.0 <= weight_fts <= 1.0):
            raise ValueError("weight_fts must be between 0.0 and 1.0")
        if abs(weight_vector + weight_fts - 1.0) > 1e-6:
            raise ValueError("weight_vector + weight_fts must equal 1.0")
        super().__init__(return_score)
        self.weight_vector = weight_vector
        self.weight_fts = weight_fts
    def rerank_hybrid(
        self,
        query: str,  # noqa: F821
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
        vector_ids = vector_results["_rowid"].to_pylist() if vector_results else []
        fts_ids = fts_results["_rowid"].to_pylist() if fts_results else []
        # Maps result_id to list of (type, reciprocal_rank)
        mrr_score_map = defaultdict(list)
        if vector_ids:
            for rank, result_id in enumerate(vector_ids, 1):
                reciprocal_rank = 1.0 / rank
                mrr_score_map[result_id].append(("vector", reciprocal_rank))
        if fts_ids:
            for rank, result_id in enumerate(fts_ids, 1):
                reciprocal_rank = 1.0 / rank
                mrr_score_map[result_id].append(("fts", reciprocal_rank))
        final_mrr_scores = {}
        for result_id, scores in mrr_score_map.items():
            vector_rr = 0.0
            fts_rr = 0.0
            for score_type, reciprocal_rank in scores:
                if score_type == "vector":
                    vector_rr = reciprocal_rank
                elif score_type == "fts":
                    fts_rr = reciprocal_rank
            # If a document doesn't appear, its reciprocal rank is 0
            weighted_mrr = self.weight_vector * vector_rr + self.weight_fts * fts_rr
            final_mrr_scores[result_id] = weighted_mrr
        combined_results = self.merge_results(vector_results, fts_results)
        combined_row_ids = combined_results["_rowid"].to_pylist()
        relevance_scores = [final_mrr_scores[row_id] for row_id in combined_row_ids]
        combined_results = combined_results.append_column(
            "_relevance_score", pa.array(relevance_scores, type=pa.float32())
        )
        combined_results = combined_results.sort_by(
            [("_relevance_score", "descending")]
        )
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
        return combined_results
    def rerank_multivector(
        self,
        vector_results: Union[List[pa.Table], List["LanceVectorQueryBuilder"]],
        query: str = None,
        deduplicate: bool = True,  # noqa: F821
    ):
        """
        Reranks the results from multiple vector searches using MRR algorithm.
        Each vector search result is treated as a separate ranking system,
        and MRR calculates the mean of reciprocal ranks across all systems.
        This cannot reuse rerank_hybrid because MRR semantics require treating
        each vector result as a separate ranking system.
        """
        if not all(isinstance(v, type(vector_results[0])) for v in vector_results):
            raise ValueError(
                "All elements in vector_results should be of the same type"
            )
        # avoid circular import
        if type(vector_results[0]).__name__ == "LanceVectorQueryBuilder":
            vector_results = [result.to_arrow() for result in vector_results]
        elif not isinstance(vector_results[0], pa.Table):
            raise ValueError(
                "vector_results should be a list of pa.Table or LanceVectorQueryBuilder"
            )
        if not all("_rowid" in result.column_names for result in vector_results):
            raise ValueError(
                "'_rowid' is required for deduplication. \
                    add _rowid to search results like this: \
                    `search().with_row_id(True)`"
            )
        mrr_score_map = defaultdict(list)
        for result_table in vector_results:
            result_ids = result_table["_rowid"].to_pylist()
            for rank, result_id in enumerate(result_ids, 1):
                reciprocal_rank = 1.0 / rank
                mrr_score_map[result_id].append(reciprocal_rank)
        final_mrr_scores = {}
        for result_id, reciprocal_ranks in mrr_score_map.items():
            mean_rr = np.mean(reciprocal_ranks)
            final_mrr_scores[result_id] = mean_rr
        combined = pa.concat_tables(vector_results, **self._concat_tables_args)
        combined = self._deduplicate(combined)
        combined_row_ids = combined["_rowid"].to_pylist()
        relevance_scores = [final_mrr_scores[row_id] for row_id in combined_row_ids]
        combined = combined.append_column(
            "_relevance_score", pa.array(relevance_scores, type=pa.float32())
        )
        combined = combined.sort_by([("_relevance_score", "descending")])
        if self.score == "relevance":
            combined = self._keep_relevance_score(combined)
        return combined
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -44,7 +44,7 @@ import numpy as np
 from .common import DATA, VEC, VECTOR_COLUMN_NAME
 from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
-from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
+from .index import BTree, IvfFlat, IvfPq, Bitmap, IvfRq, LabelList, HnswPq, HnswSq, FTS
 from .merge import LanceMergeInsertBuilder
 from .pydantic import LanceModel, model_to_dict
 from .query import (
@@ -74,6 +74,7 @@ from .index import lang_mapping
 if TYPE_CHECKING:
    from .db import LanceDBConnection
    from ._lancedb import (
        Table as LanceDBTable,
        OptimizeStats,
@@ -88,7 +89,6 @@ if TYPE_CHECKING:
        MergeResult,
        UpdateResult,
    )
    from .db import LanceDBConnection
    from .index import IndexConfig
    import pandas
    import PIL
@@ -691,6 +691,7 @@ class Table(ABC):
        ef_construction: int = 300,
        name: Optional[str] = None,
        train: bool = True,
        target_partition_size: Optional[int] = None,
    ):
        """Create an index on the table.
@@ -1469,10 +1470,7 @@ class Table(ABC):
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
        retrain: bool, default False
-            If True, retrain the vector indices, this would refine the IVF clustering
+            This parameter is no longer used and is deprecated.
            and quantization, which may improve the search accuracy. It's faster than
            re-creating the index from scratch, so it's recommended to try this first,
            when the data distribution has changed significantly.
        Experimental API
        ----------------
@@ -1709,22 +1707,38 @@ class LanceTable(Table):
        namespace: List[str] = [],
        storage_options: Optional[Dict[str, str]] = None,
        index_cache_size: Optional[int] = None,
        _async: AsyncTable = None,
    ):
        self._conn = connection
        self._namespace = namespace
-        self._table = LOOP.run(
+        if _async is not None:
-            connection._conn.open_table(
+            self._table = _async
-                name,
+        else:
-                namespace=namespace,
+            self._table = LOOP.run(
-                storage_options=storage_options,
+                connection._conn.open_table(
-                index_cache_size=index_cache_size,
+                    name,
                    namespace=namespace,
                    storage_options=storage_options,
                    index_cache_size=index_cache_size,
                )
            )
        )
    @property
    def name(self) -> str:
        return self._table.name
    @classmethod
    def from_inner(cls, tbl: LanceDBTable):
        from .db import LanceDBConnection
        async_tbl = AsyncTable(tbl)
        conn = LanceDBConnection.from_inner(tbl.database())
        return cls(
            conn,
            async_tbl.name,
            _async=async_tbl,
        )
    @classmethod
    def open(cls, db, name, *, namespace: List[str] = [], **kwargs):
        tbl = cls(db, name, namespace=namespace, **kwargs)
@@ -1993,7 +2007,7 @@ class LanceTable(Table):
        index_cache_size: Optional[int] = None,
        num_bits: int = 8,
        index_type: Literal[
-            "IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
+            "IVF_FLAT", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
        ] = "IVF_PQ",
        max_iterations: int = 50,
        sample_rate: int = 256,
@@ -2002,6 +2016,7 @@ class LanceTable(Table):
        *,
        name: Optional[str] = None,
        train: bool = True,
        target_partition_size: Optional[int] = None,
    ):
        """Create an index on the table."""
        if accelerator is not None:
@@ -2018,6 +2033,7 @@ class LanceTable(Table):
                num_bits=num_bits,
                m=m,
                ef_construction=ef_construction,
                target_partition_size=target_partition_size,
            )
            self.checkout_latest()
            return
@@ -2027,6 +2043,7 @@ class LanceTable(Table):
                num_partitions=num_partitions,
                max_iterations=max_iterations,
                sample_rate=sample_rate,
                target_partition_size=target_partition_size,
            )
        elif index_type == "IVF_PQ":
            config = IvfPq(
@@ -2036,6 +2053,16 @@ class LanceTable(Table):
                num_bits=num_bits,
                max_iterations=max_iterations,
                sample_rate=sample_rate,
                target_partition_size=target_partition_size,
            )
        elif index_type == "IVF_RQ":
            config = IvfRq(
                distance_type=metric,
                num_partitions=num_partitions,
                num_bits=num_bits,
                max_iterations=max_iterations,
                sample_rate=sample_rate,
                target_partition_size=target_partition_size,
            )
        elif index_type == "IVF_HNSW_PQ":
            config = HnswPq(
@@ -2047,6 +2074,7 @@ class LanceTable(Table):
                sample_rate=sample_rate,
                m=m,
                ef_construction=ef_construction,
                target_partition_size=target_partition_size,
            )
        elif index_type == "IVF_HNSW_SQ":
            config = HnswSq(
@@ -2056,6 +2084,7 @@ class LanceTable(Table):
                sample_rate=sample_rate,
                m=m,
                ef_construction=ef_construction,
                target_partition_size=target_partition_size,
            )
        else:
            raise ValueError(f"Unknown index type {index_type}")
@@ -2743,6 +2772,10 @@ class LanceTable(Table):
            self._table._do_merge(merge, new_data, on_bad_vectors, fill_value)
        )
    @property
    def _inner(self) -> LanceDBTable:
        return self._table._inner
    @deprecation.deprecated(
        deprecated_in="0.21.0",
        current_version=__version__,
@@ -2828,10 +2861,7 @@ class LanceTable(Table):
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
        retrain: bool, default False
-            If True, retrain the vector indices, this would refine the IVF clustering
+            This parameter is no longer used and is deprecated.
            and quantization, which may improve the search accuracy. It's faster than
            re-creating the index from scratch, so it's recommended to try this first,
            when the data distribution has changed significantly.
        Experimental API
        ----------------
@@ -3329,7 +3359,7 @@ class AsyncTable:
        *,
        replace: Optional[bool] = None,
        config: Optional[
-            Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
+            Union[IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
        ] = None,
        wait_timeout: Optional[timedelta] = None,
        name: Optional[str] = None,
@@ -3368,11 +3398,12 @@ class AsyncTable:
        """
        if config is not None:
            if not isinstance(
-                config, (IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS)
+                config,
                (IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS),
            ):
                raise TypeError(
-                    "config must be an instance of IvfPq, HnswPq, HnswSq, BTree,"
+                    "config must be an instance of IvfPq, IvfRq, HnswPq, HnswSq, BTree,"
-                    " Bitmap, LabelList, or FTS"
+                    " Bitmap, LabelList, or FTS, but got " + str(type(config))
                )
        try:
            await self._inner.create_index(
@@ -3919,6 +3950,7 @@ class AsyncTable:
                when_not_matched_by_source_delete=merge._when_not_matched_by_source_delete,
                when_not_matched_by_source_condition=merge._when_not_matched_by_source_condition,
                timeout=merge._timeout,
                use_index=merge._use_index,
            ),
        )
@@ -4291,10 +4323,7 @@ class AsyncTable:
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
        retrain: bool, default False
-            If True, retrain the vector indices, this would refine the IVF clustering
+            This parameter is no longer used and is deprecated.
            and quantization, which may improve the search accuracy. It's faster than
            re-creating the index from scratch, so it's recommended to try this first,
            when the data distribution has changed significantly.
        Experimental API
        ----------------
@@ -4317,10 +4346,19 @@ class AsyncTable:
        cleanup_since_ms: Optional[int] = None
        if cleanup_older_than is not None:
            cleanup_since_ms = round(cleanup_older_than.total_seconds() * 1000)
        if retrain:
            import warnings
            warnings.warn(
                "The 'retrain' parameter is deprecated and will be removed in a "
                "future version.",
                DeprecationWarning,
            )
        return await self._inner.optimize(
            cleanup_since_ms=cleanup_since_ms,
            delete_unverified=delete_unverified,
            retrain=retrain,
        )
    async def list_indices(self) -> Iterable[IndexConfig]:
--- a/python/python/lancedb/types.py
+++ b/python/python/lancedb/types.py
@@ -18,10 +18,17 @@ AddMode = Literal["append", "overwrite"]
 CreateMode = Literal["create", "overwrite"]
 # Index type literals
-VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"]
+VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ", "IVF_RQ"]
 ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"]
 IndexType = Literal[
-    "IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ", "FTS", "BTREE", "BITMAP", "LABEL_LIST"
+    "IVF_PQ",
    "IVF_HNSW_PQ",
    "IVF_HNSW_SQ",
    "FTS",
    "BTREE",
    "BITMAP",
    "LABEL_LIST",
    "IVF_RQ",
 ]
 # Tokenizer literals
--- a/python/python/tests/test_db.py
+++ b/python/python/tests/test_db.py
@@ -747,15 +747,16 @@ def test_local_namespace_operations(tmp_path):
    # Create a local database connection
    db = lancedb.connect(tmp_path)
-    # Test list_namespaces returns empty list
+    # Test list_namespaces returns empty list for root namespace
    namespaces = list(db.list_namespaces())
    assert namespaces == []
-    # Test list_namespaces with parameters still returns empty list
+    # Test list_namespaces with non-empty namespace raises NotImplementedError
-    namespaces_with_params = list(
+    with pytest.raises(
-        db.list_namespaces(namespace=["test"], page_token="token", limit=5)
+        NotImplementedError,
-    )
+        match="Namespace operations are not supported for listing database",
-    assert namespaces_with_params == []
+    ):
        list(db.list_namespaces(namespace=["test"]))
 def test_local_create_namespace_not_supported(tmp_path):
@@ -830,3 +831,119 @@ def test_local_table_operations_with_namespace_raise_error(tmp_path):
    # Test table_names without namespace - should work normally
    tables_root = list(db.table_names())
    assert "test_table" in tables_root
 def test_clone_table_latest_version(tmp_path):
    """Test cloning a table with the latest version (default behavior)"""
    import os
    db = lancedb.connect(tmp_path)
    # Create source table with some data
    data = [
        {"id": 1, "text": "hello", "vector": [1.0, 2.0]},
        {"id": 2, "text": "world", "vector": [3.0, 4.0]},
    ]
    source_table = db.create_table("source", data=data)
    # Add more data to create a new version
    more_data = [{"id": 3, "text": "test", "vector": [5.0, 6.0]}]
    source_table.add(more_data)
    # Clone the table (should get latest version with 3 rows)
    source_uri = os.path.join(tmp_path, "source.lance")
    cloned_table = db.clone_table("cloned", source_uri)
    # Verify cloned table has all 3 rows
    assert cloned_table.count_rows() == 3
    assert "cloned" in db.table_names()
    # Verify data matches
    cloned_data = cloned_table.to_pandas()
    assert len(cloned_data) == 3
    assert set(cloned_data["id"].tolist()) == {1, 2, 3}
 def test_clone_table_specific_version(tmp_path):
    """Test cloning a table from a specific version"""
    import os
    db = lancedb.connect(tmp_path)
    # Create source table with initial data
    data = [
        {"id": 1, "text": "hello", "vector": [1.0, 2.0]},
        {"id": 2, "text": "world", "vector": [3.0, 4.0]},
    ]
    source_table = db.create_table("source", data=data)
    # Get the initial version
    initial_version = source_table.version
    # Add more data to create a new version
    more_data = [{"id": 3, "text": "test", "vector": [5.0, 6.0]}]
    source_table.add(more_data)
    # Verify source now has 3 rows
    assert source_table.count_rows() == 3
    # Clone from the initial version (should have only 2 rows)
    source_uri = os.path.join(tmp_path, "source.lance")
    cloned_table = db.clone_table("cloned", source_uri, source_version=initial_version)
    # Verify cloned table has only the initial 2 rows
    assert cloned_table.count_rows() == 2
    cloned_data = cloned_table.to_pandas()
    assert set(cloned_data["id"].tolist()) == {1, 2}
 def test_clone_table_with_tag(tmp_path):
    """Test cloning a table from a tagged version"""
    import os
    db = lancedb.connect(tmp_path)
    # Create source table with initial data
    data = [
        {"id": 1, "text": "hello", "vector": [1.0, 2.0]},
        {"id": 2, "text": "world", "vector": [3.0, 4.0]},
    ]
    source_table = db.create_table("source", data=data)
    # Create a tag for the current version
    source_table.tags.create("v1.0", source_table.version)
    # Add more data after the tag
    more_data = [{"id": 3, "text": "test", "vector": [5.0, 6.0]}]
    source_table.add(more_data)
    # Verify source now has 3 rows
    assert source_table.count_rows() == 3
    # Clone from the tagged version (should have only 2 rows)
    source_uri = os.path.join(tmp_path, "source.lance")
    cloned_table = db.clone_table("cloned", source_uri, source_tag="v1.0")
    # Verify cloned table has only the tagged version's 2 rows
    assert cloned_table.count_rows() == 2
    cloned_data = cloned_table.to_pandas()
    assert set(cloned_data["id"].tolist()) == {1, 2}
 def test_clone_table_deep_clone_fails(tmp_path):
    """Test that deep clone raises an unsupported error"""
    import os
    db = lancedb.connect(tmp_path)
    # Create source table with some data
    data = [
        {"id": 1, "text": "hello", "vector": [1.0, 2.0]},
        {"id": 2, "text": "world", "vector": [3.0, 4.0]},
    ]
    db.create_table("source", data=data)
    # Try to create a deep clone (should fail)
    source_uri = os.path.join(tmp_path, "source.lance")
    with pytest.raises(Exception, match="Deep clone is not yet implemented"):
        db.clone_table("cloned", source_uri, is_shallow=False)
--- a/python/python/tests/test_embeddings.py
+++ b/python/python/tests/test_embeddings.py
@@ -114,6 +114,63 @@ def test_embedding_function_variables():
    assert func.safe_model_dump()["secret_key"] == "$var:secret"
 def test_parse_functions_with_variables():
    @register("variable-parsing-test")
    class VariableParsingFunction(TextEmbeddingFunction):
        api_key: str
        base_url: Optional[str] = None
        @staticmethod
        def sensitive_keys():
            return ["api_key"]
        def ndims(self):
            return 10
        def generate_embeddings(self, texts):
            # Mock implementation that just returns random embeddings
            # In real usage, this would use the api_key to call an API
            return [np.random.rand(self.ndims()).tolist() for _ in texts]
    registry = EmbeddingFunctionRegistry.get_instance()
    registry.set_var("test_api_key", "sk-test-key-12345")
    registry.set_var("test_base_url", "https://api.example.com")
    conf = EmbeddingFunctionConfig(
        source_column="text",
        vector_column="vector",
        function=registry.get("variable-parsing-test").create(
            api_key="$var:test_api_key", base_url="$var:test_base_url"
        ),
    )
    metadata = registry.get_table_metadata([conf])
    # Create a mock arrow table with the metadata
    schema = pa.schema(
        [pa.field("text", pa.string()), pa.field("vector", pa.list_(pa.float32(), 10))]
    )
    table = pa.table({"text": [], "vector": []}, schema=schema)
    table = table.replace_schema_metadata(metadata)
    ds = lance.write_dataset(table, "memory://")
    configs = registry.parse_functions(ds.schema.metadata)
    assert "vector" in configs
    parsed_func = configs["vector"].function
    assert parsed_func.api_key == "sk-test-key-12345"
    assert parsed_func.base_url == "https://api.example.com"
    embeddings = parsed_func.generate_embeddings(["test text"])
    assert len(embeddings) == 1
    assert len(embeddings[0]) == 10
    assert parsed_func.safe_model_dump()["api_key"] == "$var:test_api_key"
 def test_embedding_with_bad_results(tmp_path):
    @register("null-embedding")
    class NullEmbeddingFunction(TextEmbeddingFunction):
--- a/python/python/tests/test_index.py
+++ b/python/python/tests/test_index.py
@@ -8,7 +8,17 @@ import pyarrow as pa
 import pytest
 import pytest_asyncio
 from lancedb import AsyncConnection, AsyncTable, connect_async
-from lancedb.index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
+from lancedb.index import (
    BTree,
    IvfFlat,
    IvfPq,
    IvfRq,
    Bitmap,
    LabelList,
    HnswPq,
    HnswSq,
    FTS,
 )
@pytest_asyncio.fixture
@@ -35,6 +45,8 @@ async def some_table(db_async):
            "tags": [
                [f"tag{random.randint(0, 8)}" for _ in range(2)] for _ in range(NROWS)
            ],
            "is_active": [random.choice([True, False]) for _ in range(NROWS)],
            "data": [random.randbytes(random.randint(0, 128)) for _ in range(NROWS)],
        }
    )
    return await db_async.create_table(
@@ -99,10 +111,17 @@ async def test_create_fixed_size_binary_index(some_table: AsyncTable):
@pytest.mark.asyncio
 async def test_create_bitmap_index(some_table: AsyncTable):
    await some_table.create_index("id", config=Bitmap())
    await some_table.create_index("is_active", config=Bitmap())
    await some_table.create_index("data", config=Bitmap())
    indices = await some_table.list_indices()
-    assert str(indices) == '[Index(Bitmap, columns=["id"], name="id_idx")]'
+    assert len(indices) == 3
-    indices = await some_table.list_indices()
+    assert indices[0].index_type == "Bitmap"
-    assert len(indices) == 1
+    assert indices[0].columns == ["id"]
    assert indices[1].index_type == "Bitmap"
    assert indices[1].columns == ["is_active"]
    assert indices[2].index_type == "Bitmap"
    assert indices[2].columns == ["data"]
    index_name = indices[0].name
    stats = await some_table.index_stats(index_name)
    assert stats.index_type == "BITMAP"
@@ -111,6 +130,11 @@ async def test_create_bitmap_index(some_table: AsyncTable):
    assert stats.num_unindexed_rows == 0
    assert stats.num_indices == 1
    assert (
        "ScalarIndexQuery"
        in await some_table.query().where("is_active = TRUE").explain_plan()
    )
@pytest.mark.asyncio
 async def test_create_label_list_index(some_table: AsyncTable):
@@ -181,6 +205,16 @@ async def test_create_4bit_ivfpq_index(some_table: AsyncTable):
    assert stats.loss >= 0.0
@pytest.mark.asyncio
 async def test_create_ivfrq_index(some_table: AsyncTable):
    await some_table.create_index("vector", config=IvfRq(num_bits=1))
    indices = await some_table.list_indices()
    assert len(indices) == 1
    assert indices[0].index_type == "IvfRq"
    assert indices[0].columns == ["vector"]
    assert indices[0].name == "vector_idx"
@pytest.mark.asyncio
 async def test_create_hnswpq_index(some_table: AsyncTable):
    await some_table.create_index("vector", config=HnswPq(num_partitions=10))
--- a/python/python/tests/test_permutation.py
+++ b/python/python/tests/test_permutation.py
@@ -0,0 +1,496 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors
 import pyarrow as pa
 import pytest
 from lancedb.permutation import permutation_builder
 def test_split_random_ratios(mem_db):
    """Test random splitting with ratios."""
    tbl = mem_db.create_table(
        "test_table", pa.table({"x": range(100), "y": range(100)})
    )
    permutation_tbl = (
        permutation_builder(tbl, "test_permutation")
        .split_random(ratios=[0.3, 0.7])
        .execute()
    )
    # Check that the table was created and has data
    assert permutation_tbl.count_rows() == 100
    # Check that split_id column exists and has correct values
    data = permutation_tbl.search(None).to_arrow().to_pydict()
    split_ids = data["split_id"]
    assert set(split_ids) == {0, 1}
    # Check approximate split sizes (allowing for rounding)
    split_0_count = split_ids.count(0)
    split_1_count = split_ids.count(1)
    assert 25 <= split_0_count <= 35  # ~30% ± tolerance
    assert 65 <= split_1_count <= 75  # ~70% ± tolerance
 def test_split_random_counts(mem_db):
    """Test random splitting with absolute counts."""
    tbl = mem_db.create_table(
        "test_table", pa.table({"x": range(100), "y": range(100)})
    )
    permutation_tbl = (
        permutation_builder(tbl, "test_permutation")
        .split_random(counts=[20, 30])
        .execute()
    )
    # Check that we have exactly the requested counts
    assert permutation_tbl.count_rows() == 50
    data = permutation_tbl.search(None).to_arrow().to_pydict()
    split_ids = data["split_id"]
    assert split_ids.count(0) == 20
    assert split_ids.count(1) == 30
 def test_split_random_fixed(mem_db):
    """Test random splitting with fixed number of splits."""
    tbl = mem_db.create_table(
        "test_table", pa.table({"x": range(100), "y": range(100)})
    )
    permutation_tbl = (
        permutation_builder(tbl, "test_permutation").split_random(fixed=4).execute()
    )
    # Check that we have 4 splits with 25 rows each
    assert permutation_tbl.count_rows() == 100
    data = permutation_tbl.search(None).to_arrow().to_pydict()
    split_ids = data["split_id"]
    assert set(split_ids) == {0, 1, 2, 3}
    for split_id in range(4):
        assert split_ids.count(split_id) == 25
 def test_split_random_with_seed(mem_db):
    """Test that seeded random splits are reproducible."""
    tbl = mem_db.create_table("test_table", pa.table({"x": range(50), "y": range(50)}))
    # Create two identical permutations with same seed
    perm1 = (
        permutation_builder(tbl, "perm1")
        .split_random(ratios=[0.6, 0.4], seed=42)
        .execute()
    )
    perm2 = (
        permutation_builder(tbl, "perm2")
        .split_random(ratios=[0.6, 0.4], seed=42)
        .execute()
    )
    # Results should be identical
    data1 = perm1.search(None).to_arrow().to_pydict()
    data2 = perm2.search(None).to_arrow().to_pydict()
    assert data1["row_id"] == data2["row_id"]
    assert data1["split_id"] == data2["split_id"]
 def test_split_hash(mem_db):
    """Test hash-based splitting."""
    tbl = mem_db.create_table(
        "test_table",
        pa.table(
            {
                "id": range(100),
                "category": (["A", "B", "C"] * 34)[:100],  # Repeating pattern
                "value": range(100),
            }
        ),
    )
    permutation_tbl = (
        permutation_builder(tbl, "test_permutation")
        .split_hash(["category"], [1, 1], discard_weight=0)
        .execute()
    )
    # Should have all 100 rows (no discard)
    assert permutation_tbl.count_rows() == 100
    data = permutation_tbl.search(None).to_arrow().to_pydict()
    split_ids = data["split_id"]
    assert set(split_ids) == {0, 1}
    # Verify that each split has roughly 50 rows (allowing for hash variance)
    split_0_count = split_ids.count(0)
    split_1_count = split_ids.count(1)
    assert 30 <= split_0_count <= 70  # ~50 ± 20 tolerance for hash distribution
    assert 30 <= split_1_count <= 70  # ~50 ± 20 tolerance for hash distribution
    # Hash splits should be deterministic - same category should go to same split
    # Let's verify by creating another permutation and checking consistency
    perm2 = (
        permutation_builder(tbl, "test_permutation2")
        .split_hash(["category"], [1, 1], discard_weight=0)
        .execute()
    )
    data2 = perm2.search(None).to_arrow().to_pydict()
    assert data["split_id"] == data2["split_id"]  # Should be identical
 def test_split_hash_with_discard(mem_db):
    """Test hash-based splitting with discard weight."""
    tbl = mem_db.create_table(
        "test_table",
        pa.table({"id": range(100), "category": ["A", "B"] * 50, "value": range(100)}),
    )
    permutation_tbl = (
        permutation_builder(tbl, "test_permutation")
        .split_hash(["category"], [1, 1], discard_weight=2)  # Should discard ~50%
        .execute()
    )
    # Should have fewer than 100 rows due to discard
    row_count = permutation_tbl.count_rows()
    assert row_count < 100
    assert row_count > 0  # But not empty
 def test_split_sequential(mem_db):
    """Test sequential splitting."""
    tbl = mem_db.create_table(
        "test_table", pa.table({"x": range(100), "y": range(100)})
    )
    permutation_tbl = (
        permutation_builder(tbl, "test_permutation")
        .split_sequential(counts=[30, 40])
        .execute()
    )
    assert permutation_tbl.count_rows() == 70
    data = permutation_tbl.search(None).to_arrow().to_pydict()
    row_ids = data["row_id"]
    split_ids = data["split_id"]
    # Sequential should maintain order
    assert row_ids == sorted(row_ids)
    # First 30 should be split 0, next 40 should be split 1
    assert split_ids[:30] == [0] * 30
    assert split_ids[30:] == [1] * 40
 def test_split_calculated(mem_db):
    """Test calculated splitting."""
    tbl = mem_db.create_table(
        "test_table", pa.table({"id": range(100), "value": range(100)})
    )
    permutation_tbl = (
        permutation_builder(tbl, "test_permutation")
        .split_calculated("id % 3")  # Split based on id modulo 3
        .execute()
    )
    assert permutation_tbl.count_rows() == 100
    data = permutation_tbl.search(None).to_arrow().to_pydict()
    row_ids = data["row_id"]
    split_ids = data["split_id"]
    # Verify the calculation: each row's split_id should equal row_id % 3
    for i, (row_id, split_id) in enumerate(zip(row_ids, split_ids)):
        assert split_id == row_id % 3
 def test_split_error_cases(mem_db):
    """Test error handling for invalid split parameters."""
    tbl = mem_db.create_table("test_table", pa.table({"x": range(10), "y": range(10)}))
    # Test split_random with no parameters
    with pytest.raises(Exception):
        permutation_builder(tbl, "error1").split_random().execute()
    # Test split_random with multiple parameters
    with pytest.raises(Exception):
        permutation_builder(tbl, "error2").split_random(
            ratios=[0.5, 0.5], counts=[5, 5]
        ).execute()
    # Test split_sequential with no parameters
    with pytest.raises(Exception):
        permutation_builder(tbl, "error3").split_sequential().execute()
    # Test split_sequential with multiple parameters
    with pytest.raises(Exception):
        permutation_builder(tbl, "error4").split_sequential(
            ratios=[0.5, 0.5], fixed=2
        ).execute()
 def test_shuffle_no_seed(mem_db):
    """Test shuffling without a seed."""
    tbl = mem_db.create_table(
        "test_table", pa.table({"id": range(100), "value": range(100)})
    )
    # Create a permutation with shuffling (no seed)
    permutation_tbl = permutation_builder(tbl, "test_permutation").shuffle().execute()
    assert permutation_tbl.count_rows() == 100
    data = permutation_tbl.search(None).to_arrow().to_pydict()
    row_ids = data["row_id"]
    # Row IDs should not be in sequential order due to shuffling
    # This is probabilistic but with 100 rows, it's extremely unlikely they'd stay
    # in order
    assert row_ids != list(range(100))
 def test_shuffle_with_seed(mem_db):
    """Test that shuffling with a seed is reproducible."""
    tbl = mem_db.create_table(
        "test_table", pa.table({"id": range(50), "value": range(50)})
    )
    # Create two identical permutations with same shuffle seed
    perm1 = permutation_builder(tbl, "perm1").shuffle(seed=42).execute()
    perm2 = permutation_builder(tbl, "perm2").shuffle(seed=42).execute()
    # Results should be identical due to same seed
    data1 = perm1.search(None).to_arrow().to_pydict()
    data2 = perm2.search(None).to_arrow().to_pydict()
    assert data1["row_id"] == data2["row_id"]
    assert data1["split_id"] == data2["split_id"]
 def test_shuffle_with_clump_size(mem_db):
    """Test shuffling with clump size."""
    tbl = mem_db.create_table(
        "test_table", pa.table({"id": range(100), "value": range(100)})
    )
    # Create a permutation with shuffling using clumps
    permutation_tbl = (
        permutation_builder(tbl, "test_permutation")
        .shuffle(clump_size=10)  # 10-row clumps
        .execute()
    )
    assert permutation_tbl.count_rows() == 100
    data = permutation_tbl.search(None).to_arrow().to_pydict()
    row_ids = data["row_id"]
    for i in range(10):
        start = row_ids[i * 10]
        assert row_ids[i * 10 : (i + 1) * 10] == list(range(start, start + 10))
 def test_shuffle_different_seeds(mem_db):
    """Test that different seeds produce different shuffle orders."""
    tbl = mem_db.create_table(
        "test_table", pa.table({"id": range(50), "value": range(50)})
    )
    # Create two permutations with different shuffle seeds
    perm1 = (
        permutation_builder(tbl, "perm1")
        .split_random(fixed=2)
        .shuffle(seed=42)
        .execute()
    )
    perm2 = (
        permutation_builder(tbl, "perm2")
        .split_random(fixed=2)
        .shuffle(seed=123)
        .execute()
    )
    # Results should be different due to different seeds
    data1 = perm1.search(None).to_arrow().to_pydict()
    data2 = perm2.search(None).to_arrow().to_pydict()
    # Row order should be different
    assert data1["row_id"] != data2["row_id"]
 def test_shuffle_combined_with_splits(mem_db):
    """Test shuffling combined with different split strategies."""
    tbl = mem_db.create_table(
        "test_table",
        pa.table(
            {
                "id": range(100),
                "category": (["A", "B", "C"] * 34)[:100],
                "value": range(100),
            }
        ),
    )
    # Test shuffle with random splits
    perm_random = (
        permutation_builder(tbl, "perm_random")
        .split_random(ratios=[0.6, 0.4], seed=42)
        .shuffle(seed=123, clump_size=None)
        .execute()
    )
    # Test shuffle with hash splits
    perm_hash = (
        permutation_builder(tbl, "perm_hash")
        .split_hash(["category"], [1, 1], discard_weight=0)
        .shuffle(seed=456, clump_size=5)
        .execute()
    )
    # Test shuffle with sequential splits
    perm_sequential = (
        permutation_builder(tbl, "perm_sequential")
        .split_sequential(counts=[40, 35])
        .shuffle(seed=789, clump_size=None)
        .execute()
    )
    # Verify all permutations work and have expected properties
    assert perm_random.count_rows() == 100
    assert perm_hash.count_rows() == 100
    assert perm_sequential.count_rows() == 75
    # Verify shuffle affected the order
    data_random = perm_random.search(None).to_arrow().to_pydict()
    data_sequential = perm_sequential.search(None).to_arrow().to_pydict()
    assert data_random["row_id"] != list(range(100))
    assert data_sequential["row_id"] != list(range(75))
 def test_no_shuffle_maintains_order(mem_db):
    """Test that not calling shuffle maintains the original order."""
    tbl = mem_db.create_table(
        "test_table", pa.table({"id": range(50), "value": range(50)})
    )
    # Create permutation without shuffle (should maintain some order)
    permutation_tbl = (
        permutation_builder(tbl, "test_permutation")
        .split_sequential(counts=[25, 25])  # Sequential maintains order
        .execute()
    )
    assert permutation_tbl.count_rows() == 50
    data = permutation_tbl.search(None).to_arrow().to_pydict()
    row_ids = data["row_id"]
    # With sequential splits and no shuffle, should maintain order
    assert row_ids == list(range(50))
 def test_filter_basic(mem_db):
    """Test basic filtering functionality."""
    tbl = mem_db.create_table(
        "test_table", pa.table({"id": range(100), "value": range(100, 200)})
    )
    # Filter to only include rows where id < 50
    permutation_tbl = (
        permutation_builder(tbl, "test_permutation").filter("id < 50").execute()
    )
    assert permutation_tbl.count_rows() == 50
    data = permutation_tbl.search(None).to_arrow().to_pydict()
    row_ids = data["row_id"]
    # All row_ids should be less than 50
    assert all(row_id < 50 for row_id in row_ids)
 def test_filter_with_splits(mem_db):
    """Test filtering combined with split strategies."""
    tbl = mem_db.create_table(
        "test_table",
        pa.table(
            {
                "id": range(100),
                "category": (["A", "B", "C"] * 34)[:100],
                "value": range(100),
            }
        ),
    )
    # Filter to only category A and B, then split
    permutation_tbl = (
        permutation_builder(tbl, "test_permutation")
        .filter("category IN ('A', 'B')")
        .split_random(ratios=[0.5, 0.5])
        .execute()
    )
    # Should have fewer than 100 rows due to filtering
    row_count = permutation_tbl.count_rows()
    assert row_count == 67
    data = permutation_tbl.search(None).to_arrow().to_pydict()
    categories = data["category"]
    # All categories should be A or B
    assert all(cat in ["A", "B"] for cat in categories)
 def test_filter_with_shuffle(mem_db):
    """Test filtering combined with shuffling."""
    tbl = mem_db.create_table(
        "test_table",
        pa.table(
            {
                "id": range(100),
                "category": (["A", "B", "C", "D"] * 25)[:100],
                "value": range(100),
            }
        ),
    )
    # Filter and shuffle
    permutation_tbl = (
        permutation_builder(tbl, "test_permutation")
        .filter("category IN ('A', 'C')")
        .shuffle(seed=42)
        .execute()
    )
    row_count = permutation_tbl.count_rows()
    assert row_count == 50  # Should have 50 rows (A and C categories)
    data = permutation_tbl.search(None).to_arrow().to_pydict()
    row_ids = data["row_id"]
    assert row_ids != sorted(row_ids)
 def test_filter_empty_result(mem_db):
    """Test filtering that results in empty set."""
    tbl = mem_db.create_table(
        "test_table", pa.table({"id": range(10), "value": range(10)})
    )
    # Filter that matches nothing
    permutation_tbl = (
        permutation_builder(tbl, "test_permutation")
        .filter("value > 100")  # No values > 100 in our data
        .execute()
    )
    assert permutation_tbl.count_rows() == 0
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -22,6 +22,7 @@ from lancedb.rerankers import (
    JinaReranker,
    AnswerdotaiRerankers,
    VoyageAIReranker,
    MRRReranker,
 )
 from lancedb.table import LanceTable
@@ -46,6 +47,7 @@ def get_test_table(tmp_path, use_tantivy):
        db,
        "my_table",
        schema=MyTable,
        mode="overwrite",
    )
    # Need to test with a bunch of phrases to make sure sorting is consistent
@@ -96,7 +98,7 @@ def get_test_table(tmp_path, use_tantivy):
    )
    # Create a fts index
-    table.create_fts_index("text", use_tantivy=use_tantivy)
+    table.create_fts_index("text", use_tantivy=use_tantivy, replace=True)
    return table, MyTable
@@ -320,6 +322,34 @@ def test_rrf_reranker(tmp_path, use_tantivy):
    _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
@pytest.mark.parametrize("use_tantivy", [True, False])
 def test_mrr_reranker(tmp_path, use_tantivy):
    reranker = MRRReranker()
    _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
    # Test multi-vector part
    table, schema = get_test_table(tmp_path, use_tantivy)
    query = "single player experience"
    rs1 = table.search(query, vector_column_name="vector").limit(10).with_row_id(True)
    rs2 = (
        table.search(query, vector_column_name="meta_vector")
        .limit(10)
        .with_row_id(True)
    )
    result = reranker.rerank_multivector([rs1, rs2])
    assert "_relevance_score" in result.column_names
    assert len(result) <= 20
    if len(result) > 1:
        assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
            "The _relevance_score should be descending."
        )
    # Test with duplicate results
    result_deduped = reranker.rerank_multivector([rs1, rs2, rs1])
    assert len(result_deduped) == len(result)
 def test_rrf_reranker_distance():
    data = pa.table(
        {
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -674,6 +674,45 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
        "vector", replace=True, config=expected_config, name=None, train=True
    )
    # Test with target_partition_size
    table.create_index(
        metric="l2",
        num_sub_vectors=96,
        vector_column_name="vector",
        replace=True,
        index_cache_size=256,
        num_bits=4,
        target_partition_size=8192,
    )
    expected_config = IvfPq(
        distance_type="l2",
        num_sub_vectors=96,
        num_bits=4,
        target_partition_size=8192,
    )
    mock_create_index.assert_called_with(
        "vector", replace=True, config=expected_config, name=None, train=True
    )
    # target_partition_size has a default value,
    # so `num_partitions` and `target_partition_size` are not required
    table.create_index(
        metric="l2",
        num_sub_vectors=96,
        vector_column_name="vector",
        replace=True,
        index_cache_size=256,
        num_bits=4,
    )
    expected_config = IvfPq(
        distance_type="l2",
        num_sub_vectors=96,
        num_bits=4,
    )
    mock_create_index.assert_called_with(
        "vector", replace=True, config=expected_config, name=None, train=True
    )
    table.create_index(
        vector_column_name="my_vector",
        metric="dot",
--- a/python/src/connection.rs
+++ b/python/src/connection.rs
@@ -4,7 +4,10 @@
 use std::{collections::HashMap, sync::Arc, time::Duration};
 use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::FromPyArrow};
-use lancedb::{connection::Connection as LanceConnection, database::CreateTableMode};
+use lancedb::{
    connection::Connection as LanceConnection,
    database::{CreateTableMode, ReadConsistency},
 };
 use pyo3::{
    exceptions::{PyRuntimeError, PyValueError},
    pyclass, pyfunction, pymethods, Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
@@ -23,7 +26,7 @@ impl Connection {
        Self { inner: Some(inner) }
    }
-    fn get_inner(&self) -> PyResult<&LanceConnection> {
+    pub(crate) fn get_inner(&self) -> PyResult<&LanceConnection> {
        self.inner
            .as_ref()
            .ok_or_else(|| PyRuntimeError::new_err("Connection is closed"))
@@ -63,6 +66,18 @@ impl Connection {
        self.get_inner().map(|inner| inner.uri().to_string())
    }
    #[pyo3(signature = ())]
    pub fn get_read_consistency_interval(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.get_inner()?.clone();
        future_into_py(self_.py(), async move {
            Ok(match inner.read_consistency().await.infer_error()? {
                ReadConsistency::Manual => None,
                ReadConsistency::Eventual(duration) => Some(duration.as_secs_f64()),
                ReadConsistency::Strong => Some(0.0_f64),
            })
        })
    }
    #[pyo3(signature = (namespace=vec![], start_after=None, limit=None))]
    pub fn table_names(
        self_: PyRef<'_, Self>,
@@ -163,6 +178,34 @@ impl Connection {
        })
    }
    #[pyo3(signature = (target_table_name, source_uri, target_namespace=vec![], source_version=None, source_tag=None, is_shallow=true))]
    pub fn clone_table(
        self_: PyRef<'_, Self>,
        target_table_name: String,
        source_uri: String,
        target_namespace: Vec<String>,
        source_version: Option<u64>,
        source_tag: Option<String>,
        is_shallow: bool,
    ) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.get_inner()?.clone();
        let mut builder = inner.clone_table(target_table_name, source_uri);
        builder = builder.target_namespace(target_namespace);
        if let Some(version) = source_version {
            builder = builder.source_version(version);
        }
        if let Some(tag) = source_tag {
            builder = builder.source_tag(tag);
        }
        builder = builder.is_shallow(is_shallow);
        future_into_py(self_.py(), async move {
            let table = builder.execute().await.infer_error()?;
            Ok(Table::new(table))
        })
    }
    #[pyo3(signature = (cur_name, new_name, cur_namespace=vec![], new_namespace=vec![]))]
    pub fn rename_table(
        self_: PyRef<'_, Self>,
@@ -255,7 +298,7 @@ impl Connection {
 #[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
 #[allow(clippy::too_many_arguments)]
 pub fn connect(
-    py: Python,
+    py: Python<'_>,
    uri: String,
    api_key: Option<String>,
    region: Option<String>,
--- a/python/src/index.rs
+++ b/python/src/index.rs
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
-use lancedb::index::vector::IvfFlatIndexBuilder;
+use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder};
 use lancedb::index::{
    scalar::{BTreeIndexBuilder, FtsIndexBuilder},
    vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
@@ -63,6 +63,9 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                if let Some(num_partitions) = params.num_partitions {
                    ivf_flat_builder = ivf_flat_builder.num_partitions(num_partitions);
                }
                if let Some(target_partition_size) = params.target_partition_size {
                    ivf_flat_builder = ivf_flat_builder.target_partition_size(target_partition_size);
                }
                Ok(LanceDbIndex::IvfFlat(ivf_flat_builder))
            },
            "IvfPq" => {
@@ -76,11 +79,30 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                if let Some(num_partitions) = params.num_partitions {
                    ivf_pq_builder = ivf_pq_builder.num_partitions(num_partitions);
                }
                if let Some(target_partition_size) = params.target_partition_size {
                    ivf_pq_builder = ivf_pq_builder.target_partition_size(target_partition_size);
                }
                if let Some(num_sub_vectors) = params.num_sub_vectors {
                    ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
                }
                Ok(LanceDbIndex::IvfPq(ivf_pq_builder))
            },
            "IvfRq" => {
                let params = source.extract::<IvfRqParams>()?;
                let distance_type = parse_distance_type(params.distance_type)?;
                let mut ivf_rq_builder = IvfRqIndexBuilder::default()
                    .distance_type(distance_type)
                    .max_iterations(params.max_iterations)
                    .sample_rate(params.sample_rate)
                    .num_bits(params.num_bits);
                if let Some(num_partitions) = params.num_partitions {
                    ivf_rq_builder = ivf_rq_builder.num_partitions(num_partitions);
                }
                if let Some(target_partition_size) = params.target_partition_size {
                    ivf_rq_builder = ivf_rq_builder.target_partition_size(target_partition_size);
                }
                Ok(LanceDbIndex::IvfRq(ivf_rq_builder))
            },
            "HnswPq" => {
                let params = source.extract::<IvfHnswPqParams>()?;
                let distance_type = parse_distance_type(params.distance_type)?;
@@ -94,6 +116,9 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                if let Some(num_partitions) = params.num_partitions {
                    hnsw_pq_builder = hnsw_pq_builder.num_partitions(num_partitions);
                }
                if let Some(target_partition_size) = params.target_partition_size {
                    hnsw_pq_builder = hnsw_pq_builder.target_partition_size(target_partition_size);
                }
                if let Some(num_sub_vectors) = params.num_sub_vectors {
                    hnsw_pq_builder = hnsw_pq_builder.num_sub_vectors(num_sub_vectors);
                }
@@ -111,6 +136,9 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                if let Some(num_partitions) = params.num_partitions {
                    hnsw_sq_builder = hnsw_sq_builder.num_partitions(num_partitions);
                }
                if let Some(target_partition_size) = params.target_partition_size {
                    hnsw_sq_builder = hnsw_sq_builder.target_partition_size(target_partition_size);
                }
                Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
            },
            not_supported => Err(PyValueError::new_err(format!(
@@ -144,6 +172,7 @@ struct IvfFlatParams {
    num_partitions: Option<u32>,
    max_iterations: u32,
    sample_rate: u32,
    target_partition_size: Option<u32>,
 }
 #[derive(FromPyObject)]
@@ -154,6 +183,17 @@ struct IvfPqParams {
    num_bits: u32,
    max_iterations: u32,
    sample_rate: u32,
    target_partition_size: Option<u32>,
 }
 #[derive(FromPyObject)]
 struct IvfRqParams {
    distance_type: String,
    num_partitions: Option<u32>,
    num_bits: u32,
    max_iterations: u32,
    sample_rate: u32,
    target_partition_size: Option<u32>,
 }
 #[derive(FromPyObject)]
@@ -166,6 +206,7 @@ struct IvfHnswPqParams {
    sample_rate: u32,
    m: u32,
    ef_construction: u32,
    target_partition_size: Option<u32>,
 }
 #[derive(FromPyObject)]
@@ -176,6 +217,7 @@ struct IvfHnswSqParams {
    sample_rate: u32,
    m: u32,
    ef_construction: u32,
    target_partition_size: Option<u32>,
 }
 #[pyclass(get_all)]
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -5,6 +5,7 @@ use arrow::RecordBatchStream;
 use connection::{connect, Connection};
 use env_logger::Env;
 use index::IndexConfig;
 use permutation::PyAsyncPermutationBuilder;
 use pyo3::{
    pymodule,
    types::{PyModule, PyModuleMethods},
@@ -22,6 +23,7 @@ pub mod connection;
 pub mod error;
 pub mod header;
 pub mod index;
 pub mod permutation;
 pub mod query;
 pub mod session;
 pub mod table;
@@ -49,7 +51,9 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<DeleteResult>()?;
    m.add_class::<DropColumnsResult>()?;
    m.add_class::<UpdateResult>()?;
    m.add_class::<PyAsyncPermutationBuilder>()?;
    m.add_function(wrap_pyfunction!(connect, m)?)?;
    m.add_function(wrap_pyfunction!(permutation::async_permutation_builder, m)?)?;
    m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?;
    m.add("__version__", env!("CARGO_PKG_VERSION"))?;
    Ok(())
--- a/python/src/permutation.rs
+++ b/python/src/permutation.rs
@@ -0,0 +1,177 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 use std::sync::{Arc, Mutex};
 use crate::{error::PythonErrorExt, table::Table};
 use lancedb::dataloader::{
    permutation::{PermutationBuilder as LancePermutationBuilder, ShuffleStrategy},
    split::{SplitSizes, SplitStrategy},
 };
 use pyo3::{
    exceptions::PyRuntimeError, pyclass, pymethods, types::PyAnyMethods, Bound, PyAny, PyRefMut,
    PyResult,
 };
 use pyo3_async_runtimes::tokio::future_into_py;
 /// Create a permutation builder for the given table
 #[pyo3::pyfunction]
 pub fn async_permutation_builder(
    table: Bound<'_, PyAny>,
    dest_table_name: String,
 ) -> PyResult<PyAsyncPermutationBuilder> {
    let table = table.getattr("_inner")?.downcast_into::<Table>()?;
    let inner_table = table.borrow().inner_ref()?.clone();
    let inner_builder = LancePermutationBuilder::new(inner_table);
    Ok(PyAsyncPermutationBuilder {
        state: Arc::new(Mutex::new(PyAsyncPermutationBuilderState {
            builder: Some(inner_builder),
            dest_table_name,
        })),
    })
 }
 struct PyAsyncPermutationBuilderState {
    builder: Option<LancePermutationBuilder>,
    dest_table_name: String,
 }
 #[pyclass(name = "AsyncPermutationBuilder")]
 pub struct PyAsyncPermutationBuilder {
    state: Arc<Mutex<PyAsyncPermutationBuilderState>>,
 }
 impl PyAsyncPermutationBuilder {
    fn modify(
        &self,
        func: impl FnOnce(LancePermutationBuilder) -> LancePermutationBuilder,
    ) -> PyResult<Self> {
        let mut state = self.state.lock().unwrap();
        let builder = state
            .builder
            .take()
            .ok_or_else(|| PyRuntimeError::new_err("Builder already consumed"))?;
        state.builder = Some(func(builder));
        Ok(Self {
            state: self.state.clone(),
        })
    }
 }
 #[pymethods]
 impl PyAsyncPermutationBuilder {
    #[pyo3(signature = (*, ratios=None, counts=None, fixed=None, seed=None))]
    pub fn split_random(
        slf: PyRefMut<'_, Self>,
        ratios: Option<Vec<f64>>,
        counts: Option<Vec<u64>>,
        fixed: Option<u64>,
        seed: Option<u64>,
    ) -> PyResult<Self> {
        // Check that exactly one split type is provided
        let split_args_count = [ratios.is_some(), counts.is_some(), fixed.is_some()]
            .iter()
            .filter(|&&x| x)
            .count();
        if split_args_count != 1 {
            return Err(pyo3::exceptions::PyValueError::new_err(
                "Exactly one of 'ratios', 'counts', or 'fixed' must be provided",
            ));
        }
        let sizes = if let Some(ratios) = ratios {
            SplitSizes::Percentages(ratios)
        } else if let Some(counts) = counts {
            SplitSizes::Counts(counts)
        } else if let Some(fixed) = fixed {
            SplitSizes::Fixed(fixed)
        } else {
            unreachable!("One of the split arguments must be provided");
        };
        slf.modify(|builder| builder.with_split_strategy(SplitStrategy::Random { seed, sizes }))
    }
    #[pyo3(signature = (columns, split_weights, *, discard_weight=0))]
    pub fn split_hash(
        slf: PyRefMut<'_, Self>,
        columns: Vec<String>,
        split_weights: Vec<u64>,
        discard_weight: u64,
    ) -> PyResult<Self> {
        slf.modify(|builder| {
            builder.with_split_strategy(SplitStrategy::Hash {
                columns,
                split_weights,
                discard_weight,
            })
        })
    }
    #[pyo3(signature = (*, ratios=None, counts=None, fixed=None))]
    pub fn split_sequential(
        slf: PyRefMut<'_, Self>,
        ratios: Option<Vec<f64>>,
        counts: Option<Vec<u64>>,
        fixed: Option<u64>,
    ) -> PyResult<Self> {
        // Check that exactly one split type is provided
        let split_args_count = [ratios.is_some(), counts.is_some(), fixed.is_some()]
            .iter()
            .filter(|&&x| x)
            .count();
        if split_args_count != 1 {
            return Err(pyo3::exceptions::PyValueError::new_err(
                "Exactly one of 'ratios', 'counts', or 'fixed' must be provided",
            ));
        }
        let sizes = if let Some(ratios) = ratios {
            SplitSizes::Percentages(ratios)
        } else if let Some(counts) = counts {
            SplitSizes::Counts(counts)
        } else if let Some(fixed) = fixed {
            SplitSizes::Fixed(fixed)
        } else {
            unreachable!("One of the split arguments must be provided");
        };
        slf.modify(|builder| builder.with_split_strategy(SplitStrategy::Sequential { sizes }))
    }
    pub fn split_calculated(slf: PyRefMut<'_, Self>, calculation: String) -> PyResult<Self> {
        slf.modify(|builder| builder.with_split_strategy(SplitStrategy::Calculated { calculation }))
    }
    pub fn shuffle(
        slf: PyRefMut<'_, Self>,
        seed: Option<u64>,
        clump_size: Option<u64>,
    ) -> PyResult<Self> {
        slf.modify(|builder| {
            builder.with_shuffle_strategy(ShuffleStrategy::Random { seed, clump_size })
        })
    }
    pub fn filter(slf: PyRefMut<'_, Self>, filter: String) -> PyResult<Self> {
        slf.modify(|builder| builder.with_filter(filter))
    }
    pub fn execute(slf: PyRefMut<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
        let mut state = slf.state.lock().unwrap();
        let builder = state
            .builder
            .take()
            .ok_or_else(|| PyRuntimeError::new_err("Builder already consumed"))?;
        let dest_table_name = std::mem::take(&mut state.dest_table_name);
        future_into_py(slf.py(), async move {
            let table = builder.build(&dest_table_name).await.infer_error()?;
            Ok(Table::new(table))
        })
    }
 }
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -3,6 +3,7 @@
 use std::{collections::HashMap, sync::Arc};
 use crate::{
    connection::Connection,
    error::PythonErrorExt,
    index::{extract_index_params, IndexConfig},
    query::{Query, TakeQuery},
@@ -249,7 +250,7 @@ impl Table {
 }
 impl Table {
-    fn inner_ref(&self) -> PyResult<&LanceDbTable> {
+    pub(crate) fn inner_ref(&self) -> PyResult<&LanceDbTable> {
        self.inner
            .as_ref()
            .ok_or_else(|| PyRuntimeError::new_err(format!("Table {} is closed", self.name)))
@@ -272,6 +273,13 @@ impl Table {
        self.inner.take();
    }
    pub fn database(&self) -> PyResult<Connection> {
        let inner = self.inner_ref()?.clone();
        let inner_connection =
            lancedb::Connection::new(inner.database().clone(), inner.embedding_registry().clone());
        Ok(Connection::new(inner_connection))
    }
    pub fn schema(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
@@ -591,12 +599,11 @@ impl Table {
    }
    /// Optimize the on-disk data by compacting and pruning old data, for better performance.
-    #[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None, retrain=None))]
+    #[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None))]
    pub fn optimize(
        self_: PyRef<'_, Self>,
        cleanup_since_ms: Option<u64>,
        delete_unverified: Option<bool>,
        retrain: Option<bool>,
    ) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
        let older_than = if let Some(ms) = cleanup_since_ms {
@@ -632,10 +639,9 @@ impl Table {
                .prune
                .unwrap();
            inner
-                .optimize(lancedb::table::OptimizeAction::Index(match retrain {
+                .optimize(lancedb::table::OptimizeAction::Index(
-                    Some(true) => OptimizeOptions::retrain(),
+                    OptimizeOptions::default(),
-                    _ => OptimizeOptions::default(),
+                ))
                }))
                .await
                .infer_error()?;
            Ok(OptimizeStats {
@@ -674,6 +680,9 @@ impl Table {
        if let Some(timeout) = parameters.timeout {
            builder.timeout(timeout);
        }
        if let Some(use_index) = parameters.use_index {
            builder.use_index(use_index);
        }
        future_into_py(self_.py(), async move {
            let res = builder.execute(Box::new(batches)).await.infer_error()?;
@@ -833,6 +842,7 @@ pub struct MergeInsertParams {
    when_not_matched_by_source_delete: bool,
    when_not_matched_by_source_condition: Option<String>,
    timeout: Option<std::time::Duration>,
    use_index: Option<bool>,
 }
 #[pyclass]
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,2 +1,2 @@
 [toolchain]
-channel = "1.86.0"
+channel = "1.90.0"
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.22.1-beta.0"
+version = "0.22.2"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -11,6 +11,7 @@ rust-version.workspace = true
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
 ahash = { workspace = true }
 arrow = { workspace = true }
 arrow-array = { workspace = true }
 arrow-data = { workspace = true }
@@ -24,18 +25,23 @@ datafusion-common.workspace = true
 datafusion-execution.workspace = true
 datafusion-expr.workspace = true
 datafusion-physical-plan.workspace = true
 datafusion.workspace = true
 object_store = { workspace = true }
 snafu = { workspace = true }
 half = { workspace = true }
 lazy_static.workspace = true
 lance = { workspace = true }
 lance-core = { workspace = true }
 lance-datafusion.workspace = true
 lance-datagen = { workspace = true }
 lance-file = { workspace = true }
 lance-io = { workspace = true }
 lance-index = { workspace = true }
 lance-table = { workspace = true }
 lance-linalg = { workspace = true }
 lance-testing = { workspace = true }
 lance-encoding = { workspace = true }
 lance-namespace = { workspace = true }
 moka = { workspace = true }
 pin-project = { workspace = true }
 tokio = { version = "1.23", features = ["rt-multi-thread"] }
@@ -45,11 +51,13 @@ bytes = "1"
 futures.workspace = true
 num-traits.workspace = true
 url.workspace = true
 rand.workspace = true
 regex.workspace = true
 serde = { version = "^1" }
 serde_json = { version = "1" }
 async-openai = { version = "0.20.0", optional = true }
 serde_with = { version = "3.8.1" }
 tempfile = "3.5.0"
 aws-sdk-bedrockruntime = { version = "1.27.0", optional = true }
 # For remote feature
 reqwest = { version = "0.12.0", default-features = false, features = [
@@ -60,9 +68,8 @@ reqwest = { version = "0.12.0", default-features = false, features = [
    "macos-system-configuration",
    "stream",
 ], optional = true }
 rand = { version = "0.9", features = ["small_rng"], optional = true }
 http = { version = "1", optional = true } # Matching what is in reqwest
-uuid = { version = "1.7.0", features = ["v4"], optional = true }
+uuid = { version = "1.7.0", features = ["v4"] }
 polars-arrow = { version = ">=0.37,<0.40.0", optional = true }
 polars = { version = ">=0.37,<0.40.0", optional = true }
 hf-hub = { version = "0.4.1", optional = true, default-features = false, features = [
@@ -81,19 +88,20 @@ crunchy.workspace = true
 bytemuck_derive.workspace = true
 [dev-dependencies]
 anyhow = "1"
 tempfile = "3.5.0"
 rand = { version = "0.9", features = ["small_rng"] }
 random_word = { version = "0.4.3", features = ["en"] }
 uuid = { version = "1.7.0", features = ["v4"] }
 walkdir = "2"
-aws-sdk-dynamodb = { version = "1.38.0" }
+aws-sdk-dynamodb = { version = "1.55.0" }
-aws-sdk-s3 = { version = "1.38.0" }
+aws-sdk-s3 = { version = "1.55.0" }
-aws-sdk-kms = { version = "1.37" }
+aws-sdk-kms = { version = "1.48.0" }
-aws-config = { version = "1.0" }
+aws-config = { version = "1.5.10" }
-aws-smithy-runtime = { version = "1.3" }
+aws-smithy-runtime = { version = "1.9.1" }
 datafusion.workspace = true
 http-body = "1"                                        # Matching reqwest
 rstest = "0.23.0"
 test-log = "0.2"
 [features]
@@ -103,7 +111,7 @@ oss = ["lance/oss", "lance-io/oss"]
 gcs = ["lance/gcp", "lance-io/gcp"]
 azure = ["lance/azure", "lance-io/azure"]
 dynamodb = ["lance/dynamodb", "aws"]
-remote = ["dep:reqwest", "dep:http", "dep:rand", "dep:uuid"]
+remote = ["dep:reqwest", "dep:http"]
 fp16kernels = ["lance-linalg/fp16kernels"]
 s3-test = []
 bedrock = ["dep:aws-sdk-bedrockruntime"]
--- a/Show More
+++ b/Show More
`@@ -1,2 +1,2 @@`
	`[toolchain]`	`[toolchain]`
	`channel = "1.86.0"`	`channel = "1.90.0"`