Bump version: 0.25.3-beta.6 → 0.25.3

Bump version: 0.25.3-beta.5 → 0.25.3-beta.6
feat: add python Permutation class to mimic hugging face dataset and provide pytorch dataloader (#2725 )
2025-12-23 13:29:57 +00:00 · 2025-11-07 04:57:55 +00:00 · 2025-11-07 04:57:54 +00:00 · 2025-11-06 16:15:33 -08:00 · 2025-11-05 14:07:35 -08:00 · 2025-11-05 21:25:05 +08:00
295 changed files with 28130 additions and 17604 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.20.1-beta.2"
+current_version = "0.22.3-beta.5"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
@@ -50,11 +50,6 @@ pre_commit_hooks = [
 optional_value = "final"
 values = ["beta", "final"]

-[[tool.bumpversion.files]]
-filename = "node/package.json"
-replace = "\"version\": \"{new_version}\","
-search = "\"version\": \"{current_version}\","
-
 [[tool.bumpversion.files]]
 filename = "nodejs/package.json"
 replace = "\"version\": \"{new_version}\","
@@ -66,39 +61,8 @@ glob = "nodejs/npm/*/package.json"
 replace = "\"version\": \"{new_version}\","
 search = "\"version\": \"{current_version}\","

-# vectodb node binary packages
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-darwin-arm64\": \"{new_version}\""
-search = "\"@lancedb/vectordb-darwin-arm64\": \"{current_version}\""
-
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-darwin-x64\": \"{new_version}\""
-search = "\"@lancedb/vectordb-darwin-x64\": \"{current_version}\""
-
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{new_version}\""
-search = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{current_version}\""
-
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-linux-x64-gnu\": \"{new_version}\""
-search = "\"@lancedb/vectordb-linux-x64-gnu\": \"{current_version}\""
-
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-win32-x64-msvc\": \"{new_version}\""
-search = "\"@lancedb/vectordb-win32-x64-msvc\": \"{current_version}\""
-
 # Cargo files
 # ------------
-[[tool.bumpversion.files]]
-filename = "rust/ffi/node/Cargo.toml"
-replace = "\nversion = \"{new_version}\""
-search = "\nversion = \"{current_version}\""
-
 [[tool.bumpversion.files]]
 filename = "rust/lancedb/Cargo.toml"
 replace = "\nversion = \"{new_version}\""
--- a/.github/actions/create-failure-issue/action.yml
+++ b/.github/actions/create-failure-issue/action.yml
@@ -0,0 +1,45 @@
+name: Create Failure Issue
+description: Creates a GitHub issue if any jobs in the workflow failed
+
+inputs:
+  job-results:
+    description: 'JSON string of job results from needs context'
+    required: true
+  workflow-name:
+    description: 'Name of the workflow'
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - name: Check for failures and create issue
+      shell: bash
+      env:
+        JOB_RESULTS: ${{ inputs.job-results }}
+        WORKFLOW_NAME: ${{ inputs.workflow-name }}
+        RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        # Check if any job failed
+        if echo "$JOB_RESULTS" | jq -e 'to_entries | any(.value.result == "failure")' > /dev/null; then
+          echo "Detected job failures, creating issue..."
+
+          # Extract failed job names
+          FAILED_JOBS=$(echo "$JOB_RESULTS" | jq -r 'to_entries | map(select(.value.result == "failure")) | map(.key) | join(", ")')
+
+          # Create issue with workflow name, failed jobs, and run URL
+          gh issue create \
+            --title "$WORKFLOW_NAME Failed ($FAILED_JOBS)" \
+            --body "The workflow **$WORKFLOW_NAME** failed during execution.
+
+        **Failed jobs:** $FAILED_JOBS
+
+        **Run URL:** $RUN_URL
+
+        Please investigate the failed jobs and address any issues." \
+            --label "ci"
+
+          echo "Issue created successfully"
+        else
+          echo "No job failures detected, skipping issue creation"
+        fi
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -5,8 +5,8 @@ on:
    tags-ignore:
      # We don't publish pre-releases for Rust. Crates.io is just a source
      # distribution, so we don't need to publish pre-releases.
-      - 'v*-beta*'
-      - '*-v*' # for example, python-vX.Y.Z
+      - "v*-beta*"
+      - "*-v*" # for example, python-vX.Y.Z

 env:
  # This env var is used by Swatinem/rust-cache@v2 for the cache
@@ -19,6 +19,8 @@ env:
 jobs:
  build:
    runs-on: ubuntu-22.04
+    permissions:
+      id-token: write
    timeout-minutes: 30
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
@@ -31,6 +33,22 @@ jobs:
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
+      - uses: rust-lang/crates-io-auth-action@v1
+        id: auth
      - name: Publish the package
        run: |
-          cargo publish -p lancedb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
+  report-failure:
+    name: Report Workflow Failure
+    runs-on: ubuntu-latest
+    needs: [build]
+    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    permissions:
+      contents: read
+      issues: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/create-failure-issue
+        with:
+          job-results: ${{ toJSON(needs) }}
+          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/codex-update-lance-dependency.yml
+++ b/.github/workflows/codex-update-lance-dependency.yml
@@ -0,0 +1,107 @@
+name: Codex Update Lance Dependency
+
+on:
+  workflow_call:
+    inputs:
+      tag:
+        description: "Tag name from Lance"
+        required: true
+        type: string
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: "Tag name from Lance"
+        required: true
+        type: string
+
+permissions:
+  contents: write
+  pull-requests: write
+  actions: read
+
+jobs:
+  update:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Show inputs
+        run: |
+          echo "tag = ${{ inputs.tag }}"
+
+      - name: Checkout Repo LanceDB
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: true
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: 20
+
+      - name: Install Codex CLI
+        run: npm install -g @openai/codex
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          toolchain: stable
+          components: clippy, rustfmt
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y protobuf-compiler libssl-dev
+
+      - name: Install cargo-info
+        run: cargo install cargo-info
+
+      - name: Install Python dependencies
+        run: python3 -m pip install --upgrade pip packaging
+
+      - name: Configure git user
+        run: |
+          git config user.name "lancedb automation"
+          git config user.email "robot@lancedb.com"
+
+      - name: Configure Codex authentication
+        env:
+          CODEX_TOKEN_B64: ${{ secrets.CODEX_TOKEN }}
+        run: |
+          if [ -z "${CODEX_TOKEN_B64}" ]; then
+            echo "Repository secret CODEX_TOKEN is not defined; skipping Codex execution."
+            exit 1
+          fi
+          mkdir -p ~/.codex
+          echo "${CODEX_TOKEN_B64}" | base64 --decode > ~/.codex/auth.json
+
+      - name: Run Codex to update Lance dependency
+        env:
+          TAG: ${{ inputs.tag }}
+          GITHUB_TOKEN: ${{ secrets.ROBOT_TOKEN }}
+          GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
+        run: |
+          set -euo pipefail
+          VERSION="${TAG#refs/tags/}"
+          VERSION="${VERSION#v}"
+          BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
+          cat <<EOF >/tmp/codex-prompt.txt
+          You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.
+
+          Follow these steps exactly:
+          1. Use script "ci/set_lance_version.py" to update Lance dependencies. The script already refreshes Cargo metadata, so allow it to finish even if it takes time.
+          2. Run "cargo clippy --workspace --tests --all-features -- -D warnings". If diagnostics appear, fix them yourself and rerun clippy until it exits cleanly. Do not skip any warnings.
+          3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
+          4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
+          5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
+          6. Stage all relevant files with "git add -A". Commit using the message "chore: update lance dependency to v${VERSION}".
+          7. Push the branch to origin. If the branch already exists, force-push your changes.
+          8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
+          9. Create a pull request targeting "main" with title "chore: update lance dependency to v${VERSION}". In the body, summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}).
+          10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
+
+          Constraints:
+          - Use bash commands; avoid modifying GitHub workflow files other than through the scripted task above.
+          - Do not merge the PR.
+          - If any command fails, diagnose and fix the issue instead of aborting.
+          EOF
+          codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat /tmp/codex-prompt.txt)"
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -56,22 +56,12 @@ jobs:
        with:
          node-version: 20
          cache: 'npm'
-          cache-dependency-path: node/package-lock.json
+          cache-dependency-path: docs/package-lock.json
      - name: Install node dependencies
-        working-directory: node
+        working-directory: nodejs
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
-      - name: Build node
-        working-directory: node
-        run: |
-          npm ci
-          npm run build
-          npm run tsc
-      - name: Create markdown files
-        working-directory: node
-        run: |
-          npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
      - name: Build docs
        working-directory: docs
        run: |
--- a/.github/workflows/docs_test.yml
+++ b/.github/workflows/docs_test.yml
@@ -24,7 +24,8 @@ env:
 jobs:
  test-python:
    name: Test doc python code
-    runs-on: ubuntu-24.04
+    runs-on: warp-ubuntu-2204-x64-8x
+    timeout-minutes: 60
    steps:
    - name: Checkout
      uses: actions/checkout@v4
@@ -58,51 +59,3 @@ jobs:
      run: |
        cd docs/test/python
        for d in *; do cd "$d"; echo "$d".py; python "$d".py; cd ..; done
-  test-node:
-    name: Test doc nodejs code
-    runs-on: ubuntu-24.04
-    timeout-minutes: 60
-    strategy:
-      fail-fast: false
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        lfs: true
-    - name: Print CPU capabilities
-      run: cat /proc/cpuinfo
-    - name: Set up Node
-      uses: actions/setup-node@v4
-      with:
-        node-version: 20
-    - name: Install protobuf
-      run: |
-        sudo apt update
-        sudo apt install -y protobuf-compiler
-    - name: Install dependecies needed for ubuntu
-      run: |
-        sudo apt install -y libssl-dev
-        rustup update && rustup default
-    - name: Rust cache
-      uses: swatinem/rust-cache@v2
-    - name: Install node dependencies
-      run: |
-        sudo swapoff -a
-        sudo fallocate -l 8G /swapfile
-        sudo chmod 600 /swapfile
-        sudo mkswap /swapfile
-        sudo swapon /swapfile
-        sudo swapon --show
-        cd node
-        npm ci
-        npm run build-release
-        cd ../docs
-        npm install
-    - name: Test
-      env:
-        LANCEDB_URI: ${{ secrets.LANCEDB_URI }}
-        LANCEDB_DEV_API_KEY: ${{ secrets.LANCEDB_DEV_API_KEY }}
-      run: |
-        cd docs
-        npm t
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -43,7 +43,6 @@ jobs:
      - uses: Swatinem/rust-cache@v2
      - uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
-          toolchain: "1.81.0"
          cache-workspaces: "./java/core/lancedb-jni"
          # Disable full debug symbol generation to speed up CI build and keep memory down
          # "1" means line tables only, which is useful for panic tracebacks.
@@ -112,3 +111,17 @@ jobs:
        env:
          SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
          SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
+  report-failure:
+    name: Report Workflow Failure
+    runs-on: ubuntu-latest
+    needs: [linux-arm64, linux-x86, macos-arm64]
+    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    permissions:
+      contents: read
+      issues: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/create-failure-issue
+        with:
+          job-results: ${{ toJSON(needs) }}
+          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/node.yml
+++ b/.github/workflows/node.yml
@@ -1,147 +0,0 @@
-name: Node
-
-on:
-  push:
-    branches:
-      - main
-  pull_request:
-    paths:
-      - node/**
-      - rust/ffi/node/**
-      - .github/workflows/node.yml
-      - docker-compose.yml
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-env:
-  # Disable full debug symbol generation to speed up CI build and keep memory down
-  # "1" means line tables only, which is useful for panic tracebacks.
-  #
-  # Use native CPU to accelerate tests if possible, especially for f16
-  # target-cpu=haswell fixes failing ci build
-  RUSTFLAGS: "-C debuginfo=1 -C target-cpu=haswell -C target-feature=+f16c,+avx2,+fma"
-  RUST_BACKTRACE: "1"
-
-jobs:
-  linux:
-    name: Linux (Node ${{ matrix.node-version }})
-    timeout-minutes: 30
-    strategy:
-      matrix:
-        node-version: [ "18", "20" ]
-    runs-on: "ubuntu-22.04"
-    defaults:
-      run:
-        shell: bash
-        working-directory: node
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        lfs: true
-    - uses: actions/setup-node@v3
-      with:
-        node-version: ${{ matrix.node-version }}
-        cache: 'npm'
-        cache-dependency-path: node/package-lock.json
-    - uses: Swatinem/rust-cache@v2
-    - name: Install dependencies
-      run: |
-        sudo apt update
-        sudo apt install -y protobuf-compiler libssl-dev
-    - name: Build
-      run: |
-        npm ci
-        npm run build
-        npm run pack-build
-        npm install --no-save ./dist/lancedb-vectordb-*.tgz
-        # Remove index.node to test with dependency installed
-        rm index.node
-    - name: Test
-      run: npm run test
-  macos:
-    timeout-minutes: 30
-    runs-on: "macos-13"
-    defaults:
-      run:
-        shell: bash
-        working-directory: node
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        lfs: true
-    - uses: actions/setup-node@v3
-      with:
-        node-version: 20
-        cache: 'npm'
-        cache-dependency-path: node/package-lock.json
-    - uses: Swatinem/rust-cache@v2
-    - name: Install dependencies
-      run: brew install protobuf
-    - name: Build
-      run: |
-        npm ci
-        npm run build
-        npm run pack-build
-        npm install --no-save ./dist/lancedb-vectordb-*.tgz
-        # Remove index.node to test with dependency installed
-        rm index.node
-    - name: Test
-      run: |
-        npm run test
-  aws-integtest:
-    timeout-minutes: 45
-    runs-on: "ubuntu-22.04"
-    defaults:
-      run:
-        shell: bash
-        working-directory: node
-    env:
-      AWS_ACCESS_KEY_ID: ACCESSKEY
-      AWS_SECRET_ACCESS_KEY: SECRETKEY
-      AWS_DEFAULT_REGION: us-west-2
-      # this one is for s3
-      AWS_ENDPOINT: http://localhost:4566
-      # this one is for dynamodb
-      DYNAMODB_ENDPOINT: http://localhost:4566
-      ALLOW_HTTP: true
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        lfs: true
-    - uses: actions/setup-node@v3
-      with:
-        node-version: 20
-        cache: 'npm'
-        cache-dependency-path: node/package-lock.json
-    - name: start local stack
-      run: docker compose -f ../docker-compose.yml up -d --wait
-    - name: create s3
-      run: aws s3 mb s3://lancedb-integtest --endpoint $AWS_ENDPOINT
-    - name: create ddb
-      run: |
-        aws dynamodb create-table \
-          --table-name lancedb-integtest \
-          --attribute-definitions '[{"AttributeName": "base_uri", "AttributeType": "S"}, {"AttributeName": "version", "AttributeType": "N"}]' \
-          --key-schema '[{"AttributeName": "base_uri", "KeyType": "HASH"}, {"AttributeName": "version", "KeyType": "RANGE"}]' \
-          --provisioned-throughput '{"ReadCapacityUnits": 10, "WriteCapacityUnits": 10}' \
-          --endpoint-url $DYNAMODB_ENDPOINT
-    - uses: Swatinem/rust-cache@v2
-    - name: Install dependencies
-      run: |
-        sudo apt update
-        sudo apt install -y protobuf-compiler libssl-dev
-    - name: Build
-      run: |
-        npm ci
-        npm run build
-        npm run pack-build
-        npm install --no-save ./dist/lancedb-vectordb-*.tgz
-        # Remove index.node to test with dependency installed
-        rm index.node
-    - name: Test
-      run: npm run integration-test
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -6,6 +6,7 @@ on:
      - main
  pull_request:
    paths:
+      - Cargo.toml
      - nodejs/**
      - .github/workflows/nodejs.yml
      - docker-compose.yml
@@ -79,7 +80,7 @@ jobs:
      with:
        node-version: ${{ matrix.node-version }}
        cache: 'npm'
-        cache-dependency-path: node/package-lock.json
+        cache-dependency-path: nodejs/package-lock.json
    - uses: Swatinem/rust-cache@v2
    - name: Install dependencies
      run: |
@@ -116,7 +117,7 @@ jobs:
        set -e
        npm ci
        npm run docs
-        if ! git diff --exit-code -- . ':(exclude)Cargo.lock'; then
+        if ! git diff --exit-code -- ../ ':(exclude)Cargo.lock'; then
          echo "Docs need to be updated"
          echo "Run 'npm run docs', fix any warnings, and commit the changes."
          exit 1
@@ -137,7 +138,7 @@ jobs:
      with:
        node-version: 20
        cache: 'npm'
-        cache-dependency-path: node/package-lock.json
+        cache-dependency-path: nodejs/package-lock.json
    - uses: Swatinem/rust-cache@v2
    - name: Install dependencies
      run: |
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -365,200 +365,17 @@ jobs:
            ARGS="$ARGS --tag preview"
          fi
          npm publish $ARGS
-
-
-  # ----------------------------------------------------------------------------
-  # vectordb release (legacy)
-  # ----------------------------------------------------------------------------
-  # TODO: delete this when we drop vectordb
-  node:
-    name: vectordb Typescript
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        shell: bash
-        working-directory: node
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 20
-          cache: "npm"
-          cache-dependency-path: node/package-lock.json
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y protobuf-compiler libssl-dev
-      - name: Build
-        run: |
-          npm ci
-          npm run tsc
-          npm pack
-      - name: Upload Linux Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-package
-          path: |
-            node/vectordb-*.tgz
-
-  node-macos:
-    name: vectordb ${{ matrix.config.arch }}
-    strategy:
-      matrix:
-        config:
-          - arch: x86_64-apple-darwin
-            runner: macos-13
-          - arch: aarch64-apple-darwin
-            # xlarge is implicitly arm64.
-            runner: macos-14
-    runs-on: ${{ matrix.config.runner }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - name: Install system dependencies
-        run: brew install protobuf
-      - name: Install npm dependencies
-        run: |
-          cd node
-          npm ci
-      - name: Build MacOS native node modules
-        run: bash ci/build_macos_artifacts.sh ${{ matrix.config.arch }}
-      - name: Upload Darwin Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-native-darwin-${{ matrix.config.arch }}
-          path: |
-            node/dist/lancedb-vectordb-darwin*.tgz
-
-  node-linux-gnu:
-    name: vectordb (${{ matrix.config.arch}}-unknown-linux-gnu)
-    runs-on: ${{ matrix.config.runner }}
-    strategy:
-      fail-fast: false
-      matrix:
-        config:
-          - arch: x86_64
-            runner: ubuntu-latest
-          - arch: aarch64
-            # For successful fat LTO builds, we need a large runner to avoid OOM errors.
-            runner: warp-ubuntu-latest-arm64-4x
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      # To avoid OOM errors on ARM, we create a swap file.
-      - name: Configure aarch64 build
-        if: ${{ matrix.config.arch == 'aarch64' }}
-        run: |
-          free -h
-          sudo fallocate -l 16G /swapfile
-          sudo chmod 600 /swapfile
-          sudo mkswap /swapfile
-          sudo swapon /swapfile
-          echo "/swapfile swap swap defaults 0 0" >> sudo /etc/fstab
-          # print info
-          swapon --show
-          free -h
-      - name: Build Linux Artifacts
-        run: |
-          bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-gnu
-      - name: Upload Linux Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-native-linux-${{ matrix.config.arch }}-gnu
-          path: |
-            node/dist/lancedb-vectordb-linux*.tgz
-
-  node-windows:
-    name: vectordb ${{ matrix.target }}
-    runs-on: windows-2022
-    strategy:
-      fail-fast: false
-      matrix:
-        target: [x86_64-pc-windows-msvc]
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - name: Install Protoc v21.12
-        working-directory: C:\
-        run: |
-          New-Item -Path 'C:\protoc' -ItemType Directory
-          Set-Location C:\protoc
-          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
-          7z x protoc.zip
-          Add-Content $env:GITHUB_PATH "C:\protoc\bin"
-        shell: powershell
-      - name: Install npm dependencies
-        run: |
-          cd node
-          npm ci
-      - name: Build Windows native node modules
-        run: .\ci\build_windows_artifacts.ps1 ${{ matrix.target }}
-      - name: Upload Windows Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-native-windows
-          path: |
-            node/dist/lancedb-vectordb-win32*.tgz
-
-  release:
-    name: vectordb NPM Publish
-    needs: [node, node-macos, node-linux-gnu, node-windows]
+  report-failure:
+    name: Report Workflow Failure
    runs-on: ubuntu-latest
+    needs: [build-lancedb, test-lancedb, publish]
+    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
    permissions:
-      contents: write
-    # Only runs on tags that matches the make-release action
-    if: startsWith(github.ref, 'refs/tags/v')
+      contents: read
+      issues: write
    steps:
-      - uses: actions/download-artifact@v4
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/create-failure-issue
        with:
-          pattern: node-*
-      - name: Display structure of downloaded files
-        run: ls -R
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 20
-          registry-url: "https://registry.npmjs.org"
-      - name: Publish to NPM
-        env:
-          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
-        run: |
-          # Tag beta as "preview" instead of default "latest". See lancedb
-          # npm publish step for more info.
-          if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
-            PUBLISH_ARGS="--tag preview"
-          fi
-
-          mv */*.tgz .
-          for filename in *.tgz; do
-            npm publish $PUBLISH_ARGS $filename
-          done
-      - name: Deprecate
-        env:
-          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
-        # We need to deprecate the old package to avoid confusion.
-        # Each time we publish a new version, it gets undeprecated.
-        run: npm deprecate vectordb "Use @lancedb/lancedb instead."
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          ref: main
-      - name: Update package-lock.json
-        run: |
-          git config user.name 'Lance Release'
-          git config user.email 'lance-dev@lancedb.com'
-          bash ci/update_lockfiles.sh
-      - name: Push new commit
-        uses: ad-m/github-push-action@master
-        with:
-          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
-          branch: main
-      - name: Notify Slack Action
-        uses: ravsamhq/notify-slack-action@2.3.0
-        if: ${{ always() }}
-        with:
-          status: ${{ job.status }}
-          notify_when: "failure"
-          notification_title: "{workflow} is failing"
-        env:
-          SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
+          job-results: ${{ toJSON(needs) }}
+          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -56,7 +56,7 @@ jobs:
          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          fury_token: ${{ secrets.FURY_TOKEN }}
  mac:
-    timeout-minutes: 60
+    timeout-minutes: 90
    runs-on: ${{ matrix.config.runner }}
    strategy:
      matrix:
@@ -64,7 +64,7 @@ jobs:
          - target: x86_64-apple-darwin
            runner: macos-13
          - target: aarch64-apple-darwin
-            runner: macos-14
+            runner: warp-macos-14-arm64-6x
    env:
      MACOSX_DEPLOYMENT_TARGET: 10.15
    steps:
@@ -173,3 +173,17 @@ jobs:
          generate_release_notes: false
          name: Python LanceDB v${{ steps.extract_version.outputs.version }}
          body: ${{ steps.python_release_notes.outputs.changelog }}
+  report-failure:
+    name: Report Workflow Failure
+    runs-on: ubuntu-latest
+    needs: [linux, mac, windows]
+    permissions:
+      contents: read
+      issues: write
+    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/create-failure-issue
+        with:
+          job-results: ${{ toJSON(needs) }}
+          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -6,6 +6,7 @@ on:
      - main
  pull_request:
    paths:
+      - Cargo.toml
      - python/**
      - .github/workflows/python.yml

--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -96,6 +96,7 @@ jobs:
      # Need up-to-date compilers for kernels
      CC: clang-18
      CXX: clang++-18
+      GH_TOKEN: ${{ secrets.SOPHON_READ_TOKEN }}
    steps:
      - uses: actions/checkout@v4
        with:
@@ -117,15 +118,17 @@ jobs:
          sudo chmod 600 /swapfile
          sudo mkswap /swapfile
          sudo swapon /swapfile
-      - name: Start S3 integration test environment
-        working-directory: .
-        run: docker compose up --detach --wait
      - name: Build
        run: cargo build --all-features --tests --locked --examples
-      - name: Run tests
-        run: cargo test --all-features --locked
+      - name: Run feature tests
+        run: make -C ./lancedb feature-tests
      - name: Run examples
        run: cargo run --example simple --locked
+      - name: Run remote tests
+        # Running this requires access to secrets, so skip if this is
+        # a PR from a fork.
+        if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
+        run: make -C ./lancedb remote-tests

  macos:
    timeout-minutes: 30
--- a/.github/workflows/trigger-vectordb-recipes.yml
+++ b/.github/workflows/trigger-vectordb-recipes.yml
@@ -1,26 +0,0 @@
-name: Trigger vectordb-recipers workflow
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    paths:
-      - .github/workflows/trigger-vectordb-recipes.yml
-  workflow_dispatch:
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Trigger vectordb-recipes workflow
-        uses: actions/github-script@v6
-        with:
-          github-token: ${{ secrets.VECTORDB_RECIPES_ACTION_TOKEN }}
-          script: |
-            const result = await github.rest.actions.createWorkflowDispatch({
-                owner: 'lancedb',
-                repo: 'vectordb-recipes',
-                workflow_id: 'examples-test.yml',
-                ref: 'main'
-            });
-            console.log(result);
--- a/.gitignore
+++ b/.gitignore
@@ -31,9 +31,6 @@ python/dist
 *.node
 **/node_modules
 **/.DS_Store
-node/dist
-node/examples/**/package-lock.json
-node/examples/**/dist
 nodejs/lancedb/native*
 dist

--- a/AGENTS.md
+++ b/AGENTS.md
@@ -0,0 +1,101 @@
+LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
+It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
+remote (against LanceDB Cloud).
+
+The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
+
+Project layout:
+
+* `rust/lancedb`: The LanceDB core Rust implementation.
+* `python`: The Python bindings, using PyO3.
+* `nodejs`: The Typescript bindings, using napi-rs
+* `java`: The Java bindings
+
+Common commands:
+
+* Check for compiler errors: `cargo check --quiet --features remote --tests --examples`
+* Run tests: `cargo test --quiet --features remote --tests`
+* Run specific test: `cargo test --quiet --features remote -p <package_name> --test <test_name>`
+* Lint: `cargo clippy --quiet --features remote --tests --examples`
+* Format: `cargo fmt --all`
+
+Before committing changes, run formatting.
+
+## Coding tips
+
+* When writing Rust doctests for things that require a connection or table reference,
+  write them as a function instead of a fully executable test. This allows type checking
+  to run but avoids needing a full test environment. For example:
+    ```rust
+    /// ```
+    /// use lance_index::scalar::FullTextSearchQuery;
+    /// use lancedb::query::{QueryBase, ExecutableQuery};
+    ///
+    /// # use lancedb::Table;
+    /// # async fn query(table: &Table) -> Result<(), Box<dyn std::error::Error>> {
+    /// let results = table.query()
+    ///     .full_text_search(FullTextSearchQuery::new("hello world".into()))
+    ///     .execute()
+    ///     .await?;
+    /// # Ok(())
+    /// # }
+    /// ```
+    ```
+
+## Example plan: adding a new method on Table
+
+Adding a new method involves first adding it to the Rust core, then exposing it
+in the Python and TypeScript bindings. There are both local and remote tables.
+Remote tables are implemented via a HTTP API and require the `remote` cargo
+feature flag to be enabled. Python has both sync and async methods.
+
+Rust core changes:
+
+1. Add method on `Table` struct in `rust/lancedb/src/table.rs` (calls `BaseTable` trait).
+2. Add method to `BaseTable` trait in `rust/lancedb/src/table.rs`.
+3. Implement new trait method on `NativeTable` in `rust/lancedb/src/table.rs`.
+    * Test with unit test in `rust/lancedb/src/table.rs`.
+4. Implement new trait method on `RemoteTable` in `rust/lancedb/src/remote/table.rs`.
+    * Test with unit test in `rust/lancedb/src/remote/table.rs` against mocked endpoint.
+
+Python bindings changes:
+
+1. Add PyO3 method binding in `python/src/table.rs`. Run `make develop` to compile bindings.
+2. Add types for PyO3 method in `python/python/lancedb/_lancedb.pyi`.
+3. Add method to `AsyncTable` class in `python/python/lancedb/table.py`.
+4. Add abstract method to `Table` abstract base class in `python/python/lancedb/table.py`.
+5. Add concrete sync method to `LanceTable` class in `python/python/lancedb/table.py`.
+    * Should use `LOOP.run()` to call the corresponding `AsyncTable` method.
+6. Add concrete sync method to `RemoteTable` class in `python/python/lancedb/remote/table.py`.
+7. Add unit test in `python/tests/test_table.py`.
+
+TypeScript bindings changes:
+
+1. Add napi-rs method binding on `Table` in `nodejs/src/table.rs`.
+2. Run `npm run build` to generate TypeScript definitions.
+3. Add typescript method on abstract class `Table` in `nodejs/src/table.ts`.
+4. Add concrete method on `LocalTable` class in `nodejs/src/native_table.ts`.
+    * Note: despite the name, this class is also used for remote tables.
+5. Add test in `nodejs/__test__/table.test.ts`.
+6. Run `npm run docs` to generate TypeScript documentation.
+
+## Review Guidelines
+
+Please consider the following when reviewing code contributions.
+
+### Rust API design
+* Design public APIs so they can be evolved easily in the future without breaking
+  changes. Often this means using builder patterns or options structs instead of
+  long argument lists.
+* For public APIs, prefer inputs that use `Into<T>` or `AsRef<T>` traits to allow
+  more flexible inputs. For example, use `name: Into<String>` instead of `name: String`,
+  so we don't have to write `func("my_string".to_string())`.
+
+### Testing
+* Ensure all new public APIs have documentation and examples.
+* Ensure that all bugfixes and features have corresponding tests. **We do not merge
+  code without tests.**
+
+### Documentation
+* New features must include updates to the rust documentation comments. Link to
+  relevant structs and methods to increase the value of documentation.
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+AGENTS.md
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,11 +1,5 @@
 [workspace]
-members = [
-    "rust/ffi/node",
-    "rust/lancedb",
-    "nodejs",
-    "python",
-    "java/core/lancedb-jni",
-]
+members = ["rust/lancedb", "nodejs", "python", "java/core/lancedb-jni"]
 # Python package needs to be built by maturin.
 exclude = ["python"]
 resolver = "2"
@@ -21,52 +15,51 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.30.0", "features" = ["dynamodb"] }
-lance-io = "=0.30.0"
-lance-index = "=0.30.0"
-lance-linalg = "=0.30.0"
-lance-table = "=0.30.0"
-lance-testing = "=0.30.0"
-lance-datafusion = "=0.30.0"
-lance-encoding = "=0.30.0"
+lance = { "version" = "=0.39.0", default-features = false }
+lance-core = "=0.39.0"
+lance-datagen = "=0.39.0"
+lance-file = "=0.39.0"
+lance-io = { "version" = "=0.39.0", default-features = false }
+lance-index = "=0.39.0"
+lance-linalg = "=0.39.0"
+lance-namespace = "=0.39.0"
+lance-namespace-impls = { "version" = "=0.39.0", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"] }
+lance-table = "=0.39.0"
+lance-testing = "=0.39.0"
+lance-datafusion = "=0.39.0"
+lance-encoding = "=0.39.0"
+lance-arrow = "=0.39.0"
+ahash = "0.8"
 # Note that this one does not include pyarrow
-arrow = { version = "55.1", optional = false }
-arrow-array = "55.1"
-arrow-data = "55.1"
-arrow-ipc = "55.1"
-arrow-ord = "55.1"
-arrow-schema = "55.1"
-arrow-arith = "55.1"
-arrow-cast = "55.1"
+arrow = { version = "56.2", optional = false }
+arrow-array = "56.2"
+arrow-data = "56.2"
+arrow-ipc = "56.2"
+arrow-ord = "56.2"
+arrow-schema = "56.2"
+arrow-select = "56.2"
+arrow-cast = "56.2"
 async-trait = "0"
-datafusion = { version = "47.0", default-features = false }
-datafusion-catalog = "47.0"
-datafusion-common = { version = "47.0", default-features = false }
-datafusion-execution = "47.0"
-datafusion-expr = "47.0"
-datafusion-physical-plan = "47.0"
+datafusion = { version = "50.1", default-features = false }
+datafusion-catalog = "50.1"
+datafusion-common = { version = "50.1", default-features = false }
+datafusion-execution = "50.1"
+datafusion-expr = "50.1"
+datafusion-physical-plan = "50.1"
 env_logger = "0.11"
-half = { "version" = "=2.5.0", default-features = false, features = [
+half = { "version" = "2.6.0", default-features = false, features = [
    "num-traits",
 ] }
 futures = "0"
 log = "0.4"
 moka = { version = "0.12", features = ["future"] }
-object_store = "0.11.0"
+object_store = "0.12.0"
 pin-project = "1.0.7"
+rand = "0.9"
 snafu = "0.8"
 url = "2"
 num-traits = "0.2"
-rand = "0.9"
 regex = "1.10"
 lazy_static = "1"
 semver = "1.0.25"
-# Temporary pins to work around downstream issues
-# https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
-chrono = "=0.4.41"
-# https://github.com/RustCrypto/formats/issues/1684
-base64ct = "=1.6.0"
-# Workaround for: https://github.com/eira-fransham/crunchy/issues/13
-crunchy = "=0.2.2"
-# Workaround for: https://github.com/Lokathor/bytemuck/issues/306
-bytemuck_derive = ">=1.8.1, <1.9.0"
+chrono = "0.4"
--- a/ci/build_linux_artifacts.sh
+++ b/ci/build_linux_artifacts.sh
@@ -1,22 +0,0 @@
-#!/bin/bash
-set -e
-ARCH=${1:-x86_64}
-TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
-
-# We pass down the current user so that when we later mount the local files
-# into the container, the files are accessible by the current user.
-pushd ci/manylinux_node
-docker build \
-    -t lancedb-node-manylinux \
-    --build-arg="ARCH=$ARCH" \
-    --build-arg="DOCKER_USER=$(id -u)" \
-    --progress=plain \
-    .
-popd
-
-# We turn on memory swap to avoid OOM killer
-docker run \
-    -v $(pwd):/io -w /io \
-    --memory-swap=-1 \
-    lancedb-node-manylinux \
-    bash ci/manylinux_node/build_vectordb.sh $ARCH $TARGET_TRIPLE
--- a/ci/build_macos_artifacts.sh
+++ b/ci/build_macos_artifacts.sh
@@ -1,34 +0,0 @@
-# Builds the macOS artifacts (node binaries).
-# Usage: ./ci/build_macos_artifacts.sh [target]
-# Targets supported: x86_64-apple-darwin aarch64-apple-darwin
-set -e
-
-prebuild_rust() {
-    # Building here for the sake of easier debugging.
-    pushd rust/ffi/node
-    echo "Building rust library for $1"
-    export RUST_BACKTRACE=1
-    cargo build --release --target $1
-    popd
-}
-
-build_node_binaries() {
-    pushd node
-    echo "Building node library for $1"
-    npm run build-release -- --target $1
-    npm run pack-build -- --target $1
-    popd
-}
-
-if [ -n "$1" ]; then
-    targets=$1
-else
-    targets="x86_64-apple-darwin aarch64-apple-darwin"
-fi
-
-echo "Building artifacts for targets: $targets"
-for target in $targets
-    do
-    prebuild_rust $target
-    build_node_binaries $target
-done
--- a/ci/build_windows_artifacts.ps1
+++ b/ci/build_windows_artifacts.ps1
@@ -1,42 +0,0 @@
-# Builds the Windows artifacts (node binaries).
-# Usage:  .\ci\build_windows_artifacts.ps1 [target]
-# Targets supported:
-# - x86_64-pc-windows-msvc
-# - i686-pc-windows-msvc
-# - aarch64-pc-windows-msvc
-
-function Prebuild-Rust {
-    param (
-        [string]$target
-    )
-
-    # Building here for the sake of easier debugging.
-    Push-Location -Path "rust/ffi/node"
-    Write-Host "Building rust library for $target"
-    $env:RUST_BACKTRACE=1
-    cargo build --release --target $target
-    Pop-Location
-}
-
-function Build-NodeBinaries {
-    param (
-        [string]$target
-    )
-
-    Push-Location -Path "node"
-    Write-Host "Building node library for $target"
-    npm run build-release -- --target $target
-    npm run pack-build -- --target $target
-    Pop-Location
-}
-
-$targets = $args[0]
-if (-not $targets) {
-    $targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
-}
-
-Write-Host "Building artifacts for targets: $targets"
-foreach ($target in $targets) {
-    Prebuild-Rust $target
-    Build-NodeBinaries $target
-}
--- a/ci/build_windows_artifacts_nodejs.ps1
+++ b/ci/build_windows_artifacts_nodejs.ps1
@@ -1,42 +0,0 @@
-# Builds the Windows artifacts (nodejs binaries).
-# Usage:  .\ci\build_windows_artifacts_nodejs.ps1 [target]
-# Targets supported:
-# - x86_64-pc-windows-msvc
-# - i686-pc-windows-msvc
-# - aarch64-pc-windows-msvc
-
-function Prebuild-Rust {
-    param (
-        [string]$target
-    )
-
-    # Building here for the sake of easier debugging.
-    Push-Location -Path "rust/lancedb"
-    Write-Host "Building rust library for $target"
-    $env:RUST_BACKTRACE=1
-    cargo build --release --target $target
-    Pop-Location
-}
-
-function Build-NodeBinaries {
-    param (
-        [string]$target
-    )
-
-    Push-Location -Path "nodejs"
-    Write-Host "Building nodejs library for $target"
-    $env:RUST_TARGET=$target
-    npm run build-release
-    Pop-Location
-}
-
-$targets = $args[0]
-if (-not $targets) {
-    $targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
-}
-
-Write-Host "Building artifacts for targets: $targets"
-foreach ($target in $targets) {
-    Prebuild-Rust $target
-    Build-NodeBinaries $target
-}
--- a/ci/create_lancedb_test_connection.sh
+++ b/ci/create_lancedb_test_connection.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+
+export RUST_LOG=info
+exec ./lancedb server --port 0 --sql-port 0  --data-dir "${1}"
--- a/ci/manylinux_node/Dockerfile
+++ b/ci/manylinux_node/Dockerfile
@@ -1,27 +0,0 @@
-# Many linux dockerfile with Rust, Node, and Lance dependencies installed.
-# This container allows building the node modules native libraries in an
-# environment with a very old glibc, so that we are compatible with a wide
-# range of linux distributions.
-ARG ARCH=x86_64
-
-FROM quay.io/pypa/manylinux_2_28_${ARCH}
-
-ARG ARCH=x86_64
-ARG DOCKER_USER=default_user
-
-# Protobuf is also installed as root.
-COPY install_protobuf.sh install_protobuf.sh
-RUN ./install_protobuf.sh ${ARCH}
-
-ENV DOCKER_USER=${DOCKER_USER}
-# Create a group and user, but only if it doesn't exist
-RUN echo ${ARCH} && id -u ${DOCKER_USER} >/dev/null 2>&1 || adduser --user-group --create-home --uid ${DOCKER_USER} build_user
-
-# We switch to the user to install Rust and Node, since those like to be
-# installed at the user level.
-USER ${DOCKER_USER}
-
-COPY prepare_manylinux_node.sh prepare_manylinux_node.sh
-RUN cp /prepare_manylinux_node.sh $HOME/ && \
-    cd $HOME && \
-    ./prepare_manylinux_node.sh ${ARCH}
--- a/ci/manylinux_node/build_vectordb.sh
+++ b/ci/manylinux_node/build_vectordb.sh
@@ -1,13 +0,0 @@
-#!/bin/bash
-# Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
-set -e
-ARCH=${1:-x86_64}
-TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
-
-#Alpine doesn't have .bashrc
-FILE=$HOME/.bashrc && test -f $FILE && source $FILE
-
-cd node
-npm ci
-npm run build-release
-npm run pack-build -- -t $TARGET_TRIPLE
--- a/ci/manylinux_node/install_protobuf.sh
+++ b/ci/manylinux_node/install_protobuf.sh
@@ -1,15 +0,0 @@
-#!/bin/bash
-# Installs protobuf compiler. Should be run as root.
-set -e
-
-if [[ $1 == x86_64* ]]; then
-    ARCH=x86_64
-else
-    # gnu target
-    ARCH=aarch_64
-fi
-
-PB_REL=https://github.com/protocolbuffers/protobuf/releases
-PB_VERSION=23.1
-curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-$ARCH.zip
-unzip protoc-$PB_VERSION-linux-$ARCH.zip -d /usr/local
--- a/ci/manylinux_node/prepare_manylinux_node.sh
+++ b/ci/manylinux_node/prepare_manylinux_node.sh
@@ -1,21 +0,0 @@
-#!/bin/bash
-set -e
-
-install_node() {
-    echo "Installing node..."
-
-    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
-
-    source "$HOME"/.bashrc
-
-    nvm install --no-progress 18
-}
-
-install_rust() {
-    echo "Installing rust..."
-    curl https://sh.rustup.rs -sSf | bash -s -- -y
-    export PATH="$PATH:/root/.cargo/bin"
-}
-
-install_node
-install_rust
--- a/ci/run_with_docker_compose.sh
+++ b/ci/run_with_docker_compose.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+#
+# A script for running the given command together with a docker compose environment.
+#
+
+# Bring down the docker setup once the command is done running.
+tear_down() {
+    docker compose -p fixture down
+}
+trap tear_down EXIT
+
+set +xe
+
+# Clean up any existing docker setup and bring up a new one.
+docker compose -p fixture up --detach --wait || exit 1
+
+"${@}"
--- a/ci/run_with_test_connection.sh
+++ b/ci/run_with_test_connection.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+
+#
+# A script for running the given command together with the lancedb cli.
+#
+
+die() {
+    echo $?
+    exit 1
+}
+
+check_command_exists() {
+    command="${1}"
+    which ${command} &> /dev/null || \
+        die "Unable to locate command: ${command}. Did you install it?"
+}
+
+if [[ ! -e ./lancedb ]]; then
+    if [[ -v SOPHON_READ_TOKEN ]]; then
+        INPUT="lancedb-linux-x64"
+        gh release \
+            --repo lancedb/lancedb \
+            download ci-support-binaries \
+            --pattern "${INPUT}" \
+            || die "failed to fetch cli."
+        check_command_exists openssl
+        openssl enc -aes-256-cbc \
+            -d -pbkdf2 \
+            -pass "env:SOPHON_READ_TOKEN" \
+            -in "${INPUT}" \
+            -out ./lancedb-linux-x64.tar.gz \
+            || die "openssl failed"
+        TARGET="${INPUT}.tar.gz"
+    else
+        ARCH="x64"
+        if [[ $OSTYPE == 'darwin'* ]]; then
+            UNAME=$(uname -m)
+            if [[ $UNAME == 'arm64' ]]; then
+                ARCH='arm64'
+            fi
+            OSTYPE="macos"
+        elif [[ $OSTYPE == 'linux'* ]]; then
+            if [[ $UNAME == 'aarch64' ]]; then
+                ARCH='arm64'
+            fi
+            OSTYPE="linux"
+        else
+            die "unknown OSTYPE: $OSTYPE"
+        fi
+
+        check_command_exists gh
+        TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
+        gh release \
+            --repo lancedb/sophon \
+            download lancedb-cli-v0.0.3 \
+            --pattern "${TARGET}" \
+            || die "failed to fetch cli."
+    fi
+
+    check_command_exists tar
+    tar xvf "${TARGET}" || die "tar failed."
+    [[ -e ./lancedb ]] || die "failed to extract lancedb."
+fi
+
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+export CREATE_LANCEDB_TEST_CONNECTION_SCRIPT="${SCRIPT_DIR}/create_lancedb_test_connection.sh"
+
+"${@}"
--- a/ci/set_lance_version.py
+++ b/ci/set_lance_version.py
@@ -1,4 +1,5 @@
 import argparse
+import re
 import sys
 import json

@@ -18,8 +19,12 @@ def run_command(command: str) -> str:

 def get_latest_stable_version() -> str:
    version_line = run_command("cargo info lance | grep '^version:'")
-    version = version_line.split(" ")[1].strip()
-    return version
+    # Example output: "version: 0.35.0 (latest 0.37.0)"
+    match = re.search(r'\(latest ([0-9.]+)\)', version_line)
+    if match:
+        return match.group(1)
+    # Fallback: use the first version after 'version:'
+    return version_line.split("version:")[1].split()[0].strip()


 def get_latest_preview_version() -> str:
@@ -47,13 +52,59 @@ def extract_features(line: str) -> list:
    """
    import re

-    match = re.search(r'"features"\s*=\s*\[(.*?)\]', line)
+    match = re.search(r'"features"\s*=\s*\[\s*(.*?)\s*\]', line, re.DOTALL)
    if match:
        features_str = match.group(1)
-        return [f.strip('"') for f in features_str.split(",")]
+        return [f.strip().strip('"') for f in features_str.split(",") if f.strip()]
    return []


+def extract_default_features(line: str) -> bool:
+    """
+    Checks if default-features = false is present in a line in Cargo.toml.
+    Example: 'lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"] }'
+    Returns: True if default-features = false is present, False otherwise
+    """
+    import re
+
+    match = re.search(r'default-features\s*=\s*false', line)
+    return match is not None
+
+
+def dict_to_toml_line(package_name: str, config: dict) -> str:
+    """
+    Converts a configuration dictionary to a TOML dependency line.
+    Dictionary insertion order is preserved (Python 3.7+), so the caller
+    controls the order of fields in the output.
+
+    Args:
+        package_name: The name of the package (e.g., "lance", "lance-io")
+        config: Dictionary with keys like "version", "path", "git", "tag", "features", "default-features"
+                The order of keys in this dict determines the order in the output.
+
+    Returns:
+        A properly formatted TOML line with a trailing newline
+    """
+    # If only version is specified, use simple format
+    if len(config) == 1 and "version" in config:
+        return f'{package_name} = "{config["version"]}"\n'
+
+    # Otherwise, use inline table format
+    parts = []
+    for key, value in config.items():
+        if key == "default-features" and not value:
+            parts.append("default-features = false")
+        elif key == "features":
+            parts.append(f'"features" = {json.dumps(value)}')
+        elif isinstance(value, str):
+            parts.append(f'"{key}" = "{value}"')
+        else:
+            # This shouldn't happen with our current usage
+            parts.append(f'"{key}" = {json.dumps(value)}')
+
+    return f'{package_name} = {{ {", ".join(parts)} }}\n'
+
+
 def update_cargo_toml(line_updater):
    """
    Updates the Cargo.toml file by applying the line_updater function to each line.
@@ -63,10 +114,31 @@ def update_cargo_toml(line_updater):
        lines = f.readlines()

    new_lines = []
+    lance_line = ""
+    is_parsing_lance_line = False
    for line in lines:
        if line.startswith("lance"):
-            # Update the line using the provided function
+            # Check if this is a single-line or multi-line entry
+            # Single-line entries either:
+            # 1. End with } (complete inline table)
+            # 2. End with " (simple version string)
+            # Multi-line entries start with { but don't end with }
+            if line.strip().endswith("}") or line.strip().endswith('"'):
+                # Single-line entry - process immediately
                new_lines.append(line_updater(line))
+            elif "{" in line and not line.strip().endswith("}"):
+                # Multi-line entry - start accumulating
+                lance_line = line
+                is_parsing_lance_line = True
+            else:
+                # Single-line entry without quotes or braces (shouldn't happen but handle it)
+                new_lines.append(line_updater(line))
+        elif is_parsing_lance_line:
+            lance_line += line
+            if line.strip().endswith("}"):
+                new_lines.append(line_updater(lance_line))
+                lance_line = ""
+                is_parsing_lance_line = False
        else:
            # Keep the line unchanged
            new_lines.append(line)
@@ -78,18 +150,25 @@ def update_cargo_toml(line_updater):
 def set_stable_version(version: str):
    """
    Sets lines to
-    lance = { "version" = "=0.29.0", "features" = ["dynamodb"] }
-    lance-io = "=0.29.0"
+    lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"] }
+    lance-io = { "version" = "=0.29.0", default-features = false }
    ...
    """

    def line_updater(line: str) -> str:
        package_name = line.split("=", maxsplit=1)[0].strip()
+
+        # Build config in desired order: version, default-features, features
+        config = {"version": f"={version}"}
+
+        if extract_default_features(line):
+            config["default-features"] = False
+
        features = extract_features(line)
        if features:
-            return f'{package_name} = {{ "version" = "={version}", "features" = {json.dumps(features)} }}\n'
-        else:
-            return f'{package_name} = "={version}"\n'
+            config["features"] = features
+
+        return dict_to_toml_line(package_name, config)

    update_cargo_toml(line_updater)

@@ -97,19 +176,27 @@ def set_stable_version(version: str):
 def set_preview_version(version: str):
    """
    Sets lines to
-    lance = { "version" = "=0.29.0", "features" = ["dynamodb"], tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
-    lance-io = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+    lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+    lance-io = { "version" = "=0.29.0", default-features = false, "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
    ...
    """

    def line_updater(line: str) -> str:
        package_name = line.split("=", maxsplit=1)[0].strip()
+        # Build config in desired order: version, default-features, features, tag, git
+        config = {"version": f"={version}"}
+
+        if extract_default_features(line):
+            config["default-features"] = False
+
        features = extract_features(line)
-        base_version = version.split("-")[0]  # Get the base version without beta suffix
        if features:
-            return f'{package_name} = {{ "version" = "={base_version}", "features" = {json.dumps(features)}, "tag" = "v{version}", "git" = "https://github.com/lancedb/lance.git" }}\n'
-        else:
-            return f'{package_name} = {{ "version" = "={base_version}", "tag" = "v{version}", "git" = "https://github.com/lancedb/lance.git" }}\n'
+            config["features"] = features
+
+        config["tag"] = f"v{version}"
+        config["git"] = "https://github.com/lancedb/lance.git"
+
+        return dict_to_toml_line(package_name, config)

    update_cargo_toml(line_updater)

@@ -117,18 +204,25 @@ def set_preview_version(version: str):
 def set_local_version():
    """
    Sets lines to
-    lance = { path = "../lance/rust/lance", features = ["dynamodb"] }
-    lance-io = { path = "../lance/rust/lance-io" }
+    lance = { "path" = "../lance/rust/lance", default-features = false, "features" = ["dynamodb"] }
+    lance-io = { "path" = "../lance/rust/lance-io", default-features = false }
    ...
    """

    def line_updater(line: str) -> str:
        package_name = line.split("=", maxsplit=1)[0].strip()
+
+        # Build config in desired order: path, default-features, features
+        config = {"path": f"../lance/rust/{package_name}"}
+
+        if extract_default_features(line):
+            config["default-features"] = False
+
        features = extract_features(line)
        if features:
-            return f'{package_name} = {{ "path" = "../lance/rust/{package_name}", "features" = {json.dumps(features)} }}\n'
-        else:
-            return f'{package_name} = {{ "path" = "../lance/rust/{package_name}" }}\n'
+            config["features"] = features
+
+        return dict_to_toml_line(package_name, config)

    update_cargo_toml(line_updater)

--- a/ci/update_lockfiles.sh
+++ b/ci/update_lockfiles.sh
@@ -15,16 +15,13 @@ cargo metadata --quiet > /dev/null
 pushd nodejs || exit 1
 npm install --package-lock-only --silent
 popd
-pushd node || exit 1
-npm install --package-lock-only --silent
-popd

 if git diff --quiet --exit-code; then
  echo "No lockfile changes to commit; skipping amend."
 elif $AMEND; then
-  git add Cargo.lock nodejs/package-lock.json node/package-lock.json
+  git add Cargo.lock nodejs/package-lock.json
  git commit --amend --no-edit
 else
-  git add Cargo.lock nodejs/package-lock.json node/package-lock.json
+  git add Cargo.lock nodejs/package-lock.json
  git commit -m "Update lockfiles"
 fi
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -70,6 +70,23 @@ plugins:
  - mkdocs-jupyter
  - render_swagger:
      allow_arbitrary_locations: true
+  - redirects:
+      redirect_maps:
+        # Redirect the home page and other top-level markdown files. This enables maximum SEO benefit
+        # other sub-pages are handled by the ingected js in overrides/partials/header.html
+        'index.md': 'https://lancedb.com/docs/'
+        'guides/tables.md': 'https://lancedb.com/docs/tables/'
+        'ann_indexes.md': 'https://lancedb.com/docs/indexing/'
+        'basic.md': 'https://lancedb.com/docs/quickstart/'
+        'faq.md': 'https://lancedb.com/docs/faq/'
+        'embeddings/understanding_embeddings.md': 'https://lancedb.com/docs/embedding/'
+        'integrations.md': 'https://lancedb.com/docs/integrations/'
+        'examples.md': 'https://lancedb.com/docs/tutorials/'
+        'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
+        'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
+        'guides/storage.md': 'https://lancedb.com/docs/storage/integrations'
+
+

 markdown_extensions:
  - admonition
--- a/docs/overrides/partials/header.html
+++ b/docs/overrides/partials/header.html
@@ -19,7 +19,13 @@
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  IN THE SOFTWARE.
 -->
-
+<div id="deprecation-banner" style="background-color: #f8d7da; color: #721c24; padding: 1em; text-align: center;">                                                           
+    <p style="margin: 0; font-size: 1.1em;">                                                                                                           
+        <strong>This documentation site is deprecated.</strong>                                                                           
+        Please visit our new documentation site at <a href="https://lancedb.com/docs" style="color: #721c24; text-decoration: underline;">
+          lancedb.com/docs</a> for the latest information.                                                                                        
+    </p>                                                                                                                                               
+  </div>  
 {% set class = "md-header" %}
 {% if "navigation.tabs.sticky" in features %}
  {% set class = class ~ " md-header--shadow md-header--lifted" %}
@@ -174,3 +180,76 @@
    {% endif %}
  {% endif %}
 </header>
+
+<script>
+  (function() {
+    function checkPathAndRedirect() {
+      var banner = document.getElementById('deprecation-banner');
+
+      if (document.querySelector('meta[http-equiv="refresh"]')) {
+        return; // The redirects plugin is already handling this page.
+      }
+
+      var currentPath = window.location.pathname;
+
+      var cleanPath = currentPath.endsWith('/') && currentPath.length > 1
+        ? currentPath.slice(0, -1)
+        : currentPath;
+
+      // These are the ONLY paths that should remain on the old site
+      var apiPaths = [
+        '/lancedb/python', 
+        '/lancedb/javascript', 
+        '/lancedb/js',
+        '/lancedb/api_reference'
+      ];
+      
+      var isApiPage = apiPaths.some(function(apiPath) {
+        return cleanPath.startsWith(apiPath);
+      });
+
+      if (isApiPage) {
+        if (banner) {
+          banner.style.display = 'none';
+        }
+      } else {
+        if (banner) {
+          banner.style.display = 'block';
+        }
+        
+        // Add noindex meta tag to prevent indexing of old docs for seo
+        var noindexMeta = document.createElement('meta');
+        noindexMeta.setAttribute('name', 'robots');
+        noindexMeta.setAttribute('content', 'noindex, follow');
+        document.head.appendChild(noindexMeta);
+
+        // Add canonical link to point to the new docs to reward new site for seo
+        var canonicalLink = document.createElement('link');
+        canonicalLink.setAttribute('rel', 'canonical');
+        canonicalLink.setAttribute('href', 'https://lancedb.com/docs');
+        document.head.appendChild(canonicalLink);
+        
+        window.location.replace('https://lancedb.com/docs');
+      }
+    }
+
+    // Run the check only if doc is ready. This makes sure we catch the initial load
+    // and redirect.
+    if (document.readyState === 'loading') {
+      document.addEventListener('DOMContentLoaded', checkPathAndRedirect);
+    } else {
+      checkPathAndRedirect();
+    }
+
+    // Use an interval to handle subsequent navigation clicks.
+    var lastPath = window.location.pathname;
+    setInterval(function() {
+      if (window.location.pathname !== lastPath) {
+        lastPath = window.location.pathname;
+        checkPathAndRedirect();
+      }
+    }, 2000); // keeping it 2 second to make it easy for user to understand
+              // what's happening
+
+  })();
+</script>
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -19,7 +19,7 @@
    },
    "../node": {
      "name": "vectordb",
-      "version": "0.12.0",
+      "version": "0.21.2-beta.0",
      "cpu": [
        "x64",
        "arm64"
@@ -65,11 +65,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.12.0",
-        "@lancedb/vectordb-darwin-x64": "0.12.0",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.12.0",
-        "@lancedb/vectordb-linux-x64-gnu": "0.12.0",
-        "@lancedb/vectordb-win32-x64-msvc": "0.12.0"
+        "@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
+        "@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,3 +5,4 @@ mkdocstrings[python]==0.25.2
 griffe
 mkdocs-render-swagger-plugin
 pydantic
+mkdocs-redirects
--- a/docs/src/concepts/data_management.md
+++ b/docs/src/concepts/data_management.md
@@ -37,6 +37,10 @@ Depending on the use case and dataset, optimal compaction will have different re
 - It’s always better to use *batch* inserts rather than adding 1 row at a time (to avoid too small fragments). If single-row inserts are unavoidable, run compaction on a regular basis to merge them into larger fragments.
 - Keep the number of fragments under 100, which is suitable for most use cases (for *really* large datasets of >500M rows, more fragments might be needed)

+!!! note
+
+    LanceDB Cloud/Enterprise supports [auto-compaction](https://docs.lancedb.com/enterprise/architecture/architecture#write-path) which automatically optimizes fragments in the background as data changes.
+
 ## Deletion

 Although Lance allows you to delete rows from a dataset, it does not actually delete the data immediately. It simply marks the row as deleted in the `DataFile` that represents a fragment. For a given version of the dataset, each fragment can have up to one deletion file (if no rows were ever deleted from that fragment, it will not have a deletion file). This is important to keep in mind because it means that the data is still there, and can be recovered if needed, as long as that version still exists based on your backup policy.
@@ -50,13 +54,9 @@ Reindexing is the process of updating the index to account for new data, keeping

 Both LanceDB OSS and Cloud support reindexing, but the process (at least for now) is different for each, depending on the type of index.

-When a reindex job is triggered in the background, the entire data is reindexed, but in the interim as new queries come in, LanceDB will combine results from the existing index with exhaustive kNN search on the new data. This is done to ensure that you're still searching on all your data, but it does come at a performance cost. The more data that you add without reindexing, the impact on latency (due to exhaustive search) can be noticeable.
+In LanceDB OSS, re-indexing happens synchronously when you call either `create_index` or `optimize` on a table. In LanceDB Cloud, re-indexing happens asynchronously as you add and update data in your table.

-### Vector reindex
+By default, queries will search new data even if it has yet to be indexed. This is done using brute-force methods, such as kNN for vector search, and combined with the fast index search results. This is done to ensure that you're always searching over all your data, but it does come at a performance cost. Without reindexing, adding more data to a table will make queries slower and more expensive. This behavior can be disabled by setting the [fast_search](https://lancedb.github.io/lancedb/python/python/#lancedb.query.AsyncQuery.fast_search) parameter which will instruct the query to ignore un-indexed data.

-* LanceDB Cloud supports incremental reindexing, where a background process will trigger a new index build for you automatically when new data is added to a dataset
+* LanceDB Cloud/Enterprise supports [automatic incremental reindexing](https://docs.lancedb.com/core#vector-index) for vector, scalar, and FTS indices, where a background process will trigger a new index build for you automatically when new data is added or modified in a dataset
 * LanceDB OSS requires you to manually trigger a reindex operation -- we are working on adding incremental reindexing to LanceDB OSS as well
-
-### FTS reindex
-
-FTS reindexing is supported in both LanceDB OSS and Cloud, but requires that it's manually rebuilt once you have a significant enough amount of new data added that needs to be reindexed. We [updated](https://github.com/lancedb/lancedb/pull/762) Tantivy's default heap size from 128MB to 1GB in LanceDB to make it much faster to reindex, by up to 10x from the default settings.
--- a/docs/src/embeddings/available_embedding_models/multimodal_embedding_functions/voyageai_multimodal_embedding.md
+++ b/docs/src/embeddings/available_embedding_models/multimodal_embedding_functions/voyageai_multimodal_embedding.md
@@ -0,0 +1,97 @@
+# VoyageAI Embeddings : Multimodal
+
+VoyageAI embeddings can also be used to embed both text and image data, only some of the models support image data and you can check the list
+under [https://docs.voyageai.com/docs/multimodal-embeddings](https://docs.voyageai.com/docs/multimodal-embeddings)
+
+Supported parameters (to be passed in `create` method) are:
+
+| Parameter | Type | Default Value           | Description                               |
+|---|---|-------------------------|-------------------------------------------|
+| `name` | `str` | `"voyage-multimodal-3"` | The model ID of the VoyageAI model to use |
+
+Usage Example:
+
+```python
+import base64
+import os
+from io import BytesIO
+
+import requests
+import lancedb
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.embeddings import get_registry
+import pandas as pd
+
+os.environ['VOYAGE_API_KEY'] = 'YOUR_VOYAGE_API_KEY'
+
+db = lancedb.connect(".lancedb")
+func = get_registry().get("voyageai").create(name="voyage-multimodal-3")
+
+
+def image_to_base64(image_bytes: bytes):
+    buffered = BytesIO(image_bytes)
+    img_str = base64.b64encode(buffered.getvalue())
+    return img_str.decode("utf-8")
+
+
+class Images(LanceModel):
+    label: str
+    image_uri: str = func.SourceField()  # image uri as the source
+    image_bytes: str = func.SourceField()  # image bytes base64 encoded as the source
+    vector: Vector(func.ndims()) = func.VectorField()  # vector column
+    vec_from_bytes: Vector(func.ndims()) = func.VectorField()  # Another vector column
+
+
+if "images" in db.table_names():
+    db.drop_table("images")
+table = db.create_table("images", schema=Images)
+labels = ["cat", "cat", "dog", "dog", "horse", "horse"]
+uris = [
+    "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
+    "http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
+    "http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
+    "http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
+    "http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
+    "http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
+]
+# get each uri as bytes
+images_bytes = [image_to_base64(requests.get(uri).content) for uri in uris]
+table.add(
+    pd.DataFrame({"label": labels, "image_uri": uris, "image_bytes": images_bytes})
+)
+```
+Now we can search using text from both the default vector column and the custom vector column
+```python
+
+# text search
+actual = table.search("man's best friend", "vec_from_bytes").limit(1).to_pydantic(Images)[0]
+print(actual.label) # prints "dog"
+
+frombytes = (
+    table.search("man's best friend", vector_column_name="vec_from_bytes")
+    .limit(1)
+    .to_pydantic(Images)[0]
+)
+print(frombytes.label)
+
+```
+
+Because we're using a multi-modal embedding function, we can also search using images
+
+```python
+# image search
+query_image_uri = "http://farm1.staticflickr.com/200/467715466_ed4a31801f_z.jpg"
+image_bytes = requests.get(query_image_uri).content
+query_image = Image.open(BytesIO(image_bytes))
+actual = table.search(query_image, "vec_from_bytes").limit(1).to_pydantic(Images)[0]
+print(actual.label == "dog")
+
+# image search using a custom vector column
+other = (
+    table.search(query_image, vector_column_name="vec_from_bytes")
+    .limit(1)
+    .to_pydantic(Images)[0]
+)
+print(actual.label)
+
+```
--- a/docs/src/guides/sql_querying.md
+++ b/docs/src/guides/sql_querying.md
@@ -1,7 +1,9 @@
+# SQL Querying
+
 You can use DuckDB and Apache Datafusion to query your LanceDB tables using SQL.
 This guide will show how to query Lance tables them using both.

-We will re-use the dataset [created previously](./pandas_and_pyarrow.md):
+We will re-use the dataset [created previously](./tables.md):

 ```python
 import lancedb
@@ -27,15 +29,10 @@ arrow_table = table.to_lance()
 duckdb.query("SELECT * FROM arrow_table")
 ```

-```
-┌─────────────┬─────────┬────────┐
-│   vector    │  item   │ price  │
-│   float[]   │ varchar │ double │
-├─────────────┼─────────┼────────┤
-│ [3.1, 4.1]  │ foo     │   10.0 │
-│ [5.9, 26.5] │ bar     │   20.0 │
-└─────────────┴─────────┴────────┘
-```
+| vector      | item | price |
+| ----------- | ---- | ----- |
+| [3.1, 4.1]  | foo  | 10.0  |
+| [5.9, 26.5] | bar  | 20.0  |

 ## Querying a LanceDB Table with Apache Datafusion

@@ -57,12 +54,7 @@ Register the table created with the Datafusion session context.
    --8<-- "python/python/tests/docs/test_guide_tables.py:lance_sql_basic"
    ```

-```
-┌─────────────┬─────────┬────────┐
-│   vector    │  item   │ price  │
-│   float[]   │ varchar │ double │
-├─────────────┼─────────┼────────┤
-│ [3.1, 4.1]  │ foo     │   10.0 │
-│ [5.9, 26.5] │ bar     │   20.0 │
-└─────────────┴─────────┴────────┘
-```
+| vector      | item | price |
+| ----------- | ---- | ----- |
+| [3.1, 4.1]  | foo  | 10.0  |
+| [5.9, 26.5] | bar  | 20.0  |
--- a/docs/src/guides/storage.md
+++ b/docs/src/guides/storage.md
@@ -397,117 +397,6 @@ For **read-only access**, LanceDB will need a policy such as:
 }
 ```

-#### DynamoDB Commit Store for concurrent writes
-
-By default, S3 does not support concurrent writes. Having two or more processes
-writing to the same table at the same time can lead to data corruption. This is
-because S3, unlike other object stores, does not have any atomic put or copy
-operation.
-
-To enable concurrent writes, you can configure LanceDB to use a DynamoDB table
-as a commit store. This table will be used to coordinate writes between
-different processes. To enable this feature, you must modify your connection
-URI to use the `s3+ddb` scheme and add a query parameter `ddbTableName` with the
-name of the table to use.
-
-=== "Python"
-
-    === "Sync API"
-
-        ```python
-        import lancedb
-        db = lancedb.connect(
-            "s3+ddb://bucket/path?ddbTableName=my-dynamodb-table",
-        )
-        ```
-    === "Async API"
-
-        ```python
-        import lancedb
-        async_db = await lancedb.connect_async(
-            "s3+ddb://bucket/path?ddbTableName=my-dynamodb-table",
-        )    
-        ```
-
-=== "JavaScript"
-
-    ```javascript
-    const lancedb = require("lancedb");
-
-    const db = await lancedb.connect(
-        "s3+ddb://bucket/path?ddbTableName=my-dynamodb-table",
-    );
-    ```
-
-The DynamoDB table must be created with the following schema:
-
- Hash key: `base_uri` (string)
- Range key: `version` (number)
-
-You can create this programmatically with:
-
-=== "Python"
-
-    <!-- skip-test -->
-    ```python
-    import boto3
-
-    dynamodb = boto3.client("dynamodb")
-    table = dynamodb.create_table(
-        TableName=table_name,
-        KeySchema=[
-            {"AttributeName": "base_uri", "KeyType": "HASH"},
-            {"AttributeName": "version", "KeyType": "RANGE"},
-        ],
-        AttributeDefinitions=[
-            {"AttributeName": "base_uri", "AttributeType": "S"},
-            {"AttributeName": "version", "AttributeType": "N"},
-        ],
-        ProvisionedThroughput={"ReadCapacityUnits": 1, "WriteCapacityUnits": 1},
-    )
-    ```
-
-=== "JavaScript"
-
-    <!-- skip-test -->
-    ```javascript
-    import {
-      CreateTableCommand,
-      DynamoDBClient,
-    } from "@aws-sdk/client-dynamodb";
-
-    const dynamodb = new DynamoDBClient({
-      region: CONFIG.awsRegion,
-      credentials: {
-        accessKeyId: CONFIG.awsAccessKeyId,
-        secretAccessKey: CONFIG.awsSecretAccessKey,
-      },
-      endpoint: CONFIG.awsEndpoint,
-    });
-    const command = new CreateTableCommand({
-      TableName: table_name,
-      AttributeDefinitions: [
-        {
-          AttributeName: "base_uri",
-          AttributeType: "S",
-        },
-        {
-          AttributeName: "version",
-          AttributeType: "N",
-        },
-      ],
-      KeySchema: [
-        { AttributeName: "base_uri", KeyType: "HASH" },
-        { AttributeName: "version", KeyType: "RANGE" },
-      ],
-      ProvisionedThroughput: {
-        ReadCapacityUnits: 1,
-        WriteCapacityUnits: 1,
-      },
-    });
-    await client.send(command);
-    ```
-

 #### S3-compatible stores

--- a/docs/src/js/classes/Connection.md
+++ b/docs/src/js/classes/Connection.md
@@ -25,6 +25,51 @@ the underlying connection has been closed.

 ## Methods

+### cloneTable()
+
+```ts
+abstract cloneTable(
+   targetTableName,
+   sourceUri,
+   options?): Promise<Table>
+```
+
+Clone a table from a source table.
+
+A shallow clone creates a new table that shares the underlying data files
+with the source table but has its own independent manifest. This allows
+both the source and cloned tables to evolve independently while initially
+sharing the same data, deletion, and index files.
+
+#### Parameters
+
+* **targetTableName**: `string`
+    The name of the target table to create.
+
+* **sourceUri**: `string`
+    The URI of the source table to clone from.
+
+* **options?**
+    Clone options.
+
+* **options.isShallow?**: `boolean`
+    Whether to perform a shallow clone (defaults to true).
+
+* **options.sourceTag?**: `string`
+    The tag of the source table to clone.
+
+* **options.sourceVersion?**: `number`
+    The version of the source table to clone.
+
+* **options.targetNamespace?**: `string`[]
+    The namespace for the target table (defaults to root namespace).
+
+#### Returns
+
+`Promise`&lt;[`Table`](Table.md)&gt;
+
+***
+
 ### close()

 ```ts
@@ -45,6 +90,8 @@ Any attempt to use the connection after it is closed will result in an error.

 ### createEmptyTable()

+#### createEmptyTable(name, schema, options)
+
 ```ts
 abstract createEmptyTable(
   name,
@@ -54,7 +101,7 @@ abstract createEmptyTable(

 Creates a new empty Table

-#### Parameters
+##### Parameters

 * **name**: `string`
    The name of the table.
@@ -63,8 +110,39 @@ Creates a new empty Table
    The schema of the table

 * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
+    Additional options (backwards compatibility)

-#### Returns
+##### Returns
+
+`Promise`&lt;[`Table`](Table.md)&gt;
+
+#### createEmptyTable(name, schema, namespace, options)
+
+```ts
+abstract createEmptyTable(
+   name,
+   schema,
+   namespace?,
+   options?): Promise<Table>
+```
+
+Creates a new empty Table
+
+##### Parameters
+
+* **name**: `string`
+    The name of the table.
+
+* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
+    The schema of the table
+
+* **namespace?**: `string`[]
+    The namespace to create the table in (defaults to root namespace)
+
+* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
+    Additional options
+
+##### Returns

 `Promise`&lt;[`Table`](Table.md)&gt;

@@ -72,10 +150,10 @@ Creates a new empty Table

 ### createTable()

-#### createTable(options)
+#### createTable(options, namespace)

 ```ts
-abstract createTable(options): Promise<Table>
+abstract createTable(options, namespace?): Promise<Table>
 ```

 Creates a new Table and initialize it with new data.
@@ -85,6 +163,9 @@ Creates a new Table and initialize it with new data.
 * **options**: `object` & `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    The options object.

+* **namespace?**: `string`[]
+    The namespace to create the table in (defaults to root namespace)
+
 ##### Returns

 `Promise`&lt;[`Table`](Table.md)&gt;
@@ -110,6 +191,38 @@ Creates a new Table and initialize it with new data.
    to be inserted into the table

 * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
+    Additional options (backwards compatibility)
+
+##### Returns
+
+`Promise`&lt;[`Table`](Table.md)&gt;
+
+#### createTable(name, data, namespace, options)
+
+```ts
+abstract createTable(
+   name,
+   data,
+   namespace?,
+   options?): Promise<Table>
+```
+
+Creates a new Table and initialize it with new data.
+
+##### Parameters
+
+* **name**: `string`
+    The name of the table.
+
+* **data**: [`TableLike`](../type-aliases/TableLike.md) \| `Record`&lt;`string`, `unknown`&gt;[]
+    Non-empty Array of Records
+    to be inserted into the table
+
+* **namespace?**: `string`[]
+    The namespace to create the table in (defaults to root namespace)
+
+* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
+    Additional options

 ##### Returns

@@ -134,11 +247,16 @@ Return a brief description of the connection
 ### dropAllTables()

 ```ts
-abstract dropAllTables(): Promise<void>
+abstract dropAllTables(namespace?): Promise<void>
 ```

 Drop all tables in the database.

+#### Parameters
+
+* **namespace?**: `string`[]
+    The namespace to drop tables from (defaults to root namespace).
+
 #### Returns

 `Promise`&lt;`void`&gt;
@@ -148,7 +266,7 @@ Drop all tables in the database.
 ### dropTable()

 ```ts
-abstract dropTable(name): Promise<void>
+abstract dropTable(name, namespace?): Promise<void>
 ```

 Drop an existing table.
@@ -158,6 +276,9 @@ Drop an existing table.
 * **name**: `string`
    The name of the table to drop.

+* **namespace?**: `string`[]
+    The namespace of the table (defaults to root namespace).
+
 #### Returns

 `Promise`&lt;`void`&gt;
@@ -181,7 +302,10 @@ Return true if the connection has not been closed
 ### openTable()

 ```ts
-abstract openTable(name, options?): Promise<Table>
+abstract openTable(
+   name,
+   namespace?,
+   options?): Promise<Table>
 ```

 Open a table in the database.
@@ -191,7 +315,11 @@ Open a table in the database.
 * **name**: `string`
    The name of the table

+* **namespace?**: `string`[]
+    The namespace of the table (defaults to root namespace)
+
 * **options?**: `Partial`&lt;[`OpenTableOptions`](../interfaces/OpenTableOptions.md)&gt;
+    Additional options

 #### Returns

@@ -201,6 +329,8 @@ Open a table in the database.

 ### tableNames()

+#### tableNames(options)
+
 ```ts
 abstract tableNames(options?): Promise<string[]>
 ```
@@ -209,12 +339,35 @@ List all the table names in this database.

 Tables will be returned in lexicographical order.

-#### Parameters
+##### Parameters
+
+* **options?**: `Partial`&lt;[`TableNamesOptions`](../interfaces/TableNamesOptions.md)&gt;
+    options to control the
+    paging / start point (backwards compatibility)
+
+##### Returns
+
+`Promise`&lt;`string`[]&gt;
+
+#### tableNames(namespace, options)
+
+```ts
+abstract tableNames(namespace?, options?): Promise<string[]>
+```
+
+List all the table names in this database.
+
+Tables will be returned in lexicographical order.
+
+##### Parameters
+
+* **namespace?**: `string`[]
+    The namespace to list tables from (defaults to root namespace)

 * **options?**: `Partial`&lt;[`TableNamesOptions`](../interfaces/TableNamesOptions.md)&gt;
    options to control the
    paging / start point

-#### Returns
+##### Returns

 `Promise`&lt;`string`[]&gt;
--- a/docs/src/js/classes/HeaderProvider.md
+++ b/docs/src/js/classes/HeaderProvider.md
@@ -0,0 +1,85 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / HeaderProvider
+
+# Class: `abstract` HeaderProvider
+
+Abstract base class for providing custom headers for each request.
+
+Users can implement this interface to provide dynamic headers for various purposes
+such as authentication (OAuth tokens, API keys), request tracking (correlation IDs),
+custom metadata, or any other header-based requirements. The provider is called
+before each request to ensure fresh header values are always used.
+
+## Examples
+
+Simple JWT token provider:
+```typescript
+class JWTProvider extends HeaderProvider {
+  constructor(private token: string) {
+    super();
+  }
+
+  getHeaders(): Record<string, string> {
+    return { authorization: `Bearer ${this.token}` };
+  }
+}
+```
+
+Provider with request tracking:
+```typescript
+class RequestTrackingProvider extends HeaderProvider {
+  constructor(private sessionId: string) {
+    super();
+  }
+
+  getHeaders(): Record<string, string> {
+    return {
+      "X-Session-Id": this.sessionId,
+      "X-Request-Id": `req-${Date.now()}`
+    };
+  }
+}
+```
+
+## Extended by
+
+- [`StaticHeaderProvider`](StaticHeaderProvider.md)
+- [`OAuthHeaderProvider`](OAuthHeaderProvider.md)
+
+## Constructors
+
+### new HeaderProvider()
+
+```ts
+new HeaderProvider(): HeaderProvider
+```
+
+#### Returns
+
+[`HeaderProvider`](HeaderProvider.md)
+
+## Methods
+
+### getHeaders()
+
+```ts
+abstract getHeaders(): Record<string, string>
+```
+
+Get the latest headers to be added to requests.
+
+This method is called before each request to the remote LanceDB server.
+Implementations should return headers that will be merged with existing headers.
+
+#### Returns
+
+`Record`&lt;`string`, `string`&gt;
+
+Dictionary of header names to values to add to the request.
+
+#### Throws
+
+If unable to fetch headers, the exception will be propagated and the request will fail.
--- a/docs/src/js/classes/Index.md
+++ b/docs/src/js/classes/Index.md
@@ -194,6 +194,37 @@ currently is also a memory intensive operation.

 ***

+### ivfRq()
+
+```ts
+static ivfRq(options?): Index
+```
+
+Create an IvfRq index
+
+IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
+and organizes them into IVF partitions.
+
+The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
+The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
+between index size (and thus search speed) and index accuracy.
+
+The partitioning process is called IVF and the `num_partitions` parameter controls how
+many groups to create.
+
+Note that training an IVF RQ index on a large dataset is a slow operation and
+currently is also a memory intensive operation.
+
+#### Parameters
+
+* **options?**: `Partial`&lt;[`IvfRqOptions`](../interfaces/IvfRqOptions.md)&gt;
+
+#### Returns
+
+[`Index`](Index.md)
+
+***
+
 ### labelList()

 ```ts
--- a/docs/src/js/classes/MatchQuery.md
+++ b/docs/src/js/classes/MatchQuery.md
@@ -41,6 +41,7 @@ Creates an instance of MatchQuery.
    - `fuzziness`: The fuzziness level for the query (default is 0).
    - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
    - `operator`: The logical operator to use for combining terms in the query (default is "OR").
+    - `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.

 * **options.boost?**: `number`

@@ -50,6 +51,8 @@ Creates an instance of MatchQuery.

 * **options.operator?**: [`Operator`](../enumerations/Operator.md)

+* **options.prefixLength?**: `number`
+
 #### Returns

 [`MatchQuery`](MatchQuery.md)
--- a/docs/src/js/classes/MergeInsertBuilder.md
+++ b/docs/src/js/classes/MergeInsertBuilder.md
@@ -52,6 +52,30 @@ the merge result

 ***

+### useIndex()
+
+```ts
+useIndex(useIndex): MergeInsertBuilder
+```
+
+Controls whether to use indexes for the merge operation.
+
+When set to `true` (the default), the operation will use an index if available
+on the join key for improved performance. When set to `false`, it forces a full
+table scan even if an index exists. This can be useful for benchmarking or when
+the query optimizer chooses a suboptimal path.
+
+#### Parameters
+
+* **useIndex**: `boolean`
+    Whether to use indices for the merge operation. Defaults to `true`.
+
+#### Returns
+
+[`MergeInsertBuilder`](MergeInsertBuilder.md)
+
+***
+
 ### whenMatchedUpdateAll()

 ```ts
--- a/docs/src/js/classes/NativeJsHeaderProvider.md
+++ b/docs/src/js/classes/NativeJsHeaderProvider.md
@@ -0,0 +1,29 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / NativeJsHeaderProvider
+
+# Class: NativeJsHeaderProvider
+
+JavaScript HeaderProvider implementation that wraps a JavaScript callback.
+This is the only native header provider - all header provider implementations
+should provide a JavaScript function that returns headers.
+
+## Constructors
+
+### new NativeJsHeaderProvider()
+
+```ts
+new NativeJsHeaderProvider(getHeadersCallback): NativeJsHeaderProvider
+```
+
+Create a new JsHeaderProvider from a JavaScript callback
+
+#### Parameters
+
+* **getHeadersCallback**
+
+#### Returns
+
+[`NativeJsHeaderProvider`](NativeJsHeaderProvider.md)
--- a/docs/src/js/classes/OAuthHeaderProvider.md
+++ b/docs/src/js/classes/OAuthHeaderProvider.md
@@ -0,0 +1,108 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / OAuthHeaderProvider
+
+# Class: OAuthHeaderProvider
+
+Example implementation: OAuth token provider with automatic refresh.
+
+This is an example implementation showing how to manage OAuth tokens
+with automatic refresh when they expire.
+
+## Example
+
+```typescript
+async function fetchToken(): Promise<TokenResponse> {
+  const response = await fetch("https://oauth.example.com/token", {
+    method: "POST",
+    body: JSON.stringify({
+      grant_type: "client_credentials",
+      client_id: "your-client-id",
+      client_secret: "your-client-secret"
+    }),
+    headers: { "Content-Type": "application/json" }
+  });
+  const data = await response.json();
+  return {
+    accessToken: data.access_token,
+    expiresIn: data.expires_in
+  };
+}
+
+const provider = new OAuthHeaderProvider(fetchToken);
+const headers = provider.getHeaders();
+// Returns: {"authorization": "Bearer <your-token>"}
+```
+
+## Extends
+
+- [`HeaderProvider`](HeaderProvider.md)
+
+## Constructors
+
+### new OAuthHeaderProvider()
+
+```ts
+new OAuthHeaderProvider(tokenFetcher, refreshBufferSeconds): OAuthHeaderProvider
+```
+
+Initialize the OAuth provider.
+
+#### Parameters
+
+* **tokenFetcher**
+    Function to fetch new tokens. Should return object with 'accessToken' and optionally 'expiresIn'.
+
+* **refreshBufferSeconds**: `number` = `300`
+    Seconds before expiry to refresh token. Default 300 (5 minutes).
+
+#### Returns
+
+[`OAuthHeaderProvider`](OAuthHeaderProvider.md)
+
+#### Overrides
+
+[`HeaderProvider`](HeaderProvider.md).[`constructor`](HeaderProvider.md#constructors)
+
+## Methods
+
+### getHeaders()
+
+```ts
+getHeaders(): Record<string, string>
+```
+
+Get OAuth headers, refreshing token if needed.
+Note: This is synchronous for now as the Rust implementation expects sync.
+In a real implementation, this would need to handle async properly.
+
+#### Returns
+
+`Record`&lt;`string`, `string`&gt;
+
+Headers with Bearer token authorization.
+
+#### Throws
+
+If unable to fetch or refresh token.
+
+#### Overrides
+
+[`HeaderProvider`](HeaderProvider.md).[`getHeaders`](HeaderProvider.md#getheaders)
+
+***
+
+### refreshToken()
+
+```ts
+refreshToken(): Promise<void>
+```
+
+Manually refresh the token.
+Call this before using getHeaders() to ensure token is available.
+
+#### Returns
+
+`Promise`&lt;`void`&gt;
--- a/docs/src/js/classes/PermutationBuilder.md
+++ b/docs/src/js/classes/PermutationBuilder.md
@@ -0,0 +1,250 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / PermutationBuilder
+
+# Class: PermutationBuilder
+
+A PermutationBuilder for creating data permutations with splits, shuffling, and filtering.
+
+This class provides a TypeScript wrapper around the native Rust PermutationBuilder,
+offering methods to configure data splits, shuffling, and filtering before executing
+the permutation to create a new table.
+
+## Methods
+
+### execute()
+
+```ts
+execute(): Promise<Table>
+```
+
+Execute the permutation and create the destination table.
+
+#### Returns
+
+`Promise`&lt;[`Table`](Table.md)&gt;
+
+A Promise that resolves to the new Table instance
+
+#### Example
+
+```ts
+const permutationTable = await builder.execute();
+console.log(`Created table: ${permutationTable.name}`);
+```
+
+***
+
+### filter()
+
+```ts
+filter(filter): PermutationBuilder
+```
+
+Configure filtering for the permutation.
+
+#### Parameters
+
+* **filter**: `string`
+    SQL filter expression
+
+#### Returns
+
+[`PermutationBuilder`](PermutationBuilder.md)
+
+A new PermutationBuilder instance
+
+#### Example
+
+```ts
+builder.filter("age > 18 AND status = 'active'");
+```
+
+***
+
+### persist()
+
+```ts
+persist(connection, tableName): PermutationBuilder
+```
+
+Configure the permutation to be persisted.
+
+#### Parameters
+
+* **connection**: [`Connection`](Connection.md)
+    The connection to persist the permutation to
+
+* **tableName**: `string`
+    The name of the table to create
+
+#### Returns
+
+[`PermutationBuilder`](PermutationBuilder.md)
+
+A new PermutationBuilder instance
+
+#### Example
+
+```ts
+builder.persist(connection, "permutation_table");
+```
+
+***
+
+### shuffle()
+
+```ts
+shuffle(options): PermutationBuilder
+```
+
+Configure shuffling for the permutation.
+
+#### Parameters
+
+* **options**: [`ShuffleOptions`](../interfaces/ShuffleOptions.md)
+    Configuration for shuffling
+
+#### Returns
+
+[`PermutationBuilder`](PermutationBuilder.md)
+
+A new PermutationBuilder instance
+
+#### Example
+
+```ts
+// Basic shuffle
+builder.shuffle({ seed: 42 });
+
+// Shuffle with clump size
+builder.shuffle({ seed: 42, clumpSize: 10 });
+```
+
+***
+
+### splitCalculated()
+
+```ts
+splitCalculated(options): PermutationBuilder
+```
+
+Configure calculated splits for the permutation.
+
+#### Parameters
+
+* **options**: [`SplitCalculatedOptions`](../interfaces/SplitCalculatedOptions.md)
+    Configuration for calculated splitting
+
+#### Returns
+
+[`PermutationBuilder`](PermutationBuilder.md)
+
+A new PermutationBuilder instance
+
+#### Example
+
+```ts
+builder.splitCalculated("user_id % 3");
+```
+
+***
+
+### splitHash()
+
+```ts
+splitHash(options): PermutationBuilder
+```
+
+Configure hash-based splits for the permutation.
+
+#### Parameters
+
+* **options**: [`SplitHashOptions`](../interfaces/SplitHashOptions.md)
+    Configuration for hash-based splitting
+
+#### Returns
+
+[`PermutationBuilder`](PermutationBuilder.md)
+
+A new PermutationBuilder instance
+
+#### Example
+
+```ts
+builder.splitHash({
+  columns: ["user_id"],
+  splitWeights: [70, 30],
+  discardWeight: 0
+});
+```
+
+***
+
+### splitRandom()
+
+```ts
+splitRandom(options): PermutationBuilder
+```
+
+Configure random splits for the permutation.
+
+#### Parameters
+
+* **options**: [`SplitRandomOptions`](../interfaces/SplitRandomOptions.md)
+    Configuration for random splitting
+
+#### Returns
+
+[`PermutationBuilder`](PermutationBuilder.md)
+
+A new PermutationBuilder instance
+
+#### Example
+
+```ts
+// Split by ratios
+builder.splitRandom({ ratios: [0.7, 0.3], seed: 42 });
+
+// Split by counts
+builder.splitRandom({ counts: [1000, 500], seed: 42 });
+
+// Split with fixed size
+builder.splitRandom({ fixed: 100, seed: 42 });
+```
+
+***
+
+### splitSequential()
+
+```ts
+splitSequential(options): PermutationBuilder
+```
+
+Configure sequential splits for the permutation.
+
+#### Parameters
+
+* **options**: [`SplitSequentialOptions`](../interfaces/SplitSequentialOptions.md)
+    Configuration for sequential splitting
+
+#### Returns
+
+[`PermutationBuilder`](PermutationBuilder.md)
+
+A new PermutationBuilder instance
+
+#### Example
+
+```ts
+// Split by ratios
+builder.splitSequential({ ratios: [0.8, 0.2] });
+
+// Split by counts
+builder.splitSequential({ counts: [800, 200] });
+
+// Split with fixed size
+builder.splitSequential({ fixed: 1000 });
+```
--- a/docs/src/js/classes/Query.md
+++ b/docs/src/js/classes/Query.md
@@ -14,7 +14,7 @@ A builder for LanceDB queries.

 ## Extends

- [`QueryBase`](QueryBase.md)&lt;`NativeQuery`&gt;
+- `StandardQueryBase`&lt;`NativeQuery`&gt;

 ## Properties

@@ -26,7 +26,7 @@ protected inner: Query | Promise<Query>;

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
+`StandardQueryBase.inner`

 ## Methods

@@ -73,14 +73,14 @@ AnalyzeExec verbose=true, metrics=[]

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
+`StandardQueryBase.analyzePlan`

 ***

 ### execute()

 ```ts
-protected execute(options?): RecordBatchIterator
+protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
 ```

 Execute the query and return the results as an
@@ -91,7 +91,7 @@ Execute the query and return the results as an

 #### Returns

-[`RecordBatchIterator`](RecordBatchIterator.md)
+`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;

 #### See

@@ -107,7 +107,7 @@ single query)

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
+`StandardQueryBase.execute`

 ***

@@ -143,7 +143,7 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
+`StandardQueryBase.explainPlan`

 ***

@@ -164,7 +164,7 @@ Use [Table#optimize](Table.md#optimize) to index all un-indexed data.

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`fastSearch`](QueryBase.md#fastsearch)
+`StandardQueryBase.fastSearch`

 ***

@@ -194,7 +194,7 @@ Use `where` instead

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`filter`](QueryBase.md#filter)
+`StandardQueryBase.filter`

 ***

@@ -216,7 +216,7 @@ fullTextSearch(query, options?): this

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`fullTextSearch`](QueryBase.md#fulltextsearch)
+`StandardQueryBase.fullTextSearch`

 ***

@@ -241,7 +241,7 @@ called then every valid row from the table will be returned.

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`limit`](QueryBase.md#limit)
+`StandardQueryBase.limit`

 ***

@@ -325,6 +325,10 @@ nearestToText(query, columns?): Query
 offset(offset): this
 ```

+Set the number of rows to skip before returning results.
+
+This is useful for pagination.
+
 #### Parameters

 * **offset**: `number`
@@ -335,7 +339,30 @@ offset(offset): this

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`offset`](QueryBase.md#offset)
+`StandardQueryBase.offset`
+
+***
+
+### outputSchema()
+
+```ts
+outputSchema(): Promise<Schema<any>>
+```
+
+Returns the schema of the output that will be returned by this query.
+
+This can be used to inspect the types and names of the columns that will be
+returned by the query before executing it.
+
+#### Returns
+
+`Promise`&lt;`Schema`&lt;`any`&gt;&gt;
+
+An Arrow Schema describing the output columns.
+
+#### Inherited from
+
+`StandardQueryBase.outputSchema`

 ***

@@ -388,7 +415,7 @@ object insertion order is easy to get wrong and `Map` is more foolproof.

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
+`StandardQueryBase.select`

 ***

@@ -410,7 +437,7 @@ Collect the results as an array of objects.

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
+`StandardQueryBase.toArray`

 ***

@@ -436,7 +463,7 @@ ArrowTable.

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
+`StandardQueryBase.toArrow`

 ***

@@ -471,7 +498,7 @@ on the filter column(s).

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
+`StandardQueryBase.where`

 ***

@@ -493,4 +520,4 @@ order to perform hybrid search.

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
+`StandardQueryBase.withRowId`
--- a/docs/src/js/classes/QueryBase.md
+++ b/docs/src/js/classes/QueryBase.md
@@ -15,12 +15,11 @@ Common methods supported by all query types

 ## Extended by

- [`Query`](Query.md)
- [`VectorQuery`](VectorQuery.md)
+- [`TakeQuery`](TakeQuery.md)

 ## Type Parameters

-• **NativeQueryType** *extends* `NativeQuery` \| `NativeVectorQuery`
+• **NativeQueryType** *extends* `NativeQuery` \| `NativeVectorQuery` \| `NativeTakeQuery`

 ## Implements

@@ -82,7 +81,7 @@ AnalyzeExec verbose=true, metrics=[]
 ### execute()

 ```ts
-protected execute(options?): RecordBatchIterator
+protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
 ```

 Execute the query and return the results as an
@@ -93,7 +92,7 @@ Execute the query and return the results as an

 #### Returns

-[`RecordBatchIterator`](RecordBatchIterator.md)
+`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;

 #### See

@@ -141,101 +140,22 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();

 ***

-### fastSearch()
+### outputSchema()

 ```ts
-fastSearch(): this
+outputSchema(): Promise<Schema<any>>
 ```

-Skip searching un-indexed data. This can make search faster, but will miss
-any data that is not yet indexed.
+Returns the schema of the output that will be returned by this query.

-Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
+This can be used to inspect the types and names of the columns that will be
+returned by the query before executing it.

 #### Returns

-`this`
+`Promise`&lt;`Schema`&lt;`any`&gt;&gt;

-***
-
-### ~~filter()~~
-
-```ts
-filter(predicate): this
-```
-
-A filter statement to be applied to this query.
-
-#### Parameters
-
-* **predicate**: `string`
-
-#### Returns
-
-`this`
-
-#### See
-
-where
-
-#### Deprecated
-
-Use `where` instead
-
-***
-
-### fullTextSearch()
-
-```ts
-fullTextSearch(query, options?): this
-```
-
-#### Parameters
-
-* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
-
-* **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;
-
-#### Returns
-
-`this`
-
-***
-
-### limit()
-
-```ts
-limit(limit): this
-```
-
-Set the maximum number of results to return.
-
-By default, a plain search has no limit.  If this method is not
-called then every valid row from the table will be returned.
-
-#### Parameters
-
-* **limit**: `number`
-
-#### Returns
-
-`this`
-
-***
-
-### offset()
-
-```ts
-offset(offset): this
-```
-
-#### Parameters
-
-* **offset**: `number`
-
-#### Returns
-
-`this`
+An Arrow Schema describing the output columns.

 ***

@@ -328,37 +248,6 @@ ArrowTable.

 ***

-### where()
-
-```ts
-where(predicate): this
-```
-
-A filter statement to be applied to this query.
-
-The filter should be supplied as an SQL query string.  For example:
-
-#### Parameters
-
-* **predicate**: `string`
-
-#### Returns
-
-`this`
-
-#### Example
-
-```ts
-x > 10
-y > 0 AND y < 100
-x > 5 OR y = 'test'
-
-Filtering performance can often be improved by creating a scalar index
-on the filter column(s).
-```
-
-***
-
 ### withRowId()

 ```ts
--- a/docs/src/js/classes/RecordBatchIterator.md
+++ b/docs/src/js/classes/RecordBatchIterator.md
@@ -1,43 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / RecordBatchIterator
-
-# Class: RecordBatchIterator
-
-## Implements
-
- `AsyncIterator`&lt;`RecordBatch`&gt;
-
-## Constructors
-
-### new RecordBatchIterator()
-
-```ts
-new RecordBatchIterator(promise?): RecordBatchIterator
-```
-
-#### Parameters
-
-* **promise?**: `Promise`&lt;`RecordBatchIterator`&gt;
-
-#### Returns
-
-[`RecordBatchIterator`](RecordBatchIterator.md)
-
-## Methods
-
-### next()
-
-```ts
-next(): Promise<IteratorResult<RecordBatch<any>, any>>
-```
-
-#### Returns
-
-`Promise`&lt;`IteratorResult`&lt;`RecordBatch`&lt;`any`&gt;, `any`&gt;&gt;
-
-#### Implementation of
-
-`AsyncIterator.next`
--- a/docs/src/js/classes/Session.md
+++ b/docs/src/js/classes/Session.md
@@ -0,0 +1,88 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / Session
+
+# Class: Session
+
+A session for managing caches and object stores across LanceDB operations.
+
+Sessions allow you to configure cache sizes for index and metadata caches,
+which can significantly impact memory use and performance. They can
+also be re-used across multiple connections to share the same cache state.
+
+## Constructors
+
+### new Session()
+
+```ts
+new Session(indexCacheSizeBytes?, metadataCacheSizeBytes?): Session
+```
+
+Create a new session with custom cache sizes.
+
+# Parameters
+
+- `index_cache_size_bytes`: The size of the index cache in bytes.
+  Index data is stored in memory in this cache to speed up queries.
+  Defaults to 6GB if not specified.
+- `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
+  The metadata cache stores file metadata and schema information in memory.
+  This cache improves scan and write performance.
+  Defaults to 1GB if not specified.
+
+#### Parameters
+
+* **indexCacheSizeBytes?**: `null` \| `bigint`
+
+* **metadataCacheSizeBytes?**: `null` \| `bigint`
+
+#### Returns
+
+[`Session`](Session.md)
+
+## Methods
+
+### approxNumItems()
+
+```ts
+approxNumItems(): number
+```
+
+Get the approximate number of items cached in the session.
+
+#### Returns
+
+`number`
+
+***
+
+### sizeBytes()
+
+```ts
+sizeBytes(): bigint
+```
+
+Get the current size of the session caches in bytes.
+
+#### Returns
+
+`bigint`
+
+***
+
+### default()
+
+```ts
+static default(): Session
+```
+
+Create a session with default cache sizes.
+
+This is equivalent to creating a session with 6GB index cache
+and 1GB metadata cache.
+
+#### Returns
+
+[`Session`](Session.md)
--- a/docs/src/js/classes/StaticHeaderProvider.md
+++ b/docs/src/js/classes/StaticHeaderProvider.md
@@ -0,0 +1,70 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / StaticHeaderProvider
+
+# Class: StaticHeaderProvider
+
+Example implementation: A simple header provider that returns static headers.
+
+This is an example implementation showing how to create a HeaderProvider
+for cases where headers don't change during the session.
+
+## Example
+
+```typescript
+const provider = new StaticHeaderProvider({
+  authorization: "Bearer my-token",
+  "X-Custom-Header": "custom-value"
+});
+const headers = provider.getHeaders();
+// Returns: {authorization: 'Bearer my-token', 'X-Custom-Header': 'custom-value'}
+```
+
+## Extends
+
+- [`HeaderProvider`](HeaderProvider.md)
+
+## Constructors
+
+### new StaticHeaderProvider()
+
+```ts
+new StaticHeaderProvider(headers): StaticHeaderProvider
+```
+
+Initialize with static headers.
+
+#### Parameters
+
+* **headers**: `Record`&lt;`string`, `string`&gt;
+    Headers to return for every request.
+
+#### Returns
+
+[`StaticHeaderProvider`](StaticHeaderProvider.md)
+
+#### Overrides
+
+[`HeaderProvider`](HeaderProvider.md).[`constructor`](HeaderProvider.md#constructors)
+
+## Methods
+
+### getHeaders()
+
+```ts
+getHeaders(): Record<string, string>
+```
+
+Return the static headers.
+
+#### Returns
+
+`Record`&lt;`string`, `string`&gt;
+
+Copy of the static headers.
+
+#### Overrides
+
+[`HeaderProvider`](HeaderProvider.md).[`getHeaders`](HeaderProvider.md#getheaders)
--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -612,7 +612,7 @@ of the given query

 #### Parameters

-* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
+* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
    the query, a vector or string

 * **queryType?**: `string`
@@ -674,6 +674,48 @@ console.log(tags); // { "v1": { version: 1, manifestSize: ... } }

 ***

+### takeOffsets()
+
+```ts
+abstract takeOffsets(offsets): TakeQuery
+```
+
+Create a query that returns a subset of the rows in the table.
+
+#### Parameters
+
+* **offsets**: `number`[]
+    The offsets of the rows to return.
+
+#### Returns
+
+[`TakeQuery`](TakeQuery.md)
+
+A builder that can be used to parameterize the query.
+
+***
+
+### takeRowIds()
+
+```ts
+abstract takeRowIds(rowIds): TakeQuery
+```
+
+Create a query that returns a subset of the rows in the table.
+
+#### Parameters
+
+* **rowIds**: `number`[]
+    The row ids of the rows to return.
+
+#### Returns
+
+[`TakeQuery`](TakeQuery.md)
+
+A builder that can be used to parameterize the query.
+
+***
+
 ### toArrow()

 ```ts
@@ -799,7 +841,7 @@ by `query`.

 #### Parameters

-* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)
+* **vector**: [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md)

 #### Returns

--- a/docs/src/js/classes/TakeQuery.md
+++ b/docs/src/js/classes/TakeQuery.md
@@ -0,0 +1,288 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / TakeQuery
+
+# Class: TakeQuery
+
+A query that returns a subset of the rows in the table.
+
+## Extends
+
+- [`QueryBase`](QueryBase.md)&lt;`NativeTakeQuery`&gt;
+
+## Properties
+
+### inner
+
+```ts
+protected inner: TakeQuery | Promise<TakeQuery>;
+```
+
+#### Inherited from
+
+[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
+
+## Methods
+
+### analyzePlan()
+
+```ts
+analyzePlan(): Promise<string>
+```
+
+Executes the query and returns the physical query plan annotated with runtime metrics.
+
+This is useful for debugging and performance analysis, as it shows how the query was executed
+and includes metrics such as elapsed time, rows processed, and I/O statistics.
+
+#### Returns
+
+`Promise`&lt;`string`&gt;
+
+A query execution plan with runtime metrics for each step.
+
+#### Example
+
+```ts
+import * as lancedb from "@lancedb/lancedb"
+
+const db = await lancedb.connect("./.lancedb");
+const table = await db.createTable("my_table", [
+  { vector: [1.1, 0.9], id: "1" },
+]);
+
+const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
+
+Example output (with runtime metrics inlined):
+AnalyzeExec verbose=true, metrics=[]
+ ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
+  Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
+   CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
+    GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
+     FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
+      SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
+       KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
+        LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
+```
+
+#### Inherited from
+
+[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
+
+***
+
+### execute()
+
+```ts
+protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
+```
+
+Execute the query and return the results as an
+
+#### Parameters
+
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+
+#### Returns
+
+`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;
+
+#### See
+
+ - AsyncIterator
+of
+ - RecordBatch.
+
+By default, LanceDb will use many threads to calculate results and, when
+the result set is large, multiple batches will be processed at one time.
+This readahead is limited however and backpressure will be applied if this
+stream is consumed slowly (this constrains the maximum memory used by a
+single query)
+
+#### Inherited from
+
+[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
+
+***
+
+### explainPlan()
+
+```ts
+explainPlan(verbose): Promise<string>
+```
+
+Generates an explanation of the query execution plan.
+
+#### Parameters
+
+* **verbose**: `boolean` = `false`
+    If true, provides a more detailed explanation. Defaults to false.
+
+#### Returns
+
+`Promise`&lt;`string`&gt;
+
+A Promise that resolves to a string containing the query execution plan explanation.
+
+#### Example
+
+```ts
+import * as lancedb from "@lancedb/lancedb"
+const db = await lancedb.connect("./.lancedb");
+const table = await db.createTable("my_table", [
+  { vector: [1.1, 0.9], id: "1" },
+]);
+const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
+```
+
+#### Inherited from
+
+[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
+
+***
+
+### outputSchema()
+
+```ts
+outputSchema(): Promise<Schema<any>>
+```
+
+Returns the schema of the output that will be returned by this query.
+
+This can be used to inspect the types and names of the columns that will be
+returned by the query before executing it.
+
+#### Returns
+
+`Promise`&lt;`Schema`&lt;`any`&gt;&gt;
+
+An Arrow Schema describing the output columns.
+
+#### Inherited from
+
+[`QueryBase`](QueryBase.md).[`outputSchema`](QueryBase.md#outputschema)
+
+***
+
+### select()
+
+```ts
+select(columns): this
+```
+
+Return only the specified columns.
+
+By default a query will return all columns from the table.  However, this can have
+a very significant impact on latency.  LanceDb stores data in a columnar fashion.  This
+means we can finely tune our I/O to select exactly the columns we need.
+
+As a best practice you should always limit queries to the columns that you need.  If you
+pass in an array of column names then only those columns will be returned.
+
+You can also use this method to create new "dynamic" columns based on your existing columns.
+For example, you may not care about "a" or "b" but instead simply want "a + b".  This is often
+seen in the SELECT clause of an SQL query (e.g. `SELECT a+b FROM my_table`).
+
+To create dynamic columns you can pass in a Map<string, string>.  A column will be returned
+for each entry in the map.  The key provides the name of the column.  The value is
+an SQL string used to specify how the column is calculated.
+
+For example, an SQL query might state `SELECT a + b AS combined, c`.  The equivalent
+input to this method would be:
+
+#### Parameters
+
+* **columns**: `string` \| `string`[] \| `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;
+
+#### Returns
+
+`this`
+
+#### Example
+
+```ts
+new Map([["combined", "a + b"], ["c", "c"]])
+
+Columns will always be returned in the order given, even if that order is different than
+the order used when adding the data.
+
+Note that you can pass in a `Record<string, string>` (e.g. an object literal). This method
+uses `Object.entries` which should preserve the insertion order of the object.  However,
+object insertion order is easy to get wrong and `Map` is more foolproof.
+```
+
+#### Inherited from
+
+[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
+
+***
+
+### toArray()
+
+```ts
+toArray(options?): Promise<any[]>
+```
+
+Collect the results as an array of objects.
+
+#### Parameters
+
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+
+#### Returns
+
+`Promise`&lt;`any`[]&gt;
+
+#### Inherited from
+
+[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
+
+***
+
+### toArrow()
+
+```ts
+toArrow(options?): Promise<Table<any>>
+```
+
+Collect the results as an Arrow
+
+#### Parameters
+
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+
+#### Returns
+
+`Promise`&lt;`Table`&lt;`any`&gt;&gt;
+
+#### See
+
+ArrowTable.
+
+#### Inherited from
+
+[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
+
+***
+
+### withRowId()
+
+```ts
+withRowId(): this
+```
+
+Whether to return the row id in the results.
+
+This column can be used to match results between different queries. For
+example, to match results from a full text search and a vector search in
+order to perform hybrid search.
+
+#### Returns
+
+`this`
+
+#### Inherited from
+
+[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
--- a/docs/src/js/classes/VectorQuery.md
+++ b/docs/src/js/classes/VectorQuery.md
@@ -16,7 +16,7 @@ This builder can be reused to execute the query many times.

 ## Extends

- [`QueryBase`](QueryBase.md)&lt;`NativeVectorQuery`&gt;
+- `StandardQueryBase`&lt;`NativeVectorQuery`&gt;

 ## Properties

@@ -28,7 +28,7 @@ protected inner: VectorQuery | Promise<VectorQuery>;

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
+`StandardQueryBase.inner`

 ## Methods

@@ -91,7 +91,7 @@ AnalyzeExec verbose=true, metrics=[]

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
+`StandardQueryBase.analyzePlan`

 ***

@@ -221,7 +221,7 @@ also increase the latency of your query. The default value is 1.5*limit.
 ### execute()

 ```ts
-protected execute(options?): RecordBatchIterator
+protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
 ```

 Execute the query and return the results as an
@@ -232,7 +232,7 @@ Execute the query and return the results as an

 #### Returns

-[`RecordBatchIterator`](RecordBatchIterator.md)
+`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;

 #### See

@@ -248,7 +248,7 @@ single query)

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
+`StandardQueryBase.execute`

 ***

@@ -284,7 +284,7 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
+`StandardQueryBase.explainPlan`

 ***

@@ -305,7 +305,7 @@ Use [Table#optimize](Table.md#optimize) to index all un-indexed data.

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`fastSearch`](QueryBase.md#fastsearch)
+`StandardQueryBase.fastSearch`

 ***

@@ -335,7 +335,7 @@ Use `where` instead

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`filter`](QueryBase.md#filter)
+`StandardQueryBase.filter`

 ***

@@ -357,7 +357,7 @@ fullTextSearch(query, options?): this

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`fullTextSearch`](QueryBase.md#fulltextsearch)
+`StandardQueryBase.fullTextSearch`

 ***

@@ -382,7 +382,54 @@ called then every valid row from the table will be returned.

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`limit`](QueryBase.md#limit)
+`StandardQueryBase.limit`
+
+***
+
+### maximumNprobes()
+
+```ts
+maximumNprobes(maximumNprobes): VectorQuery
+```
+
+Set the maximum number of probes used.
+
+This controls the maximum number of partitions that will be searched.  If this
+number is greater than minimumNprobes then the excess partitions will _only_ be
+searched if we have not found enough results.  This can be useful when there is
+a narrow filter to allow these queries to spend more time searching and avoid
+potential false negatives.
+
+#### Parameters
+
+* **maximumNprobes**: `number`
+
+#### Returns
+
+[`VectorQuery`](VectorQuery.md)
+
+***
+
+### minimumNprobes()
+
+```ts
+minimumNprobes(minimumNprobes): VectorQuery
+```
+
+Set the minimum number of probes used.
+
+This controls the minimum number of partitions that will be searched.  This
+parameter will impact every query against a vector index, regardless of the
+filter.  See `nprobes` for more details.  Higher values will increase recall
+but will also increase latency.
+
+#### Parameters
+
+* **minimumNprobes**: `number`
+
+#### Returns
+
+[`VectorQuery`](VectorQuery.md)

 ***

@@ -413,6 +460,10 @@ For best results we recommend tuning this parameter with a benchmark against
 your actual data to find the smallest possible value that will still give
 you the desired recall.

+For more fine grained control over behavior when you have a very narrow filter
+you can use `minimumNprobes` and `maximumNprobes`.  This method sets both
+the minimum and maximum to the same value.
+
 #### Parameters

 * **nprobes**: `number`
@@ -429,6 +480,10 @@ you the desired recall.
 offset(offset): this
 ```

+Set the number of rows to skip before returning results.
+
+This is useful for pagination.
+
 #### Parameters

 * **offset**: `number`
@@ -439,7 +494,30 @@ offset(offset): this

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`offset`](QueryBase.md#offset)
+`StandardQueryBase.offset`
+
+***
+
+### outputSchema()
+
+```ts
+outputSchema(): Promise<Schema<any>>
+```
+
+Returns the schema of the output that will be returned by this query.
+
+This can be used to inspect the types and names of the columns that will be
+returned by the query before executing it.
+
+#### Returns
+
+`Promise`&lt;`Schema`&lt;`any`&gt;&gt;
+
+An Arrow Schema describing the output columns.
+
+#### Inherited from
+
+`StandardQueryBase.outputSchema`

 ***

@@ -586,7 +664,7 @@ object insertion order is easy to get wrong and `Map` is more foolproof.

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
+`StandardQueryBase.select`

 ***

@@ -608,7 +686,7 @@ Collect the results as an array of objects.

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
+`StandardQueryBase.toArray`

 ***

@@ -634,7 +712,7 @@ ArrowTable.

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
+`StandardQueryBase.toArrow`

 ***

@@ -669,7 +747,7 @@ on the filter column(s).

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
+`StandardQueryBase.where`

 ***

@@ -691,4 +769,4 @@ order to perform hybrid search.

 #### Inherited from

-[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
+`StandardQueryBase.withRowId`
--- a/docs/src/js/enumerations/Occur.md
+++ b/docs/src/js/enumerations/Occur.md
@@ -10,6 +10,7 @@ Enum representing the occurrence of terms in full-text queries.

 - `Must`: The term must be present in the document.
 - `Should`: The term should contribute to the document score, but is not required.
+- `MustNot`: The term must not be present in the document.

 ## Enumeration Members

@@ -21,6 +22,14 @@ Must: "MUST";

 ***

+### MustNot
+
+```ts
+MustNot: "MUST_NOT";
+```
+
+***
+
 ### Should

 ```ts
--- a/docs/src/js/functions/RecordBatchIterator.md
+++ b/docs/src/js/functions/RecordBatchIterator.md
@@ -0,0 +1,19 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / RecordBatchIterator
+
+# Function: RecordBatchIterator()
+
+```ts
+function RecordBatchIterator(promisedInner): AsyncGenerator<RecordBatch<any>, void, unknown>
+```
+
+## Parameters
+
+* **promisedInner**: `Promise`&lt;`RecordBatchIterator`&gt;
+
+## Returns
+
+`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;
--- a/docs/src/js/functions/connect.md
+++ b/docs/src/js/functions/connect.md
@@ -6,10 +6,14 @@

 # Function: connect()

-## connect(uri, options)
+## connect(uri, options, session, headerProvider)

 ```ts
-function connect(uri, options?): Promise<Connection>
+function connect(
+   uri,
+   options?,
+   session?,
+   headerProvider?): Promise<Connection>
 ```

 Connect to a LanceDB instance at the given URI.
@@ -29,6 +33,10 @@ Accepted formats:
 * **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
    The options to use when connecting to the database

+* **session?**: [`Session`](../classes/Session.md)
+
+* **headerProvider?**: [`HeaderProvider`](../classes/HeaderProvider.md) \| () => `Record`&lt;`string`, `string`&gt; \| () => `Promise`&lt;`Record`&lt;`string`, `string`&gt;&gt;
+
 ### Returns

 `Promise`&lt;[`Connection`](../classes/Connection.md)&gt;
@@ -50,6 +58,18 @@ const conn = await connect(
 });
 ```

+Using with a header provider for per-request authentication:
+```ts
+const provider = new StaticHeaderProvider({
+  "X-API-Key": "my-key"
+});
+const conn = await connectWithHeaderProvider(
+  "db://host:port",
+  options,
+  provider
+);
+```
+
 ## connect(options)

 ```ts
@@ -77,7 +97,7 @@ Accepted formats:

 [ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.

-### Example
+### Examples

 ```ts
 const conn = await connect({
@@ -85,3 +105,11 @@ const conn = await connect({
  storageOptions: {timeout: "60s"}
 });
 ```
+
+```ts
+const session = Session.default();
+const conn = await connect({
+  uri: "/path/to/database",
+  session: session
+});
+```
--- a/docs/src/js/functions/makeArrowTable.md
+++ b/docs/src/js/functions/makeArrowTable.md
@@ -13,7 +13,7 @@ function makeArrowTable(
   metadata?): ArrowTable
 ```

-An enhanced version of the makeTable function from Apache Arrow
+An enhanced version of the apache-arrow makeTable function from Apache Arrow
 that supports nested fields and embeddings columns.

 (typically you do not need to call this function.  It will be called automatically
--- a/docs/src/js/functions/permutationBuilder.md
+++ b/docs/src/js/functions/permutationBuilder.md
@@ -0,0 +1,34 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / permutationBuilder
+
+# Function: permutationBuilder()
+
+```ts
+function permutationBuilder(table): PermutationBuilder
+```
+
+Create a permutation builder for the given table.
+
+## Parameters
+
+* **table**: [`Table`](../classes/Table.md)
+    The source table to create a permutation from
+
+## Returns
+
+[`PermutationBuilder`](../classes/PermutationBuilder.md)
+
+A PermutationBuilder instance
+
+## Example
+
+```ts
+const builder = permutationBuilder(sourceTable, "training_data")
+  .splitRandom({ ratios: [0.8, 0.2], seed: 42 })
+  .shuffle({ seed: 123 });
+
+const trainingTable = await builder.execute();
+```
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -20,18 +20,24 @@
 - [BooleanQuery](classes/BooleanQuery.md)
 - [BoostQuery](classes/BoostQuery.md)
 - [Connection](classes/Connection.md)
+- [HeaderProvider](classes/HeaderProvider.md)
 - [Index](classes/Index.md)
 - [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
 - [MatchQuery](classes/MatchQuery.md)
 - [MergeInsertBuilder](classes/MergeInsertBuilder.md)
 - [MultiMatchQuery](classes/MultiMatchQuery.md)
+- [NativeJsHeaderProvider](classes/NativeJsHeaderProvider.md)
+- [OAuthHeaderProvider](classes/OAuthHeaderProvider.md)
+- [PermutationBuilder](classes/PermutationBuilder.md)
 - [PhraseQuery](classes/PhraseQuery.md)
 - [Query](classes/Query.md)
 - [QueryBase](classes/QueryBase.md)
- [RecordBatchIterator](classes/RecordBatchIterator.md)
+- [Session](classes/Session.md)
+- [StaticHeaderProvider](classes/StaticHeaderProvider.md)
 - [Table](classes/Table.md)
 - [TagContents](classes/TagContents.md)
 - [Tags](classes/Tags.md)
+- [TakeQuery](classes/TakeQuery.md)
 - [VectorColumnOptions](classes/VectorColumnOptions.md)
 - [VectorQuery](classes/VectorQuery.md)

@@ -62,6 +68,7 @@
 - [IndexStatistics](interfaces/IndexStatistics.md)
 - [IvfFlatOptions](interfaces/IvfFlatOptions.md)
 - [IvfPqOptions](interfaces/IvfPqOptions.md)
+- [IvfRqOptions](interfaces/IvfRqOptions.md)
 - [MergeResult](interfaces/MergeResult.md)
 - [OpenTableOptions](interfaces/OpenTableOptions.md)
 - [OptimizeOptions](interfaces/OptimizeOptions.md)
@@ -69,9 +76,16 @@
 - [QueryExecutionOptions](interfaces/QueryExecutionOptions.md)
 - [RemovalStats](interfaces/RemovalStats.md)
 - [RetryConfig](interfaces/RetryConfig.md)
+- [ShuffleOptions](interfaces/ShuffleOptions.md)
+- [SplitCalculatedOptions](interfaces/SplitCalculatedOptions.md)
+- [SplitHashOptions](interfaces/SplitHashOptions.md)
+- [SplitRandomOptions](interfaces/SplitRandomOptions.md)
+- [SplitSequentialOptions](interfaces/SplitSequentialOptions.md)
 - [TableNamesOptions](interfaces/TableNamesOptions.md)
 - [TableStatistics](interfaces/TableStatistics.md)
 - [TimeoutConfig](interfaces/TimeoutConfig.md)
+- [TlsConfig](interfaces/TlsConfig.md)
+- [TokenResponse](interfaces/TokenResponse.md)
 - [UpdateOptions](interfaces/UpdateOptions.md)
 - [UpdateResult](interfaces/UpdateResult.md)
 - [Version](interfaces/Version.md)
@@ -84,12 +98,15 @@
 - [FieldLike](type-aliases/FieldLike.md)
 - [IntoSql](type-aliases/IntoSql.md)
 - [IntoVector](type-aliases/IntoVector.md)
+- [MultiVector](type-aliases/MultiVector.md)
 - [RecordBatchLike](type-aliases/RecordBatchLike.md)
 - [SchemaLike](type-aliases/SchemaLike.md)
 - [TableLike](type-aliases/TableLike.md)

 ## Functions

+- [RecordBatchIterator](functions/RecordBatchIterator.md)
 - [connect](functions/connect.md)
 - [makeArrowTable](functions/makeArrowTable.md)
 - [packBits](functions/packBits.md)
+- [permutationBuilder](functions/permutationBuilder.md)
--- a/docs/src/js/interfaces/ClientConfig.md
+++ b/docs/src/js/interfaces/ClientConfig.md
@@ -16,6 +16,14 @@ optional extraHeaders: Record<string, string>;

 ***

+### idDelimiter?
+
+```ts
+optional idDelimiter: string;
+```
+
+***
+
 ### retryConfig?

 ```ts
@@ -32,6 +40,14 @@ optional timeoutConfig: TimeoutConfig;

 ***

+### tlsConfig?
+
+```ts
+optional tlsConfig: TlsConfig;
+```
+
+***
+
 ### userAgent?

 ```ts
--- a/docs/src/js/interfaces/ConnectionOptions.md
+++ b/docs/src/js/interfaces/ConnectionOptions.md
@@ -70,6 +70,17 @@ Defaults to 'us-east-1'.

 ***

+### session?
+
+```ts
+optional session: Session;
+```
+
+(For LanceDB OSS only): the session to use for this connection. Holds
+shared caches and other session-specific state.
+
+***
+
 ### storageOptions?

 ```ts
--- a/docs/src/js/interfaces/FtsOptions.md
+++ b/docs/src/js/interfaces/FtsOptions.md
@@ -23,7 +23,7 @@ whether to remove punctuation
 ### baseTokenizer?

 ```ts
-optional baseTokenizer: "raw" | "simple" | "whitespace";
+optional baseTokenizer: "raw" | "simple" | "whitespace" | "ngram";
 ```

 The tokenizer to use when building the index.
@@ -71,6 +71,36 @@ tokens longer than this length will be ignored

 ***

+### ngramMaxLength?
+
+```ts
+optional ngramMaxLength: number;
+```
+
+ngram max length
+
+***
+
+### ngramMinLength?
+
+```ts
+optional ngramMinLength: number;
+```
+
+ngram min length
+
+***
+
+### prefixOnly?
+
+```ts
+optional prefixOnly: boolean;
+```
+
+whether to only index the prefix of the token for ngram tokenizer
+
+***
+
 ### removeStopWords?

 ```ts
--- a/docs/src/js/interfaces/IndexOptions.md
+++ b/docs/src/js/interfaces/IndexOptions.md
@@ -26,6 +26,18 @@ will be used to determine the most useful kind of index to create.

 ***

+### name?
+
+```ts
+optional name: string;
+```
+
+Optional custom name for the index.
+
+If not provided, a default name will be generated based on the column name.
+
+***
+
 ### replace?

 ```ts
@@ -42,8 +54,27 @@ The default is true

 ***

+### train?
+
+```ts
+optional train: boolean;
+```
+
+Whether to train the index with existing data.
+
+If true (default), the index will be trained with existing data in the table.
+If false, the index will be created empty and populated as new data is added.
+
+Note: This option is only supported for scalar indices. Vector indices always train.
+
+***
+
 ### waitTimeoutSeconds?

 ```ts
 optional waitTimeoutSeconds: number;
 ```
+
+Timeout in seconds to wait for index creation to complete.
+
+If not specified, the method will return immediately after starting the index creation.
--- a/docs/src/js/interfaces/IvfRqOptions.md
+++ b/docs/src/js/interfaces/IvfRqOptions.md
@@ -0,0 +1,101 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / IvfRqOptions
+
+# Interface: IvfRqOptions
+
+## Properties
+
+### distanceType?
+
+```ts
+optional distanceType: "l2" | "cosine" | "dot";
+```
+
+Distance type to use to build the index.
+
+Default value is "l2".
+
+This is used when training the index to calculate the IVF partitions
+(vectors are grouped in partitions with similar vectors according to this
+distance type) and during quantization.
+
+The distance type used to train an index MUST match the distance type used
+to search the index. Failure to do so will yield inaccurate results.
+
+The following distance types are available:
+
+"l2" - Euclidean distance.
+"cosine" - Cosine distance.
+"dot" - Dot product.
+
+***
+
+### maxIterations?
+
+```ts
+optional maxIterations: number;
+```
+
+Max iterations to train IVF kmeans.
+
+When training an IVF index we use kmeans to calculate the partitions. This parameter
+controls how many iterations of kmeans to run.
+
+The default value is 50.
+
+***
+
+### numBits?
+
+```ts
+optional numBits: number;
+```
+
+Number of bits per dimension for residual quantization.
+
+This value controls how much each residual component is compressed. The more
+bits, the more accurate the index will be but the slower search. Typical values
+are small integers; the default is 1 bit per dimension.
+
+***
+
+### numPartitions?
+
+```ts
+optional numPartitions: number;
+```
+
+The number of IVF partitions to create.
+
+This value should generally scale with the number of rows in the dataset.
+By default the number of partitions is the square root of the number of
+rows.
+
+If this value is too large then the first part of the search (picking the
+right partition) will be slow. If this value is too small then the second
+part of the search (searching within a partition) will be slow.
+
+***
+
+### sampleRate?
+
+```ts
+optional sampleRate: number;
+```
+
+The number of vectors, per partition, to sample when training IVF kmeans.
+
+When an IVF index is trained, we need to calculate partitions. These are groups
+of vectors that are similar to each other. To do this we use an algorithm called kmeans.
+
+Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
+random sample of the data. This parameter controls the size of the sample. The total
+number of vectors used to train the index is `sample_rate * num_partitions`.
+
+Increasing this value might improve the quality of the index but in most cases the
+default should be sufficient.
+
+The default value is 256.
--- a/docs/src/js/interfaces/OpenTableOptions.md
+++ b/docs/src/js/interfaces/OpenTableOptions.md
@@ -8,7 +8,7 @@

 ## Properties

-### indexCacheSize?
+### ~~indexCacheSize?~~

 ```ts
 optional indexCacheSize: number;
@@ -16,6 +16,11 @@ optional indexCacheSize: number;

 Set the size of the index cache, specified as a number of entries

+#### Deprecated
+
+Use session-level cache configuration instead.
+Create a Session with custom cache sizes and pass it to the connect() function.
+
 The exact meaning of an "entry" will depend on the type of index:
 - IVF: there is one entry for each IVF partition
 - BTREE: there is one entry for the entire index
--- a/docs/src/js/interfaces/OptimizeOptions.md
+++ b/docs/src/js/interfaces/OptimizeOptions.md
@@ -24,10 +24,10 @@ The default is 7 days
 // Delete all versions older than 1 day
 const olderThan = new Date();
 olderThan.setDate(olderThan.getDate() - 1));
-tbl.cleanupOlderVersions(olderThan);
+tbl.optimize({cleanupOlderThan: olderThan});

 // Delete all versions except the current version
-tbl.cleanupOlderVersions(new Date());
+tbl.optimize({cleanupOlderThan: new Date()});
 ```

 ***
--- a/docs/src/js/interfaces/ShuffleOptions.md
+++ b/docs/src/js/interfaces/ShuffleOptions.md
@@ -0,0 +1,23 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / ShuffleOptions
+
+# Interface: ShuffleOptions
+
+## Properties
+
+### clumpSize?
+
+```ts
+optional clumpSize: number;
+```
+
+***
+
+### seed?
+
+```ts
+optional seed: number;
+```
--- a/docs/src/js/interfaces/SplitCalculatedOptions.md
+++ b/docs/src/js/interfaces/SplitCalculatedOptions.md
@@ -0,0 +1,23 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / SplitCalculatedOptions
+
+# Interface: SplitCalculatedOptions
+
+## Properties
+
+### calculation
+
+```ts
+calculation: string;
+```
+
+***
+
+### splitNames?
+
+```ts
+optional splitNames: string[];
+```
--- a/docs/src/js/interfaces/SplitHashOptions.md
+++ b/docs/src/js/interfaces/SplitHashOptions.md
@@ -0,0 +1,39 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / SplitHashOptions
+
+# Interface: SplitHashOptions
+
+## Properties
+
+### columns
+
+```ts
+columns: string[];
+```
+
+***
+
+### discardWeight?
+
+```ts
+optional discardWeight: number;
+```
+
+***
+
+### splitNames?
+
+```ts
+optional splitNames: string[];
+```
+
+***
+
+### splitWeights
+
+```ts
+splitWeights: number[];
+```
--- a/docs/src/js/interfaces/SplitRandomOptions.md
+++ b/docs/src/js/interfaces/SplitRandomOptions.md
@@ -0,0 +1,47 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / SplitRandomOptions
+
+# Interface: SplitRandomOptions
+
+## Properties
+
+### counts?
+
+```ts
+optional counts: number[];
+```
+
+***
+
+### fixed?
+
+```ts
+optional fixed: number;
+```
+
+***
+
+### ratios?
+
+```ts
+optional ratios: number[];
+```
+
+***
+
+### seed?
+
+```ts
+optional seed: number;
+```
+
+***
+
+### splitNames?
+
+```ts
+optional splitNames: string[];
+```
--- a/docs/src/js/interfaces/SplitSequentialOptions.md
+++ b/docs/src/js/interfaces/SplitSequentialOptions.md
@@ -0,0 +1,39 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / SplitSequentialOptions
+
+# Interface: SplitSequentialOptions
+
+## Properties
+
+### counts?
+
+```ts
+optional counts: number[];
+```
+
+***
+
+### fixed?
+
+```ts
+optional fixed: number;
+```
+
+***
+
+### ratios?
+
+```ts
+optional ratios: number[];
+```
+
+***
+
+### splitNames?
+
+```ts
+optional splitNames: string[];
+```
--- a/docs/src/js/interfaces/TimeoutConfig.md
+++ b/docs/src/js/interfaces/TimeoutConfig.md
@@ -44,3 +44,17 @@ optional readTimeout: number;
 The timeout for reading data from the server in seconds. Default is 300
 seconds (5 minutes). This can also be set via the environment variable
 `LANCE_CLIENT_READ_TIMEOUT`, as an integer number of seconds.
+
+***
+
+### timeout?
+
+```ts
+optional timeout: number;
+```
+
+The overall timeout for the entire request in seconds. This includes
+connection, send, and read time. If the entire request doesn't complete
+within this time, it will fail. Default is None (no overall timeout).
+This can also be set via the environment variable `LANCE_CLIENT_TIMEOUT`,
+as an integer number of seconds.
--- a/docs/src/js/interfaces/TlsConfig.md
+++ b/docs/src/js/interfaces/TlsConfig.md
@@ -0,0 +1,49 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / TlsConfig
+
+# Interface: TlsConfig
+
+TLS/mTLS configuration for the remote HTTP client.
+
+## Properties
+
+### assertHostname?
+
+```ts
+optional assertHostname: boolean;
+```
+
+Whether to verify the hostname in the server's certificate.
+
+***
+
+### certFile?
+
+```ts
+optional certFile: string;
+```
+
+Path to the client certificate file (PEM format) for mTLS authentication.
+
+***
+
+### keyFile?
+
+```ts
+optional keyFile: string;
+```
+
+Path to the client private key file (PEM format) for mTLS authentication.
+
+***
+
+### sslCaCert?
+
+```ts
+optional sslCaCert: string;
+```
+
+Path to the CA certificate file (PEM format) for server verification.
--- a/docs/src/js/interfaces/TokenResponse.md
+++ b/docs/src/js/interfaces/TokenResponse.md
@@ -0,0 +1,25 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / TokenResponse
+
+# Interface: TokenResponse
+
+Token response from OAuth provider.
+
+## Properties
+
+### accessToken
+
+```ts
+accessToken: string;
+```
+
+***
+
+### expiresIn?
+
+```ts
+optional expiresIn: number;
+```
--- a/docs/src/js/type-aliases/MultiVector.md
+++ b/docs/src/js/type-aliases/MultiVector.md
@@ -0,0 +1,11 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / MultiVector
+
+# Type Alias: MultiVector
+
+```ts
+type MultiVector: IntoVector[];
+```
--- a/docs/src/notebooks/Multivector_on_LanceDB.ipynb
+++ b/docs/src/notebooks/Multivector_on_LanceDB.ipynb
@@ -428,7 +428,7 @@
        "\n",
        "**Why?**  \n",
        "Embedding the UFO dataset and ingesting it into LanceDB takes **~2 hours on a T4 GPU**. To save time:  \n",
-        "- **Use the pre-prepared table with index created ** (provided below) to proceed directly to step7: search.  \n",
+        "- **Use the pre-prepared table with index created** (provided below) to proceed directly to **Step 7**: search.  \n",
        "- **Step 5a** contains the full ingestion code for reference (run it only if necessary).  \n",
        "- **Step 6** contains the details on creating the index on the multivector column"
      ]
--- a/docs/test/md_testing.py
+++ b/docs/test/md_testing.py
@@ -30,7 +30,8 @@ excluded_globs = [
    "../src/rag/advanced_techniques/*.md",
    "../src/guides/scalar_index.md",
    "../src/guides/storage.md",
-    "../src/search.md"
+    "../src/search.md",
+    "../src/guides/sql_querying.md",
 ]

 python_prefix = "py"
--- a/java/.mvn/wrapper/maven-wrapper.properties
+++ b/java/.mvn/wrapper/maven-wrapper.properties
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+wrapperVersion=3.3.2
+distributionType=only-script
+distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
--- a/java/README.md
+++ b/java/README.md
@@ -0,0 +1,37 @@
+# LanceDB Java SDK
+
+## Configuration and Initialization
+
+### LanceDB Cloud
+
+For LanceDB Cloud, use the simplified builder API:
+
+```java
+import com.lancedb.lance.namespace.LanceRestNamespace;
+
+// If your DB url is db://example-db, then your database here is example-db
+LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
+    .apiKey("your_lancedb_cloud_api_key")
+    .database("your_database_name")
+    .build();
+```
+
+### LanceDB Enterprise
+
+For Enterprise deployments, use your VPC endpoint:
+
+```java
+LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
+    .apiKey("your_lancedb_enterprise_api_key")
+    .database("your-top-dir") // Your top level folder under your cloud bucket, e.g. s3://your-bucket/your-top-dir/
+    .hostOverride("http://<vpc_endpoint_dns_name>:80")
+    .build();
+```
+
+## Development
+
+Build:
+
+```shell
+./mvnw install
+```
--- a/java/core/lancedb-jni/Cargo.toml
+++ b/java/core/lancedb-jni/Cargo.toml
@@ -15,13 +15,16 @@ publish = false
 crate-type = ["cdylib"]

 [dependencies]
-lancedb = { path = "../../../rust/lancedb" }
+lancedb = { path = "../../../rust/lancedb", default-features = false }
 lance = { workspace = true }
 arrow = { workspace = true, features = ["ffi"] }
 arrow-schema.workspace = true
-tokio = "1.23"
+tokio = "1.46"
 jni = "0.21.1"
 snafu.workspace = true
 lazy_static.workspace = true
 serde = { version = "^1" }
 serde_json = { version = "1" }
+
+[features]
+default = ["lancedb/default"]
--- a/java/core/lancedb-jni/src/error.rs
+++ b/java/core/lancedb-jni/src/error.rs
@@ -51,8 +51,11 @@ pub enum Error {
    DatasetAlreadyExists { uri: String, location: Location },
    #[snafu(display("Table '{name}' already exists"))]
    TableAlreadyExists { name: String },
-    #[snafu(display("Table '{name}' was not found"))]
-    TableNotFound { name: String },
+    #[snafu(display("Table '{name}' was not found: {source}"))]
+    TableNotFound {
+        name: String,
+        source: Box<dyn std::error::Error + Send + Sync>,
+    },
    #[snafu(display("Invalid table name '{name}': {reason}"))]
    InvalidTableName { name: String, reason: String },
    #[snafu(display("Embedding function '{name}' was not found: {reason}, {location}"))]
@@ -191,7 +194,7 @@ impl From<lancedb::Error> for Error {
                message,
                location: std::panic::Location::caller().to_snafu_location(),
            },
-            lancedb::Error::TableNotFound { name } => Self::TableNotFound { name },
+            lancedb::Error::TableNotFound { name, source } => Self::TableNotFound { name, source },
            lancedb::Error::TableAlreadyExists { name } => Self::TableAlreadyExists { name },
            lancedb::Error::EmbeddingFunctionNotFound { name, reason } => {
                Self::EmbeddingFunctionNotFound {
--- a/java/core/lancedb-jni/src/ffi.rs
+++ b/java/core/lancedb-jni/src/ffi.rs
@@ -16,6 +16,7 @@ pub trait JNIEnvExt {
    fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;

    /// Get strings from Java List<String> object.
+    #[allow(dead_code)]
    fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;

    /// Get strings from Java String[] object.
--- a/java/core/lancedb-jni/src/traits.rs
+++ b/java/core/lancedb-jni/src/traits.rs
@@ -6,6 +6,7 @@ use jni::JNIEnv;

 use crate::Result;

+#[allow(dead_code)]
 pub trait FromJObject<T> {
    fn extract(&self) -> Result<T>;
 }
@@ -39,6 +40,7 @@ impl FromJObject<f64> for JObject<'_> {
    }
 }

+#[allow(dead_code)]
 pub trait FromJString {
    fn extract(&self, env: &mut JNIEnv) -> Result<String>;
 }
@@ -66,6 +68,7 @@ pub trait JMapExt {
    fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
 }

+#[allow(dead_code)]
 fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
 where
    for<'a> JObject<'a>: FromJObject<T>,
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,18 +8,24 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.20.1-beta.2</version>
+        <version>0.22.3-beta.5</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

    <artifactId>lancedb-core</artifactId>
-    <name>LanceDB Core</name>
+    <name>${project.artifactId}</name>
+    <description>LanceDB Core</description>
    <packaging>jar</packaging>
    <properties>
        <rust.release.build>false</rust.release.build>
    </properties>

    <dependencies>
+        <dependency>
+            <groupId>com.lancedb</groupId>
+            <artifactId>lance-namespace-core</artifactId>
+            <version>0.0.1</version>
+        </dependency>
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-vector</artifactId>
--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>com.lancedb</groupId>
+        <artifactId>lancedb-parent</artifactId>
+        <version>0.22.3-beta.5</version>
+        <relativePath>../pom.xml</relativePath>
+    </parent>
+
+    <artifactId>lancedb-lance-namespace</artifactId>
+    <name>${project.artifactId}</name>
+    <description>LanceDB Java Integration with Lance Namespace</description>
+    <packaging>jar</packaging>
+
+    <dependencies>
+        <dependency>
+            <groupId>com.lancedb</groupId>
+            <artifactId>lance-namespace-core</artifactId>
+        </dependency>
+    </dependencies>
+</project>
--- a/java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java
+++ b/java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.lancedb.lancedb;
+
+import com.lancedb.lance.namespace.LanceRestNamespace;
+import com.lancedb.lance.namespace.client.apache.ApiClient;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+
+/** Util class to help construct a {@link LanceRestNamespace} for LanceDB. */
+public class LanceDbRestNamespaces {
+  private static final String DEFAULT_REGION = "us-east-1";
+  private static final String CLOUD_URL_PATTERN = "https://%s.%s.api.lancedb.com";
+
+  private String apiKey;
+  private String database;
+  private Optional<String> hostOverride = Optional.empty();
+  private Optional<String> region = Optional.empty();
+  private Map<String, String> additionalConfig = new HashMap<>();
+
+  private LanceDbRestNamespaces() {}
+
+  /**
+   * Create a new builder instance.
+   *
+   * @return A new LanceRestNamespaceBuilder
+   */
+  public static LanceDbRestNamespaces builder() {
+    return new LanceDbRestNamespaces();
+  }
+
+  /**
+   * Set the API key (required).
+   *
+   * @param apiKey The LanceDB API key
+   * @return This builder
+   */
+  public LanceDbRestNamespaces apiKey(String apiKey) {
+    if (apiKey == null || apiKey.trim().isEmpty()) {
+      throw new IllegalArgumentException("API key cannot be null or empty");
+    }
+    this.apiKey = apiKey;
+    return this;
+  }
+
+  /**
+   * Set the database name (required).
+   *
+   * @param database The database name
+   * @return This builder
+   */
+  public LanceDbRestNamespaces database(String database) {
+    if (database == null || database.trim().isEmpty()) {
+      throw new IllegalArgumentException("Database cannot be null or empty");
+    }
+    this.database = database;
+    return this;
+  }
+
+  /**
+   * Set a custom host override (optional). When set, this overrides the default LanceDB Cloud URL
+   * construction. Use this for LanceDB Enterprise deployments.
+   *
+   * @param hostOverride The complete base URL (e.g., "http://your-vpc-endpoint:80")
+   * @return This builder
+   */
+  public LanceDbRestNamespaces hostOverride(String hostOverride) {
+    this.hostOverride = Optional.ofNullable(hostOverride);
+    return this;
+  }
+
+  /**
+   * Set the region for LanceDB Cloud (optional). Defaults to "us-east-1" if not specified. This is
+   * ignored when hostOverride is set.
+   *
+   * @param region The AWS region (e.g., "us-east-1", "eu-west-1")
+   * @return This builder
+   */
+  public LanceDbRestNamespaces region(String region) {
+    this.region = Optional.ofNullable(region);
+    return this;
+  }
+
+  /**
+   * Add additional configuration parameters.
+   *
+   * @param key The configuration key
+   * @param value The configuration value
+   * @return This builder
+   */
+  public LanceDbRestNamespaces config(String key, String value) {
+    this.additionalConfig.put(key, value);
+    return this;
+  }
+
+  /**
+   * Build the LanceRestNamespace instance.
+   *
+   * @return A configured LanceRestNamespace
+   * @throws IllegalStateException if required parameters are missing
+   */
+  public LanceRestNamespace build() {
+    // Validate required fields
+    if (apiKey == null) {
+      throw new IllegalStateException("API key is required");
+    }
+    if (database == null) {
+      throw new IllegalStateException("Database is required");
+    }
+
+    // Build configuration map
+    Map<String, String> config = new HashMap<>(additionalConfig);
+    config.put("headers.x-lancedb-database", database);
+    config.put("headers.x-api-key", apiKey);
+
+    // Determine base URL
+    String baseUrl;
+    if (hostOverride.isPresent()) {
+      baseUrl = hostOverride.get();
+      config.put("host_override", hostOverride.get());
+    } else {
+      String effectiveRegion = region.orElse(DEFAULT_REGION);
+      baseUrl = String.format(CLOUD_URL_PATTERN, database, effectiveRegion);
+      config.put("region", effectiveRegion);
+    }
+
+    // Create and configure ApiClient
+    ApiClient apiClient = new ApiClient();
+    apiClient.setBasePath(baseUrl);
+
+    return new LanceRestNamespace(apiClient, config);
+  }
+}
--- a/java/mvnw
+++ b/java/mvnw
@@ -0,0 +1,259 @@
+#!/bin/sh
+# ----------------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# ----------------------------------------------------------------------------
+
+# ----------------------------------------------------------------------------
+# Apache Maven Wrapper startup batch script, version 3.3.2
+#
+# Optional ENV vars
+# -----------------
+#   JAVA_HOME - location of a JDK home dir, required when download maven via java source
+#   MVNW_REPOURL - repo url base for downloading maven distribution
+#   MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
+#   MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output
+# ----------------------------------------------------------------------------
+
+set -euf
+[ "${MVNW_VERBOSE-}" != debug ] || set -x
+
+# OS specific support.
+native_path() { printf %s\\n "$1"; }
+case "$(uname)" in
+CYGWIN* | MINGW*)
+  [ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")"
+  native_path() { cygpath --path --windows "$1"; }
+  ;;
+esac
+
+# set JAVACMD and JAVACCMD
+set_java_home() {
+  # For Cygwin and MinGW, ensure paths are in Unix format before anything is touched
+  if [ -n "${JAVA_HOME-}" ]; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ]; then
+      # IBM's JDK on AIX uses strange locations for the executables
+      JAVACMD="$JAVA_HOME/jre/sh/java"
+      JAVACCMD="$JAVA_HOME/jre/sh/javac"
+    else
+      JAVACMD="$JAVA_HOME/bin/java"
+      JAVACCMD="$JAVA_HOME/bin/javac"
+
+      if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then
+        echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2
+        echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2
+        return 1
+      fi
+    fi
+  else
+    JAVACMD="$(
+      'set' +e
+      'unset' -f command 2>/dev/null
+      'command' -v java
+    )" || :
+    JAVACCMD="$(
+      'set' +e
+      'unset' -f command 2>/dev/null
+      'command' -v javac
+    )" || :
+
+    if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then
+      echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2
+      return 1
+    fi
+  fi
+}
+
+# hash string like Java String::hashCode
+hash_string() {
+  str="${1:-}" h=0
+  while [ -n "$str" ]; do
+    char="${str%"${str#?}"}"
+    h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296))
+    str="${str#?}"
+  done
+  printf %x\\n $h
+}
+
+verbose() { :; }
+[ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; }
+
+die() {
+  printf %s\\n "$1" >&2
+  exit 1
+}
+
+trim() {
+  # MWRAPPER-139:
+  #   Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds.
+  #   Needed for removing poorly interpreted newline sequences when running in more
+  #   exotic environments such as mingw bash on Windows.
+  printf "%s" "${1}" | tr -d '[:space:]'
+}
+
+# parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties
+while IFS="=" read -r key value; do
+  case "${key-}" in
+  distributionUrl) distributionUrl=$(trim "${value-}") ;;
+  distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;;
+  esac
+done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties"
+[ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties"
+
+case "${distributionUrl##*/}" in
+maven-mvnd-*bin.*)
+  MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/
+  case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in
+  *AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;;
+  :Darwin*x86_64) distributionPlatform=darwin-amd64 ;;
+  :Darwin*arm64) distributionPlatform=darwin-aarch64 ;;
+  :Linux*x86_64*) distributionPlatform=linux-amd64 ;;
+  *)
+    echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2
+    distributionPlatform=linux-amd64
+    ;;
+  esac
+  distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip"
+  ;;
+maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;;
+*) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;;
+esac
+
+# apply MVNW_REPOURL and calculate MAVEN_HOME
+# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-<version>,maven-mvnd-<version>-<platform>}/<hash>
+[ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}"
+distributionUrlName="${distributionUrl##*/}"
+distributionUrlNameMain="${distributionUrlName%.*}"
+distributionUrlNameMain="${distributionUrlNameMain%-bin}"
+MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}"
+MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")"
+
+exec_maven() {
+  unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || :
+  exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD"
+}
+
+if [ -d "$MAVEN_HOME" ]; then
+  verbose "found existing MAVEN_HOME at $MAVEN_HOME"
+  exec_maven "$@"
+fi
+
+case "${distributionUrl-}" in
+*?-bin.zip | *?maven-mvnd-?*-?*.zip) ;;
+*) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;;
+esac
+
+# prepare tmp dir
+if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then
+  clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; }
+  trap clean HUP INT TERM EXIT
+else
+  die "cannot create temp dir"
+fi
+
+mkdir -p -- "${MAVEN_HOME%/*}"
+
+# Download and Install Apache Maven
+verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
+verbose "Downloading from: $distributionUrl"
+verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
+
+# select .zip or .tar.gz
+if ! command -v unzip >/dev/null; then
+  distributionUrl="${distributionUrl%.zip}.tar.gz"
+  distributionUrlName="${distributionUrl##*/}"
+fi
+
+# verbose opt
+__MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR=''
+[ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v
+
+# normalize http auth
+case "${MVNW_PASSWORD:+has-password}" in
+'') MVNW_USERNAME='' MVNW_PASSWORD='' ;;
+has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;;
+esac
+
+if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then
+  verbose "Found wget ... using wget"
+  wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl"
+elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then
+  verbose "Found curl ... using curl"
+  curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl"
+elif set_java_home; then
+  verbose "Falling back to use Java to download"
+  javaSource="$TMP_DOWNLOAD_DIR/Downloader.java"
+  targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName"
+  cat >"$javaSource" <<-END
+	public class Downloader extends java.net.Authenticator
+	{
+	  protected java.net.PasswordAuthentication getPasswordAuthentication()
+	  {
+	    return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() );
+	  }
+	  public static void main( String[] args ) throws Exception
+	  {
+	    setDefault( new Downloader() );
+	    java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() );
+	  }
+	}
+	END
+  # For Cygwin/MinGW, switch paths to Windows format before running javac and java
+  verbose " - Compiling Downloader.java ..."
+  "$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java"
+  verbose " - Running Downloader.java ..."
+  "$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")"
+fi
+
+# If specified, validate the SHA-256 sum of the Maven distribution zip file
+if [ -n "${distributionSha256Sum-}" ]; then
+  distributionSha256Result=false
+  if [ "$MVN_CMD" = mvnd.sh ]; then
+    echo "Checksum validation is not supported for maven-mvnd." >&2
+    echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
+    exit 1
+  elif command -v sha256sum >/dev/null; then
+    if echo "$distributionSha256Sum  $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then
+      distributionSha256Result=true
+    fi
+  elif command -v shasum >/dev/null; then
+    if echo "$distributionSha256Sum  $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then
+      distributionSha256Result=true
+    fi
+  else
+    echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2
+    echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
+    exit 1
+  fi
+  if [ $distributionSha256Result = false ]; then
+    echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2
+    echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2
+    exit 1
+  fi
+fi
+
+# unzip and move
+if command -v unzip >/dev/null; then
+  unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip"
+else
+  tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar"
+fi
+printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url"
+mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME"
+
+clean || :
+exec_maven "$@"
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,11 +6,10 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.20.1-beta.2</version>
+    <version>0.22.3-beta.5</version>
    <packaging>pom</packaging>
-
-    <name>LanceDB Parent</name>
-    <description>LanceDB vector database Java API</description>
+    <name>${project.artifactId}</name>
+    <description>LanceDB Java SDK Parent POM</description>
    <url>http://lancedb.com/</url>

    <developers>
@@ -29,6 +28,7 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
+        <lance-namespace.verison>0.0.1</lance-namespace.verison>
        <spotless.skip>false</spotless.skip>
        <spotless.version>2.30.0</spotless.version>
        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
@@ -52,6 +52,7 @@

    <modules>
        <module>core</module>
+        <module>lance-namespace</module>
    </modules>

    <scm>
@@ -62,6 +63,11 @@

    <dependencyManagement>
        <dependencies>
+            <dependency>
+                <groupId>com.lancedb</groupId>
+                <artifactId>lance-namespace-core</artifactId>
+                <version>${lance-namespace.verison}</version>
+            </dependency>
            <dependency>
                <groupId>org.apache.arrow</groupId>
                <artifactId>arrow-vector</artifactId>
--- a/node/.eslintrc.js
+++ b/node/.eslintrc.js
@@ -1,22 +0,0 @@
-module.exports = {
-  env: {
-    browser: true,
-    es2021: true
-  },
-  extends: 'standard-with-typescript',
-  overrides: [
-  ],
-  parserOptions: {
-    project: './tsconfig.json',
-    ecmaVersion: 'latest',
-    sourceType: 'module'
-  },
-  rules: {
-    "@typescript-eslint/method-signature-style": "off",
-    "@typescript-eslint/quotes": "off",
-    "@typescript-eslint/semi": "off",
-    "@typescript-eslint/explicit-function-return-type": "off",
-    "@typescript-eslint/space-before-function-paren": "off",
-    "@typescript-eslint/indent": "off",
-  }
-}
--- a/node/.npmignore
+++ b/node/.npmignore
@@ -1,4 +0,0 @@
-gen_test_data.py
-index.node
-dist/lancedb*.tgz
-vectordb*.tgz
--- a/node/CHANGELOG.md
+++ b/node/CHANGELOG.md
@@ -1,64 +0,0 @@
-# Changelog
-
-All notable changes to this project will be documented in this file.
-
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-
-## [0.1.5] - 2023-06-00
-
-### Added
-
- Support for macOS X86
-
-## [0.1.4] - 2023-06-03
-
-### Added
-
- Select / Project query API
-
-### Changed
-
-  Deprecated created_index in favor of createIndex
-
-## [0.1.3] - 2023-06-01
-
-### Added
-
- Support S3 and Google Cloud Storage
- Embedding functions support
- OpenAI embedding function
-
-## [0.1.2] - 2023-05-27
-
-### Added
-
- Append records API
- Extra query params to to nodejs client
- Create_index API
- 
-### Fixed
-
- bugfix: string columns should be converted to Utf8Array (#94)
-
-## [0.1.1] - 2023-05-16
-
-### Added
-
- create_table API
- limit parameter for queries
- Typescript / JavaScript examples
- Linux support
-
-## [0.1.0] - 2023-05-16
-
-### Added
-
- Initial  JavaScript / Node.js library for LanceDB
- Read-only api to query LanceDB datasets
- Supports macOS arm only
-
-## [pre-0.1.0]
-
- Various prototypes / test builds
-
--- a/node/README.md
+++ b/node/README.md
@@ -1,66 +0,0 @@
-# LanceDB
-
-A JavaScript / Node.js library for [LanceDB](https://github.com/lancedb/lancedb).
-
-**DEPRECATED: This library is deprecated. Please use the new client,
-[@lancedb/lancedb](https://www.npmjs.com/package/@lancedb/lancedb).**
-
-## Installation
-
-```bash
-npm install vectordb
-```
-
-This will download the appropriate native library for your platform. We currently
-support:
-
-* Linux (x86_64 and aarch64)
-* MacOS (Intel and ARM/M1/M2)
-* Windows (x86_64 only)
-
-We do not yet support musl-based Linux (such as Alpine Linux) or aarch64 Windows.
-
-## Usage
-
-### Basic Example
-
-```javascript
-const lancedb = require('vectordb');
-const db = await lancedb.connect('data/sample-lancedb');
-const table = await db.createTable("my_table",
-      [{ id: 1, vector: [0.1, 1.0], item: "foo", price: 10.0 },
-      { id: 2, vector: [3.9, 0.5], item: "bar", price: 20.0 }])
-const results = await table.search([0.1, 0.3]).limit(20).execute();
-console.log(results);
-```
-
-The [examples](./examples) folder contains complete examples.
-
-## Development
-
-To build everything fresh:
-
-```bash
-npm install
-npm run build
-```
-
-Then you should be able to run the tests with:
-
-```bash
-npm test
-```
-
-### Fix lints
-
-To run the linter and have it automatically fix all errors
-
-```bash
-npm run lint -- --fix
-```
-
-To build documentation
-
-```bash
-npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
-```
--- a/node/examples/js-openai/index.js
+++ b/node/examples/js-openai/index.js
@@ -1,41 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-'use strict'
-
-async function example () {
-  const lancedb = require('vectordb')
-  // You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
-  const apiKey = process.env.OPENAI_API_KEY
-  // The embedding function will create embeddings for the 'text' column(text in this case)
-  const embedding = new lancedb.OpenAIEmbeddingFunction('text', apiKey)
-
-  const db = await lancedb.connect('data/sample-lancedb')
-
-  const data = [
-    { id: 1, text: 'Black T-Shirt', price: 10 },
-    { id: 2, text: 'Leather Jacket', price: 50 }
-  ]
-
-  const table = await db.createTable('vectors', data, embedding)
-  console.log(await db.tableNames())
-
-  const results = await table
-    .search('keeps me warm')
-    .limit(1)
-    .execute()
-  console.log(results[0].text)
-}
-
-example().then(_ => { console.log('All done!') })
--- a/node/examples/js-openai/package.json
+++ b/node/examples/js-openai/package.json
@@ -1,15 +0,0 @@
-{
-  "name": "vectordb-example-js-openai",
-  "version": "1.0.0",
-  "description": "",
-  "main": "index.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "dependencies": {
-    "vectordb": "file:../..",
-    "openai": "^3.2.1"
-  }
-}
--- a/node/examples/js-transformers/index.js
+++ b/node/examples/js-transformers/index.js
@@ -1,66 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-'use strict'
-
-
-async function example() {
-
-    const lancedb = require('vectordb')
-
-    // Import transformers and the all-MiniLM-L6-v2 model (https://huggingface.co/Xenova/all-MiniLM-L6-v2)
-    const { pipeline } = await import('@xenova/transformers')
-    const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
-
-
-    // Create embedding function from pipeline which returns a list of vectors from batch
-    // sourceColumn is the name of the column in the data to be embedded
-    //
-    // Output of pipe is a Tensor { data: Float32Array(384) }, so filter for the vector
-    const embed_fun = {}
-    embed_fun.sourceColumn = 'text'
-    embed_fun.embed = async function (batch) {
-        let result = []
-        for (let text of batch) {
-            const res = await pipe(text, { pooling: 'mean', normalize: true })
-            result.push(Array.from(res['data']))
-        }
-        return (result)
-    }
-
-    // Link a folder and create a table with data
-    const db = await lancedb.connect('data/sample-lancedb')
-
-    const data = [
-        { id: 1, text: 'Cherry', type: 'fruit' },
-        { id: 2, text: 'Carrot', type: 'vegetable' },
-        { id: 3, text: 'Potato', type: 'vegetable' },
-        { id: 4, text: 'Apple', type: 'fruit' },
-        { id: 5, text: 'Banana', type: 'fruit' }
-    ]
-
-    const table = await db.createTable('food_table', data, embed_fun)
-
-
-    // Query the table
-    const results = await table
-        .search("a sweet fruit to eat")
-        .metricType("cosine")
-        .limit(2)
-        .execute()
-    console.log(results.map(r => r.text))
-
-}
-
-example().then(_ => { console.log("Done!") })
--- a/node/examples/js-transformers/package.json
+++ b/node/examples/js-transformers/package.json
@@ -1,16 +0,0 @@
-{
-  "name": "vectordb-example-js-transformers",
-  "version": "1.0.0",
-  "description": "Example for using transformers.js with lancedb",
-  "main": "index.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "dependencies": {
-    "@xenova/transformers": "^2.4.1",
-    "vectordb": "file:../.."
-  }
-
-}
--- a/node/examples/js-youtube-transcripts/index.js
+++ b/node/examples/js-youtube-transcripts/index.js
@@ -1,122 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-'use strict'
-
-const lancedb = require('vectordb')
-const fs = require('fs/promises')
-const readline = require('readline/promises')
-const { stdin: input, stdout: output } = require('process')
-const { Configuration, OpenAIApi } = require('openai')
-
-// Download file from XYZ
-const INPUT_FILE_NAME = 'data/youtube-transcriptions_sample.jsonl';
-
-(async () => {
-  // You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
-  const apiKey = process.env.OPENAI_API_KEY
-  // The embedding function will create embeddings for the 'context' column
-  const embedFunction = new lancedb.OpenAIEmbeddingFunction('context', apiKey)
-
-  // Connects to LanceDB
-  const db = await lancedb.connect('data/youtube-lancedb')
-
-  // Open the vectors table or create one if it does not exist
-  let tbl
-  if ((await db.tableNames()).includes('vectors')) {
-    tbl = await db.openTable('vectors', embedFunction)
-  } else {
-    tbl = await createEmbeddingsTable(db, embedFunction)
-  }
-
-  // Use OpenAI Completion API to generate and answer based on the context that LanceDB provides
-  const configuration = new Configuration({ apiKey })
-  const openai = new OpenAIApi(configuration)
-  const rl = readline.createInterface({ input, output })
-  try {
-    while (true) {
-      const query = await rl.question('Prompt: ')
-      const results = await tbl
-        .search(query)
-        .select(['title', 'text', 'context'])
-        .limit(3)
-        .execute()
-
-      // console.table(results)
-
-      const response = await openai.createCompletion({
-        model: 'text-davinci-003',
-        prompt: createPrompt(query, results),
-        max_tokens: 400,
-        temperature: 0,
-        top_p: 1,
-        frequency_penalty: 0,
-        presence_penalty: 0
-      })
-      console.log(response.data.choices[0].text)
-    }
-  } catch (err) {
-    console.log('Error: ', err)
-  } finally {
-    rl.close()
-  }
-  process.exit(1)
-})()
-
-async function createEmbeddingsTable (db, embedFunction) {
-  console.log(`Creating embeddings from ${INPUT_FILE_NAME}`)
-  // read the input file into a JSON array, skipping empty lines
-  const lines = (await fs.readFile(INPUT_FILE_NAME, 'utf-8'))
-    .toString()
-    .split('\n')
-    .filter(line => line.length > 0)
-    .map(line => JSON.parse(line))
-
-  const data = contextualize(lines, 20, 'video_id')
-  return await db.createTable('vectors', data, embedFunction)
-}
-
-// Each transcript has a small text column, we include previous transcripts in order to
-// have more context information when creating embeddings
-function contextualize (rows, contextSize, groupColumn) {
-  const grouped = []
-  rows.forEach(row => {
-    if (!grouped[row[groupColumn]]) {
-      grouped[row[groupColumn]] = []
-    }
-    grouped[row[groupColumn]].push(row)
-  })
-
-  const data = []
-  Object.keys(grouped).forEach(key => {
-    for (let i = 0; i < grouped[key].length; i++) {
-      const start = i - contextSize > 0 ? i - contextSize : 0
-      grouped[key][i].context = grouped[key].slice(start, i + 1).map(r => r.text).join(' ')
-    }
-    data.push(...grouped[key])
-  })
-  return data
-}
-
-// Creates a prompt by aggregating all relevant contexts
-function createPrompt (query, context) {
-  let prompt =
-      'Answer the question based on the context below.\n\n' +
-      'Context:\n'
-
-  // need to make sure our prompt is not larger than max size
-  prompt = prompt + context.map(c => c.context).join('\n\n---\n\n').substring(0, 3750)
-  prompt = prompt + `\n\nQuestion: ${query}\nAnswer:`
-  return prompt
-}
--- a/Show More
+++ b/Show More