Bump version: 0.29.0-beta.0 → 0.29.0

Bump version: 0.28.0-beta.11 → 0.29.0-beta.0
Bump version: 0.32.0-beta.0 → 0.32.0
2026-05-23 06:50:40 +00:00 · 2026-05-13 16:32:11 +00:00 · 2026-05-13 16:32:06 +00:00 · 2026-05-13 16:31:47 +00:00 · 2026-05-13 16:31:45 +00:00 · 2026-05-13 13:19:20 +00:00
130 changed files with 7540 additions and 3488 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.27.2"
+current_version = "0.29.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/ISSUE_TEMPLATE/documentation.yml
+++ b/.github/ISSUE_TEMPLATE/documentation.yml
@@ -18,6 +18,6 @@ body:
      label: Link
      description: >
        Provide a link to the existing documentation, if applicable.
-      placeholder: ex. https://lancedb.com/docs/tables/...
+      placeholder: ex. https://docs.lancedb.com/tables/...
    validations:
      required: false
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -0,0 +1,18 @@
+version: 2
+
+# Scope: the root Cargo workspace, which produces the Rust binaries we
+# ship to users (the Node.js and Python native extensions). The
+# `rust/lancedb` library crate shares the same lockfile; its consumers
+# pick their own dependency versions, but bumping transitive deps here
+# keeps the binaries we ship current.
+updates:
+  - package-ecosystem: cargo
+    directory: /
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 10
+    groups:
+      rust-minor-patch:
+        update-types:
+          - minor
+          - patch
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -8,6 +8,9 @@ concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

+permissions:
+  contents: read
+
 jobs:
  labeler:
    permissions:
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -19,6 +19,9 @@ on:
    paths:
      - .github/workflows/java-publish.yml

+permissions:
+  contents: read
+
 jobs:
  publish:
    name: Build and Publish
@@ -40,7 +43,7 @@ jobs:
          server-username: SONATYPE_USER
          server-password: SONATYPE_TOKEN
          gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
-          gpg-passphrase: ${{ secrets.GPG_PASSPHRASE }}
+          gpg-passphrase: MAVEN_GPG_PASSPHRASE
      - name: Set git config
        run: |
          git config --global user.email "dev+gha@lancedb.com"
@@ -55,10 +58,11 @@ jobs:
          echo "use-agent" >> ~/.gnupg/gpg.conf
          echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf
          export GPG_TTY=$(tty)
-          ./mvnw --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -pl lancedb-core -am -P deploy-to-ossrh
+          ./mvnw --batch-mode -DskipTests -DpushChanges=false deploy -pl lancedb-core -am -P deploy-to-ossrh
        env:
          SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
          SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
+          MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}

  report-failure:
    name: Report Workflow Failure
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -16,6 +16,7 @@ on:
  push:
    branches:
      - main
+      - release/**
    paths:
      - java/**
      - .github/workflows/java.yml
@@ -24,6 +25,9 @@ on:
      - java/**
      - .github/workflows/java.yml

+permissions:
+  contents: read
+
 jobs:
  build-java:
    runs-on: ubuntu-24.04
--- a/.github/workflows/license-header-check.yml
+++ b/.github/workflows/license-header-check.yml
@@ -3,6 +3,7 @@ on:
  push:
    branches:
      - main
+      - release/**
  pull_request:
    paths:
      - rust/**
@@ -10,6 +11,10 @@ on:
      - nodejs/**
      - java/**
      - .github/workflows/license-header-check.yml
+
+permissions:
+  contents: read
+
 jobs:
  check-licenses:
    runs-on: ubuntu-latest
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -4,16 +4,21 @@ on:
  push:
    branches:
      - main
+      - release/**
  pull_request:
    paths:
      - Cargo.toml
      - Cargo.lock
+      - rust-toolchain.toml
      - nodejs/**
      - rust/**
      - docs/src/js/**
      - .github/workflows/nodejs.yml
      - docker-compose.yml

+permissions:
+  contents: read
+
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -14,10 +14,16 @@ on:
 env:
  PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"

+permissions:
+  contents: read
+
 jobs:
  linux:
    name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }}
    timeout-minutes: 60
+    permissions:
+      id-token: write
+      contents: read
    strategy:
      matrix:
        config:
@@ -57,10 +63,12 @@ jobs:
      - uses: ./.github/workflows/upload_wheel
        if: startsWith(github.ref, 'refs/tags/python-v')
        with:
-          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          fury_token: ${{ secrets.FURY_TOKEN }}
  mac:
    timeout-minutes: 90
+    permissions:
+      id-token: write
+      contents: read
    runs-on: ${{ matrix.config.runner }}
    strategy:
      matrix:
@@ -85,10 +93,12 @@ jobs:
      - uses: ./.github/workflows/upload_wheel
        if: startsWith(github.ref, 'refs/tags/python-v')
        with:
-          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          fury_token: ${{ secrets.FURY_TOKEN }}
  windows:
    timeout-minutes: 60
+    permissions:
+      id-token: write
+      contents: read
    runs-on: windows-latest
    steps:
      - uses: actions/checkout@v4
@@ -107,7 +117,6 @@ jobs:
      - uses: ./.github/workflows/upload_wheel
        if: startsWith(github.ref, 'refs/tags/python-v')
        with:
-          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          fury_token: ${{ secrets.FURY_TOKEN }}
  gh-release:
    if: startsWith(github.ref, 'refs/tags/python-v')
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -4,10 +4,12 @@ on:
  push:
    branches:
      - main
+      - release/**
  pull_request:
    paths:
      - Cargo.toml
      - Cargo.lock
+      - rust-toolchain.toml
      - python/**
      - rust/**
      - .github/workflows/python.yml
@@ -16,6 +18,9 @@ on:
      - .github/workflows/build_windows_wheel/**
      - .github/workflows/run_tests/**

+permissions:
+  contents: read
+
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
@@ -107,7 +112,6 @@ jobs:
      - name: Install
        run: |
          pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
-          pip install tantivy
          pip install mlx
      - name: Doctest
        run: pytest --doctest-modules python/lancedb
@@ -226,6 +230,5 @@ jobs:
          pip install "pydantic<2"
          pip install pyarrow==16
          pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
-          pip install tantivy
      - name: Run tests
        run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -4,13 +4,21 @@ on:
  push:
    branches:
      - main
+      - release/**
  pull_request:
    paths:
      - Cargo.toml
      - Cargo.lock
+      - rust-toolchain.toml
+      - deny.toml
      - rust/**
+      - nodejs/Cargo.toml
+      - python/Cargo.toml
      - .github/workflows/rust.yml

+permissions:
+  contents: read
+
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
@@ -52,6 +60,17 @@ jobs:
      - name: Run clippy (without remote feature)
        run: cargo clippy --profile ci --workspace --tests -- -D warnings

+  deny:
+    # Supply-chain checks: advisories, licenses, banned crates, and source
+    # restrictions. Configuration lives in `deny.toml` at the workspace root.
+    timeout-minutes: 10
+    runs-on: ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: EmbarkStudios/cargo-deny-action@v2
+        with:
+          command: check advisories bans licenses sources
+
  build-no-lock:
    runs-on: ubuntu-24.04
    timeout-minutes: 30
--- a/.github/workflows/update_package_lock_run.yml
+++ b/.github/workflows/update_package_lock_run.yml
@@ -3,6 +3,9 @@ name: Update package-lock.json
 on:
  workflow_dispatch:

+permissions:
+  contents: read
+
 jobs:
  publish:
    runs-on: ubuntu-latest
--- a/.github/workflows/update_package_lock_run_nodejs.yml
+++ b/.github/workflows/update_package_lock_run_nodejs.yml
@@ -3,6 +3,9 @@ name: Update NodeJs package-lock.json
 on:
  workflow_dispatch:

+permissions:
+  contents: read
+
 jobs:
  publish:
    runs-on: ubuntu-latest
--- a/.github/workflows/upload_wheel/action.yml
+++ b/.github/workflows/upload_wheel/action.yml
@@ -2,9 +2,6 @@ name: upload-wheel

 description: "Upload wheels to Pypi"
 inputs:
-  pypi_token:
-    required: true
-    description: "release token for the repo"
  fury_token:
    required: true
    description: "release token for the fury repo"
@@ -12,12 +9,6 @@ inputs:
 runs:
  using: "composite"
  steps:
-  - name: Install dependencies
-    shell: bash
-    run: |
-      python -m pip install --upgrade pip
-      pip install twine
-      python3 -m pip install --upgrade pkginfo
  - name: Choose repo
    shell: bash
    id: choose_repo
@@ -27,19 +18,17 @@ runs:
      else
        echo "repo=pypi" >> $GITHUB_OUTPUT
      fi
-  - name: Publish to PyPI
+  - name: Publish to Fury
+    if: steps.choose_repo.outputs.repo == 'fury'
    shell: bash
    env:
      FURY_TOKEN: ${{ inputs.fury_token }}
-      PYPI_TOKEN: ${{ inputs.pypi_token }}
    run: |
-      if [[ ${{ steps.choose_repo.outputs.repo }} == fury ]]; then
-        WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
-        echo "Uploading $WHEEL to Fury"
-        curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
-      else
-        twine upload --repository ${{ steps.choose_repo.outputs.repo }} \
-          --username __token__ \
-          --password $PYPI_TOKEN \
-          target/wheels/lancedb-*.whl
-      fi
+      WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
+      echo "Uploading $WHEEL to Fury"
+      curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
+  - name: Publish to PyPI
+    if: steps.choose_repo.outputs.repo == 'pypi'
+    uses: pypa/gh-action-pypi-publish@release/v1
+    with:
+      packages-dir: target/wheels/
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,5 @@
 [workspace]
 members = ["rust/lancedb", "nodejs", "python"]
-# Python package needs to be built by maturin.
-exclude = ["python"]
 resolver = "2"

 [workspace.package]
@@ -15,40 +13,40 @@ categories = ["database-implementations"]
 rust-version = "1.91.0"

 [workspace.dependencies]
-lance = { version = "=4.0.0", default-features = false }
-lance-core = { version = "=4.0.0" }
-lance-datagen = { version = "=4.0.0" }
-lance-file = { version = "=4.0.0" }
-lance-io = { version = "=4.0.0", default-features = false }
-lance-index = { version = "=4.0.0" }
-lance-linalg = { version = "=4.0.0" }
-lance-namespace = { version = "=4.0.0" }
-lance-namespace-impls = { version = "=4.0.0", default-features = false }
-lance-table = { version = "=4.0.0" }
-lance-testing = { version = "=4.0.0" }
-lance-datafusion = { version = "=4.0.0" }
-lance-encoding = { version = "=4.0.0" }
-lance-arrow = { version = "=4.0.0" }
+lance = { "version" = "=6.0.0", default-features = false }
+lance-core = "=6.0.0"
+lance-datagen = "=6.0.0"
+lance-file = "=6.0.0"
+lance-io = { "version" = "=6.0.0", default-features = false }
+lance-index = "=6.0.0"
+lance-linalg = "=6.0.0"
+lance-namespace = "=6.0.0"
+lance-namespace-impls = { "version" = "=6.0.0", default-features = false }
+lance-table = "=6.0.0"
+lance-testing = "=6.0.0"
+lance-datafusion = "=6.0.0"
+lance-encoding = "=6.0.0"
+lance-arrow = "=6.0.0"
 ahash = "0.8"
 # Note that this one does not include pyarrow
-arrow = { version = "57.2", optional = false }
-arrow-array = "57.2"
-arrow-data = "57.2"
-arrow-ipc = "57.2"
-arrow-ord = "57.2"
-arrow-schema = "57.2"
-arrow-select = "57.2"
-arrow-cast = "57.2"
+arrow = { version = "58.0.0", optional = false }
+arrow-array = "58.0.0"
+arrow-data = "58.0.0"
+arrow-ipc = "58.0.0"
+arrow-ord = "58.0.0"
+arrow-schema = "58.0.0"
+arrow-select = "58.0.0"
+arrow-cast = "58.0.0"
 async-trait = "0"
-datafusion = { version = "52.1", default-features = false }
-datafusion-catalog = "52.1"
-datafusion-common = { version = "52.1", default-features = false }
-datafusion-execution = "52.1"
-datafusion-expr = "52.1"
-datafusion-functions = "52.1"
-datafusion-physical-plan = "52.1"
-datafusion-physical-expr = "52.1"
-datafusion-sql = "52.1"
+datafusion = { version = "53.0.0", default-features = false }
+datafusion-catalog = "53.0.0"
+datafusion-common = { version = "53.0.0", default-features = false }
+datafusion-execution = "53.0.0"
+datafusion-expr = "53.0.0"
+datafusion-functions = "53.0.0"
+datafusion-physical-plan = "53.0.0"
+datafusion-physical-expr = "53.0.0"
+datafusion-sql = "53.0.0"
 env_logger = "0.11"
 half = { "version" = "2.7.1", default-features = false, features = [
    "num-traits",
--- a/README.md
+++ b/README.md
@@ -1,3 +1,9 @@
+<a href="https://cloud.lancedb.com" target="_blank">
+  <img src="https://github.com/user-attachments/assets/92dad0a2-2a37-4ce1-b783-0d1b4f30a00c" alt="LanceDB Cloud Public Beta" width="100%" style="max-width: 100%;">
+</a>
+<div align="center">
+
+[![LanceDB](docs/src/assets/hero-header.png)](https://lancedb.com)
 [![Website](https://img.shields.io/badge/-Website-100000?style=for-the-badge&labelColor=645cfb&color=645cfb)](https://lancedb.com/)
 [![Blog](https://img.shields.io/badge/Blog-100000?style=for-the-badge&labelColor=645cfb&color=645cfb)](https://blog.lancedb.com/)
 [![Discord](https://img.shields.io/badge/-Discord-100000?style=for-the-badge&logo=discord&logoColor=white&labelColor=645cfb&color=645cfb)](https://discord.gg/zMM32dvNtd)
@@ -9,7 +15,7 @@

 # **The Multimodal AI Lakehouse**

-[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://lancedb.com/docs) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦  [**Contributors**](#contributors) 
+[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://docs.lancedb.com) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦  [**Contributors**](#contributors) 

 **The ultimate multimodal data platform for AI/ML applications.** 

@@ -51,7 +57,7 @@ LanceDB is a central location where developers can build, train and analyze thei

 ## **How to Install**:

-Follow the [Quickstart](https://lancedb.com/docs/quickstart/) doc to set up LanceDB locally. 
+Follow the [Quickstart](https://docs.lancedb.com/quickstart) doc to set up LanceDB locally. 

 **API & SDK:** We also support Python, Typescript and Rust SDKs

--- a/deny.toml
+++ b/deny.toml
@@ -0,0 +1,196 @@
+# cargo-deny configuration for LanceDB.
+#
+# Run locally with `cargo deny check`. See
+# https://embarkstudios.github.io/cargo-deny/ for the full reference.
+
+# The set of target triples we care about. cargo-deny will only consider
+# dependencies that are used on at least one of these targets. Keeping this
+# explicit avoids noise from platform-specific crates (e.g. wasm, android,
+# ios) that we never actually ship.
+[graph]
+targets = [
+    "x86_64-unknown-linux-gnu",
+    "aarch64-unknown-linux-gnu",
+    "x86_64-apple-darwin",
+    "aarch64-apple-darwin",
+    "x86_64-pc-windows-msvc",
+    "aarch64-pc-windows-msvc",
+]
+all-features = true
+
+[output]
+feature-depth = 1
+
+# ---------------------------------------------------------------------------
+# Advisories: security vulnerabilities and yanked crates.
+# ---------------------------------------------------------------------------
+[advisories]
+version = 2
+# Fail the check if any crate in the lockfile has been yanked from crates.io.
+# Yanked crates are a signal the author retracted the release (often due to
+# bugs or security issues) and should not be depended on.
+yanked = "deny"
+# Advisory IDs we have explicitly reviewed and chosen to accept. Every
+# entry must include a rationale and, where possible, an upstream issue
+# pointing to a fix. Revisit this list whenever dependencies are updated.
+ignore = [
+    # rsa: Marvin Attack timing side-channel in PKCS#1 v1.5 decryption.
+    # Reached only through opendal → reqsign → rsa. We do not use RSA
+    # decryption in LanceDB ourselves; this is dormant in the signing path.
+    # No fixed release exists upstream as of this writing.
+    # https://rustsec.org/advisories/RUSTSEC-2023-0071
+    { id = "RUSTSEC-2023-0071", reason = "rsa crate via opendal/reqsign; no fixed upstream release" },
+
+    # instant: unmaintained. Pulled in via backoff → instant. Upstream
+    # recommends switching to `web-time`; fix has to come from backoff.
+    # https://rustsec.org/advisories/RUSTSEC-2024-0384
+    { id = "RUSTSEC-2024-0384", reason = "transitive via backoff; waiting on backoff replacement" },
+
+    # paste: unmaintained (author archived the repo). Used transitively by
+    # datafusion and the arrow ecosystem; widespread, no drop-in replacement.
+    # https://rustsec.org/advisories/RUSTSEC-2024-0436
+    { id = "RUSTSEC-2024-0436", reason = "transitive via datafusion; awaiting ecosystem migration" },
+
+    # encoding: unmaintained. Reached through lindera-dictionary, which is
+    # required by the native Lindera tokenizer path. Lindera has not migrated
+    # off this crate yet.
+    # https://rustsec.org/advisories/RUSTSEC-2021-0153
+    { id = "RUSTSEC-2021-0153", reason = "transitive via lindera-dictionary for native Lindera tokenizer" },
+
+    # fast-float: unsound and unmaintained. Reached only through polars-arrow
+    # from the optional Polars integration; replacement requires a Polars
+    # dependency upgrade.
+    # https://rustsec.org/advisories/RUSTSEC-2024-0379
+    { id = "RUSTSEC-2024-0379", reason = "transitive via polars-arrow; waiting on Polars migration" },
+
+    # tantivy: segfault on malformed input due to missing bounds check.
+    # Pulled in via lance for full-text search. We only feed tantivy
+    # documents we construct ourselves, not attacker-controlled bytes.
+    # Tracked for a lance dependency bump.
+    # https://rustsec.org/advisories/RUSTSEC-2025-0003
+    { id = "RUSTSEC-2025-0003", reason = "tantivy via lance; inputs are internally produced, not user-supplied bytes" },
+
+    # backoff: unmaintained. Reached only via async-openai. Replacement
+    # requires async-openai to migrate (or us to drop async-openai).
+    # https://rustsec.org/advisories/RUSTSEC-2025-0012
+    { id = "RUSTSEC-2025-0012", reason = "transitive via async-openai; waiting on upstream migration" },
+
+    # number_prefix: unmaintained. Transitive via indicatif → hf-hub.
+    # No security impact, just maintenance status.
+    # https://rustsec.org/advisories/RUSTSEC-2025-0119
+    { id = "RUSTSEC-2025-0119", reason = "transitive via hf-hub/indicatif; cosmetic formatting crate" },
+
+    # bincode: unmaintained. Reached through lindera and lindera-dictionary,
+    # which are required by the native Lindera tokenizer path. Lindera has not
+    # migrated to another serialization format yet.
+    # https://rustsec.org/advisories/RUSTSEC-2025-0141
+    { id = "RUSTSEC-2025-0141", reason = "transitive via lindera/lindera-dictionary for native Lindera tokenizer" },
+
+    # lru: soundness issue in IterMut. Reached only through aws-sdk-s3 in
+    # LanceDB's dev-dependency graph; LanceDB does not use that iterator
+    # directly. Clearing this requires the AWS SDK chain to update lru.
+    # https://rustsec.org/advisories/RUSTSEC-2026-0002
+    { id = "RUSTSEC-2026-0002", reason = "transitive via aws-sdk-s3 dev-dependency; waiting on AWS SDK lru upgrade" },
+
+    # rustls-webpki 0.101.7 (old major line): name-constraint checks for
+    # URI / wildcard names. Pulled in only via the legacy rustls 0.21 chain
+    # from aws-smithy-http-client. The 0.103 line we actively use is patched.
+    # Clearing the 0.101 copy requires the aws-sdk chain to migrate off
+    # rustls 0.21.
+    # https://rustsec.org/advisories/RUSTSEC-2026-0098
+    # https://rustsec.org/advisories/RUSTSEC-2026-0099
+    { id = "RUSTSEC-2026-0098", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
+    { id = "RUSTSEC-2026-0099", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
+
+    # rustls-webpki 0.101.7: reachable panic in CRL parsing. Same legacy
+    # rustls 0.21 chain from aws-smithy-http-client as above. The 0.103 line
+    # we actively use is upgraded to 0.103.13 which contains the fix.
+    # https://rustsec.org/advisories/RUSTSEC-2026-0104
+    { id = "RUSTSEC-2026-0104", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
+
+    # rand 0.8.5: soundness issue only when ThreadRng reseeds inside a custom
+    # logger. Reached through several transitive chains. LanceDB does not use
+    # rand from a custom logger; upgrade once all pinned chains accept 0.8.6+.
+    # https://rustsec.org/advisories/RUSTSEC-2026-0097
+    { id = "RUSTSEC-2026-0097", reason = "transitive rand 0.8.5; LanceDB does not call ThreadRng from custom logging" },
+]
+
+# ---------------------------------------------------------------------------
+# Licenses: only allow licenses we've reviewed as compatible with Apache-2.0.
+# ---------------------------------------------------------------------------
+[licenses]
+version = 2
+# SPDX identifiers for licenses that are compatible with our Apache-2.0
+# distribution. Additions require legal review.
+allow = [
+    "Apache-2.0",
+    "Apache-2.0 WITH LLVM-exception",
+    "MIT",
+    "BSD-2-Clause",
+    "BSD-3-Clause",
+    "ISC",
+    "Unicode-3.0",
+    "Unicode-DFS-2016",
+    "Zlib",
+    "CC0-1.0",
+    "MPL-2.0",
+    "BSL-1.0",
+    "OpenSSL",
+    # 0BSD ("BSD Zero Clause") is effectively public domain — no attribution
+    # required. Pulled in by `mock_instant`.
+    "0BSD",
+    # bzip2-1.0.6 is the permissive upstream bzip2 license (BSD-like). Pulled
+    # in by `libbz2-rs-sys`, the pure-Rust bzip2 implementation.
+    "bzip2-1.0.6",
+    # CDLA-Permissive-2.0 is a permissive data license used by `webpki-roots`
+    # for the Mozilla CA root bundle. Data-only, distribution-compatible.
+    "CDLA-Permissive-2.0",
+]
+confidence-threshold = 0.8
+# Crates whose license cannot be determined from Cargo metadata but whose
+# license we've manually confirmed from upstream. Keep this list minimal.
+[[licenses.clarify]]
+# polars-arrow-format omits the `license` field in its Cargo.toml, but the
+# upstream repo (pola-rs/polars-arrow-format) is dual-licensed Apache-2.0 OR
+# MIT. See https://github.com/pola-rs/polars-arrow-format/blob/main/LICENSE
+crate = "polars-arrow-format"
+expression = "Apache-2.0 OR MIT"
+license-files = []
+
+# ---------------------------------------------------------------------------
+# Bans: disallow specific crates and flag dependency hygiene issues.
+# ---------------------------------------------------------------------------
+[bans]
+# Warn (not deny) on duplicate versions of the same crate. In a large
+# workspace like this one, duplicates are common and often unavoidable
+# transitively. We surface them to discourage growth, but don't fail CI.
+multiple-versions = "warn"
+# Wildcard version requirements (`foo = "*"`) are a footgun — they let any
+# future release in without review. Ban them outright.
+wildcards = "deny"
+# Internal workspace crates reference each other via `path = "..."`, which
+# cargo-deny sees as a wildcard version. That's fine for private workspace
+# members (not published to crates.io), so allow it specifically for paths.
+allow-wildcard-paths = true
+# Features that, if enabled, should cause the check to fail.
+deny = []
+# Crates to skip when checking for duplicate versions.
+skip = []
+# Similar to `skip`, but also skips the entire transitive subtree.
+skip-tree = []
+
+# ---------------------------------------------------------------------------
+# Sources: restrict where crates can come from.
+# ---------------------------------------------------------------------------
+[sources]
+# Deny any registry other than the ones explicitly listed below.
+unknown-registry = "deny"
+# Deny any git dependency whose host isn't in the allow-list below. This
+# prevents accidental pulls from arbitrary forks.
+unknown-git = "deny"
+allow-registry = ["https://github.com/rust-lang/crates.io-index"]
+# Lance is developed in a sibling repo and pulled as a git dependency until
+# releases are cut to crates.io. Allow that specific host.
+allow-git = [
+    "https://github.com/lance-format/lance",
+]
--- a/dockerfiles/Dockerfile
+++ b/dockerfiles/Dockerfile
@@ -24,4 +24,4 @@ RUN python --version && \
  rustc --version && \
  protoc --version

-RUN pip install --no-cache-dir tantivy lancedb
+RUN pip install --no-cache-dir lancedb
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,6 +1,6 @@
 # LanceDB Documentation

-LanceDB docs are available at [lancedb.com/docs](https://lancedb.com/docs).
+LanceDB docs are available at [docs.lancedb.com](https://docs.lancedb.com).

 The SDK docs are built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
 whenever a commit is pushed to the `main` branch. So it is possible for the docs to show
--- a/docs/src/java/java.md
+++ b/docs/src/java/java.md
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
 <dependency>
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-core</artifactId>
-    <version>0.27.2</version>
+    <version>0.29.0</version>
 </dependency>
 ```

@@ -57,32 +57,32 @@ LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()

 ## Metadata Operations

-### Creating a Namespace
+### Creating a Namespace Path

-Namespaces organize tables hierarchically. Create a namespace before creating tables within it:
+Namespace paths organize tables hierarchically. Create the desired namespace path before creating tables within it:

 ```java
 import org.lance.namespace.model.CreateNamespaceRequest;
 import org.lance.namespace.model.CreateNamespaceResponse;

-// Create a child namespace
+// Create a child namespace path
 CreateNamespaceRequest request = new CreateNamespaceRequest();
 request.setId(Arrays.asList("my_namespace"));

 CreateNamespaceResponse response = namespaceClient.createNamespace(request);
 ```

-You can also create nested namespaces:
+You can also create nested namespace paths:

 ```java
-// Create a nested namespace: parent/child
+// Create a nested namespace path: parent/child
 CreateNamespaceRequest request = new CreateNamespaceRequest();
 request.setId(Arrays.asList("parent_namespace", "child_namespace"));

 CreateNamespaceResponse response = namespaceClient.createNamespace(request);
 ```

-### Describing a Namespace
+### Describing a Namespace Path

 ```java
 import org.lance.namespace.model.DescribeNamespaceRequest;
@@ -95,22 +95,22 @@ DescribeNamespaceResponse response = namespaceClient.describeNamespace(request);
 System.out.println("Namespace properties: " + response.getProperties());
 ```

-### Listing Namespaces
+### Listing Namespace Paths

 ```java
 import org.lance.namespace.model.ListNamespacesRequest;
 import org.lance.namespace.model.ListNamespacesResponse;

-// List all namespaces at root level
+// List all namespace paths at the root level
 ListNamespacesRequest request = new ListNamespacesRequest();
 request.setId(Arrays.asList());  // Empty for root

 ListNamespacesResponse response = namespaceClient.listNamespaces(request);
 for (String ns : response.getNamespaces()) {
-    System.out.println("Namespace: " + ns);
+    System.out.println("Namespace path: " + ns);
 }

-// List child namespaces under a parent
+// List child namespace paths under a parent path
 ListNamespacesRequest childRequest = new ListNamespacesRequest();
 childRequest.setId(Arrays.asList("parent_namespace"));

@@ -123,7 +123,7 @@ ListNamespacesResponse childResponse = namespaceClient.listNamespaces(childReque
 import org.lance.namespace.model.ListTablesRequest;
 import org.lance.namespace.model.ListTablesResponse;

-// List tables in a namespace
+// List tables in a namespace path
 ListTablesRequest request = new ListTablesRequest();
 request.setId(Arrays.asList("my_namespace"));

@@ -133,7 +133,7 @@ for (String table : response.getTables()) {
 }
 ```

-### Dropping a Namespace
+### Dropping a Namespace Path

 ```java
 import org.lance.namespace.model.DropNamespaceRequest;
@@ -175,7 +175,7 @@ DropTableResponse response = namespaceClient.dropTable(request);

 ### Creating a Table

-Tables are created within a namespace by providing data in Apache Arrow IPC format:
+Tables are created within a namespace path by providing data in Apache Arrow IPC format:

 ```java
 import org.lance.namespace.LanceNamespace;
@@ -242,7 +242,7 @@ try (BufferAllocator allocator = new RootAllocator();
    }
    byte[] tableData = out.toByteArray();

-    // Create table in a namespace
+    // Create a table in a namespace path
    CreateTableRequest request = new CreateTableRequest();
    request.setId(Arrays.asList("my_namespace", "my_table"));
    CreateTableResponse response = namespaceClient.createTable(request, tableData);
--- a/docs/src/js/README.md
+++ b/docs/src/js/README.md
@@ -34,7 +34,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
 console.log(results);
 ```

-The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
+The [quickstart](https://docs.lancedb.com/quickstart/) contains more complete examples.

 ## Development

--- a/docs/src/js/classes/Connection.md
+++ b/docs/src/js/classes/Connection.md
@@ -61,8 +61,8 @@ sharing the same data, deletion, and index files.
 * **options.sourceVersion?**: `number`
    The version of the source table to clone.

-* **options.targetNamespace?**: `string`[]
-    The namespace for the target table (defaults to root namespace).
+* **options.targetNamespacePath?**: `string`[]
+    The namespace path for the target table (defaults to root namespace).

 #### Returns

@@ -116,13 +116,13 @@ Creates a new empty Table

 `Promise`&lt;[`Table`](Table.md)&gt;

-#### createEmptyTable(name, schema, namespace, options)
+#### createEmptyTable(name, schema, namespacePath, options)

 ```ts
 abstract createEmptyTable(
   name,
   schema,
-   namespace?,
+   namespacePath?,
   options?): Promise<Table>
 ```

@@ -136,8 +136,8 @@ Creates a new empty Table
 * **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
    The schema of the table

-* **namespace?**: `string`[]
-    The namespace to create the table in (defaults to root namespace)
+* **namespacePath?**: `string`[]
+    The namespace path to create the table in (defaults to root namespace)

 * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    Additional options
@@ -150,10 +150,10 @@ Creates a new empty Table

 ### createTable()

-#### createTable(options, namespace)
+#### createTable(options, namespacePath)

 ```ts
-abstract createTable(options, namespace?): Promise<Table>
+abstract createTable(options, namespacePath?): Promise<Table>
 ```

 Creates a new Table and initialize it with new data.
@@ -163,8 +163,8 @@ Creates a new Table and initialize it with new data.
 * **options**: `object` & `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    The options object.

-* **namespace?**: `string`[]
-    The namespace to create the table in (defaults to root namespace)
+* **namespacePath?**: `string`[]
+    The namespace path to create the table in (defaults to root namespace)

 ##### Returns

@@ -197,13 +197,13 @@ Creates a new Table and initialize it with new data.

 `Promise`&lt;[`Table`](Table.md)&gt;

-#### createTable(name, data, namespace, options)
+#### createTable(name, data, namespacePath, options)

 ```ts
 abstract createTable(
   name,
   data,
-   namespace?,
+   namespacePath?,
   options?): Promise<Table>
 ```

@@ -218,8 +218,8 @@ Creates a new Table and initialize it with new data.
    Non-empty Array of Records
    to be inserted into the table

-* **namespace?**: `string`[]
-    The namespace to create the table in (defaults to root namespace)
+* **namespacePath?**: `string`[]
+    The namespace path to create the table in (defaults to root namespace)

 * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    Additional options
@@ -247,15 +247,15 @@ Return a brief description of the connection
 ### dropAllTables()

 ```ts
-abstract dropAllTables(namespace?): Promise<void>
+abstract dropAllTables(namespacePath?): Promise<void>
 ```

 Drop all tables in the database.

 #### Parameters

-* **namespace?**: `string`[]
-    The namespace to drop tables from (defaults to root namespace).
+* **namespacePath?**: `string`[]
+    The namespace path to drop tables from (defaults to root namespace).

 #### Returns

@@ -266,7 +266,7 @@ Drop all tables in the database.
 ### dropTable()

 ```ts
-abstract dropTable(name, namespace?): Promise<void>
+abstract dropTable(name, namespacePath?): Promise<void>
 ```

 Drop an existing table.
@@ -276,8 +276,8 @@ Drop an existing table.
 * **name**: `string`
    The name of the table to drop.

-* **namespace?**: `string`[]
-    The namespace of the table (defaults to root namespace).
+* **namespacePath?**: `string`[]
+    The namespace path of the table (defaults to root namespace).

 #### Returns

@@ -304,7 +304,7 @@ Return true if the connection has not been closed
 ```ts
 abstract openTable(
   name,
-   namespace?,
+   namespacePath?,
   options?): Promise<Table>
 ```

@@ -315,8 +315,8 @@ Open a table in the database.
 * **name**: `string`
    The name of the table

-* **namespace?**: `string`[]
-    The namespace of the table (defaults to root namespace)
+* **namespacePath?**: `string`[]
+    The namespace path of the table (defaults to root namespace)

 * **options?**: `Partial`&lt;[`OpenTableOptions`](../interfaces/OpenTableOptions.md)&gt;
    Additional options
@@ -349,10 +349,10 @@ Tables will be returned in lexicographical order.

 `Promise`&lt;`string`[]&gt;

-#### tableNames(namespace, options)
+#### tableNames(namespacePath, options)

 ```ts
-abstract tableNames(namespace?, options?): Promise<string[]>
+abstract tableNames(namespacePath?, options?): Promise<string[]>
 ```

 List all the table names in this database.
@@ -361,8 +361,8 @@ Tables will be returned in lexicographical order.

 ##### Parameters

-* **namespace?**: `string`[]
-    The namespace to list tables from (defaults to root namespace)
+* **namespacePath?**: `string`[]
+    The namespace path to list tables from (defaults to root namespace)

 * **options?**: `Partial`&lt;[`TableNamesOptions`](../interfaces/TableNamesOptions.md)&gt;
    options to control the
--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -501,6 +501,34 @@ Modeled after ``VACUUM`` in PostgreSQL.

 ***

+### prewarmData()
+
+```ts
+abstract prewarmData(columns?): Promise<void>
+```
+
+Prewarm one or more columns of data in the table.
+
+#### Parameters
+
+* **columns?**: `string`[]
+    The columns to prewarm. If undefined, all columns are prewarmed.
+    This will load the column data into the page cache so that future queries that
+    read those columns avoid the initial cold-start latency.  This call initiates
+    prewarming and returns once the request is accepted; the warming itself may
+    continue in the background.  Calling it on already-prewarmed columns is a
+    no-op on the server.
+    Prewarming is generally useful for columns used in filters or projections.
+    Large columns (e.g. high-dimensional vectors or binary data) may not be
+    practical to prewarm.
+    This feature is currently only supported on remote tables.
+
+#### Returns
+
+`Promise`&lt;`void`&gt;
+
+***
+
 ### prewarmIndex()

 ```ts
--- a/docs/src/js/interfaces/ClientConfig.md
+++ b/docs/src/js/interfaces/ClientConfig.md
@@ -53,3 +53,18 @@ optional tlsConfig: TlsConfig;
 ```ts
 optional userAgent: string;
 ```
+
+***
+
+### userId?
+
+```ts
+optional userId: string;
+```
+
+User identifier for tracking purposes.
+
+This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
+It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
+Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
+variable that contains the user ID value.
--- a/docs/src/js/interfaces/ConnectionOptions.md
+++ b/docs/src/js/interfaces/ConnectionOptions.md
@@ -41,6 +41,29 @@ for testing purposes.

 ***

+### manifestEnabled?
+
+```ts
+optional manifestEnabled: boolean;
+```
+
+(For LanceDB OSS only): use directory namespace manifests as the source
+of truth for table metadata. Existing directory-listed root tables are
+migrated into the manifest on access.
+
+***
+
+### namespaceClientProperties?
+
+```ts
+optional namespaceClientProperties: Record<string, string>;
+```
+
+(For LanceDB OSS only): extra properties for the backing namespace
+client used by manifest-enabled native connections.
+
+***
+
 ### readConsistencyInterval?

 ```ts
@@ -89,4 +112,4 @@ optional storageOptions: Record<string, string>;

 (For LanceDB OSS only): configuration for object storage.

-The available options are described at https://lancedb.com/docs/storage/
+The available options are described at https://docs.lancedb.com/storage/
--- a/docs/src/js/interfaces/CreateTableOptions.md
+++ b/docs/src/js/interfaces/CreateTableOptions.md
@@ -97,4 +97,4 @@ Configuration for object storage.
 Options already set on the connection will be inherited by the table,
 but can be overridden here.

-The available options are described at https://lancedb.com/docs/storage/
+The available options are described at https://docs.lancedb.com/storage/
--- a/docs/src/js/interfaces/OpenTableOptions.md
+++ b/docs/src/js/interfaces/OpenTableOptions.md
@@ -42,4 +42,4 @@ Configuration for object storage.
 Options already set on the connection will be inherited by the table,
 but can be overridden here.

-The available options are described at https://lancedb.com/docs/storage/
+The available options are described at https://docs.lancedb.com/storage/
--- a/docs/src/python/python.md
+++ b/docs/src/python/python.md
@@ -94,11 +94,11 @@ of raw SQL strings with [where][lancedb.query.LanceQueryBuilder.where] and

 ## Full text search

-::: lancedb.fts.create_index
+Use [lancedb.table.Table.create_fts_index][] for the synchronous API or
+[lancedb.table.AsyncTable.create_index][] with [lancedb.index.FTS][] for the
+asynchronous API.

-::: lancedb.fts.populate_index
-
-::: lancedb.fts.search_index
+::: lancedb.index.FTS

 ## Utilities

--- a/java/lancedb-core/pom.xml
+++ b/java/lancedb-core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
      <groupId>com.lancedb</groupId>
      <artifactId>lancedb-parent</artifactId>
-      <version>0.27.2-final.0</version>
+      <version>0.29.0-final.0</version>
      <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.27.2-final.0</version>
+    <version>0.29.0-final.0</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
@@ -28,7 +28,7 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
-        <lance-core.version>3.0.1</lance-core.version>
+        <lance-core.version>6.0.0</lance-core.version>
        <spotless.skip>false</spotless.skip>
        <spotless.version>2.30.0</spotless.version>
        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,8 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.27.2"
+version = "0.29.0"
+publish = false
 license.workspace = true
 description.workspace = true
 repository.workspace = true
@@ -15,7 +16,7 @@ crate-type = ["cdylib"]
 async-trait.workspace = true
 arrow-ipc.workspace = true
 arrow-array.workspace = true
-arrow-buffer = "57.2"
+arrow-buffer = "58.0.0"
 half.workspace = true
 arrow-schema.workspace = true
 env_logger.workspace = true
@@ -31,8 +32,8 @@ lzma-sys = { version = "0.1", features = ["static"] }
 log.workspace = true

 # Pin to resolve build failures; update periodically for security patches.
-aws-lc-sys = "=0.38.0"
-aws-lc-rs = "=1.16.1"
+aws-lc-sys = "=0.40.0"
+aws-lc-rs = "=1.16.3"

 [build-dependencies]
 napi-build = "2.3.1"
--- a/nodejs/README.md
+++ b/nodejs/README.md
@@ -30,7 +30,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
 console.log(results);
 ```

-The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
+The [quickstart](https://docs.lancedb.com/quickstart/) contains more complete examples.

 ## Development

--- a/nodejs/test/rerankers.test.ts
+++ b/nodejs/test/rerankers.test.ts
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

+import { spawn } from "node:child_process";
+import * as path from "node:path";
 import { RecordBatch } from "apache-arrow";
 import * as tmp from "tmp";
 import { Connection, Index, Table, connect, makeArrowTable } from "../lancedb";
@@ -76,4 +78,91 @@ describe("rerankers", function () {

    expect(result).toHaveLength(2);
  });
+
+  it("does not keep process alive after rerank query", async function () {
+    const script = `
+import * as lancedb from "./dist/index.js";
+import * as os from "node:os";
+import * as path from "node:path";
+import * as fs from "node:fs/promises";
+
+const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lancedb-rerank-exit-"));
+const db = await lancedb.connect(dir);
+const table = await db.createTable("test", [{ text: "hello", vector: [1, 2, 3] }], {
+  mode: "overwrite",
+});
+await table.createIndex("text", { config: lancedb.Index.fts() });
+await table.waitForIndex(["text_idx"], 30);
+
+const reranker = await lancedb.rerankers.RRFReranker.create();
+await table
+  .query()
+  .nearestTo([1, 2, 3])
+  .fullTextSearch("hello")
+  .rerank(reranker)
+  .toArray();
+
+table.close();
+db.close();
+`;
+
+    await new Promise<void>((resolve, reject) => {
+      const child = spawn(
+        process.execPath,
+        ["--input-type=module", "-e", script],
+        {
+          cwd: path.resolve(__dirname, ".."),
+          stdio: ["ignore", "pipe", "pipe"],
+        },
+      );
+
+      let stdout = "";
+      let stderr = "";
+
+      child.stdout.on("data", (chunk) => {
+        stdout += chunk.toString();
+      });
+
+      child.stderr.on("data", (chunk) => {
+        stderr += chunk.toString();
+      });
+
+      const timeout = setTimeout(() => {
+        child.kill();
+        reject(
+          new Error(
+            `child process did not exit in time\nstdout:\n${stdout}\nstderr:\n${stderr}`,
+          ),
+        );
+      }, 20_000);
+
+      child.on("error", (err) => {
+        clearTimeout(timeout);
+        reject(err);
+      });
+
+      child.on("exit", (code, signal) => {
+        clearTimeout(timeout);
+        if (signal !== null) {
+          reject(
+            new Error(
+              `child process exited with signal ${signal}\nstdout:\n${stdout}\nstderr:\n${stderr}`,
+            ),
+          );
+          return;
+        }
+
+        if (code !== 0) {
+          reject(
+            new Error(
+              `child process exited with code ${code}\nstdout:\n${stdout}\nstderr:\n${stderr}`,
+            ),
+          );
+          return;
+        }
+
+        resolve();
+      });
+    });
+  });
 });
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -103,7 +103,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        },
        numIndices: 0,
        numRows: 3,
-        totalBytes: 24,
+        totalBytes: 44,
      });
    });

@@ -1870,6 +1870,25 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      expect(results.length).toBe(3);
    });

+    test("prewarmData errors on local tables", async () => {
+      const db = await connect(tmpDir.name);
+      const data = [
+        { text: "alpha", vector: [0.1, 0.2, 0.3] },
+        { text: "beta", vector: [0.4, 0.5, 0.6] },
+      ];
+      const table = await db.createTable("prewarm_data_test", data);
+
+      // prewarmData is only supported on remote tables. We verify the call
+      // is wired through napi and surfaces the expected error for both
+      // arg shapes (undefined and string[]).
+      await expect(table.prewarmData()).rejects.toThrow(
+        "prewarm_data is currently only supported on remote tables",
+      );
+      await expect(table.prewarmData(["text"])).rejects.toThrow(
+        "prewarm_data is currently only supported on remote tables",
+      );
+    });
+
    test("full text index on list", async () => {
      const db = await connect(tmpDir.name);
      const data = [
--- a/nodejs/lancedb/connection.ts
+++ b/nodejs/lancedb/connection.ts
@@ -42,7 +42,7 @@ export interface CreateTableOptions {
   * Options already set on the connection will be inherited by the table,
   * but can be overridden here.
   *
-   * The available options are described at https://lancedb.com/docs/storage/
+   * The available options are described at https://docs.lancedb.com/storage/
   */
  storageOptions?: Record<string, string>;

@@ -78,7 +78,7 @@ export interface OpenTableOptions {
   * Options already set on the connection will be inherited by the table,
   * but can be overridden here.
   *
-   * The available options are described at https://lancedb.com/docs/storage/
+   * The available options are described at https://docs.lancedb.com/storage/
   */
  storageOptions?: Record<string, string>;
  /**
@@ -166,25 +166,25 @@ export abstract class Connection {
   * List all the table names in this database.
   *
   * Tables will be returned in lexicographical order.
-   * @param {string[]} namespace - The namespace to list tables from (defaults to root namespace)
+   * @param {string[]} namespacePath - The namespace path to list tables from (defaults to root namespace)
   * @param {Partial<TableNamesOptions>} options - options to control the
   * paging / start point
   *
   */
  abstract tableNames(
-    namespace?: string[],
+    namespacePath?: string[],
    options?: Partial<TableNamesOptions>,
  ): Promise<string[]>;

  /**
   * Open a table in the database.
   * @param {string} name - The name of the table
-   * @param {string[]} namespace - The namespace of the table (defaults to root namespace)
+   * @param {string[]} namespacePath - The namespace path of the table (defaults to root namespace)
   * @param {Partial<OpenTableOptions>} options - Additional options
   */
  abstract openTable(
    name: string,
-    namespace?: string[],
+    namespacePath?: string[],
    options?: Partial<OpenTableOptions>,
  ): Promise<Table>;

@@ -193,7 +193,7 @@ export abstract class Connection {
   * @param {object} options - The options object.
   * @param {string} options.name - The name of the table.
   * @param {Data} options.data - Non-empty Array of Records to be inserted into the table
-   * @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
+   * @param {string[]} namespacePath - The namespace path to create the table in (defaults to root namespace)
   *
   */
  abstract createTable(
@@ -201,7 +201,7 @@ export abstract class Connection {
      name: string;
      data: Data;
    } & Partial<CreateTableOptions>,
-    namespace?: string[],
+    namespacePath?: string[],
  ): Promise<Table>;
  /**
   * Creates a new Table and initialize it with new data.
@@ -220,13 +220,13 @@ export abstract class Connection {
   * @param {string} name - The name of the table.
   * @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
   * to be inserted into the table
-   * @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
+   * @param {string[]} namespacePath - The namespace path to create the table in (defaults to root namespace)
   * @param {Partial<CreateTableOptions>} options - Additional options
   */
  abstract createTable(
    name: string,
    data: Record<string, unknown>[] | TableLike,
-    namespace?: string[],
+    namespacePath?: string[],
    options?: Partial<CreateTableOptions>,
  ): Promise<Table>;

@@ -245,28 +245,28 @@ export abstract class Connection {
   * Creates a new empty Table
   * @param {string} name - The name of the table.
   * @param {Schema} schema - The schema of the table
-   * @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
+   * @param {string[]} namespacePath - The namespace path to create the table in (defaults to root namespace)
   * @param {Partial<CreateTableOptions>} options - Additional options
   */
  abstract createEmptyTable(
    name: string,
    schema: import("./arrow").SchemaLike,
-    namespace?: string[],
+    namespacePath?: string[],
    options?: Partial<CreateTableOptions>,
  ): Promise<Table>;

  /**
   * Drop an existing table.
   * @param {string} name The name of the table to drop.
-   * @param {string[]} namespace The namespace of the table (defaults to root namespace).
+   * @param {string[]} namespacePath The namespace path of the table (defaults to root namespace).
   */
-  abstract dropTable(name: string, namespace?: string[]): Promise<void>;
+  abstract dropTable(name: string, namespacePath?: string[]): Promise<void>;

  /**
   * Drop all tables in the database.
-   * @param {string[]} namespace The namespace to drop tables from (defaults to root namespace).
+   * @param {string[]} namespacePath The namespace path to drop tables from (defaults to root namespace).
   */
-  abstract dropAllTables(namespace?: string[]): Promise<void>;
+  abstract dropAllTables(namespacePath?: string[]): Promise<void>;

  /**
   * Clone a table from a source table.
@@ -279,7 +279,7 @@ export abstract class Connection {
   * @param {string} targetTableName - The name of the target table to create.
   * @param {string} sourceUri - The URI of the source table to clone from.
   * @param {object} options - Clone options.
-   * @param {string[]} options.targetNamespace - The namespace for the target table (defaults to root namespace).
+   * @param {string[]} options.targetNamespacePath - The namespace path for the target table (defaults to root namespace).
   * @param {number} options.sourceVersion - The version of the source table to clone.
   * @param {string} options.sourceTag - The tag of the source table to clone.
   * @param {boolean} options.isShallow - Whether to perform a shallow clone (defaults to true).
@@ -288,7 +288,7 @@ export abstract class Connection {
    targetTableName: string,
    sourceUri: string,
    options?: {
-      targetNamespace?: string[];
+      targetNamespacePath?: string[];
      sourceVersion?: number;
      sourceTag?: string;
      isShallow?: boolean;
@@ -319,25 +319,25 @@ export class LocalConnection extends Connection {
  }

  async tableNames(
-    namespaceOrOptions?: string[] | Partial<TableNamesOptions>,
+    namespacePathOrOptions?: string[] | Partial<TableNamesOptions>,
    options?: Partial<TableNamesOptions>,
  ): Promise<string[]> {
-    // Detect if first argument is namespace array or options object
-    let namespace: string[] | undefined;
+    // Detect if first argument is namespacePath array or options object
+    let namespacePath: string[] | undefined;
    let tableNamesOptions: Partial<TableNamesOptions> | undefined;

-    if (Array.isArray(namespaceOrOptions)) {
-      // First argument is namespace array
-      namespace = namespaceOrOptions;
+    if (Array.isArray(namespacePathOrOptions)) {
+      // First argument is namespacePath array
+      namespacePath = namespacePathOrOptions;
      tableNamesOptions = options;
    } else {
      // First argument is options object (backwards compatibility)
-      namespace = undefined;
-      tableNamesOptions = namespaceOrOptions;
+      namespacePath = undefined;
+      tableNamesOptions = namespacePathOrOptions;
    }

    return this.inner.tableNames(
-      namespace ?? [],
+      namespacePath ?? [],
      tableNamesOptions?.startAfter,
      tableNamesOptions?.limit,
    );
@@ -345,12 +345,12 @@ export class LocalConnection extends Connection {

  async openTable(
    name: string,
-    namespace?: string[],
+    namespacePath?: string[],
    options?: Partial<OpenTableOptions>,
  ): Promise<Table> {
    const innerTable = await this.inner.openTable(
      name,
-      namespace ?? [],
+      namespacePath ?? [],
      cleanseStorageOptions(options?.storageOptions),
      options?.indexCacheSize,
    );
@@ -362,7 +362,7 @@ export class LocalConnection extends Connection {
    targetTableName: string,
    sourceUri: string,
    options?: {
-      targetNamespace?: string[];
+      targetNamespacePath?: string[];
      sourceVersion?: number;
      sourceTag?: string;
      isShallow?: boolean;
@@ -371,7 +371,7 @@ export class LocalConnection extends Connection {
    const innerTable = await this.inner.cloneTable(
      targetTableName,
      sourceUri,
-      options?.targetNamespace ?? [],
+      options?.targetNamespacePath ?? [],
      options?.sourceVersion ?? null,
      options?.sourceTag ?? null,
      options?.isShallow ?? true,
@@ -406,42 +406,42 @@ export class LocalConnection extends Connection {
    nameOrOptions:
      | string
      | ({ name: string; data: Data } & Partial<CreateTableOptions>),
-    dataOrNamespace?: Record<string, unknown>[] | TableLike | string[],
-    namespaceOrOptions?: string[] | Partial<CreateTableOptions>,
+    dataOrNamespacePath?: Record<string, unknown>[] | TableLike | string[],
+    namespacePathOrOptions?: string[] | Partial<CreateTableOptions>,
    options?: Partial<CreateTableOptions>,
  ): Promise<Table> {
    if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
-      // First overload: createTable(options, namespace?)
+      // First overload: createTable(options, namespacePath?)
      const { name, data, ...createOptions } = nameOrOptions;
-      const namespace = dataOrNamespace as string[] | undefined;
-      return this._createTableImpl(name, data, namespace, createOptions);
+      const namespacePath = dataOrNamespacePath as string[] | undefined;
+      return this._createTableImpl(name, data, namespacePath, createOptions);
    }

-    // Second overload: createTable(name, data, namespace?, options?)
+    // Second overload: createTable(name, data, namespacePath?, options?)
    const name = nameOrOptions;
-    const data = dataOrNamespace as Record<string, unknown>[] | TableLike;
+    const data = dataOrNamespacePath as Record<string, unknown>[] | TableLike;

-    // Detect if third argument is namespace array or options object
-    let namespace: string[] | undefined;
+    // Detect if third argument is namespacePath array or options object
+    let namespacePath: string[] | undefined;
    let createOptions: Partial<CreateTableOptions> | undefined;

-    if (Array.isArray(namespaceOrOptions)) {
-      // Third argument is namespace array
-      namespace = namespaceOrOptions;
+    if (Array.isArray(namespacePathOrOptions)) {
+      // Third argument is namespacePath array
+      namespacePath = namespacePathOrOptions;
      createOptions = options;
    } else {
      // Third argument is options object (backwards compatibility)
-      namespace = undefined;
-      createOptions = namespaceOrOptions;
+      namespacePath = undefined;
+      createOptions = namespacePathOrOptions;
    }

-    return this._createTableImpl(name, data, namespace, createOptions);
+    return this._createTableImpl(name, data, namespacePath, createOptions);
  }

  private async _createTableImpl(
    name: string,
    data: Data,
-    namespace?: string[],
+    namespacePath?: string[],
    options?: Partial<CreateTableOptions>,
  ): Promise<Table> {
    if (data === undefined) {
@@ -455,7 +455,7 @@ export class LocalConnection extends Connection {
      name,
      buf,
      mode,
-      namespace ?? [],
+      namespacePath ?? [],
      storageOptions,
    );

@@ -465,21 +465,21 @@ export class LocalConnection extends Connection {
  async createEmptyTable(
    name: string,
    schema: import("./arrow").SchemaLike,
-    namespaceOrOptions?: string[] | Partial<CreateTableOptions>,
+    namespacePathOrOptions?: string[] | Partial<CreateTableOptions>,
    options?: Partial<CreateTableOptions>,
  ): Promise<Table> {
-    // Detect if third argument is namespace array or options object
-    let namespace: string[] | undefined;
+    // Detect if third argument is namespacePath array or options object
+    let namespacePath: string[] | undefined;
    let createOptions: Partial<CreateTableOptions> | undefined;

-    if (Array.isArray(namespaceOrOptions)) {
-      // Third argument is namespace array
-      namespace = namespaceOrOptions;
+    if (Array.isArray(namespacePathOrOptions)) {
+      // Third argument is namespacePath array
+      namespacePath = namespacePathOrOptions;
      createOptions = options;
    } else {
      // Third argument is options object (backwards compatibility)
-      namespace = undefined;
-      createOptions = namespaceOrOptions;
+      namespacePath = undefined;
+      createOptions = namespacePathOrOptions;
    }

    let mode: string = createOptions?.mode ?? "create";
@@ -502,18 +502,18 @@ export class LocalConnection extends Connection {
      name,
      buf,
      mode,
-      namespace ?? [],
+      namespacePath ?? [],
      storageOptions,
    );
    return new LocalTable(innerTable);
  }

-  async dropTable(name: string, namespace?: string[]): Promise<void> {
-    return this.inner.dropTable(name, namespace ?? []);
+  async dropTable(name: string, namespacePath?: string[]): Promise<void> {
+    return this.inner.dropTable(name, namespacePath ?? []);
  }

-  async dropAllTables(namespace?: string[]): Promise<void> {
-    return this.inner.dropAllTables(namespace ?? []);
+  async dropAllTables(namespacePath?: string[]): Promise<void> {
+    return this.inner.dropAllTables(namespacePath ?? []);
  }
 }

--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -285,6 +285,25 @@ export abstract class Table {
   */
  abstract prewarmIndex(name: string): Promise<void>;

+  /**
+   * Prewarm one or more columns of data in the table.
+   *
+   * @param columns The columns to prewarm. If undefined, all columns are prewarmed.
+   *
+   * This will load the column data into the page cache so that future queries that
+   * read those columns avoid the initial cold-start latency.  This call initiates
+   * prewarming and returns once the request is accepted; the warming itself may
+   * continue in the background.  Calling it on already-prewarmed columns is a
+   * no-op on the server.
+   *
+   * Prewarming is generally useful for columns used in filters or projections.
+   * Large columns (e.g. high-dimensional vectors or binary data) may not be
+   * practical to prewarm.
+   *
+   * This feature is currently only supported on remote tables.
+   */
+  abstract prewarmData(columns?: string[]): Promise<void>;
+
  /**
   * Waits for asynchronous indexing to complete on the table.
   *
@@ -710,6 +729,10 @@ export class LocalTable extends Table {
    await this.inner.prewarmIndex(name);
  }

+  async prewarmData(columns?: string[]): Promise<void> {
+    await this.inner.prewarmData(columns);
+  }
+
  async waitForIndex(
    indexNames: string[],
    timeoutSeconds: number,
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.27.2",
+	"version": "0.29.0",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.27.2",
+	"version": "0.29.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.27.2",
+	"version": "0.29.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.27.2",
+	"version": "0.29.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.27.2",
+	"version": "0.29.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.27.2",
+  "version": "0.29.0",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.27.2",
+	"version": "0.29.0",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.27.2",
+  "version": "0.28.0-beta.11",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.27.2",
+      "version": "0.28.0-beta.11",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.27.2",
+  "version": "0.29.0",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
@@ -75,7 +75,6 @@
    "build:debug": "napi build --platform --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir lancedb",
    "postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
    "build:release": "napi build --platform --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir dist",
-    "postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
    "build": "npm run build:debug && npm run tsc",
    "build-release": "npm run build:release && npm run tsc",
    "tsc": "tsc -b",
--- a/nodejs/src/connection.rs
+++ b/nodejs/src/connection.rs
@@ -67,6 +67,12 @@ impl Connection {
                builder = builder.storage_option(key, value);
            }
        }
+        if let Some(manifest_enabled) = options.manifest_enabled {
+            builder = builder.manifest_enabled(manifest_enabled);
+        }
+        if let Some(namespace_client_properties) = options.namespace_client_properties {
+            builder = builder.namespace_client_properties(namespace_client_properties);
+        }

        // Create client config, optionally with header provider
        let client_config = options.client_config.unwrap_or_default();
@@ -119,12 +125,12 @@ impl Connection {
    #[napi(catch_unwind)]
    pub async fn table_names(
        &self,
-        namespace: Vec<String>,
+        namespace_path: Option<Vec<String>>,
        start_after: Option<String>,
        limit: Option<u32>,
    ) -> napi::Result<Vec<String>> {
        let mut op = self.get_inner()?.table_names();
-        op = op.namespace(namespace);
+        op = op.namespace(namespace_path.unwrap_or_default());
        if let Some(start_after) = start_after {
            op = op.start_after(start_after);
        }
@@ -146,7 +152,7 @@ impl Connection {
        name: String,
        buf: Buffer,
        mode: String,
-        namespace: Vec<String>,
+        namespace_path: Option<Vec<String>>,
        storage_options: Option<HashMap<String, String>>,
    ) -> napi::Result<Table> {
        let batches = ipc_file_to_batches(buf.to_vec())
@@ -154,7 +160,7 @@ impl Connection {
        let mode = Self::parse_create_mode_str(&mode)?;
        let mut builder = self.get_inner()?.create_table(&name, batches).mode(mode);

-        builder = builder.namespace(namespace);
+        builder = builder.namespace(namespace_path.unwrap_or_default());

        if let Some(storage_options) = storage_options {
            for (key, value) in storage_options {
@@ -171,7 +177,7 @@ impl Connection {
        name: String,
        schema_buf: Buffer,
        mode: String,
-        namespace: Vec<String>,
+        namespace_path: Option<Vec<String>>,
        storage_options: Option<HashMap<String, String>>,
    ) -> napi::Result<Table> {
        let schema = ipc_file_to_schema(schema_buf.to_vec()).map_err(|e| {
@@ -183,7 +189,7 @@ impl Connection {
            .create_empty_table(&name, schema)
            .mode(mode);

-        builder = builder.namespace(namespace);
+        builder = builder.namespace(namespace_path.unwrap_or_default());

        if let Some(storage_options) = storage_options {
            for (key, value) in storage_options {
@@ -198,13 +204,13 @@ impl Connection {
    pub async fn open_table(
        &self,
        name: String,
-        namespace: Vec<String>,
+        namespace_path: Option<Vec<String>>,
        storage_options: Option<HashMap<String, String>>,
        index_cache_size: Option<u32>,
    ) -> napi::Result<Table> {
        let mut builder = self.get_inner()?.open_table(&name);

-        builder = builder.namespace(namespace);
+        builder = builder.namespace(namespace_path.unwrap_or_default());

        if let Some(storage_options) = storage_options {
            for (key, value) in storage_options {
@@ -223,7 +229,7 @@ impl Connection {
        &self,
        target_table_name: String,
        source_uri: String,
-        target_namespace: Vec<String>,
+        target_namespace_path: Option<Vec<String>>,
        source_version: Option<i64>,
        source_tag: Option<String>,
        is_shallow: bool,
@@ -232,7 +238,7 @@ impl Connection {
            .get_inner()?
            .clone_table(&target_table_name, &source_uri);

-        builder = builder.target_namespace(target_namespace);
+        builder = builder.target_namespace(target_namespace_path.unwrap_or_default());

        if let Some(version) = source_version {
            builder = builder.source_version(version as u64);
@@ -250,18 +256,21 @@ impl Connection {

    /// Drop table with the name. Or raise an error if the table does not exist.
    #[napi(catch_unwind)]
-    pub async fn drop_table(&self, name: String, namespace: Vec<String>) -> napi::Result<()> {
+    pub async fn drop_table(
+        &self,
+        name: String,
+        namespace_path: Option<Vec<String>>,
+    ) -> napi::Result<()> {
+        let ns = namespace_path.unwrap_or_default();
        self.get_inner()?
-            .drop_table(&name, &namespace)
+            .drop_table(&name, &ns)
            .await
            .default_error()
    }

    #[napi(catch_unwind)]
-    pub async fn drop_all_tables(&self, namespace: Vec<String>) -> napi::Result<()> {
-        self.get_inner()?
-            .drop_all_tables(&namespace)
-            .await
-            .default_error()
+    pub async fn drop_all_tables(&self, namespace_path: Option<Vec<String>>) -> napi::Result<()> {
+        let ns = namespace_path.unwrap_or_default();
+        self.get_inner()?.drop_all_tables(&ns).await.default_error()
    }
 }
--- a/nodejs/src/lib.rs
+++ b/nodejs/src/lib.rs
@@ -35,8 +35,15 @@ pub struct ConnectionOptions {
    pub read_consistency_interval: Option<f64>,
    /// (For LanceDB OSS only): configuration for object storage.
    ///
-    /// The available options are described at https://lancedb.com/docs/storage/
+    /// The available options are described at https://docs.lancedb.com/storage/
    pub storage_options: Option<HashMap<String, String>>,
+    /// (For LanceDB OSS only): use directory namespace manifests as the source
+    /// of truth for table metadata. Existing directory-listed root tables are
+    /// migrated into the manifest on access.
+    pub manifest_enabled: Option<bool>,
+    /// (For LanceDB OSS only): extra properties for the backing namespace
+    /// client used by manifest-enabled native connections.
+    pub namespace_client_properties: Option<HashMap<String, String>>,
    /// (For LanceDB OSS only): the session to use for this connection. Holds
    /// shared caches and other session-specific state.
    pub session: Option<session::Session>,
--- a/nodejs/src/remote.rs
+++ b/nodejs/src/remote.rs
@@ -92,6 +92,13 @@ pub struct ClientConfig {
    pub extra_headers: Option<HashMap<String, String>>,
    pub id_delimiter: Option<String>,
    pub tls_config: Option<TlsConfig>,
+    /// User identifier for tracking purposes.
+    ///
+    /// This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
+    /// It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
+    /// Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
+    /// variable that contains the user ID value.
+    pub user_id: Option<String>,
 }

 impl From<TimeoutConfig> for lancedb::remote::TimeoutConfig {
@@ -145,6 +152,7 @@ impl From<ClientConfig> for lancedb::remote::ClientConfig {
            id_delimiter: config.id_delimiter,
            tls_config: config.tls_config.map(Into::into),
            header_provider: None, // the header provider is set separately later
+            user_id: config.user_id,
        }
    }
 }
--- a/nodejs/src/rerankers.rs
+++ b/nodejs/src/rerankers.rs
@@ -18,6 +18,7 @@ type RerankHybridFn = ThreadsafeFunction<
    RerankHybridCallbackArgs,
    Status,
    false,
+    true,
 >;

 /// Reranker implementation that "wraps" a NodeJS Reranker implementation.
@@ -32,7 +33,10 @@ impl Reranker {
    pub fn new(
        rerank_hybrid: Function<RerankHybridCallbackArgs, Promise<Buffer>>,
    ) -> napi::Result<Self> {
-        let rerank_hybrid = rerank_hybrid.build_threadsafe_function().build()?;
+        let rerank_hybrid = rerank_hybrid
+            .build_threadsafe_function()
+            .weak::<true>()
+            .build()?;
        Ok(Self { rerank_hybrid })
    }
 }
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -159,6 +159,14 @@ impl Table {
            .default_error()
    }

+    #[napi(catch_unwind)]
+    pub async fn prewarm_data(&self, columns: Option<Vec<String>>) -> napi::Result<()> {
+        self.inner_ref()?
+            .prewarm_data(columns)
+            .await
+            .default_error()
+    }
+
    #[napi(catch_unwind)]
    pub async fn wait_for_index(&self, index_names: Vec<String>, timeout_s: i64) -> Result<()> {
        let timeout = std::time::Duration::from_secs(timeout_s.try_into().unwrap());
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.30.2"
+current_version = "0.32.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,7 @@
 [package]
 name = "lancedb-python"
-version = "0.30.2"
+version = "0.32.0"
+publish = false
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
@@ -14,7 +15,7 @@ name = "_lancedb"
 crate-type = ["cdylib"]

 [dependencies]
-arrow = { version = "57.2", features = ["pyarrow"] }
+arrow = { version = "58.0.0", features = ["pyarrow"] }
 async-trait = "0.1"
 bytes = "1"
 lancedb = { path = "../rust/lancedb", default-features = false }
@@ -24,8 +25,8 @@ lance-namespace-impls.workspace = true
 lance-io.workspace = true
 env_logger.workspace = true
 log.workspace = true
-pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
-pyo3-async-runtimes = { version = "0.26", features = [
+pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39"] }
+pyo3-async-runtimes = { version = "0.28", features = [
    "attributes",
    "tokio-runtime",
 ] }
@@ -34,10 +35,11 @@ futures.workspace = true
 serde = "1"
 serde_json = "1"
 snafu.workspace = true
-tokio = { version = "1.40", features = ["sync"] }
+tokio = { version = "1.40", features = ["sync", "rt-multi-thread"] }
+libc = "0.2"

 [build-dependencies]
-pyo3-build-config = { version = "0.26", features = [
+pyo3-build-config = { version = "0.28", features = [
    "extension-module",
    "abi3-py39",
 ] }
--- a/python/PYTHON_THIRD_PARTY_LICENSES.md
+++ b/python/PYTHON_THIRD_PARTY_LICENSES.md
@@ -183,7 +183,6 @@
 | stack-data                     | 0.6.3           | MIT License                                                                                      | http://github.com/alexmojaki/stack_data                                                           |
 | sympy                          | 1.14.0          | BSD License                                                                                      | https://sympy.org                                                                                 |
 | tabulate                       | 0.9.0           | MIT License                                                                                      | https://github.com/astanin/python-tabulate                                                        |
-| tantivy                        | 0.25.1          | UNKNOWN                                                                                          | UNKNOWN                                                                                           |
 | threadpoolctl                  | 3.6.0           | BSD License                                                                                      | https://github.com/joblib/threadpoolctl                                                           |
 | timm                           | 1.0.24          | Apache Software License                                                                          | https://github.com/huggingface/pytorch-image-models                                               |
 | tinycss2                       | 1.4.0           | BSD License                                                                                      | https://www.courtbouillon.org/tinycss2                                                            |
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -45,7 +45,7 @@ repository = "https://github.com/lancedb/lancedb"

 [project.optional-dependencies]
 pylance = [
-    "pylance>=4.0.0b7",
+    "pylance>=6.0.0",
 ]
 tests = [
    "aiohttp>=3.9.0",
@@ -57,9 +57,8 @@ tests = [
    "duckdb>=0.9.0",
    "pytz>=2023.3",
    "polars>=0.19, <=1.3.0",
-    "tantivy>=0.20.0",
    "pyarrow-stubs>=16.0",
-    "pylance>=4.0.0b7",
+    "pylance>=6.0.0",
    "requests>=2.31.0",
    "datafusion>=52,<53",
 ]
@@ -83,7 +82,7 @@ embeddings = [
    "colpali-engine>=0.3.10",
    "huggingface_hub>=0.19.0",
    "InstructorEmbedding>=1.0.1",
-    "google.generativeai>=0.3.0",
+    "google-genai>=1.0.0",
    "boto3>=1.28.57",
    "awscli>=1.44.38",
    "botocore>=1.31.57",
--- a/python/python/lancedb/init.py
+++ b/python/python/lancedb/init.py
@@ -6,8 +6,7 @@ import importlib.metadata
 import os
 from concurrent.futures import ThreadPoolExecutor
 from datetime import timedelta
-from typing import Dict, Optional, Union, Any
-import warnings
+from typing import Dict, Optional, Union, Any, List

 __version__ = importlib.metadata.version("lancedb")

@@ -15,7 +14,6 @@ from ._lancedb import connect as lancedb_connect
 from .common import URI, sanitize_uri
 from urllib.parse import urlparse
 from .db import AsyncConnection, DBConnection, LanceDBConnection
-from .io import StorageOptionsProvider
 from .remote import ClientConfig
 from .remote.db import RemoteDBConnection
 from .expr import Expr, col, lit, func
@@ -64,7 +62,7 @@ def _check_s3_bucket_with_dots(


 def connect(
-    uri: URI,
+    uri: Optional[URI] = None,
    *,
    api_key: Optional[str] = None,
    region: str = "us-east-1",
@@ -74,14 +72,19 @@ def connect(
    client_config: Union[ClientConfig, Dict[str, Any], None] = None,
    storage_options: Optional[Dict[str, str]] = None,
    session: Optional[Session] = None,
+    manifest_enabled: bool = False,
+    namespace_client_impl: Optional[str] = None,
+    namespace_client_properties: Optional[Dict[str, str]] = None,
+    namespace_client_pushdown_operations: Optional[List[str]] = None,
    **kwargs: Any,
 ) -> DBConnection:
    """Connect to a LanceDB database.

    Parameters
    ----------
-    uri: str or Path
-        The uri of the database.
+    uri: str or Path, optional
+        The uri of the database. When ``namespace_client_impl`` is provided you may
+        omit ``uri`` and connect through a namespace client instead.
    api_key: str, optional
        If presented, connect to LanceDB cloud.
        Otherwise, connect to a database on file system or cloud storage.
@@ -107,13 +110,29 @@ def connect(
        default configuration is used.
    storage_options: dict, optional
        Additional options for the storage backend. See available options at
-        <https://lancedb.com/docs/storage/>
+        <https://docs.lancedb.com/storage/>
+    manifest_enabled : bool, default False
+        When true for local/native connections, use directory namespace
+        manifests as the source of truth for table metadata. Existing
+        directory-listed root tables are migrated into the manifest on access.
    session: Session, optional
        (For LanceDB OSS only)
        A session to use for this connection. Sessions allow you to configure
        cache sizes for index and metadata caches, which can significantly
        impact memory use and performance. They can also be re-used across
        multiple connections to share the same cache state.
+    namespace_client_impl : str, optional
+        When provided along with ``namespace_client_properties``, ``connect``
+        returns a namespace-backed connection by delegating to
+        :func:`connect_namespace`. The value identifies which namespace
+        implementation to load (e.g., ``"dir"`` or ``"rest"``).
+    namespace_client_properties : dict, optional
+        Configuration to pass to the namespace client implementation. Required
+        when ``namespace_client_impl`` is set.
+    namespace_client_pushdown_operations : list[str], optional
+        Only used when ``namespace_client_properties`` is provided. Forwards to
+        :func:`connect_namespace` to control which operations are executed on the
+        namespace service (e.g., ``["QueryTable", "CreateTable"]``).

    Examples
    --------
@@ -133,11 +152,48 @@ def connect(
    >>> db = lancedb.connect("db://my_database", api_key="ldb_...",
    ...                      client_config={"retry_config": {"retries": 5}})

+    Connect to a namespace-backed database:
+
+    >>> db = lancedb.connect(namespace_client_impl="dir",
+    ...                      namespace_client_properties={"root": "/tmp/ns"})
+
    Returns
    -------
    conn : DBConnection
        A connection to a LanceDB database.
    """
+    if namespace_client_impl is not None:
+        if namespace_client_properties is None:
+            raise ValueError(
+                "namespace_client_properties must be provided when "
+                "namespace_client_impl is set"
+            )
+        if kwargs:
+            raise ValueError(f"Unknown keyword arguments: {kwargs}")
+        return connect_namespace(
+            namespace_client_impl,
+            namespace_client_properties,
+            read_consistency_interval=read_consistency_interval,
+            storage_options=storage_options,
+            session=session,
+            namespace_client_pushdown_operations=namespace_client_pushdown_operations,
+        )
+
+    if namespace_client_properties is not None and not manifest_enabled:
+        raise ValueError(
+            "namespace_client_impl must be provided when using "
+            "namespace_client_properties unless manifest_enabled=True"
+        )
+
+    if namespace_client_pushdown_operations is not None:
+        raise ValueError(
+            "namespace_client_pushdown_operations is only valid when "
+            "connecting through a namespace"
+        )
+    if uri is None:
+        raise ValueError(
+            "uri is required when not connecting through a namespace client"
+        )
    if isinstance(uri, str) and uri.startswith("db://"):
        if api_key is None:
            api_key = os.environ.get("LANCEDB_API_KEY")
@@ -166,9 +222,92 @@ def connect(
        read_consistency_interval=read_consistency_interval,
        storage_options=storage_options,
        session=session,
+        manifest_enabled=manifest_enabled,
+        namespace_client_properties=namespace_client_properties,
    )


+WORKER_PROPERTY_PREFIX = "_lancedb_worker_"
+
+
+def _apply_worker_overrides(props: dict[str, str]) -> dict[str, str]:
+    """Apply worker property overrides.
+
+    Any key starting with ``_lancedb_worker_`` is extracted, the prefix
+    is stripped, and the resulting key-value pair is put back into the
+    map (overriding the existing value if present).  The original
+    prefixed key is removed.
+    """
+    worker_keys = [k for k in props if k.startswith(WORKER_PROPERTY_PREFIX)]
+    if not worker_keys:
+        return props
+    result = dict(props)
+    for key in worker_keys:
+        value = result.pop(key)
+        real_key = key[len(WORKER_PROPERTY_PREFIX) :]
+        result[real_key] = value
+    return result
+
+
+def deserialize_conn(
+    data: str,
+    *,
+    for_worker: bool = False,
+) -> DBConnection:
+    """Reconstruct a DBConnection from a serialized string.
+
+    The string must have been produced by
+    :meth:`DBConnection.serialize`.
+
+    Parameters
+    ----------
+    data : str
+        String produced by ``serialize()``.
+    for_worker : bool, default False
+        When ``True``, any namespace client property whose key starts
+        with ``_lancedb_worker_`` has that prefix stripped and the
+        value overrides the corresponding property.  For example,
+        ``_lancedb_worker_uri`` replaces ``uri``.
+
+    Returns
+    -------
+    DBConnection
+        A new connection matching the serialized state.
+    """
+    import json
+
+    parsed = json.loads(data)
+    connection_type = parsed.get("connection_type")
+
+    rci_secs = parsed.get("read_consistency_interval_seconds")
+    rci = timedelta(seconds=rci_secs) if rci_secs is not None else None
+    storage_options = parsed.get("storage_options")
+
+    if connection_type == "namespace":
+        props = dict(parsed.get("namespace_client_properties") or {})
+        if for_worker:
+            props = _apply_worker_overrides(props)
+        return connect_namespace(
+            namespace_client_impl=parsed["namespace_client_impl"],
+            namespace_client_properties=props,
+            read_consistency_interval=rci,
+            storage_options=storage_options,
+            namespace_client_pushdown_operations=parsed.get(
+                "namespace_client_pushdown_operations"
+            ),
+        )
+    elif connection_type == "local":
+        return LanceDBConnection(
+            parsed["uri"],
+            read_consistency_interval=rci,
+            storage_options=storage_options,
+            manifest_enabled=parsed.get("manifest_enabled", False),
+            namespace_client_properties=parsed.get("namespace_client_properties"),
+        )
+    else:
+        raise ValueError(f"Unknown connection_type: {connection_type}")
+
+
 async def connect_async(
    uri: URI,
    *,
@@ -179,6 +318,8 @@ async def connect_async(
    client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
    storage_options: Optional[Dict[str, str]] = None,
    session: Optional[Session] = None,
+    manifest_enabled: bool = False,
+    namespace_client_properties: Optional[Dict[str, str]] = None,
 ) -> AsyncConnection:
    """Connect to a LanceDB database.

@@ -211,13 +352,20 @@ async def connect_async(
        default configuration is used.
    storage_options: dict, optional
        Additional options for the storage backend. See available options at
-        <https://lancedb.com/docs/storage/>
+        <https://docs.lancedb.com/storage/>
    session: Session, optional
        (For LanceDB OSS only)
        A session to use for this connection. Sessions allow you to configure
        cache sizes for index and metadata caches, which can significantly
        impact memory use and performance. They can also be re-used across
        multiple connections to share the same cache state.
+    manifest_enabled : bool, default False
+        When true for local/native connections, use directory namespace
+        manifests as the source of truth for table metadata. Existing
+        directory-listed root tables are migrated into the manifest on access.
+    namespace_client_properties : dict, optional
+        Additional directory namespace client properties to use with
+        ``manifest_enabled=True``.

    Examples
    --------
@@ -260,6 +408,8 @@ async def connect_async(
            client_config,
            storage_options,
            session,
+            manifest_enabled,
+            namespace_client_properties,
        )
    )

@@ -284,17 +434,6 @@ __all__ = [
    "LanceNamespaceDBConnection",
    "RemoteDBConnection",
    "Session",
-    "StorageOptionsProvider",
    "Table",
    "__version__",
 ]
-
-
-def __warn_on_fork():
-    warnings.warn(
-        "lance is not fork-safe. If you are using multiprocessing, use spawn instead.",
-    )
-
-
-if hasattr(os, "register_at_fork"):
-    os.register_at_fork(before=__warn_on_fork)  # type: ignore[attr-defined]
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -12,9 +12,9 @@ from .index import (
    LabelList,
    HnswPq,
    HnswSq,
+    HnswFlat,
    FTS,
 )
-from .io import StorageOptionsProvider
 from lance_namespace import (
    ListNamespacesResponse,
    CreateNamespaceResponse,
@@ -26,6 +26,7 @@ from .remote import ClientConfig

 IvfHnswPq: type[HnswPq] = HnswPq
 IvfHnswSq: type[HnswSq] = HnswSq
+IvfHnswFlat: type[HnswFlat] = HnswFlat

 class PyExpr:
    """A type-safe DataFusion expression node (Rust-side handle)."""
@@ -72,35 +73,35 @@ class Connection(object):
    async def close(self): ...
    async def list_namespaces(
        self,
-        namespace: Optional[List[str]] = None,
+        namespace_path: Optional[List[str]] = None,
        page_token: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> ListNamespacesResponse: ...
    async def create_namespace(
        self,
-        namespace: List[str],
+        namespace_path: List[str],
        mode: Optional[str] = None,
        properties: Optional[Dict[str, str]] = None,
    ) -> CreateNamespaceResponse: ...
    async def drop_namespace(
        self,
-        namespace: List[str],
+        namespace_path: List[str],
        mode: Optional[str] = None,
        behavior: Optional[str] = None,
    ) -> DropNamespaceResponse: ...
    async def describe_namespace(
        self,
-        namespace: List[str],
+        namespace_path: List[str],
    ) -> DescribeNamespaceResponse: ...
    async def list_tables(
        self,
-        namespace: Optional[List[str]] = None,
+        namespace_path: Optional[List[str]] = None,
        page_token: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> ListTablesResponse: ...
    async def table_names(
        self,
-        namespace: Optional[List[str]],
+        namespace_path: Optional[List[str]],
        start_after: Optional[str],
        limit: Optional[int],
    ) -> list[str]: ...  # Deprecated: Use list_tables instead
@@ -109,9 +110,8 @@ class Connection(object):
        name: str,
        mode: str,
        data: pa.RecordBatchReader,
-        namespace: Optional[List[str]] = None,
+        namespace_path: Optional[List[str]] = None,
        storage_options: Optional[Dict[str, str]] = None,
-        storage_options_provider: Optional[StorageOptionsProvider] = None,
        location: Optional[str] = None,
    ) -> Table: ...
    async def create_empty_table(
@@ -119,17 +119,15 @@ class Connection(object):
        name: str,
        mode: str,
        schema: pa.Schema,
-        namespace: Optional[List[str]] = None,
+        namespace_path: Optional[List[str]] = None,
        storage_options: Optional[Dict[str, str]] = None,
-        storage_options_provider: Optional[StorageOptionsProvider] = None,
        location: Optional[str] = None,
    ) -> Table: ...
    async def open_table(
        self,
        name: str,
-        namespace: Optional[List[str]] = None,
+        namespace_path: Optional[List[str]] = None,
        storage_options: Optional[Dict[str, str]] = None,
-        storage_options_provider: Optional[StorageOptionsProvider] = None,
        index_cache_size: Optional[int] = None,
        location: Optional[str] = None,
    ) -> Table: ...
@@ -137,7 +135,7 @@ class Connection(object):
        self,
        target_table_name: str,
        source_uri: str,
-        target_namespace: Optional[List[str]] = None,
+        target_namespace_path: Optional[List[str]] = None,
        source_version: Optional[int] = None,
        source_tag: Optional[str] = None,
        is_shallow: bool = True,
@@ -146,13 +144,18 @@ class Connection(object):
        self,
        cur_name: str,
        new_name: str,
-        cur_namespace: Optional[List[str]] = None,
-        new_namespace: Optional[List[str]] = None,
+        cur_namespace_path: Optional[List[str]] = None,
+        new_namespace_path: Optional[List[str]] = None,
    ) -> None: ...
    async def drop_table(
-        self, name: str, namespace: Optional[List[str]] = None
+        self, name: str, namespace_path: Optional[List[str]] = None
    ) -> None: ...
-    async def drop_all_tables(self, namespace: Optional[List[str]] = None) -> None: ...
+    async def drop_all_tables(
+        self, namespace_path: Optional[List[str]] = None
+    ) -> None: ...
+    async def namespace_client_config(
+        self,
+    ) -> Dict[str, Any]: ...

 class Table:
    def name(self) -> str: ...
@@ -179,6 +182,7 @@ class Table:
            IvfPq,
            HnswPq,
            HnswSq,
+            HnswFlat,
            BTree,
            Bitmap,
            LabelList,
@@ -241,6 +245,8 @@ async def connect(
    client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
    storage_options: Optional[Dict[str, str]],
    session: Optional[Session],
+    manifest_enabled: bool = False,
+    namespace_client_properties: Optional[Dict[str, str]] = None,
 ) -> Connection: ...

 class RecordBatchStream:
@@ -439,7 +445,7 @@ class AsyncPermutationBuilder:
    async def execute(self) -> Table: ...

 def async_permutation_builder(
-    table: Table, dest_table_name: str
+    table: Table,
 ) -> AsyncPermutationBuilder: ...
 def fts_query_to_json(query: Any) -> str: ...

--- a/python/python/lancedb/background_loop.py
+++ b/python/python/lancedb/background_loop.py
@@ -2,7 +2,9 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import asyncio
+import os
 import threading
+import warnings


 class BackgroundEventLoop:
@@ -13,6 +15,9 @@ class BackgroundEventLoop:
    """

    def __init__(self):
+        self._start()
+
+    def _start(self):
        self.loop = asyncio.new_event_loop()
        self.thread = threading.Thread(
            target=self.loop.run_forever,
@@ -31,3 +36,30 @@ class BackgroundEventLoop:


 LOOP = BackgroundEventLoop()
+
+_FORK_WARNED = False
+
+
+def _reset_after_fork():
+    # Threads do not survive fork(), so the asyncio loop in LOOP.thread is
+    # dead in the child. Re-initialize the singleton in place so existing
+    # `from .background_loop import LOOP` references in other modules see
+    # the new state. The Rust-side tokio runtime is reset analogously by a
+    # pthread_atfork hook installed in the _lancedb extension.
+    LOOP._start()
+    global _FORK_WARNED
+    if not _FORK_WARNED:
+        _FORK_WARNED = True
+        warnings.warn(
+            "lancedb fork support is experimental: the internal async "
+            "runtime has been reset in the forked child, but a small chance "
+            "of deadlock remains if other state was mid-operation at fork "
+            "time. The 'forkserver' or 'spawn' multiprocessing start method "
+            "is likely a safer alternative.",
+            RuntimeWarning,
+            stacklevel=2,
+        )
+
+
+if hasattr(os, "register_at_fork"):
+    os.register_at_fork(after_in_child=_reset_after_fork)
--- a/python/python/lancedb/common.py
+++ b/python/python/lancedb/common.py
@@ -96,7 +96,7 @@ def data_to_reader(
            f"Unknown data type {type(data)}. "
            "Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
            "pyarrow Table/RecordBatch, or Pydantic models. "
-            "See https://lancedb.com/docs/tables/ for examples."
+            "See https://docs.lancedb.com/tables/ for examples."
        )


--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
--- a/python/python/lancedb/embeddings/gemini_text.py
+++ b/python/python/lancedb/embeddings/gemini_text.py
@@ -19,10 +19,10 @@ from .utils import TEXT, api_key_not_found_help
@register("gemini-text")
 class GeminiText(TextEmbeddingFunction):
    """
-    An embedding function that uses the Google's Gemini API. Requires GOOGLE_API_KEY to
+    An embedding function that uses Google's Gemini API. Requires GOOGLE_API_KEY to
    be set.

-    https://ai.google.dev/docs/embeddings_guide
+    https://ai.google.dev/gemini-api/docs/embeddings

    Supports various tasks types:
    | Task Type               | Description                                            |
@@ -46,9 +46,12 @@ class GeminiText(TextEmbeddingFunction):

    Parameters
    ----------
-    name: str, default "models/embedding-001"
-        The name of the model to use. See the Gemini documentation for a list of
-        available models.
+    name: str, default "gemini-embedding-001"
+        The name of the model to use. Supported models include:
+        - "gemini-embedding-001" (768 dimensions)
+
+        Note: The legacy "models/embedding-001" format is also supported but
+        "gemini-embedding-001" is recommended.

    query_task_type: str, default "retrieval_query"
        Sets the task type for the queries.
@@ -77,7 +80,7 @@ class GeminiText(TextEmbeddingFunction):

    """

-    name: str = "models/embedding-001"
+    name: str = "gemini-embedding-001"
    query_task_type: str = "retrieval_query"
    source_task_type: str = "retrieval_document"

@@ -114,23 +117,48 @@ class GeminiText(TextEmbeddingFunction):
        texts: list[str] or np.ndarray (of str)
            The texts to embed
        """
-        if (
-            kwargs.get("task_type") == "retrieval_document"
-        ):  # Provide a title to use existing API design
-            title = "Embedding of a document"
-            kwargs["title"] = title
+        from google.genai import types

-        return [
-            self.client.embed_content(model=self.name, content=text, **kwargs)[
-                "embedding"
-            ]
-            for text in texts
-        ]
+        task_type = kwargs.get("task_type")
+
+        # Build content objects for embed_content
+        contents = []
+        for text in texts:
+            if task_type == "retrieval_document":
+                # Provide a title for retrieval_document task
+                contents.append(
+                    {"parts": [{"text": "Embedding of a document"}, {"text": text}]}
+                )
+            else:
+                contents.append({"parts": [{"text": text}]})
+
+        # Build config
+        config_kwargs = {}
+        if task_type:
+            config_kwargs["task_type"] = task_type.upper()  # API expects uppercase
+
+        # Call embed_content for each content
+        embeddings = []
+        for content in contents:
+            config = (
+                types.EmbedContentConfig(**config_kwargs) if config_kwargs else None
+            )
+            response = self.client.models.embed_content(
+                model=self.name,
+                contents=content,
+                config=config,
+            )
+            embeddings.append(response.embeddings[0].values)
+
+        return embeddings

    @cached_property
    def client(self):
-        genai = attempt_import_or_raise("google.generativeai", "google.generativeai")
+        attempt_import_or_raise("google.genai", "google-genai")

        if not os.environ.get("GOOGLE_API_KEY"):
            api_key_not_found_help("google")
-        return genai
+
+        from google import genai as genai_module
+
+        return genai_module.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
--- a/python/python/lancedb/fts.py
+++ b/python/python/lancedb/fts.py
@@ -1,201 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-"""Full text search index using tantivy-py"""
-
-import os
-from typing import List, Tuple, Optional
-
-import pyarrow as pa
-
-try:
-    import tantivy
-except ImportError:
-    raise ImportError(
-        "Please install tantivy-py `pip install tantivy` to use the full text search feature."  # noqa: E501
-    )
-
-from .table import LanceTable
-
-
-def create_index(
-    index_path: str,
-    text_fields: List[str],
-    ordering_fields: Optional[List[str]] = None,
-    tokenizer_name: str = "default",
-) -> tantivy.Index:
-    """
-    Create a new Index (not populated)
-
-    Parameters
-    ----------
-    index_path : str
-        Path to the index directory
-    text_fields : List[str]
-        List of text fields to index
-    ordering_fields: List[str]
-        List of unsigned type fields to order by at search time
-    tokenizer_name : str, default "default"
-        The tokenizer to use
-
-    Returns
-    -------
-    index : tantivy.Index
-        The index object (not yet populated)
-    """
-    if ordering_fields is None:
-        ordering_fields = []
-    # Declaring our schema.
-    schema_builder = tantivy.SchemaBuilder()
-    # special field that we'll populate with row_id
-    schema_builder.add_integer_field("doc_id", stored=True)
-    # data fields
-    for name in text_fields:
-        schema_builder.add_text_field(name, stored=True, tokenizer_name=tokenizer_name)
-    if ordering_fields:
-        for name in ordering_fields:
-            schema_builder.add_unsigned_field(name, fast=True)
-    schema = schema_builder.build()
-    os.makedirs(index_path, exist_ok=True)
-    index = tantivy.Index(schema, path=index_path)
-    return index
-
-
-def populate_index(
-    index: tantivy.Index,
-    table: LanceTable,
-    fields: List[str],
-    writer_heap_size: Optional[int] = None,
-    ordering_fields: Optional[List[str]] = None,
-) -> int:
-    """
-    Populate an index with data from a LanceTable
-
-    Parameters
-    ----------
-    index : tantivy.Index
-        The index object
-    table : LanceTable
-        The table to index
-    fields : List[str]
-        List of fields to index
-    writer_heap_size : int
-        The writer heap size in bytes, defaults to 1GB
-
-    Returns
-    -------
-    int
-        The number of rows indexed
-    """
-    if ordering_fields is None:
-        ordering_fields = []
-    writer_heap_size = writer_heap_size or 1024 * 1024 * 1024
-    # first check the fields exist and are string or large string type
-    nested = []
-
-    for name in fields:
-        try:
-            f = table.schema.field(name)  # raises KeyError if not found
-        except KeyError:
-            f = resolve_path(table.schema, name)
-            nested.append(name)
-
-        if not pa.types.is_string(f.type) and not pa.types.is_large_string(f.type):
-            raise TypeError(f"Field {name} is not a string type")
-
-    # create a tantivy writer
-    writer = index.writer(heap_size=writer_heap_size)
-    # write data into index
-    dataset = table.to_lance()
-    row_id = 0
-
-    max_nested_level = 0
-    if len(nested) > 0:
-        max_nested_level = max([len(name.split(".")) for name in nested])
-
-    for b in dataset.to_batches(columns=fields + ordering_fields):
-        if max_nested_level > 0:
-            b = pa.Table.from_batches([b])
-            for _ in range(max_nested_level - 1):
-                b = b.flatten()
-        for i in range(b.num_rows):
-            doc = tantivy.Document()
-            for name in fields:
-                value = b[name][i].as_py()
-                if value is not None:
-                    doc.add_text(name, value)
-            for name in ordering_fields:
-                value = b[name][i].as_py()
-                if value is not None:
-                    doc.add_unsigned(name, value)
-            if not doc.is_empty:
-                doc.add_integer("doc_id", row_id)
-                writer.add_document(doc)
-            row_id += 1
-    # commit changes
-    writer.commit()
-    return row_id
-
-
-def resolve_path(schema, field_name: str) -> pa.Field:
-    """
-    Resolve a nested field path to a list of field names
-
-    Parameters
-    ----------
-    field_name : str
-        The field name to resolve
-
-    Returns
-    -------
-    List[str]
-        The resolved path
-    """
-    path = field_name.split(".")
-    field = schema.field(path.pop(0))
-    for segment in path:
-        if pa.types.is_struct(field.type):
-            field = field.type.field(segment)
-        else:
-            raise KeyError(f"field {field_name} not found in schema {schema}")
-    return field
-
-
-def search_index(
-    index: tantivy.Index, query: str, limit: int = 10, ordering_field=None
-) -> Tuple[Tuple[int], Tuple[float]]:
-    """
-    Search an index for a query
-
-    Parameters
-    ----------
-    index : tantivy.Index
-        The index object
-    query : str
-        The query string
-    limit : int
-        The maximum number of results to return
-
-    Returns
-    -------
-    ids_and_score: list[tuple[int], tuple[float]]
-        A tuple of two tuples, the first containing the document ids
-        and the second containing the scores
-    """
-    searcher = index.searcher()
-    query = index.parse_query(query)
-    # get top results
-    if ordering_field:
-        results = searcher.search(query, limit, order_by_field=ordering_field)
-    else:
-        results = searcher.search(query, limit)
-    if results.count == 0:
-        return tuple(), tuple()
-    return tuple(
-        zip(
-            *[
-                (searcher.doc(doc_address)["doc_id"][0], score)
-                for score, doc_address in results.hits
-            ]
-        )
-    )
--- a/python/python/lancedb/index.py
+++ b/python/python/lancedb/index.py
@@ -7,6 +7,7 @@ from typing import Literal, Optional
 from ._lancedb import (
    IndexConfig,
 )
+from .types import BaseTokenizerType

 lang_mapping = {
    "ar": "Arabic",
@@ -111,8 +112,12 @@ class FTS:
        - "simple": Splits text by whitespace and punctuation.
        - "whitespace": Split text by whitespace, but not punctuation.
        - "raw": No tokenization. The entire text is treated as a single token.
+        - "ngram": N-gram tokenizer for substring-style matching.
+        - "jieba/*": Jieba tokenizer loaded from Lance's language model home.
+        - "lindera/*": Lindera tokenizer loaded from Lance's language model home.
    language : str, default "English"
-        The language to use for tokenization.
+        The language to use for stemming and stop-word removal. This is not the
+        primary way to enable CJK tokenization.
    max_token_length : int, default 40
        The maximum token length to index. Tokens longer than this length will be
        ignored.
@@ -127,10 +132,17 @@ class FTS:
    ascii_folding : bool, default True
        Whether to fold ASCII characters. This converts accented characters to
        their ASCII equivalent. For example, "café" would be converted to "cafe".
+
+    Notes
+    -----
+    Model-backed tokenizers such as ``jieba/default`` and ``lindera/ipadic``
+    require tokenizer models in Lance's language model home. Set
+    ``LANCE_LANGUAGE_MODEL_HOME`` to override the default platform data
+    directory under ``lance/language_models``.
    """

    with_position: bool = False
-    base_tokenizer: Literal["simple", "raw", "whitespace"] = "simple"
+    base_tokenizer: BaseTokenizerType = "simple"
    language: str = "English"
    max_token_length: Optional[int] = 40
    lower_case: bool = True
@@ -376,9 +388,98 @@ class HnswSq:
    target_partition_size: Optional[int] = None


+@dataclass
+class HnswFlat:
+    """Describe a HNSW-FLAT index configuration.
+
+    HNSW-FLAT stands for Hierarchical Navigable Small World without quantization.
+    It stores raw vectors in the HNSW graph, providing the highest recall among
+    the IVF_HNSW family at the cost of more memory and disk space compared to
+    :class:`HnswSq` or :class:`HnswPq`.
+
+    Parameters
+    ----------
+
+    distance_type: str, default "l2"
+
+        The distance metric used to train the index.
+
+        The following distance types are available:
+
+        "l2" - Euclidean distance. This is a very common distance metric that
+        accounts for both magnitude and direction when determining the distance
+        between vectors. l2 distance has a range of [0, ∞).
+
+        "cosine" - Cosine distance.  Cosine distance is a distance metric
+        calculated from the cosine similarity between two vectors. Cosine
+        similarity is a measure of similarity between two non-zero vectors of an
+        inner product space. It is defined to equal the cosine of the angle
+        between them.  Unlike l2, the cosine distance is not affected by the
+        magnitude of the vectors.  Cosine distance has a range of [0, 2].
+
+        "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
+        distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
+        l2 norm is 1), then dot distance is equivalent to the cosine distance.
+
+    num_partitions, default sqrt(num_rows)
+
+        The number of IVF partitions to create.
+
+        For HNSW, we recommend a small number of partitions. Setting this to 1
+        works well for most tables. For very large tables, training just one HNSW
+        graph will require too much memory. Each partition becomes its own HNSW
+        graph, so setting this value higher reduces the peak memory use of
+        training.
+
+    max_iterations, default 50
+
+        Max iterations to train kmeans.
+
+        When training an IVF index we use kmeans to calculate the partitions.
+        This parameter controls how many iterations of kmeans to run.
+
+    sample_rate, default 256
+
+        The rate used to calculate the number of training vectors for kmeans.
+
+    m, default 20
+
+        The number of neighbors to select for each vector in the HNSW graph.
+
+        This value controls the tradeoff between search speed and accuracy.
+        The higher the value the more accurate the search but the slower it
+        will be.
+
+    ef_construction, default 300
+
+        The number of candidates to evaluate during the construction of the HNSW
+        graph.
+
+        This value controls the tradeoff between build speed and accuracy.
+        The higher the value the more accurate the build but the slower it will
+        be.  150 to 300 is the typical range. 100 is a minimum for good quality
+        search results. In most cases, there is no benefit to setting this higher
+        than 500.  This value should be set to a value that is not less than `ef`
+        in the search phase.
+
+    target_partition_size, default is 1,048,576
+
+        The target size of each partition.
+    """
+
+    distance_type: Literal["l2", "cosine", "dot"] = "l2"
+    num_partitions: Optional[int] = None
+    max_iterations: int = 50
+    sample_rate: int = 256
+    m: int = 20
+    ef_construction: int = 300
+    target_partition_size: Optional[int] = None
+
+
 # Backwards-compatible aliases
 IvfHnswPq = HnswPq
 IvfHnswSq = HnswSq
+IvfHnswFlat = HnswFlat


@dataclass
@@ -698,11 +799,13 @@ __all__ = [
    "IvfPq",
    "IvfHnswPq",
    "IvfHnswSq",
+    "IvfHnswFlat",
    "IvfSq",
    "IvfRq",
    "IvfFlat",
    "HnswPq",
    "HnswSq",
+    "HnswFlat",
    "IndexConfig",
    "FTS",
    "Bitmap",
--- a/python/python/lancedb/io.py
+++ b/python/python/lancedb/io.py
@@ -2,70 +2,3 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 """I/O utilities and interfaces for LanceDB."""
-
-from abc import ABC, abstractmethod
-from typing import Dict
-
-
-class StorageOptionsProvider(ABC):
-    """Abstract base class for providing storage options to LanceDB tables.
-
-    Storage options providers enable automatic credential refresh for cloud
-    storage backends (e.g., AWS S3, Azure Blob Storage, GCS). When credentials
-    have an expiration time, the provider's fetch_storage_options() method will
-    be called periodically to get fresh credentials before they expire.
-
-    Example
-    -------
-    >>> class MyProvider(StorageOptionsProvider):
-    ...     def fetch_storage_options(self) -> Dict[str, str]:
-    ...         # Fetch fresh credentials from your credential manager
-    ...         return {
-    ...             "aws_access_key_id": "...",
-    ...             "aws_secret_access_key": "...",
-    ...             "expires_at_millis": "1234567890000"  # Optional
-    ...         }
-    """
-
-    @abstractmethod
-    def fetch_storage_options(self) -> Dict[str, str]:
-        """Fetch fresh storage credentials.
-
-        This method is called by LanceDB when credentials need to be refreshed.
-        If the returned dictionary contains an "expires_at_millis" key with a
-        Unix timestamp in milliseconds, LanceDB will automatically refresh the
-        credentials before that time. If the key is not present, credentials
-        are assumed to not expire.
-
-        Returns
-        -------
-        Dict[str, str]
-            Dictionary containing cloud storage credentials and optionally an
-            expiration time:
-            - "expires_at_millis" (optional): Unix timestamp in milliseconds when
-              credentials expire
-            - Provider-specific credential keys (e.g., aws_access_key_id,
-              aws_secret_access_key, etc.)
-
-        Raises
-        ------
-        RuntimeError
-            If credentials cannot be fetched or are invalid
-        """
-        pass
-
-    def provider_id(self) -> str:
-        """Return a human-readable unique identifier for this provider instance.
-
-        This identifier is used for caching and equality comparison. Two providers
-        with the same ID will share the same cached object store connection.
-
-        The default implementation uses the class name and string representation.
-        Override this method if you need custom identification logic.
-
-        Returns
-        -------
-        str
-            A unique identifier for this provider instance
-        """
-        return f"{self.__class__.__name__} {{ repr: {str(self)!r} }}"
--- a/python/python/lancedb/namespace.py
+++ b/python/python/lancedb/namespace.py
--- a/python/python/lancedb/permutation.py
+++ b/python/python/lancedb/permutation.py
@@ -1,11 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

-from deprecation import deprecated
-from lancedb import AsyncConnection, DBConnection
-import pyarrow as pa
+import copy
 import json

+from deprecation import deprecated
+import pyarrow as pa
+
 from ._lancedb import async_permutation_builder, PermutationReader
 from .table import LanceTable
 from .background_loop import LOOP
@@ -36,10 +37,7 @@ class PermutationBuilder:
    be referenced by name in the future.  If names are not provided then they can only
    be referenced by their ordinal index.  There is no requirement to name every split.

-    By default, the permutation will be stored in memory and will be lost when the
-    program exits.  To persist the permutation (for very large datasets or to share
-    the permutation across multiple workers) use the [persist](#persist) method to
-    create a permanent table.
+    The permutation is stored in memory and will be lost when the program exits.
    """

    def __init__(self, table: LanceTable):
@@ -51,15 +49,6 @@ class PermutationBuilder:
        """
        self._async = async_permutation_builder(table)

-    def persist(
-        self, database: Union[DBConnection, AsyncConnection], table_name: str
-    ) -> "PermutationBuilder":
-        """
-        Persist the permutation to the given database.
-        """
-        self._async.persist(database, table_name)
-        return self
-
    def split_random(
        self,
        *,
@@ -284,9 +273,8 @@ class Permutations:
        self.permutation_table = permutation_table

        if permutation_table.schema.metadata is not None:
-            split_names = permutation_table.schema.metadata.get(
-                b"split_names", None
-            ).decode("utf-8")
+            raw = permutation_table.schema.metadata.get(b"split_names")
+            split_names = raw.decode("utf-8") if raw is not None else None
            if split_names is not None:
                self.split_names = json.loads(split_names)
                self.split_dict = {
@@ -381,20 +369,44 @@ class Permutation:

    def __init__(
        self,
-        reader: PermutationReader,
+        base_table: LanceTable,
+        permutation_table: Optional[LanceTable],
+        split: int,
        selection: dict[str, str],
        batch_size: int,
        transform_fn: Callable[pa.RecordBatch, Any],
+        offset: Optional[int] = None,
+        limit: Optional[int] = None,
+        connection_factory: Optional[Callable[[str], LanceTable]] = None,
+        _reader: Optional[PermutationReader] = None,
    ):
        """
        Internal constructor.  Use [from_tables](#from_tables) instead.
        """
-        assert reader is not None, "reader is required"
+        assert base_table is not None, "base_table is required"
        assert selection is not None, "selection is required"
-        self.reader = reader
+        self.base_table = base_table
+        self.permutation_table = permutation_table
+        self.split = split
        self.selection = selection
        self.transform_fn = transform_fn
        self.batch_size = batch_size
+        self.offset = offset
+        self.limit = limit
+        self.connection_factory = connection_factory
+        if _reader is None:
+            _reader = LOOP.run(self._build_reader())
+        self.reader: PermutationReader = _reader
+
+    async def _build_reader(self) -> PermutationReader:
+        reader = await PermutationReader.from_tables(
+            self.base_table, self.permutation_table, self.split
+        )
+        if self.offset is not None:
+            reader = await reader.with_offset(self.offset)
+        if self.limit is not None:
+            reader = await reader.with_limit(self.limit)
+        return reader

    def _with_selection(self, selection: dict[str, str]) -> "Permutation":
        """
@@ -403,21 +415,97 @@ class Permutation:
        Does not validation of the selection and it replaces it entirely.  This is not
        intended for public use.
        """
-        return Permutation(self.reader, selection, self.batch_size, self.transform_fn)
-
-    def _with_reader(self, reader: PermutationReader) -> "Permutation":
-        """
-        Creates a new permutation with the given reader
-
-        This is an internal method and should not be used directly.
-        """
-        return Permutation(reader, self.selection, self.batch_size, self.transform_fn)
+        new = copy.copy(self)
+        new.selection = selection
+        return new

    def with_batch_size(self, batch_size: int) -> "Permutation":
        """
        Creates a new permutation with the given batch size
        """
-        return Permutation(self.reader, self.selection, batch_size, self.transform_fn)
+        new = copy.copy(self)
+        new.batch_size = batch_size
+        return new
+
+    def with_connection_factory(
+        self, connection_factory: Callable[[str], LanceTable]
+    ) -> "Permutation":
+        """
+        Creates a new permutation that will use ``connection_factory`` to reopen
+        the base table when this permutation is unpickled in a worker process.
+
+        The factory is a callable that takes a single argument — the base table
+        name — and returns a [LanceTable]. It must be picklable; the worker
+        will pickle it via standard ``pickle`` and call it to recover the base
+        table. Picklable callables in practice means top-level (module-level)
+        functions, ``functools.partial`` of such functions, or instances of
+        picklable classes implementing ``__call__``. Lambdas and closures over
+        local variables don't pickle with the default protocol.
+
+        Setting a factory is necessary when the URI alone is not enough to
+        re-open the connection — most importantly for LanceDB Cloud (``db://``)
+        connections, where ``api_key`` and ``region`` aren't recoverable from
+        the connection object after construction.
+
+        For local file or cloud-storage paths the factory is optional: if not
+        set, ``__getstate__`` falls back to capturing
+        ``(uri, storage_options, namespace_path)`` and re-opening via
+        ``lancedb.connect(uri, storage_options=...)``.
+
+        Examples
+        --------
+        Basic native (file-system path), parameterized via ``functools.partial``::
+
+            import functools, lancedb
+            from lancedb.permutation import Permutation
+
+            def open_native_table(uri: str, table_name: str):
+                return lancedb.connect(uri).open_table(table_name)
+
+            factory = functools.partial(open_native_table, "/data/lance_db")
+            permutation = Permutation.identity(
+                factory("training")
+            ).with_connection_factory(factory)
+
+        Native via :func:`lancedb.connect_namespace` (e.g. a directory- or
+        REST-backed namespace client). The factory takes the
+        implementation name and properties dict as partial-bound args so
+        the worker can rebuild the same namespace connection::
+
+            def open_via_namespace(
+                impl: str, properties: dict[str, str], table_name: str,
+            ):
+                return lancedb.connect_namespace(impl, properties).open_table(
+                    table_name,
+                )
+
+            factory = functools.partial(
+                open_via_namespace,
+                "dir",
+                {"root": "/data/lance_db"},
+            )
+
+        LanceDB Cloud, reading credentials from env vars at worker startup
+        so secrets aren't pickled into the dataset::
+
+            import os, lancedb
+
+            def open_remote_table(table_name: str):
+                db = lancedb.connect(
+                    "db://my-database",
+                    api_key=os.environ["LANCEDB_API_KEY"],
+                    region=os.environ.get("LANCEDB_REGION", "us-east-1"),
+                )
+                return db.open_table(table_name)
+
+            permutation = Permutation.identity(
+                open_remote_table("training")
+            ).with_connection_factory(open_remote_table)
+        """
+        assert connection_factory is not None, "connection_factory is required"
+        new = copy.copy(self)
+        new.connection_factory = connection_factory
+        return new

    @classmethod
    def identity(cls, table: LanceTable) -> "Permutation":
@@ -460,9 +548,8 @@ class Permutation:
                        f"Cannot create a permutation on split `{split}`"
                        " because no split names are defined in the permutation table"
                    )
-                split_names = permutation_table.schema.metadata.get(
-                    b"split_names", None
-                ).decode("utf-8")
+                raw = permutation_table.schema.metadata.get(b"split_names")
+                split_names = raw.decode("utf-8") if raw is not None else None
                if split_names is None:
                    raise ValueError(
                        f"Cannot create a permutation on split `{split}`"
@@ -491,11 +578,126 @@ class Permutation:
            schema = await reader.output_schema(None)
            initial_selection = {name: name for name in schema.names}
            return cls(
-                reader, initial_selection, DEFAULT_BATCH_SIZE, Transforms.arrow2python
+                base_table,
+                permutation_table,
+                split,
+                initial_selection,
+                DEFAULT_BATCH_SIZE,
+                Transforms.arrow2python,
+                _reader=reader,
            )

        return LOOP.run(do_from_tables())

+    def __getstate__(self) -> dict[str, Any]:
+        """Build a picklable state dict for this permutation.
+
+        The base table is captured either via a user-supplied
+        ``connection_factory`` (see [with_connection_factory]) or, as a
+        fallback, by introspecting ``(uri, storage_options, namespace_path)``
+        on the connection. The permutation table — always an in-memory
+        LanceDB table — is captured as a pyarrow Table (which pickles via
+        Arrow IPC natively). The reader is dropped from the wire format;
+        ``__setstate__`` rebuilds it from the restored tables.
+        """
+        permutation_data: Optional[pa.Table] = None
+        if self.permutation_table is not None:
+            permutation_data = self.permutation_table.to_arrow()
+
+        common = {
+            "base_table_name": self.base_table.name,
+            "permutation_data": permutation_data,
+            "split": self.split,
+            "selection": self.selection,
+            "batch_size": self.batch_size,
+            "transform_fn": self.transform_fn,
+            "offset": self.offset,
+            "limit": self.limit,
+            "connection_factory": self.connection_factory,
+        }
+
+        if self.connection_factory is not None:
+            # The factory carries enough state to recover the base table on
+            # its own; we don't need to capture the URI / storage options /
+            # namespace from the existing connection.
+            return common
+
+        # URI-introspection fallback: only viable for native (OSS) connections
+        # where (uri, storage_options) is enough to reopen. Remote / cloud
+        # connections don't expose recoverable api_key / region — those users
+        # must call with_connection_factory().
+        try:
+            base_uri = self.base_table._conn.uri
+            storage_options = self.base_table._conn.storage_options
+        except AttributeError as e:
+            raise ValueError(
+                "Cannot pickle this Permutation: the base table's connection "
+                "does not expose a uri/storage_options, which usually means it "
+                "is a remote (LanceDB Cloud) connection. Call "
+                "Permutation.with_connection_factory(...) first to provide a "
+                "picklable callable that re-opens the base table from a worker "
+                "process."
+            ) from e
+
+        if base_uri.startswith("memory://"):
+            # In-memory base tables don't exist in any worker process by
+            # default, so dump the entire base table into the pickle. This
+            # can be expensive for large datasets — users with large
+            # in-memory base tables should either persist them or set a
+            # connection_factory.
+            return {
+                **common,
+                "base_table_data": self.base_table.to_arrow(),
+            }
+
+        return {
+            **common,
+            "base_table_uri": base_uri,
+            "base_table_namespace": self.base_table._namespace_path,
+            "base_table_storage_options": storage_options,
+        }
+
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        from . import connect
+
+        connection_factory = state["connection_factory"]
+        if connection_factory is not None:
+            base_table = connection_factory(state["base_table_name"])
+        elif "base_table_data" in state:
+            # In-memory base table inlined into the pickle; rebuild the same
+            # way we rebuild the in-memory permutation table.
+            mem_db = connect("memory://")
+            base_table = mem_db.create_table(
+                state["base_table_name"], state["base_table_data"]
+            )
+        else:
+            base_db = connect(
+                state["base_table_uri"],
+                storage_options=state["base_table_storage_options"],
+            )
+            base_table = base_db.open_table(
+                state["base_table_name"],
+                namespace_path=state["base_table_namespace"] or None,
+            )
+
+        permutation_table: Optional[LanceTable] = None
+        if state["permutation_data"] is not None:
+            mem_db = connect("memory://")
+            permutation_table = mem_db.create_table(
+                "permutation", state["permutation_data"]
+            )
+
+        self.base_table = base_table
+        self.permutation_table = permutation_table
+        self.split = state["split"]
+        self.selection = state["selection"]
+        self.batch_size = state["batch_size"]
+        self.transform_fn = state["transform_fn"]
+        self.offset = state["offset"]
+        self.limit = state["limit"]
+        self.connection_factory = connection_factory
+        self.reader = LOOP.run(self._build_reader())
+
    @property
    def schema(self) -> pa.Schema:
        async def do_output_schema():
@@ -762,7 +964,9 @@ class Permutation:
        for expensive operations such as image decoding.
        """
        assert transform is not None, "transform is required"
-        return Permutation(self.reader, self.selection, self.batch_size, transform)
+        new = copy.copy(self)
+        new.transform_fn = transform
+        return new

    def __getitem__(self, index: int) -> Any:
        """
@@ -797,12 +1001,10 @@ class Permutation:
        """
        Skip the first `skip` rows of the permutation
        """
-
-        async def do_with_skip():
-            reader = await self.reader.with_offset(skip)
-            return self._with_reader(reader)
-
-        return LOOP.run(do_with_skip())
+        new = copy.copy(self)
+        new.offset = skip
+        new.reader = LOOP.run(new._build_reader())
+        return new

    @deprecated(details="Use with_take instead")
    def take(self, limit: int) -> "Permutation":
@@ -820,12 +1022,10 @@ class Permutation:
        """
        Limit the permutation to `limit` rows (following any `skip`)
        """
-
-        async def do_with_take():
-            reader = await self.reader.with_limit(limit)
-            return self._with_reader(reader)
-
-        return LOOP.run(do_with_take())
+        new = copy.copy(self)
+        new.limit = limit
+        new.reader = LOOP.run(new._build_reader())
+        return new

    @deprecated(details="Use with_repeat instead")
    def repeat(self, times: int) -> "Permutation":
--- a/python/python/lancedb/pydantic.py
+++ b/python/python/lancedb/pydantic.py
@@ -10,6 +10,7 @@ import sys
 import types
 from abc import ABC, abstractmethod
 from datetime import date, datetime
+from enum import Enum
 from typing import (
    TYPE_CHECKING,
    Any,
@@ -314,6 +315,19 @@ def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
                return pa.list_(pa.list_(tp.value_arrow_type(), tp.dim()))
            # For regular Vector
            return pa.list_(tp.value_arrow_type(), tp.dim())
+        if _safe_issubclass(tp, Enum):
+            # Map Enum to the Arrow type of its value.
+            # For string-valued enums, use dictionary encoding for efficiency.
+            # For integer enums, use the native type.
+            # Fall back to utf8 for mixed-type or empty enums.
+            value_types = {type(m.value) for m in tp}
+            if len(value_types) == 1:
+                value_type = value_types.pop()
+                if value_type is str:
+                    # Use dictionary encoding for string enums
+                    return pa.dictionary(pa.int32(), pa.utf8())
+                return _py_type_to_arrow_type(value_type, field)
+            return pa.utf8()
    return _py_type_to_arrow_type(tp, field)


--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -25,7 +25,6 @@ import deprecation
 import numpy as np
 import pyarrow as pa
 import pyarrow.compute as pc
-import pyarrow.fs as pa_fs
 import pydantic

 from lancedb.pydantic import PYDANTIC_VERSION
@@ -1526,9 +1525,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
        return self._table._output_schema(self.to_query_object())

    def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
-        path, fs, exist = self._table._get_fts_index_path()
-        if exist:
-            return self.tantivy_to_arrow()
+        self._table._ensure_no_legacy_fts_index()

        query = self._query
        if self._phrase_query:
@@ -1552,90 +1549,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
    ):
        raise NotImplementedError("to_batches on an FTS query")

-    def tantivy_to_arrow(self) -> pa.Table:
-        try:
-            import tantivy
-        except ImportError:
-            raise ImportError(
-                "Please install tantivy-py `pip install tantivy` to use the full text search feature."  # noqa: E501
-            )
-
-        from .fts import search_index
-
-        # get the index path
-        path, fs, exist = self._table._get_fts_index_path()
-
-        # check if the index exist
-        if not exist:
-            raise FileNotFoundError(
-                "Fts index does not exist. "
-                "Please first call table.create_fts_index(['<field_names>']) to "
-                "create the fts index."
-            )
-
-        # Check that we are on local filesystem
-        if not isinstance(fs, pa_fs.LocalFileSystem):
-            raise NotImplementedError(
-                "Tantivy-based full text search "
-                "is only supported on the local filesystem"
-            )
-        # open the index
-        index = tantivy.Index.open(path)
-        # get the scores and doc ids
-        query = self._query
-        if self._phrase_query:
-            query = query.replace('"', "'")
-            query = f'"{query}"'
-        limit = self._limit if self._limit is not None else 10
-        row_ids, scores = search_index(
-            index, query, limit, ordering_field=self.ordering_field_name
-        )
-        if len(row_ids) == 0:
-            empty_schema = pa.schema([pa.field("_score", pa.float32())])
-            return pa.Table.from_batches([], schema=empty_schema)
-        scores = pa.array(scores)
-        output_tbl = self._table.to_lance().take(row_ids, columns=self._columns)
-        output_tbl = output_tbl.append_column("_score", scores)
-        # this needs to match vector search results which are uint64
-        row_ids = pa.array(row_ids, type=pa.uint64())
-
-        if self._where is not None:
-            tmp_name = "__lancedb__duckdb__indexer__"
-            output_tbl = output_tbl.append_column(
-                tmp_name, pa.array(range(len(output_tbl)))
-            )
-            try:
-                # TODO would be great to have Substrait generate pyarrow compute
-                # expressions or conversely have pyarrow support SQL expressions
-                # using Substrait
-                import duckdb
-
-                indexer = duckdb.sql(
-                    f"SELECT {tmp_name} FROM output_tbl WHERE {self._where}"
-                ).to_arrow_table()[tmp_name]
-                output_tbl = output_tbl.take(indexer).drop([tmp_name])
-                row_ids = row_ids.take(indexer)
-
-            except ImportError:
-                import tempfile
-
-                import lance
-
-                # TODO Use "memory://" instead once that's supported
-                with tempfile.TemporaryDirectory() as tmp:
-                    ds = lance.write_dataset(output_tbl, tmp)
-                    output_tbl = ds.to_table(filter=self._where)
-                    indexer = output_tbl[tmp_name]
-                    row_ids = row_ids.take(indexer)
-                    output_tbl = output_tbl.drop([tmp_name])
-
-        if self._with_row_id:
-            output_tbl = output_tbl.append_column("_rowid", row_ids)
-
-        if self._reranker is not None:
-            output_tbl = self._reranker.rerank_fts(self._query, output_tbl)
-        return output_tbl
-
    def rerank(self, reranker: Reranker) -> LanceFtsQueryBuilder:
        """Rerank the results using the specified reranker.

@@ -1730,7 +1643,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
    def _validate_query(self, query, vector=None, text=None):
        if query is not None and (vector is not None or text is not None):
            raise ValueError(
-                "You can either provide a string query in search() method"
+                "You can either provide a string query in search() method "
                "or set `vector()` and `text()` explicitly for hybrid search."
                "But not both."
            )
--- a/python/python/lancedb/remote/init.py
+++ b/python/python/lancedb/remote/init.py
@@ -145,6 +145,33 @@ class TlsConfig:

@dataclass
 class ClientConfig:
+    """Configuration for the LanceDB Cloud HTTP client.
+
+    Attributes
+    ----------
+    user_agent: str
+        User agent string sent with requests.
+    retry_config: RetryConfig
+        Configuration for retrying failed requests.
+    timeout_config: Optional[TimeoutConfig]
+        Configuration for request timeouts.
+    extra_headers: Optional[dict]
+        Additional headers to include in requests.
+    id_delimiter: Optional[str]
+        The delimiter to use when constructing object identifiers.
+    tls_config: Optional[TlsConfig]
+        TLS/mTLS configuration for secure connections.
+    header_provider: Optional[HeaderProvider]
+        Provider for dynamic headers to be added to each request.
+    user_id: Optional[str]
+        User identifier for tracking purposes. This is sent as the
+        `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
+
+        This can also be set via the `LANCEDB_USER_ID` environment variable.
+        Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another
+        environment variable that contains the user ID value.
+    """
+
    user_agent: str = f"LanceDB-Python-Client/{__version__}"
    retry_config: RetryConfig = field(default_factory=RetryConfig)
    timeout_config: Optional[TimeoutConfig] = field(default_factory=TimeoutConfig)
@@ -152,6 +179,7 @@ class ClientConfig:
    id_delimiter: Optional[str] = None
    tls_config: Optional[TlsConfig] = None
    header_provider: Optional["HeaderProvider"] = None
+    user_id: Optional[str] = None

    def __post_init__(self):
        if isinstance(self.retry_config, dict):
--- a/python/python/lancedb/remote/db.py
+++ b/python/python/lancedb/remote/db.py
@@ -24,6 +24,7 @@ from ..common import DATA
 from ..db import DBConnection, LOOP
 from ..embeddings import EmbeddingFunctionConfig
 from lance_namespace import (
+    LanceNamespace,
    CreateNamespaceResponse,
    DescribeNamespaceResponse,
    DropNamespaceResponse,
@@ -111,7 +112,7 @@ class RemoteDBConnection(DBConnection):
    @override
    def list_namespaces(
        self,
-        namespace: Optional[List[str]] = None,
+        namespace_path: Optional[List[str]] = None,
        page_token: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> ListNamespacesResponse:
@@ -119,7 +120,7 @@ class RemoteDBConnection(DBConnection):

        Parameters
        ----------
-        namespace: List[str], optional
+        namespace_path: List[str], optional
            The parent namespace to list namespaces in.
            None or empty list represents root namespace.
        page_token: str, optional
@@ -133,18 +134,18 @@ class RemoteDBConnection(DBConnection):
        ListNamespacesResponse
            Response containing namespace names and optional page_token for pagination.
        """
-        if namespace is None:
-            namespace = []
+        if namespace_path is None:
+            namespace_path = []
        return LOOP.run(
            self._conn.list_namespaces(
-                namespace=namespace, page_token=page_token, limit=limit
+                namespace_path=namespace_path, page_token=page_token, limit=limit
            )
        )

    @override
    def create_namespace(
        self,
-        namespace: List[str],
+        namespace_path: List[str],
        mode: Optional[str] = None,
        properties: Optional[Dict[str, str]] = None,
    ) -> CreateNamespaceResponse:
@@ -152,7 +153,7 @@ class RemoteDBConnection(DBConnection):

        Parameters
        ----------
-        namespace: List[str]
+        namespace_path: List[str]
            The namespace identifier to create.
        mode: str, optional
            Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
@@ -167,14 +168,14 @@ class RemoteDBConnection(DBConnection):
        """
        return LOOP.run(
            self._conn.create_namespace(
-                namespace=namespace, mode=mode, properties=properties
+                namespace_path=namespace_path, mode=mode, properties=properties
            )
        )

    @override
    def drop_namespace(
        self,
-        namespace: List[str],
+        namespace_path: List[str],
        mode: Optional[str] = None,
        behavior: Optional[str] = None,
    ) -> DropNamespaceResponse:
@@ -182,7 +183,7 @@ class RemoteDBConnection(DBConnection):

        Parameters
        ----------
-        namespace: List[str]
+        namespace_path: List[str]
            The namespace identifier to drop.
        mode: str, optional
            Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
@@ -196,16 +197,20 @@ class RemoteDBConnection(DBConnection):
            Response containing properties and transaction_id if applicable.
        """
        return LOOP.run(
-            self._conn.drop_namespace(namespace=namespace, mode=mode, behavior=behavior)
+            self._conn.drop_namespace(
+                namespace_path=namespace_path, mode=mode, behavior=behavior
+            )
        )

    @override
-    def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
+    def describe_namespace(
+        self, namespace_path: List[str]
+    ) -> DescribeNamespaceResponse:
        """Describe a namespace.

        Parameters
        ----------
-        namespace: List[str]
+        namespace_path: List[str]
            The namespace identifier to describe.

        Returns
@@ -213,12 +218,12 @@ class RemoteDBConnection(DBConnection):
        DescribeNamespaceResponse
            Response containing the namespace properties.
        """
-        return LOOP.run(self._conn.describe_namespace(namespace=namespace))
+        return LOOP.run(self._conn.describe_namespace(namespace_path=namespace_path))

    @override
    def list_tables(
        self,
-        namespace: Optional[List[str]] = None,
+        namespace_path: Optional[List[str]] = None,
        page_token: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> ListTablesResponse:
@@ -226,7 +231,7 @@ class RemoteDBConnection(DBConnection):

        Parameters
        ----------
-        namespace: List[str], optional
+        namespace_path: List[str], optional
            The namespace to list tables in.
            None or empty list represents root namespace.
        page_token: str, optional
@@ -240,11 +245,11 @@ class RemoteDBConnection(DBConnection):
        ListTablesResponse
            Response containing table names and optional page_token for pagination.
        """
-        if namespace is None:
-            namespace = []
+        if namespace_path is None:
+            namespace_path = []
        return LOOP.run(
            self._conn.list_tables(
-                namespace=namespace, page_token=page_token, limit=limit
+                namespace_path=namespace_path, page_token=page_token, limit=limit
            )
        )

@@ -254,7 +259,7 @@ class RemoteDBConnection(DBConnection):
        page_token: Optional[str] = None,
        limit: int = 10,
        *,
-        namespace: Optional[List[str]] = None,
+        namespace_path: Optional[List[str]] = None,
    ) -> Iterable[str]:
        """List the names of all tables in the database.

@@ -263,7 +268,7 @@ class RemoteDBConnection(DBConnection):

        Parameters
        ----------
-        namespace: List[str], default []
+        namespace_path: List[str], default []
            The namespace to list tables in.
            Empty list represents root namespace.
        page_token: str
@@ -282,11 +287,11 @@ class RemoteDBConnection(DBConnection):
            DeprecationWarning,
            stacklevel=2,
        )
-        if namespace is None:
-            namespace = []
+        if namespace_path is None:
+            namespace_path = []
        return LOOP.run(
            self._conn.table_names(
-                namespace=namespace, start_after=page_token, limit=limit
+                namespace_path=namespace_path, start_after=page_token, limit=limit
            )
        )

@@ -295,7 +300,7 @@ class RemoteDBConnection(DBConnection):
        self,
        name: str,
        *,
-        namespace: Optional[List[str]] = None,
+        namespace_path: Optional[List[str]] = None,
        storage_options: Optional[Dict[str, str]] = None,
        index_cache_size: Optional[int] = None,
    ) -> Table:
@@ -305,7 +310,7 @@ class RemoteDBConnection(DBConnection):
        ----------
        name: str
            The name of the table.
-        namespace: List[str], optional
+        namespace_path: List[str], optional
            The namespace to open the table from.
            None or empty list represents root namespace.

@@ -315,15 +320,15 @@ class RemoteDBConnection(DBConnection):
        """
        from .table import RemoteTable

-        if namespace is None:
-            namespace = []
+        if namespace_path is None:
+            namespace_path = []
        if index_cache_size is not None:
            logging.info(
                "index_cache_size is ignored in LanceDb Cloud"
                " (there is no local cache to configure)"
            )

-        table = LOOP.run(self._conn.open_table(name, namespace=namespace))
+        table = LOOP.run(self._conn.open_table(name, namespace_path=namespace_path))
        return RemoteTable(table, self.db_name)

    def clone_table(
@@ -331,7 +336,7 @@ class RemoteDBConnection(DBConnection):
        target_table_name: str,
        source_uri: str,
        *,
-        target_namespace: Optional[List[str]] = None,
+        target_namespace_path: Optional[List[str]] = None,
        source_version: Optional[int] = None,
        source_tag: Optional[str] = None,
        is_shallow: bool = True,
@@ -344,7 +349,7 @@ class RemoteDBConnection(DBConnection):
            The name of the target table to create.
        source_uri: str
            The URI of the source table to clone from.
-        target_namespace: List[str], optional
+        target_namespace_path: List[str], optional
            The namespace for the target table.
            None or empty list represents root namespace.
        source_version: int, optional
@@ -361,13 +366,13 @@ class RemoteDBConnection(DBConnection):
        """
        from .table import RemoteTable

-        if target_namespace is None:
-            target_namespace = []
+        if target_namespace_path is None:
+            target_namespace_path = []
        table = LOOP.run(
            self._conn.clone_table(
                target_table_name,
                source_uri,
-                target_namespace=target_namespace,
+                target_namespace_path=target_namespace_path,
                source_version=source_version,
                source_tag=source_tag,
                is_shallow=is_shallow,
@@ -387,7 +392,7 @@ class RemoteDBConnection(DBConnection):
        exist_ok: bool = False,
        embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
        *,
-        namespace: Optional[List[str]] = None,
+        namespace_path: Optional[List[str]] = None,
    ) -> Table:
        """Create a [Table][lancedb.table.Table] in the database.

@@ -395,7 +400,7 @@ class RemoteDBConnection(DBConnection):
        ----------
        name: str
            The name of the table.
-        namespace: List[str], optional
+        namespace_path: List[str], optional
            The namespace to create the table in.
            None or empty list represents root namespace.
        data: The data to initialize the table, *optional*
@@ -495,8 +500,8 @@ class RemoteDBConnection(DBConnection):
                mode = "exist_ok"
            elif not mode:
                mode = "exist_ok"
-        if namespace is None:
-            namespace = []
+        if namespace_path is None:
+            namespace_path = []
        validate_table_name(name)
        if embedding_functions is not None:
            logging.warning(
@@ -511,7 +516,7 @@ class RemoteDBConnection(DBConnection):
            self._conn.create_table(
                name,
                data,
-                namespace=namespace,
+                namespace_path=namespace_path,
                mode=mode,
                schema=schema,
                on_bad_vectors=on_bad_vectors,
@@ -521,28 +526,28 @@ class RemoteDBConnection(DBConnection):
        return RemoteTable(table, self.db_name)

    @override
-    def drop_table(self, name: str, namespace: Optional[List[str]] = None):
+    def drop_table(self, name: str, namespace_path: Optional[List[str]] = None):
        """Drop a table from the database.

        Parameters
        ----------
        name: str
            The name of the table.
-        namespace: List[str], optional
+        namespace_path: List[str], optional
            The namespace to drop the table from.
            None or empty list represents root namespace.
        """
-        if namespace is None:
-            namespace = []
-        LOOP.run(self._conn.drop_table(name, namespace=namespace))
+        if namespace_path is None:
+            namespace_path = []
+        LOOP.run(self._conn.drop_table(name, namespace_path=namespace_path))

    @override
    def rename_table(
        self,
        cur_name: str,
        new_name: str,
-        cur_namespace: Optional[List[str]] = None,
-        new_namespace: Optional[List[str]] = None,
+        cur_namespace_path: Optional[List[str]] = None,
+        new_namespace_path: Optional[List[str]] = None,
    ):
        """Rename a table in the database.

@@ -553,19 +558,32 @@ class RemoteDBConnection(DBConnection):
        new_name: str
            The new name of the table.
        """
-        if cur_namespace is None:
-            cur_namespace = []
-        if new_namespace is None:
-            new_namespace = []
+        if cur_namespace_path is None:
+            cur_namespace_path = []
+        if new_namespace_path is None:
+            new_namespace_path = []
        LOOP.run(
            self._conn.rename_table(
                cur_name,
                new_name,
-                cur_namespace=cur_namespace,
-                new_namespace=new_namespace,
+                cur_namespace_path=cur_namespace_path,
+                new_namespace_path=new_namespace_path,
            )
        )

+    @override
+    def namespace_client(self) -> LanceNamespace:
+        """Get the equivalent namespace client for this connection.
+
+        Returns a RestNamespace with the same URI and authentication headers.
+
+        Returns
+        -------
+        LanceNamespace
+            The namespace client for this connection.
+        """
+        return LOOP.run(self._conn.namespace_client())
+
    async def close(self):
        """Close the connection to the database."""
        self._conn.close()
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -22,6 +22,7 @@ from lancedb.index import (
    FTS,
    BTree,
    Bitmap,
+    HnswFlat,
    HnswSq,
    IvfFlat,
    IvfPq,
@@ -39,6 +40,7 @@ from lancedb.table import _normalize_progress

 from ..query import LanceVectorQueryBuilder, LanceQueryBuilder, LanceTakeQueryBuilder
 from ..table import AsyncTable, IndexStatistics, Query, Table, Tags
+from ..types import BaseTokenizerType


 class RemoteTable(Table):
@@ -167,7 +169,7 @@ class RemoteTable(Table):
        wait_timeout: Optional[timedelta] = None,
        with_position: bool = False,
        # tokenizer configs:
-        base_tokenizer: str = "simple",
+        base_tokenizer: BaseTokenizerType = "simple",
        language: str = "English",
        max_token_length: Optional[int] = 40,
        lower_case: bool = True,
@@ -284,13 +286,15 @@ class RemoteTable(Table):
            )
        elif index_type == "IVF_HNSW_SQ":
            config = HnswSq(distance_type=metric, num_partitions=num_partitions)
+        elif index_type == "IVF_HNSW_FLAT":
+            config = HnswFlat(distance_type=metric, num_partitions=num_partitions)
        elif index_type == "IVF_FLAT":
            config = IvfFlat(distance_type=metric, num_partitions=num_partitions)
        else:
            raise ValueError(
                f"Unknown vector index type: {index_type}. Valid options are"
                " 'IVF_FLAT', 'IVF_PQ', 'IVF_RQ', 'IVF_SQ',"
-                " 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
+                " 'IVF_HNSW_PQ', 'IVF_HNSW_SQ', 'IVF_HNSW_FLAT'"
            )

        LOOP.run(
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -57,6 +57,7 @@ from .index import (
    LabelList,
    HnswPq,
    HnswSq,
+    HnswFlat,
    FTS,
 )
 from .merge import LanceMergeInsertBuilder
@@ -86,10 +87,62 @@ from .util import (
 )
 from .index import lang_mapping

+_MODEL_BACKED_TOKENIZER_PREFIXES = ("jieba", "lindera")
+_MODEL_BACKED_TOKENIZER_ERRORS = (
+    "unknown base tokenizer",
+    "Invalid directory path:",
+    "Failed to load Jieba",
+    "Failed to load tokenizer config",
+    "Failed to initialize default tokenizer",
+)
+
+
+def _add_unique_note(exception: BaseException, note: str) -> None:
+    existing_notes = getattr(exception, "__notes__", ()) or ()
+    message = (
+        exception.args[0]
+        if exception.args and isinstance(exception.args[0], str)
+        else ""
+    )
+    if note not in existing_notes and note not in message:
+        add_note(exception, note)
+
+
+def _is_model_backed_tokenizer(base_tokenizer: str) -> bool:
+    return any(
+        base_tokenizer == prefix or base_tokenizer.startswith(f"{prefix}/")
+        for prefix in _MODEL_BACKED_TOKENIZER_PREFIXES
+    )
+
+
+def _maybe_add_fts_error_note(
+    exception: BaseException, *, base_tokenizer: str, language: Optional[str] = None
+) -> None:
+    message = str(exception)
+    if language is not None and "not support the requested language" in message:
+        supported_langs = ", ".join(lang_mapping.values())
+        _add_unique_note(exception, f"Supported languages: {supported_langs}")
+        return
+
+    if not _is_model_backed_tokenizer(base_tokenizer):
+        return
+
+    if not any(marker in message for marker in _MODEL_BACKED_TOKENIZER_ERRORS):
+        return
+
+    _add_unique_note(
+        exception,
+        "Model-backed tokenizers such as 'jieba/default' and 'lindera/ipadic' "
+        "require tokenizer models in Lance's language model home. Set "
+        "LANCE_LANGUAGE_MODEL_HOME to override the default platform data "
+        "directory under 'lance/language_models'. Expected layouts include "
+        "'<model-home>/jieba/default/...' and "
+        "'<model-home>/lindera/ipadic/...'.",
+    )
+

 if TYPE_CHECKING:
    from .db import LanceDBConnection
-    from .io import StorageOptionsProvider
    from ._lancedb import (
        Table as LanceDBTable,
        OptimizeStats,
@@ -192,7 +245,7 @@ def _into_pyarrow_reader(
            f"Unknown data type {type(data)}. "
            "Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
            "pyarrow Table/RecordBatch, or Pydantic models. "
-            "See https://lancedb.com/docs/tables/ for examples."
+            "See https://docs.lancedb.com/tables/ for examples."
        )


@@ -271,15 +324,17 @@ def _sanitize_data(
        reader,
        on_bad_vectors=on_bad_vectors,
        fill_value=fill_value,
+        target_schema=target_schema,
+        metadata=metadata,
    )

    if target_schema is None:
        target_schema, reader = _infer_target_schema(reader)

    if metadata:
-        new_metadata = target_schema.metadata or {}
-        new_metadata.update(metadata)
-        target_schema = target_schema.with_metadata(new_metadata)
+        target_schema = target_schema.with_metadata(
+            _merge_metadata(target_schema.metadata, metadata)
+        )

    _validate_schema(target_schema)
    reader = _cast_to_target_schema(reader, target_schema, allow_subschema)
@@ -295,7 +350,7 @@ def _cast_to_target_schema(
    # pa.Table.cast expects field order not to be changed.
    # Lance doesn't care about field order, so we don't need to rearrange fields
    # to match the target schema. We just need to correctly cast the fields.
-    if reader.schema == target_schema:
+    if reader.schema.equals(target_schema, check_metadata=True):
        # Fast path when the schemas are already the same
        return reader

@@ -315,7 +370,13 @@ def _cast_to_target_schema(
    def gen():
        for batch in reader:
            # Table but not RecordBatch has cast.
-            yield pa.Table.from_batches([batch]).cast(reordered_schema).to_batches()[0]
+            cast_batches = (
+                pa.Table.from_batches([batch]).cast(reordered_schema).to_batches()
+            )
+            if cast_batches:
+                yield pa.RecordBatch.from_arrays(
+                    cast_batches[0].columns, schema=reordered_schema
+                )

    return pa.RecordBatchReader.from_batches(reordered_schema, gen())

@@ -333,37 +394,51 @@ def _align_field_types(
        if target_field is None:
            raise ValueError(f"Field '{field.name}' not found in target schema")
        if pa.types.is_struct(target_field.type):
-            new_type = pa.struct(
-                _align_field_types(
-                    field.type.fields,
-                    target_field.type.fields,
+            if pa.types.is_struct(field.type):
+                new_type = pa.struct(
+                    _align_field_types(
+                        field.type.fields,
+                        target_field.type.fields,
+                    )
                )
-            )
+            else:
+                new_type = target_field.type
        elif pa.types.is_list(target_field.type):
-            new_type = pa.list_(
-                _align_field_types(
-                    [field.type.value_field],
-                    [target_field.type.value_field],
-                )[0]
-            )
+            if _is_list_like(field.type):
+                new_type = pa.list_(
+                    _align_field_types(
+                        [field.type.value_field],
+                        [target_field.type.value_field],
+                    )[0]
+                )
+            else:
+                new_type = target_field.type
        elif pa.types.is_large_list(target_field.type):
-            new_type = pa.large_list(
-                _align_field_types(
-                    [field.type.value_field],
-                    [target_field.type.value_field],
-                )[0]
-            )
+            if _is_list_like(field.type):
+                new_type = pa.large_list(
+                    _align_field_types(
+                        [field.type.value_field],
+                        [target_field.type.value_field],
+                    )[0]
+                )
+            else:
+                new_type = target_field.type
        elif pa.types.is_fixed_size_list(target_field.type):
-            new_type = pa.list_(
-                _align_field_types(
-                    [field.type.value_field],
-                    [target_field.type.value_field],
-                )[0],
-                target_field.type.list_size,
-            )
+            if _is_list_like(field.type):
+                new_type = pa.list_(
+                    _align_field_types(
+                        [field.type.value_field],
+                        [target_field.type.value_field],
+                    )[0],
+                    target_field.type.list_size,
+                )
+            else:
+                new_type = target_field.type
        else:
            new_type = target_field.type
-        new_fields.append(pa.field(field.name, new_type, field.nullable))
+        new_fields.append(
+            pa.field(field.name, new_type, field.nullable, target_field.metadata)
+        )
    return new_fields


@@ -441,6 +516,7 @@ def sanitize_create_table(
            schema = data.schema

    if metadata:
+        metadata = _merge_metadata(schema.metadata, metadata)
        schema = schema.with_metadata(metadata)
        # Need to apply metadata to the data as well
        if isinstance(data, pa.Table):
@@ -493,9 +569,9 @@ def _append_vector_columns(
    vector columns to the table.
    """
    if schema is None:
-        metadata = metadata or {}
+        metadata = _merge_metadata(metadata)
    else:
-        metadata = schema.metadata or metadata or {}
+        metadata = _merge_metadata(schema.metadata, metadata)
    functions = EmbeddingFunctionRegistry.get_instance().parse_functions(metadata)

    if not functions:
@@ -921,29 +997,29 @@ class Table(ABC):
        Parameters
        ----------
        field_names: str or list of str
-            The name(s) of the field to index.
-            If ``use_tantivy`` is False (default), only a single field name
-            (str) is supported. To index multiple fields, create a separate
-            FTS index for each field.
+            The name of the field to index. Native FTS indexes can only be
+            created on a single field at a time. To search over multiple text
+            fields, create a separate FTS index for each field.
        replace: bool, default False
            If True, replace the existing index if it exists. Note that this is
            not yet an atomic operation; the index will be temporarily
            unavailable while the new index is being created.
        writer_heap_size: int, default 1GB
-            Only available with use_tantivy=True
+            Deprecated legacy Tantivy parameter. Any value other than the
+            default raises an error.
        ordering_field_names:
-            A list of unsigned type fields to index to optionally order
-            results on at search time.
-            only available with use_tantivy=True
+            Deprecated legacy Tantivy parameter. Setting this raises an error.
        tokenizer_name: str, default "default"
-            The tokenizer to use for the index. Can be "raw", "default" or the 2 letter
-            language code followed by "_stem". So for english it would be "en_stem".
-            For available languages see: https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html
+            A compatibility alias for native tokenizer configs. Can be "raw",
+            "default" or the 2 letter language code followed by "_stem". So
+            for english it would be "en_stem". For new native FTS indexes, use
+            ``base_tokenizer`` directly; ``tokenizer_name`` is a legacy
+            compatibility alias and does not expose model-backed tokenizer names
+            such as ``jieba/default`` or ``lindera/ipadic``.
        use_tantivy: bool, default False
-            If True, use the legacy full-text search implementation based on tantivy.
-            If False, use the new full-text search implementation based on lance-index.
+            Deprecated legacy Tantivy parameter. Setting this to True raises an
+            error.
        with_position: bool, default False
-            Only available with use_tantivy=False
            If False, do not store the positions of the terms in the text.
            This can reduce the size of the index and improve indexing speed.
            But it will raise an exception for phrase queries.
@@ -953,8 +1029,11 @@ class Table(ABC):
            - "whitespace": Split text by whitespace, but not punctuation.
            - "raw": No tokenization. The entire text is treated as a single token.
            - "ngram": N-Gram tokenizer.
+            - "jieba/*": Jieba tokenizer loaded from Lance's language model home.
+            - "lindera/*": Lindera tokenizer loaded from Lance's language model home.
        language : str, default "English"
-            The language to use for tokenization.
+            The language to use for stemming and stop-word removal. This is not
+            the primary way to enable CJK tokenization.
        max_token_length : int, default 40
            The maximum token length to index. Tokens longer than this length will be
            ignored.
@@ -980,6 +1059,13 @@ class Table(ABC):
            The timeout to wait if indexing is asynchronous.
        name: str, optional
            The name of the index. If not provided, a default name will be generated.
+
+        Notes
+        -----
+        Model-backed tokenizers such as ``jieba/default`` and ``lindera/ipadic``
+        require tokenizer models in Lance's language model home. Set
+        ``LANCE_LANGUAGE_MODEL_HOME`` to override the default platform data
+        directory under ``lance/language_models``.
        """
        raise NotImplementedError

@@ -1724,6 +1810,16 @@ class Table(ABC):
        index_exists = fs.get_file_info(path).type != pa_fs.FileType.NotFound
        return (path, fs, index_exists)

+    def _ensure_no_legacy_fts_index(self):
+        path, _, exists = self._get_fts_index_path()
+        if exists:
+            raise ValueError(
+                "Legacy Tantivy FTS index detected at "
+                f"{path}. Tantivy-based FTS has been removed. "
+                "Delete the legacy index and recreate it with "
+                "table.create_fts_index(...)."
+            )
+
    @abstractmethod
    def uses_v2_manifest_paths(self) -> bool:
        """
@@ -1776,30 +1872,30 @@ class LanceTable(Table):
        connection: "LanceDBConnection",
        name: str,
        *,
-        namespace: Optional[List[str]] = None,
+        namespace_path: Optional[List[str]] = None,
        storage_options: Optional[Dict[str, str]] = None,
-        storage_options_provider: Optional["StorageOptionsProvider"] = None,
        index_cache_size: Optional[int] = None,
        location: Optional[str] = None,
        namespace_client: Optional[Any] = None,
        managed_versioning: Optional[bool] = None,
+        pushdown_operations: Optional[set] = None,
        _async: AsyncTable = None,
    ):
-        if namespace is None:
-            namespace = []
+        if namespace_path is None:
+            namespace_path = []
        self._conn = connection
-        self._namespace = namespace
+        self._namespace_path = namespace_path
        self._location = location  # Store location for use in _dataset_path
        self._namespace_client = namespace_client
+        self._pushdown_operations = pushdown_operations or set()
        if _async is not None:
            self._table = _async
        else:
            self._table = LOOP.run(
                connection._conn.open_table(
                    name,
-                    namespace=namespace,
+                    namespace_path=namespace_path,
                    storage_options=storage_options,
-                    storage_options_provider=storage_options_provider,
                    index_cache_size=index_cache_size,
                    location=location,
                    namespace_client=namespace_client,
@@ -1814,13 +1910,13 @@ class LanceTable(Table):
    @property
    def namespace(self) -> List[str]:
        """Return the namespace path of the table."""
-        return self._namespace
+        return self._namespace_path

    @property
    def id(self) -> str:
        """Return the full identifier of the table (namespace$name)."""
-        if self._namespace:
-            return "$".join(self._namespace + [self.name])
+        if self._namespace_path:
+            return "$".join(self._namespace_path + [self.name])
        return self.name

    @classmethod
@@ -1841,26 +1937,26 @@ class LanceTable(Table):
        db,
        name,
        *,
-        namespace: Optional[List[str]] = None,
+        namespace_path: Optional[List[str]] = None,
        storage_options: Optional[Dict[str, str]] = None,
-        storage_options_provider: Optional["StorageOptionsProvider"] = None,
        index_cache_size: Optional[int] = None,
        location: Optional[str] = None,
        namespace_client: Optional[Any] = None,
        managed_versioning: Optional[bool] = None,
+        pushdown_operations: Optional[set] = None,
    ):
-        if namespace is None:
-            namespace = []
+        if namespace_path is None:
+            namespace_path = []
        tbl = cls(
            db,
            name,
-            namespace=namespace,
+            namespace_path=namespace_path,
            storage_options=storage_options,
-            storage_options_provider=storage_options_provider,
            index_cache_size=index_cache_size,
            location=location,
            namespace_client=namespace_client,
            managed_versioning=managed_versioning,
+            pushdown_operations=pushdown_operations,
        )

        # check the dataset exists
@@ -1893,11 +1989,11 @@ class LanceTable(Table):
            )

        if self._namespace_client is not None:
-            table_id = self._namespace + [self.name]
+            table_id = self._namespace_path + [self.name]
            return lance.dataset(
                version=self.version,
                storage_options=self._conn.storage_options,
-                namespace=self._namespace_client,
+                namespace_client=self._namespace_client,
                table_id=table_id,
                **kwargs,
            )
@@ -2141,7 +2237,13 @@ class LanceTable(Table):
        index_cache_size: Optional[int] = None,
        num_bits: int = 8,
        index_type: Literal[
-            "IVF_FLAT", "IVF_SQ", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
+            "IVF_FLAT",
+            "IVF_SQ",
+            "IVF_PQ",
+            "IVF_RQ",
+            "IVF_HNSW_SQ",
+            "IVF_HNSW_PQ",
+            "IVF_HNSW_FLAT",
        ] = "IVF_PQ",
        max_iterations: int = 50,
        sample_rate: int = 256,
@@ -2228,6 +2330,16 @@ class LanceTable(Table):
                ef_construction=ef_construction,
                target_partition_size=target_partition_size,
            )
+        elif index_type == "IVF_HNSW_FLAT":
+            config = HnswFlat(
+                distance_type=metric,
+                num_partitions=num_partitions,
+                max_iterations=max_iterations,
+                sample_rate=sample_rate,
+                m=m,
+                ef_construction=ef_construction,
+                target_partition_size=target_partition_size,
+            )
        else:
            raise ValueError(f"Unknown index type {index_type}")

@@ -2383,41 +2495,57 @@ class LanceTable(Table):
        prefix_only: bool = False,
        name: Optional[str] = None,
    ):
-        if not use_tantivy:
-            if not isinstance(field_names, str):
-                raise ValueError(
-                    "Native FTS indexes can only be created on a single field "
-                    "at a time. To search over multiple text fields, create a "
-                    "separate FTS index for each field."
-                )
+        self._ensure_no_legacy_fts_index()

-            if tokenizer_name is None:
-                tokenizer_configs = {
-                    "base_tokenizer": base_tokenizer,
-                    "language": language,
-                    "with_position": with_position,
-                    "max_token_length": max_token_length,
-                    "lower_case": lower_case,
-                    "stem": stem,
-                    "remove_stop_words": remove_stop_words,
-                    "ascii_folding": ascii_folding,
-                    "ngram_min_length": ngram_min_length,
-                    "ngram_max_length": ngram_max_length,
-                    "prefix_only": prefix_only,
-                }
-            else:
-                tokenizer_configs = self.infer_tokenizer_configs(tokenizer_name)
-
-            config = FTS(
-                **tokenizer_configs,
+        if use_tantivy:
+            raise ValueError(
+                "Tantivy-based FTS has been removed. "
+                "Remove use_tantivy and recreate the index with native FTS."
+            )
+        if ordering_field_names is not None:
+            raise ValueError(
+                "ordering_field_names was only supported by the removed "
+                "Tantivy-based FTS implementation."
+            )
+        if writer_heap_size != 1024 * 1024 * 1024:
+            raise ValueError(
+                "writer_heap_size was only supported by the removed "
+                "Tantivy-based FTS implementation."
+            )
+        if not isinstance(field_names, str):
+            raise ValueError(
+                "Native FTS indexes can only be created on a single field "
+                "at a time. To search over multiple text fields, create a "
+                "separate FTS index for each field."
+            )
+        if "." in field_names:
+            raise ValueError(
+                "Native FTS indexes can only be created on top-level fields. "
+                f"Received nested field path: {field_names!r}."
            )

-            # delete the existing legacy index if it exists
-            if replace:
-                path, fs, exist = self._get_fts_index_path()
-                if exist:
-                    fs.delete_dir(path)
+        if tokenizer_name is None:
+            tokenizer_configs = {
+                "base_tokenizer": base_tokenizer,
+                "language": language,
+                "with_position": with_position,
+                "max_token_length": max_token_length,
+                "lower_case": lower_case,
+                "stem": stem,
+                "remove_stop_words": remove_stop_words,
+                "ascii_folding": ascii_folding,
+                "ngram_min_length": ngram_min_length,
+                "ngram_max_length": ngram_max_length,
+                "prefix_only": prefix_only,
+            }
+        else:
+            tokenizer_configs = self.infer_tokenizer_configs(tokenizer_name)

+        config = FTS(
+            **tokenizer_configs,
+        )
+
+        try:
            LOOP.run(
                self._table.create_index(
                    field_names,
@@ -2426,42 +2554,13 @@ class LanceTable(Table):
                    name=name,
                )
            )
-            return
-
-        from .fts import create_index, populate_index
-
-        if isinstance(field_names, str):
-            field_names = [field_names]
-
-        if isinstance(ordering_field_names, str):
-            ordering_field_names = [ordering_field_names]
-
-        path, fs, exist = self._get_fts_index_path()
-        if exist:
-            if not replace:
-                raise ValueError("Index already exists. Use replace=True to overwrite.")
-            fs.delete_dir(path)
-
-        if not isinstance(fs, pa_fs.LocalFileSystem):
-            raise NotImplementedError(
-                "Full-text search is only supported on the local filesystem"
+        except (ValueError, RuntimeError) as e:
+            _maybe_add_fts_error_note(
+                e,
+                base_tokenizer=config.base_tokenizer,
+                language=config.language,
            )
-
-        if tokenizer_name is None:
-            tokenizer_name = "default"
-        index = create_index(
-            path,
-            field_names,
-            ordering_fields=ordering_field_names,
-            tokenizer_name=tokenizer_name,
-        )
-        populate_index(
-            index,
-            self,
-            field_names,
-            ordering_fields=ordering_field_names,
-            writer_heap_size=writer_heap_size,
-        )
+            raise e

    @staticmethod
    def infer_tokenizer_configs(tokenizer_name: str) -> dict:
@@ -2803,13 +2902,13 @@ class LanceTable(Table):
        fill_value: float = 0.0,
        embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
        *,
-        namespace: Optional[List[str]] = None,
+        namespace_path: Optional[List[str]] = None,
        storage_options: Optional[Dict[str, str | bool]] = None,
-        storage_options_provider: Optional["StorageOptionsProvider"] = None,
        data_storage_version: Optional[str] = None,
        enable_v2_manifest_paths: Optional[bool] = None,
        location: Optional[str] = None,
        namespace_client: Optional[Any] = None,
+        pushdown_operations: Optional[set] = None,
    ):
        """
        Create a new table.
@@ -2864,13 +2963,14 @@ class LanceTable(Table):
            Deprecated.  Set `storage_options` when connecting to the database and set
            `new_table_enable_v2_manifest_paths` in the options.
        """
-        if namespace is None:
-            namespace = []
+        if namespace_path is None:
+            namespace_path = []
        self = cls.__new__(cls)
        self._conn = db
-        self._namespace = namespace
+        self._namespace_path = namespace_path
        self._location = location
        self._namespace_client = namespace_client
+        self._pushdown_operations = pushdown_operations or set()

        if data_storage_version is not None:
            warnings.warn(
@@ -2903,10 +3003,10 @@ class LanceTable(Table):
                on_bad_vectors=on_bad_vectors,
                fill_value=fill_value,
                embedding_functions=embedding_functions,
-                namespace=namespace,
+                namespace_path=namespace_path,
                storage_options=storage_options,
-                storage_options_provider=storage_options_provider,
                location=location,
+                namespace_client=namespace_client,
            )
        )
        return self
@@ -2974,6 +3074,15 @@ class LanceTable(Table):
        batch_size: Optional[int] = None,
        timeout: Optional[timedelta] = None,
    ) -> pa.RecordBatchReader:
+        if (
+            "QueryTable" in self._pushdown_operations
+            and self._namespace_client is not None
+        ):
+            from lancedb.namespace import _execute_server_side_query
+
+            table_id = self._namespace_path + [self.name]
+            return _execute_server_side_query(self._namespace_client, table_id, query)
+
        async_iter = LOOP.run(
            self._table._execute_query(query, batch_size=batch_size, timeout=timeout)
        )
@@ -3203,43 +3312,157 @@ def _handle_bad_vectors(
    reader: pa.RecordBatchReader,
    on_bad_vectors: Literal["error", "drop", "fill", "null"] = "error",
    fill_value: float = 0.0,
+    target_schema: Optional[pa.Schema] = None,
+    metadata: Optional[dict] = None,
 ) -> pa.RecordBatchReader:
-    vector_columns = []
+    vector_columns = _find_vector_columns(reader.schema, target_schema, metadata)
+    if not vector_columns:
+        return reader

-    for field in reader.schema:
-        # They can provide a 'vector' column that isn't yet a FSL
-        named_vector_col = (
-            (
-                pa.types.is_list(field.type)
-                or pa.types.is_large_list(field.type)
-                or pa.types.is_fixed_size_list(field.type)
-            )
-            and pa.types.is_floating(field.type.value_type)
-            and field.name == VECTOR_COLUMN_NAME
-        )
-        # TODO: we're making an assumption that fixed size list of 10 or more
-        # is a vector column. This is definitely a bit hacky.
-        likely_vector_col = (
-            pa.types.is_fixed_size_list(field.type)
-            and pa.types.is_floating(field.type.value_type)
-            and (field.type.list_size >= 10)
-        )
-
-        if named_vector_col or likely_vector_col:
-            vector_columns.append(field.name)
+    output_schema = _vector_output_schema(reader.schema, vector_columns)

    def gen():
        for batch in reader:
-            for name in vector_columns:
+            pending_dims = []
+            for vector_column in vector_columns:
+                dim = vector_column["expected_dim"]
+                if target_schema is not None and dim is None:
+                    dim = _infer_vector_dim(batch[vector_column["name"]])
+                    pending_dims.append(vector_column)
                batch = _handle_bad_vector_column(
                    batch,
-                    vector_column_name=name,
+                    vector_column_name=vector_column["name"],
                    on_bad_vectors=on_bad_vectors,
                    fill_value=fill_value,
+                    expected_dim=dim,
+                    expected_value_type=vector_column["expected_value_type"],
                )
-            yield batch
+            for vector_column in pending_dims:
+                if vector_column["expected_dim"] is None:
+                    vector_column["expected_dim"] = _infer_vector_dim(
+                        batch[vector_column["name"]]
+                    )
+            if batch.schema.equals(output_schema, check_metadata=True):
+                yield batch
+                continue

-    return pa.RecordBatchReader.from_batches(reader.schema, gen())
+            cast_batches = (
+                pa.Table.from_batches([batch]).cast(output_schema).to_batches()
+            )
+            if cast_batches:
+                yield pa.RecordBatch.from_arrays(
+                    cast_batches[0].columns,
+                    schema=output_schema,
+                )
+
+    return pa.RecordBatchReader.from_batches(output_schema, gen())
+
+
+def _find_vector_columns(
+    reader_schema: pa.Schema,
+    target_schema: Optional[pa.Schema],
+    metadata: Optional[dict],
+) -> List[dict]:
+    if target_schema is None:
+        vector_columns = []
+        for field in reader_schema:
+            named_vector_col = (
+                _is_list_like(field.type)
+                and pa.types.is_floating(field.type.value_type)
+                and field.name == VECTOR_COLUMN_NAME
+            )
+            likely_vector_col = (
+                pa.types.is_fixed_size_list(field.type)
+                and pa.types.is_floating(field.type.value_type)
+                and (field.type.list_size >= 10)
+            )
+            if named_vector_col or likely_vector_col:
+                vector_columns.append(
+                    {
+                        "name": field.name,
+                        "expected_dim": None,
+                        "expected_value_type": None,
+                    }
+                )
+        return vector_columns
+
+    reader_column_names = set(reader_schema.names)
+    active_metadata = _merge_metadata(target_schema.metadata, metadata)
+    embedding_function_columns = set(
+        EmbeddingFunctionRegistry.get_instance().parse_functions(active_metadata).keys()
+    )
+    vector_columns = []
+    for field in target_schema:
+        if field.name not in reader_column_names:
+            continue
+        if not _is_list_like(field.type) or not pa.types.is_floating(
+            field.type.value_type
+        ):
+            continue
+
+        reader_field = reader_schema.field(field.name)
+        named_vector_col = (
+            field.name in embedding_function_columns
+            or field.name == VECTOR_COLUMN_NAME
+            or (field.name == "embedding" and pa.types.is_fixed_size_list(field.type))
+        )
+        typed_fixed_vector_col = (
+            pa.types.is_fixed_size_list(reader_field.type)
+            and pa.types.is_floating(reader_field.type.value_type)
+            and reader_field.type.list_size >= 10
+        )
+
+        if named_vector_col or typed_fixed_vector_col:
+            vector_columns.append(
+                {
+                    "name": field.name,
+                    "expected_dim": (
+                        field.type.list_size
+                        if pa.types.is_fixed_size_list(field.type)
+                        else None
+                    ),
+                    "expected_value_type": field.type.value_type,
+                }
+            )
+
+    return vector_columns
+
+
+def _vector_output_schema(
+    reader_schema: pa.Schema,
+    vector_columns: List[dict],
+) -> pa.Schema:
+    columns_by_name = {column["name"]: column for column in vector_columns}
+    fields = []
+    for field in reader_schema:
+        column = columns_by_name.get(field.name)
+        if column is None:
+            output_type = field.type
+        else:
+            output_type = _vector_output_type(field, column)
+        fields.append(pa.field(field.name, output_type, field.nullable, field.metadata))
+    return pa.schema(fields, metadata=reader_schema.metadata)
+
+
+def _vector_output_type(field: pa.Field, vector_column: dict) -> pa.DataType:
+    if not _is_list_like(field.type):
+        return field.type
+
+    if vector_column["expected_value_type"] is not None and (
+        pa.types.is_null(field.type.value_type)
+        or pa.types.is_integer(field.type.value_type)
+        or pa.types.is_unsigned_integer(field.type.value_type)
+    ):
+        return pa.list_(vector_column["expected_value_type"])
+
+    if (
+        vector_column["expected_dim"] is not None
+        and pa.types.is_fixed_size_list(field.type)
+        and field.type.list_size != vector_column["expected_dim"]
+    ):
+        return pa.list_(field.type.value_type)
+
+    return field.type


 def _handle_bad_vector_column(
@@ -3247,6 +3470,8 @@ def _handle_bad_vector_column(
    vector_column_name: str,
    on_bad_vectors: str = "error",
    fill_value: float = 0.0,
+    expected_dim: Optional[int] = None,
+    expected_value_type: Optional[pa.DataType] = None,
 ) -> pa.RecordBatch:
    """
    Ensure that the vector column exists and has type fixed_size_list(float)
@@ -3263,14 +3488,39 @@ def _handle_bad_vector_column(
    fill_value: float, default 0.0
        The value to use when filling vectors. Only used if on_bad_vectors="fill".
    """
+    position = data.column_names.index(vector_column_name)
    vec_arr = data[vector_column_name]
+    if not _is_list_like(vec_arr.type):
+        return data

-    has_nan = has_nan_values(vec_arr)
+    if (
+        expected_dim is not None
+        and pa.types.is_fixed_size_list(vec_arr.type)
+        and vec_arr.type.list_size != expected_dim
+    ):
+        vec_arr = pa.array(vec_arr.to_pylist(), type=pa.list_(vec_arr.type.value_type))
+        data = data.set_column(position, vector_column_name, vec_arr)

-    if pa.types.is_fixed_size_list(vec_arr.type):
+    if expected_value_type is not None and (
+        pa.types.is_integer(vec_arr.type.value_type)
+        or pa.types.is_unsigned_integer(vec_arr.type.value_type)
+    ):
+        vec_arr = pa.array(vec_arr.to_pylist(), type=pa.list_(expected_value_type))
+        data = data.set_column(position, vector_column_name, vec_arr)
+
+    if pa.types.is_floating(vec_arr.type.value_type):
+        has_nan = has_nan_values(vec_arr)
+    else:
+        has_nan = pa.array([False] * len(vec_arr))
+
+    if expected_dim is not None:
+        dim = expected_dim
+    elif pa.types.is_fixed_size_list(vec_arr.type):
        dim = vec_arr.type.list_size
    else:
-        dim = _modal_list_size(vec_arr)
+        dim = _infer_vector_dim(vec_arr)
+        if dim is None:
+            return data
    has_wrong_dim = pc.not_equal(pc.list_value_length(vec_arr), dim)

    has_bad_vectors = pc.any(has_nan).as_py() or pc.any(has_wrong_dim).as_py()
@@ -3308,13 +3558,12 @@ def _handle_bad_vector_column(
                )
            vec_arr = pc.if_else(
                is_bad,
-                pa.scalar([fill_value] * dim),
+                pa.scalar([fill_value] * dim, type=vec_arr.type),
                vec_arr,
            )
        else:
            raise ValueError(f"Invalid value for on_bad_vectors: {on_bad_vectors}")

-    position = data.column_names.index(vector_column_name)
    return data.set_column(position, vector_column_name, vec_arr)


@@ -3335,6 +3584,28 @@ def has_nan_values(arr: Union[pa.ListArray, pa.ChunkedArray]) -> pa.BooleanArray
    return pc.is_in(indices, has_nan_indices)


+def _is_list_like(data_type: pa.DataType) -> bool:
+    return (
+        pa.types.is_list(data_type)
+        or pa.types.is_large_list(data_type)
+        or pa.types.is_fixed_size_list(data_type)
+    )
+
+
+def _merge_metadata(*metadata_dicts: Optional[dict]) -> dict:
+    merged = {}
+    for metadata in metadata_dicts:
+        if metadata is None:
+            continue
+        for key, value in metadata.items():
+            if isinstance(key, str):
+                key = key.encode("utf-8")
+            if isinstance(value, str):
+                value = value.encode("utf-8")
+            merged[key] = value
+    return merged
+
+
 def _name_suggests_vector_column(field_name: str) -> bool:
    """Check if a field name indicates a vector column."""
    name_lower = field_name.lower()
@@ -3402,6 +3673,16 @@ def _modal_list_size(arr: Union[pa.ListArray, pa.ChunkedArray]) -> int:
    return pc.mode(pc.list_value_length(arr))[0].as_py()["mode"]


+def _infer_vector_dim(arr: Union[pa.Array, pa.ChunkedArray]) -> Optional[int]:
+    if not _is_list_like(arr.type):
+        return None
+    lengths = pc.list_value_length(arr)
+    lengths = pc.filter(lengths, pc.greater(lengths, 0))
+    if len(lengths) == 0:
+        return None
+    return pc.mode(lengths)[0].as_py()["mode"]
+
+
 def _validate_schema(schema: pa.Schema):
    """
    Make sure the metadata is valid utf8
@@ -3609,7 +3890,18 @@ class AsyncTable:
        *,
        replace: Optional[bool] = None,
        config: Optional[
-            Union[IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
+            Union[
+                IvfFlat,
+                IvfPq,
+                IvfRq,
+                HnswPq,
+                HnswSq,
+                HnswFlat,
+                BTree,
+                Bitmap,
+                LabelList,
+                FTS,
+            ]
        ] = None,
        wait_timeout: Optional[timedelta] = None,
        name: Optional[str] = None,
@@ -3656,6 +3948,7 @@ class AsyncTable:
                    IvfRq,
                    HnswPq,
                    HnswSq,
+                    HnswFlat,
                    BTree,
                    Bitmap,
                    LabelList,
@@ -3675,11 +3968,13 @@ class AsyncTable:
                name=name,
                train=train,
            )
-        except ValueError as e:
-            if "not support the requested language" in str(e):
-                supported_langs = ", ".join(lang_mapping.values())
-                help_msg = f"Supported languages: {supported_langs}"
-                add_note(e, help_msg)
+        except (ValueError, RuntimeError) as e:
+            if isinstance(config, FTS):
+                _maybe_add_fts_error_note(
+                    e,
+                    base_tokenizer=config.base_tokenizer,
+                    language=config.language,
+                )
            raise e

    async def drop_index(self, name: str) -> None:
@@ -4824,6 +5119,7 @@ class IndexStatistics:
        "IVF_RQ",
        "IVF_HNSW_SQ",
        "IVF_HNSW_PQ",
+        "IVF_HNSW_FLAT",
        "FTS",
        "BTREE",
        "BITMAP",
--- a/python/python/lancedb/types.py
+++ b/python/python/lancedb/types.py
@@ -24,6 +24,7 @@ VectorIndexType = Literal[
    "IVF_PQ",
    "IVF_HNSW_SQ",
    "IVF_HNSW_PQ",
+    "IVF_HNSW_FLAT",
    "IVF_RQ",
 ]
 ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"]
@@ -31,6 +32,7 @@ IndexType = Literal[
    "IVF_PQ",
    "IVF_HNSW_PQ",
    "IVF_HNSW_SQ",
+    "IVF_HNSW_FLAT",
    "IVF_SQ",
    "FTS",
    "BTREE",
@@ -40,4 +42,5 @@ IndexType = Literal[
 ]

 # Tokenizer literals
-BaseTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
+BuiltinTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
+BaseTokenizerType = BuiltinTokenizerType | str
--- a/python/python/tests/docs/test_search.py
+++ b/python/python/tests/docs/test_search.py
@@ -180,7 +180,7 @@ def test_fts_fuzzy_query():
        ),
        mode="overwrite",
    )
-    table.create_fts_index("text", use_tantivy=False, replace=True)
+    table.create_fts_index("text", replace=True)

    results = table.search(MatchQuery("foo", "text", fuzziness=1)).to_pandas()
    assert len(results) == 4
@@ -230,7 +230,7 @@ def test_fts_boost_query():
        ),
        mode="overwrite",
    )
-    table.create_fts_index("desc", use_tantivy=False, replace=True)
+    table.create_fts_index("desc", replace=True)

    results = table.search(
        BoostQuery(
@@ -265,7 +265,7 @@ def test_fts_boolean_query(tmp_path):
        ],
        mode="overwrite",
    )
-    table.create_fts_index("text", use_tantivy=False, replace=True)
+    table.create_fts_index("text", replace=True)

    # SHOULD
    results = table.search(
@@ -319,9 +319,7 @@ def test_fts_native():
        ],
    )

-    # passing `use_tantivy=False` to use lance FTS index
-    # `use_tantivy=True` by default
-    table.create_fts_index("text", use_tantivy=False)
+    table.create_fts_index("text")
    table.search("puppy").limit(10).select(["text"]).to_list()
    # [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]
    # ...
@@ -332,7 +330,6 @@ def test_fts_native():
    # --8<-- [start:fts_config_folding]
    table.create_fts_index(
        "text",
-        use_tantivy=False,
        language="French",
        stem=True,
        ascii_folding=True,
@@ -346,7 +343,7 @@ def test_fts_native():
    table.search("puppy").limit(10).where("text='foo'", prefilter=False).to_list()
    # --8<-- [end:fts_postfiltering]
    # --8<-- [start:fts_with_position]
-    table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True)
+    table.create_fts_index("text", with_position=True, replace=True)
    # --8<-- [end:fts_with_position]
    # --8<-- [start:fts_incremental_index]
    table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}])
--- a/python/python/tests/models/jieba/default/dict.txt
+++ b/python/python/tests/models/jieba/default/dict.txt
@@ -0,0 +1,8 @@
+我们 98740 r
+都 202780 d
+有 423765 v
+光明 1219 n
+的 318825 uj
+前途 1263 n
+前 62779 f
+途 857 n
--- a/python/python/tests/models/lindera/ipadic/config.yml
+++ b/python/python/tests/models/lindera/ipadic/config.yml
@@ -0,0 +1,4 @@
+segmenter:
+  mode: "normal"
+  dictionary:
+    path: "./python/tests/models/lindera/ipadic/main"
--- a/python/python/tests/models/lindera/ipadic/main.zip
+++ b/python/python/tests/models/lindera/ipadic/main.zip
--- a/python/python/tests/test_db.py
+++ b/python/python/tests/test_db.py
@@ -3,6 +3,7 @@


 import re
+import sys
 from datetime import timedelta
 import os

@@ -14,8 +15,7 @@ import pytest
 from lancedb.pydantic import LanceModel, Vector


-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_basic(tmp_path, use_tantivy):
+def test_basic(tmp_path):
    db = lancedb.connect(tmp_path)

    assert db.uri == str(tmp_path)
@@ -48,7 +48,7 @@ def test_basic(tmp_path, use_tantivy):
    assert len(rs) == 1
    assert rs["item"].iloc[0] == "foo"

-    table.create_fts_index("item", use_tantivy=use_tantivy)
+    table.create_fts_index("item")
    rs = table.search("bar", query_type="fts").to_pandas()
    assert len(rs) == 1
    assert rs["item"].iloc[0] == "bar"
@@ -183,8 +183,8 @@ def test_table_names(tmp_db: lancedb.DBConnection):
    result = list(tmp_db.table_names("test2", limit=2))
    assert result == ["test3"], f"Expected ['test3'], got {result}"

-    # Test that namespace parameter can be passed as keyword
-    result = list(tmp_db.table_names(namespace=[]))
+    # Test that namespace_path parameter can be passed as keyword
+    result = list(tmp_db.table_names(namespace_path=[]))
    assert len(result) == 3


@@ -896,42 +896,22 @@ def test_bypass_vector_index_sync(tmp_db: lancedb.DBConnection):


 def test_local_namespace_operations(tmp_path):
-    """Test that local mode namespace operations behave as expected."""
-    # Create a local database connection
+    """Test that local mode namespace operations work via directory namespace."""
    db = lancedb.connect(tmp_path)

-    # Test list_namespaces returns empty list for root namespace
-    namespaces = db.list_namespaces().namespaces
-    assert namespaces == []
+    # Root namespace starts empty
+    assert db.list_namespaces().namespaces == []

-    # Test list_namespaces with non-empty namespace raises NotImplementedError
-    with pytest.raises(
-        NotImplementedError,
-        match="Namespace operations are not supported for listing database",
-    ):
-        db.list_namespaces(namespace=["test"])
+    # Create and list child namespace
+    db.create_namespace(["child"])
+    assert "child" in db.list_namespaces().namespaces

+    # List namespaces under child
+    assert db.list_namespaces(namespace_path=["child"]).namespaces == []

-def test_local_create_namespace_not_supported(tmp_path):
-    """Test that create_namespace is not supported in local mode."""
-    db = lancedb.connect(tmp_path)
-
-    with pytest.raises(
-        NotImplementedError,
-        match="Namespace operations are not supported for listing database",
-    ):
-        db.create_namespace(["test_namespace"])
-
-
-def test_local_drop_namespace_not_supported(tmp_path):
-    """Test that drop_namespace is not supported in local mode."""
-    db = lancedb.connect(tmp_path)
-
-    with pytest.raises(
-        NotImplementedError,
-        match="Namespace operations are not supported for listing database",
-    ):
-        db.drop_namespace(["test_namespace"])
+    # Drop namespace
+    db.drop_namespace(["child"])
+    assert db.list_namespaces().namespaces == []


 def test_clone_table_latest_version(tmp_path):
@@ -1048,3 +1028,59 @@ def test_clone_table_deep_clone_fails(tmp_path):
    source_uri = os.path.join(tmp_path, "source.lance")
    with pytest.raises(Exception, match="Deep clone is not yet implemented"):
        db.clone_table("cloned", source_uri, is_shallow=False)
+
+
+@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
+def test_namespace_client_native_storage(tmp_path):
+    """Test namespace_client() returns DirectoryNamespace for native storage."""
+    from lance.namespace import DirectoryNamespace
+
+    db = lancedb.connect(tmp_path)
+    ns_client = db.namespace_client()
+
+    assert isinstance(ns_client, DirectoryNamespace)
+    assert str(tmp_path) in ns_client.namespace_id()
+
+
+@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
+def test_namespace_client_with_storage_options(tmp_path):
+    """Test namespace_client() preserves storage options."""
+    from lance.namespace import DirectoryNamespace
+
+    storage_options = {"timeout": "10s"}
+    db = lancedb.connect(tmp_path, storage_options=storage_options)
+    ns_client = db.namespace_client()
+
+    assert isinstance(ns_client, DirectoryNamespace)
+
+
+@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
+def test_namespace_client_operations(tmp_path):
+    """Test that namespace_client() returns a functional namespace client."""
+    db = lancedb.connect(tmp_path)
+    ns_client = db.namespace_client()
+
+    # Create a table through the main db connection
+    data = [{"id": 1, "text": "hello", "vector": [1.0, 2.0]}]
+    db.create_table("test_table", data=data)
+
+    # Verify the namespace client can see the table
+    from lance_namespace import ListTablesRequest
+
+    # id=[] means root namespace
+    response = ns_client.list_tables(ListTablesRequest(id=[]))
+    # Tables can be strings or objects with name attribute
+    table_names = [t.name if hasattr(t, "name") else t for t in response.tables]
+    assert "test_table" in table_names
+
+
+@pytest.mark.skipif(sys.platform == "win32", reason="Namespace client issues")
+def test_namespace_client_namespace_connection(tmp_path):
+    """Test namespace_client() returns the backing client for namespace connections."""
+    from lance.namespace import DirectoryNamespace
+
+    db = lancedb.connect_namespace("dir", {"root": str(tmp_path)})
+    ns_client = db.namespace_client()
+
+    assert isinstance(ns_client, DirectoryNamespace)
+    assert str(tmp_path) in ns_client.namespace_id()
--- a/python/python/tests/test_fts.py
+++ b/python/python/tests/test_fts.py
@@ -15,7 +15,10 @@
 #  limitations under the License.
 import os
 import random
+import shutil
 from unittest import mock
+from pathlib import Path
+import zipfile

 import lancedb as ldb
 from lancedb.db import DBConnection
@@ -36,8 +39,7 @@ import pytest
 import pytest_asyncio
 from utils import exception_output

-pytest.importorskip("lancedb.fts")
-tantivy = pytest.importorskip("tantivy")
+TEST_LANGUAGE_MODEL_HOME = Path(__file__).parent / "models"


@pytest.fixture
@@ -92,6 +94,40 @@ def table(tmp_path) -> ldb.table.LanceTable:
    return table


+@pytest.fixture
+def language_model_home(monkeypatch, tmp_path):
+    model_home = tmp_path / "language-models"
+    shutil.copytree(TEST_LANGUAGE_MODEL_HOME, model_home)
+    monkeypatch.setenv("LANCE_LANGUAGE_MODEL_HOME", str(model_home))
+    return model_home
+
+
+@pytest.fixture
+def lindera_ipadic(language_model_home):
+    model_path = language_model_home / "lindera" / "ipadic"
+    extracted_model = model_path / "main"
+    config_path = model_path / "config.yml"
+
+    if extracted_model.exists():
+        shutil.rmtree(extracted_model)
+
+    with zipfile.ZipFile(model_path / "main.zip", "r") as zip_ref:
+        zip_ref.extractall(model_path)
+    config_path.write_text(
+        "segmenter:\n"
+        '  mode: "normal"\n'
+        "  dictionary:\n"
+        f'    path: "{extracted_model.resolve().as_posix()}"\n',
+        encoding="utf-8",
+    )
+
+    try:
+        yield
+    finally:
+        if extracted_model.exists():
+            shutil.rmtree(extracted_model)
+
+
@pytest_asyncio.fixture
 async def async_table(tmp_path) -> ldb.table.AsyncTable:
    # Use local random state to avoid affecting other tests
@@ -144,58 +180,53 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
    return table


-def test_create_index(tmp_path):
-    index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
-    assert isinstance(index, tantivy.Index)
-    assert os.path.exists(str(tmp_path / "index"))
+@pytest.mark.parametrize(
+    ("kwargs", "match"),
+    [
+        (
+            {"use_tantivy": True},
+            "Tantivy-based FTS has been removed",
+        ),
+        (
+            {"ordering_field_names": ["count"]},
+            "ordering_field_names was only supported",
+        ),
+        (
+            {"writer_heap_size": 128},
+            "writer_heap_size was only supported",
+        ),
+    ],
+)
+def test_reject_removed_tantivy_parameters(table, kwargs, match):
+    with pytest.raises(ValueError, match=match):
+        table.create_fts_index("text", **kwargs)


-def test_create_index_with_stemming(tmp_path, table):
-    index = ldb.fts.create_index(
-        str(tmp_path / "index"), ["text"], tokenizer_name="en_stem"
-    )
-    assert isinstance(index, tantivy.Index)
-    assert os.path.exists(str(tmp_path / "index"))
+def test_reject_legacy_tantivy_index(table):
+    path, _, _ = table._get_fts_index_path()
+    os.makedirs(path, exist_ok=True)

-    # Check stemming by running tokenizer on non empty table
-    table.create_fts_index("text", tokenizer_name="en_stem", use_tantivy=True)
+    with pytest.raises(ValueError, match="Legacy Tantivy FTS index detected"):
+        table.search("puppy").limit(5).to_list()
+
+    with pytest.raises(ValueError, match="Legacy Tantivy FTS index detected"):
+        table.create_fts_index("text")


-@pytest.mark.parametrize("use_tantivy", [True, False])
@pytest.mark.parametrize("with_position", [True, False])
-def test_create_inverted_index(table, use_tantivy, with_position):
-    if use_tantivy and not with_position:
-        pytest.skip("we don't support building a tantivy index without position")
+def test_create_inverted_index(table, with_position):
    table.create_fts_index(
        "text",
-        use_tantivy=use_tantivy,
        with_position=with_position,
        name="custom_fts_index",
    )
-    if not use_tantivy:
-        indices = table.list_indices()
-        fts_indices = [i for i in indices if i.index_type == "FTS"]
-        assert any(i.name == "custom_fts_index" for i in fts_indices)
+    indices = table.list_indices()
+    fts_indices = [i for i in indices if i.index_type == "FTS"]
+    assert any(i.name == "custom_fts_index" for i in fts_indices)


-def test_populate_index(tmp_path, table):
-    index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
-    assert ldb.fts.populate_index(index, table, ["text"]) == len(table)
-
-
-def test_search_index(tmp_path, table):
-    index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
-    ldb.fts.populate_index(index, table, ["text"])
-    index.reload()
-    results = ldb.fts.search_index(index, query="puppy", limit=5)
-    assert len(results) == 2
-    assert len(results[0]) == 5  # row_ids
-    assert len(results[1]) == 5  # _score
-
-
-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_search_fts(table, use_tantivy):
-    table.create_fts_index("text", use_tantivy=use_tantivy)
+def test_search_fts(table):
+    table.create_fts_index("text")
    results = table.search("puppy").select(["id", "text"]).limit(5).to_list()
    assert len(results) == 5
    assert len(results[0]) == 3  # id, text, _score
@@ -204,53 +235,52 @@ def test_search_fts(table, use_tantivy):
    results = table.search("puppy").select(["id", "text"]).to_list()
    assert len(results) == 10

-    if not use_tantivy:
-        # Test with a query
-        results = (
-            table.search(MatchQuery("puppy", "text"))
-            .select(["id", "text"])
-            .limit(5)
-            .to_list()
-        )
-        assert len(results) == 5
+    # Test with a query
+    results = (
+        table.search(MatchQuery("puppy", "text"))
+        .select(["id", "text"])
+        .limit(5)
+        .to_list()
+    )
+    assert len(results) == 5

-        # Test boost query
-        results = (
-            table.search(
-                BoostQuery(
-                    MatchQuery("puppy", "text"),
-                    MatchQuery("runs", "text"),
-                )
+    # Test boost query
+    results = (
+        table.search(
+            BoostQuery(
+                MatchQuery("puppy", "text"),
+                MatchQuery("runs", "text"),
            )
-            .select(["id", "text"])
-            .limit(5)
-            .to_list()
        )
-        assert len(results) == 5
+        .select(["id", "text"])
+        .limit(5)
+        .to_list()
+    )
+    assert len(results) == 5

-        # Test multi match query
-        table.create_fts_index("text2", use_tantivy=use_tantivy)
-        results = (
-            table.search(MultiMatchQuery("puppy", ["text", "text2"]))
-            .select(["id", "text"])
-            .limit(5)
-            .to_list()
-        )
-        assert len(results) == 5
-        assert len(results[0]) == 3  # id, text, _score
+    # Test multi match query
+    table.create_fts_index("text2")
+    results = (
+        table.search(MultiMatchQuery("puppy", ["text", "text2"]))
+        .select(["id", "text"])
+        .limit(5)
+        .to_list()
+    )
+    assert len(results) == 5
+    assert len(results[0]) == 3  # id, text, _score

-        # Test boolean query
-        results = (
-            table.search(MatchQuery("puppy", "text") & MatchQuery("runs", "text"))
-            .select(["id", "text"])
-            .limit(5)
-            .to_list()
-        )
-        assert len(results) == 5
-        assert len(results[0]) == 3  # id, text, _score
-        for r in results:
-            assert "puppy" in r["text"]
-            assert "runs" in r["text"]
+    # Test boolean query
+    results = (
+        table.search(MatchQuery("puppy", "text") & MatchQuery("runs", "text"))
+        .select(["id", "text"])
+        .limit(5)
+        .to_list()
+    )
+    assert len(results) == 5
+    assert len(results[0]) == 3  # id, text, _score
+    for r in results:
+        assert "puppy" in r["text"]
+        assert "runs" in r["text"]


@pytest.mark.asyncio
@@ -318,13 +348,13 @@ async def test_fts_select_async(async_table):


 def test_search_fts_phrase_query(table):
-    table.create_fts_index("text", use_tantivy=False, with_position=False)
+    table.create_fts_index("text", with_position=False)
    try:
        phrase_results = table.search('"puppy runs"').limit(100).to_list()
        assert False
    except Exception:
        pass
-    table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True)
+    table.create_fts_index("text", with_position=True, replace=True)
    results = table.search("puppy").limit(100).to_list()

    # Test with quotation marks
@@ -375,8 +405,8 @@ async def test_search_fts_phrase_query_async(async_table):


 def test_search_fts_specify_column(table):
-    table.create_fts_index("text", use_tantivy=False)
-    table.create_fts_index("text2", use_tantivy=False)
+    table.create_fts_index("text")
+    table.create_fts_index("text2")

    results = table.search("puppy", fts_columns="text").limit(5).to_list()
    assert len(results) == 5
@@ -470,42 +500,8 @@ async def test_search_fts_specify_column_async(async_table):
        pass


-def test_search_ordering_field_index_table(tmp_path, table):
-    table.create_fts_index("text", ordering_field_names=["count"], use_tantivy=True)
-    rows = (
-        table.search("puppy", ordering_field_name="count")
-        .limit(20)
-        .select(["text", "count"])
-        .to_list()
-    )
-    for r in rows:
-        assert "puppy" in r["text"]
-    assert sorted(rows, key=lambda x: x["count"], reverse=True) == rows
-
-
-def test_search_ordering_field_index(tmp_path, table):
-    index = ldb.fts.create_index(
-        str(tmp_path / "index"), ["text"], ordering_fields=["count"]
-    )
-
-    ldb.fts.populate_index(index, table, ["text"], ordering_fields=["count"])
-    index.reload()
-    results = ldb.fts.search_index(
-        index, query="puppy", limit=5, ordering_field="count"
-    )
-    assert len(results) == 2
-    assert len(results[0]) == 5  # row_ids
-    assert len(results[1]) == 5  # _distance
-    rows = table.to_lance().take(results[0]).to_pylist()
-
-    for r in rows:
-        assert "puppy" in r["text"]
-    assert sorted(rows, key=lambda x: x["count"], reverse=True) == rows
-
-
-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_create_index_from_table(tmp_path, table, use_tantivy):
-    table.create_fts_index("text", use_tantivy=use_tantivy)
+def test_create_index_from_table(tmp_path, table):
+    table.create_fts_index("text")
    df = table.search("puppy").limit(5).select(["text"]).to_pandas()
    assert len(df) <= 5
    assert "text" in df.columns
@@ -525,36 +521,24 @@ def test_create_index_from_table(tmp_path, table, use_tantivy):
    )

    with pytest.raises(Exception, match="already exists"):
-        table.create_fts_index("text", use_tantivy=use_tantivy)
+        table.create_fts_index("text")

-    table.create_fts_index("text", replace=True, use_tantivy=use_tantivy)
+    table.create_fts_index("text", replace=True)
    assert len(table.search("gorilla").limit(1).to_pandas()) == 1


 def test_create_index_multiple_columns(tmp_path, table):
-    table.create_fts_index(["text", "text2"], use_tantivy=True)
-    df = table.search("puppy").limit(5).to_pandas()
-    assert len(df) == 5
-    assert "text" in df.columns
-    assert "text2" in df.columns
-
-
-def test_empty_rs(tmp_path, table, mocker):
-    table.create_fts_index(["text", "text2"], use_tantivy=True)
-    mocker.patch("lancedb.fts.search_index", return_value=([], []))
-    df = table.search("puppy").limit(5).to_pandas()
-    assert len(df) == 0
+    with pytest.raises(ValueError, match="Native FTS indexes can only be created"):
+        table.create_fts_index(["text", "text2"])


 def test_nested_schema(tmp_path, table):
-    table.create_fts_index("nested.text", use_tantivy=True)
-    rs = table.search("puppy").limit(5).to_list()
-    assert len(rs) == 5
+    with pytest.raises(ValueError, match="top-level fields"):
+        table.create_fts_index("nested.text")


-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_search_index_with_filter(table, use_tantivy):
-    table.create_fts_index("text", use_tantivy=use_tantivy)
+def test_search_index_with_filter(table):
+    table.create_fts_index("text")
    orig_import = __import__

    def import_mock(name, *args):
@@ -584,8 +568,7 @@ def test_search_index_with_filter(table, use_tantivy):
        assert r["_rowid"] is not None


-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_null_input(table, use_tantivy):
+def test_null_input(table):
    table.add(
        [
            {
@@ -598,14 +581,13 @@ def test_null_input(table, use_tantivy):
            }
        ]
    )
-    table.create_fts_index("text", use_tantivy=use_tantivy)
+    table.create_fts_index("text")


 def test_syntax(table):
    # https://github.com/lancedb/lancedb/issues/769
-    table.create_fts_index("text", use_tantivy=True)
-    with pytest.raises(ValueError, match="Syntax Error"):
-        table.search("they could have been dogs OR").limit(10).to_list()
+    table.create_fts_index("text")
+    table.search("they could have been dogs OR").limit(10).to_list()

    # these should work

@@ -616,6 +598,7 @@ def test_syntax(table):
    ).to_list()

    # phrase queries
+    table.create_fts_index("text", with_position=True, replace=True)
    table.search("they could have been dogs OR cats").phrase_query().limit(10).to_list()
    table.search('"they could have been dogs OR cats"').limit(10).to_list()
    table.search('''"the cats OR dogs were not really 'pets' at all"''').limit(
@@ -639,7 +622,7 @@ def test_language(mem_db: DBConnection):
    table = mem_db.create_table("test", data=data)

    with pytest.raises(ValueError) as e:
-        table.create_fts_index("text", use_tantivy=False, language="klingon")
+        table.create_fts_index("text", language="klingon")

    assert exception_output(e) == (
        "ValueError: LanceDB does not support the requested language: 'klingon'\n"
@@ -650,7 +633,6 @@ def test_language(mem_db: DBConnection):

    table.create_fts_index(
        "text",
-        use_tantivy=False,
        language="French",
        stem=True,
        ascii_folding=True,
@@ -690,7 +672,7 @@ def test_fts_on_list(mem_db: DBConnection):
        }
    )
    table = mem_db.create_table("test", data=data)
-    table.create_fts_index("text", use_tantivy=False, with_position=True)
+    table.create_fts_index("text", with_position=True)

    res = table.search("lance").limit(5).to_list()
    assert len(res) == 3
@@ -702,7 +684,7 @@ def test_fts_on_list(mem_db: DBConnection):
 def test_fts_ngram(mem_db: DBConnection):
    data = pa.table({"text": ["hello world", "lance database", "lance is cool"]})
    table = mem_db.create_table("test", data=data)
-    table.create_fts_index("text", use_tantivy=False, base_tokenizer="ngram")
+    table.create_fts_index("text", base_tokenizer="ngram")

    results = table.search("lan", query_type="fts").limit(10).to_list()
    assert len(results) == 2
@@ -721,7 +703,6 @@ def test_fts_ngram(mem_db: DBConnection):
    # test setting min_ngram_length and prefix_only
    table.create_fts_index(
        "text",
-        use_tantivy=False,
        base_tokenizer="ngram",
        replace=True,
        ngram_min_length=2,
@@ -742,6 +723,90 @@ def test_fts_ngram(mem_db: DBConnection):
    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}


+def test_fts_jieba_tokenizer(mem_db: DBConnection, language_model_home):
+    data = pa.table({"text": ["我们都有光明的前途", "光明的前途"]})
+    table = mem_db.create_table("test_jieba", data=data)
+    table.create_fts_index(
+        "text",
+        base_tokenizer="jieba/default",
+        stem=False,
+        remove_stop_words=False,
+        ascii_folding=False,
+    )
+
+    results = table.search("我们", query_type="fts").limit(10).to_list()
+    assert [row["text"] for row in results] == ["我们都有光明的前途"]
+
+
+def test_fts_jieba_missing_language_model_note(
+    mem_db: DBConnection, monkeypatch, tmp_path
+):
+    missing_root = tmp_path / "missing-language-models"
+    monkeypatch.setenv("LANCE_LANGUAGE_MODEL_HOME", str(missing_root))
+    table = mem_db.create_table(
+        "test_missing_jieba_model",
+        data=pa.table({"text": ["我们都有光明的前途"]}),
+    )
+
+    with pytest.raises((ValueError, RuntimeError)) as e:
+        table.create_fts_index(
+            "text",
+            base_tokenizer="jieba/default",
+            stem=False,
+            remove_stop_words=False,
+            ascii_folding=False,
+        )
+
+    output = exception_output(e)
+    assert "Invalid directory path:" in output
+    assert "LANCE_LANGUAGE_MODEL_HOME" in output
+    assert "jieba/default" in output
+
+
+@pytest.mark.asyncio
+async def test_fts_jieba_missing_language_model_note_async(monkeypatch, tmp_path):
+    missing_root = tmp_path / "missing-language-models"
+    monkeypatch.setenv("LANCE_LANGUAGE_MODEL_HOME", str(missing_root))
+    db = await ldb.connect_async(tmp_path / "async-db")
+    table = await db.create_table(
+        "test_missing_jieba_model_async",
+        data=pa.table({"text": ["我们都有光明的前途"]}),
+    )
+
+    with pytest.raises((ValueError, RuntimeError)) as e:
+        await table.create_index(
+            "text",
+            config=FTS(
+                base_tokenizer="jieba/default",
+                stem=False,
+                remove_stop_words=False,
+                ascii_folding=False,
+            ),
+        )
+
+    output = exception_output(e)
+    assert "Invalid directory path:" in output
+    assert "LANCE_LANGUAGE_MODEL_HOME" in output
+    assert "jieba/default" in output
+
+
+def test_fts_lindera_tokenizer(
+    mem_db: DBConnection, language_model_home, lindera_ipadic
+):
+    data = pa.table({"text": ["成田国際空港", "東京国際空港", "羽田空港"]})
+    table = mem_db.create_table("test_lindera", data=data)
+    table.create_fts_index(
+        "text",
+        base_tokenizer="lindera/ipadic",
+        stem=False,
+        remove_stop_words=False,
+        ascii_folding=False,
+    )
+
+    results = table.search("成田", query_type="fts").limit(10).to_list()
+    assert [row["text"] for row in results] == ["成田国際空港"]
+
+
 def test_fts_query_to_json():
    """Test that FTS query to_json() produces valid JSON strings with exact format."""

@@ -886,7 +951,7 @@ def test_fts_query_to_json():


 def test_fts_fast_search(table):
-    table.create_fts_index("text", use_tantivy=False)
+    table.create_fts_index("text")

    # Insert some unindexed data
    table.add(
--- a/python/python/tests/test_hybrid_query.py
+++ b/python/python/tests/test_hybrid_query.py
@@ -28,7 +28,7 @@ def sync_table(tmpdir_factory) -> Table:
        }
    )
    table = db.create_table("test", data)
-    table.create_fts_index("text", with_position=False, use_tantivy=False)
+    table.create_fts_index("text", with_position=False)
    return table


@@ -192,7 +192,7 @@ def table_with_id(tmpdir_factory) -> Table:
        }
    )
    table = db.create_table("test_with_id", data)
-    table.create_fts_index("text", with_position=False, use_tantivy=False)
+    table.create_fts_index("text", with_position=False)
    return table


--- a/python/python/tests/test_index.py
+++ b/python/python/tests/test_index.py
@@ -16,11 +16,13 @@ from lancedb.index import (
    IvfSq,
    IvfHnswPq,
    IvfHnswSq,
+    IvfHnswFlat,
    IvfRq,
    Bitmap,
    LabelList,
    HnswPq,
    HnswSq,
+    HnswFlat,
    FTS,
 )
 from lancedb.table import IndexStatistics
@@ -250,6 +252,21 @@ async def test_create_hnswpq_alias_index(some_table: AsyncTable):
    assert indices[0].index_type in {"HnswPq", "IvfHnswPq"}


+@pytest.mark.asyncio
+async def test_create_hnswflat_index(some_table: AsyncTable):
+    await some_table.create_index("vector", config=HnswFlat(num_partitions=10))
+    indices = await some_table.list_indices()
+    assert len(indices) == 1
+
+
+@pytest.mark.asyncio
+async def test_create_hnswflat_alias_index(some_table: AsyncTable):
+    await some_table.create_index("vector", config=IvfHnswFlat(num_partitions=5))
+    indices = await some_table.list_indices()
+    assert len(indices) == 1
+    assert indices[0].index_type in {"HnswFlat", "IvfHnswFlat"}
+
+
@pytest.mark.asyncio
 async def test_create_ivfsq_index(some_table: AsyncTable):
    await some_table.create_index("vector", config=IvfSq(num_partitions=10))
@@ -295,6 +312,7 @@ def test_index_statistics_index_type_lists_all_supported_values():
        "IVF_RQ",
        "IVF_HNSW_SQ",
        "IVF_HNSW_PQ",
+        "IVF_HNSW_FLAT",
        "FTS",
        "BTREE",
        "BITMAP",
--- a/python/python/tests/test_namespace.py
+++ b/python/python/tests/test_namespace.py
@@ -33,6 +33,16 @@ class TestNamespaceConnection:
        # Initially no tables in root
        assert len(list(db.table_names())) == 0

+    def test_connect_via_connect_helper(self):
+        """Connecting via lancedb.connect should delegate to namespace connection."""
+        db = lancedb.connect(
+            namespace_client_impl="dir",
+            namespace_client_properties={"root": self.temp_dir},
+        )
+
+        assert isinstance(db, lancedb.LanceNamespaceDBConnection)
+        assert len(list(db.table_names())) == 0
+
    def test_create_table_through_namespace(self):
        """Test creating a table through namespace."""
        db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
@@ -50,14 +60,14 @@ class TestNamespaceConnection:
        )

        # Create empty table in child namespace
-        table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
+        table = db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
        assert table is not None
        assert table.name == "test_table"
        assert table.namespace == ["test_ns"]
        assert table.id == "test_ns$test_table"

        # Table should appear in child namespace
-        table_names = list(db.table_names(namespace=["test_ns"]))
+        table_names = list(db.table_names(namespace_path=["test_ns"]))
        assert "test_table" in table_names
        assert len(table_names) == 1

@@ -80,10 +90,10 @@ class TestNamespaceConnection:
                pa.field("vector", pa.list_(pa.float32(), 2)),
            ]
        )
-        db.create_table("test_table", schema=schema, namespace=["test_ns"])
+        db.create_table("test_table", schema=schema, namespace_path=["test_ns"])

        # Open the table
-        table = db.open_table("test_table", namespace=["test_ns"])
+        table = db.open_table("test_table", namespace_path=["test_ns"])
        assert table is not None
        assert table.name == "test_table"
        assert table.namespace == ["test_ns"]
@@ -108,31 +118,31 @@ class TestNamespaceConnection:
                pa.field("vector", pa.list_(pa.float32(), 2)),
            ]
        )
-        db.create_table("table1", schema=schema, namespace=["test_ns"])
-        db.create_table("table2", schema=schema, namespace=["test_ns"])
+        db.create_table("table1", schema=schema, namespace_path=["test_ns"])
+        db.create_table("table2", schema=schema, namespace_path=["test_ns"])

        # Verify both tables exist in child namespace
-        table_names = list(db.table_names(namespace=["test_ns"]))
+        table_names = list(db.table_names(namespace_path=["test_ns"]))
        assert "table1" in table_names
        assert "table2" in table_names
        assert len(table_names) == 2

        # Drop one table
-        db.drop_table("table1", namespace=["test_ns"])
+        db.drop_table("table1", namespace_path=["test_ns"])

        # Verify only table2 remains
-        table_names = list(db.table_names(namespace=["test_ns"]))
+        table_names = list(db.table_names(namespace_path=["test_ns"]))
        assert "table1" not in table_names
        assert "table2" in table_names
        assert len(table_names) == 1

        # Drop the second table
-        db.drop_table("table2", namespace=["test_ns"])
-        assert len(list(db.table_names(namespace=["test_ns"]))) == 0
+        db.drop_table("table2", namespace_path=["test_ns"])
+        assert len(list(db.table_names(namespace_path=["test_ns"]))) == 0

        # Should not be able to open dropped table
        with pytest.raises(TableNotFoundError):
-            db.open_table("table1", namespace=["test_ns"])
+            db.open_table("table1", namespace_path=["test_ns"])

    def test_create_table_with_schema(self):
        """Test creating a table with explicit schema through namespace."""
@@ -151,7 +161,7 @@ class TestNamespaceConnection:
        )

        # Create table with schema in child namespace
-        table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
+        table = db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
        assert table is not None
        assert table.namespace == ["test_ns"]

@@ -175,7 +185,7 @@ class TestNamespaceConnection:
                pa.field("vector", pa.list_(pa.float32(), 2)),
            ]
        )
-        db.create_table("old_name", schema=schema, namespace=["test_ns"])
+        db.create_table("old_name", schema=schema, namespace_path=["test_ns"])

        # Rename should raise NotImplementedError
        with pytest.raises(NotImplementedError, match="rename_table is not supported"):
@@ -196,20 +206,20 @@ class TestNamespaceConnection:
            ]
        )
        for i in range(3):
-            db.create_table(f"table{i}", schema=schema, namespace=["test_ns"])
+            db.create_table(f"table{i}", schema=schema, namespace_path=["test_ns"])

        # Verify tables exist in child namespace
-        assert len(list(db.table_names(namespace=["test_ns"]))) == 3
+        assert len(list(db.table_names(namespace_path=["test_ns"]))) == 3

        # Drop all tables in child namespace
-        db.drop_all_tables(namespace=["test_ns"])
+        db.drop_all_tables(namespace_path=["test_ns"])

        # Verify all tables are gone from child namespace
-        assert len(list(db.table_names(namespace=["test_ns"]))) == 0
+        assert len(list(db.table_names(namespace_path=["test_ns"]))) == 0

        # Test that table_names works with keyword-only namespace parameter
-        db.create_table("test_table", schema=schema, namespace=["test_ns"])
-        result = list(db.table_names(namespace=["test_ns"]))
+        db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
+        result = list(db.table_names(namespace_path=["test_ns"]))
        assert "test_table" in result

    def test_table_operations(self):
@@ -227,7 +237,7 @@ class TestNamespaceConnection:
                pa.field("text", pa.string()),
            ]
        )
-        table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
+        table = db.create_table("test_table", schema=schema, namespace_path=["test_ns"])

        # Verify empty table was created
        result = table.to_pandas()
@@ -298,25 +308,25 @@ class TestNamespaceConnection:
            ]
        )
        table = db.create_table(
-            "test_table", schema=schema, namespace=["test_namespace"]
+            "test_table", schema=schema, namespace_path=["test_namespace"]
        )
        assert table is not None

        # Verify table exists in namespace
-        tables_in_namespace = list(db.table_names(namespace=["test_namespace"]))
+        tables_in_namespace = list(db.table_names(namespace_path=["test_namespace"]))
        assert "test_table" in tables_in_namespace
        assert len(tables_in_namespace) == 1

        # Open table from namespace
-        table = db.open_table("test_table", namespace=["test_namespace"])
+        table = db.open_table("test_table", namespace_path=["test_namespace"])
        assert table is not None
        assert table.name == "test_table"

        # Drop table from namespace
-        db.drop_table("test_table", namespace=["test_namespace"])
+        db.drop_table("test_table", namespace_path=["test_namespace"])

        # Verify table no longer exists in namespace
-        tables_in_namespace = list(db.table_names(namespace=["test_namespace"]))
+        tables_in_namespace = list(db.table_names(namespace_path=["test_namespace"]))
        assert len(tables_in_namespace) == 0

        # Drop namespace
@@ -338,14 +348,14 @@ class TestNamespaceConnection:
                pa.field("vector", pa.list_(pa.float32(), 2)),
            ]
        )
-        db.create_table("test_table", schema=schema, namespace=["test_namespace"])
+        db.create_table("test_table", schema=schema, namespace_path=["test_namespace"])

        # Try to drop namespace with tables - should fail
        with pytest.raises(NamespaceNotEmptyError):
            db.drop_namespace(["test_namespace"])

        # Drop table first
-        db.drop_table("test_table", namespace=["test_namespace"])
+        db.drop_table("test_table", namespace_path=["test_namespace"])

        # Now dropping namespace should work
        db.drop_namespace(["test_namespace"])
@@ -368,10 +378,10 @@ class TestNamespaceConnection:

        # Create table with same name in both namespaces
        table_a = db.create_table(
-            "same_name_table", schema=schema, namespace=["namespace_a"]
+            "same_name_table", schema=schema, namespace_path=["namespace_a"]
        )
        table_b = db.create_table(
-            "same_name_table", schema=schema, namespace=["namespace_b"]
+            "same_name_table", schema=schema, namespace_path=["namespace_b"]
        )

        # Add different data to each table
@@ -389,7 +399,9 @@ class TestNamespaceConnection:
        table_b.add(data_b)

        # Verify data in namespace_a table
-        opened_table_a = db.open_table("same_name_table", namespace=["namespace_a"])
+        opened_table_a = db.open_table(
+            "same_name_table", namespace_path=["namespace_a"]
+        )
        result_a = opened_table_a.to_pandas().sort_values("id").reset_index(drop=True)
        assert len(result_a) == 2
        assert result_a["id"].tolist() == [1, 2]
@@ -400,7 +412,9 @@ class TestNamespaceConnection:
        assert [v.tolist() for v in result_a["vector"]] == [[1.0, 2.0], [3.0, 4.0]]

        # Verify data in namespace_b table
-        opened_table_b = db.open_table("same_name_table", namespace=["namespace_b"])
+        opened_table_b = db.open_table(
+            "same_name_table", namespace_path=["namespace_b"]
+        )
        result_b = opened_table_b.to_pandas().sort_values("id").reset_index(drop=True)
        assert len(result_b) == 3
        assert result_b["id"].tolist() == [10, 20, 30]
@@ -420,8 +434,8 @@ class TestNamespaceConnection:
        assert "same_name_table" not in root_tables

        # Clean up
-        db.drop_table("same_name_table", namespace=["namespace_a"])
-        db.drop_table("same_name_table", namespace=["namespace_b"])
+        db.drop_table("same_name_table", namespace_path=["namespace_a"])
+        db.drop_table("same_name_table", namespace_path=["namespace_b"])
        db.drop_namespace(["namespace_a"])
        db.drop_namespace(["namespace_b"])

@@ -449,6 +463,8 @@ class TestAsyncNamespaceConnection:
        table_names = await db.table_names()
        assert len(list(table_names)) == 0

+    # Async connect via namespace helper is not enabled yet.
+
    async def test_create_table_async(self):
        """Test creating a table asynchronously through namespace."""
        db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
@@ -467,13 +483,13 @@ class TestAsyncNamespaceConnection:

        # Create empty table in child namespace
        table = await db.create_table(
-            "test_table", schema=schema, namespace=["test_ns"]
+            "test_table", schema=schema, namespace_path=["test_ns"]
        )
        assert table is not None
        assert isinstance(table, lancedb.AsyncTable)

        # Table should appear in child namespace
-        table_names = await db.table_names(namespace=["test_ns"])
+        table_names = await db.table_names(namespace_path=["test_ns"])
        assert "test_table" in list(table_names)

    async def test_open_table_async(self):
@@ -490,10 +506,10 @@ class TestAsyncNamespaceConnection:
                pa.field("vector", pa.list_(pa.float32(), 2)),
            ]
        )
-        await db.create_table("test_table", schema=schema, namespace=["test_ns"])
+        await db.create_table("test_table", schema=schema, namespace_path=["test_ns"])

        # Open the table
-        table = await db.open_table("test_table", namespace=["test_ns"])
+        table = await db.open_table("test_table", namespace_path=["test_ns"])
        assert table is not None
        assert isinstance(table, lancedb.AsyncTable)

@@ -547,20 +563,20 @@ class TestAsyncNamespaceConnection:
                pa.field("vector", pa.list_(pa.float32(), 2)),
            ]
        )
-        await db.create_table("table1", schema=schema, namespace=["test_ns"])
-        await db.create_table("table2", schema=schema, namespace=["test_ns"])
+        await db.create_table("table1", schema=schema, namespace_path=["test_ns"])
+        await db.create_table("table2", schema=schema, namespace_path=["test_ns"])

        # Verify both tables exist in child namespace
-        table_names = list(await db.table_names(namespace=["test_ns"]))
+        table_names = list(await db.table_names(namespace_path=["test_ns"]))
        assert "table1" in table_names
        assert "table2" in table_names
        assert len(table_names) == 2

        # Drop one table
-        await db.drop_table("table1", namespace=["test_ns"])
+        await db.drop_table("table1", namespace_path=["test_ns"])

        # Verify only table2 remains
-        table_names = list(await db.table_names(namespace=["test_ns"]))
+        table_names = list(await db.table_names(namespace_path=["test_ns"]))
        assert "table1" not in table_names
        assert "table2" in table_names
        assert len(table_names) == 1
@@ -589,20 +605,24 @@ class TestAsyncNamespaceConnection:
            ]
        )
        table = await db.create_table(
-            "test_table", schema=schema, namespace=["test_namespace"]
+            "test_table", schema=schema, namespace_path=["test_namespace"]
        )
        assert table is not None

        # Verify table exists in namespace
-        tables_in_namespace = list(await db.table_names(namespace=["test_namespace"]))
+        tables_in_namespace = list(
+            await db.table_names(namespace_path=["test_namespace"])
+        )
        assert "test_table" in tables_in_namespace
        assert len(tables_in_namespace) == 1

        # Drop table from namespace
-        await db.drop_table("test_table", namespace=["test_namespace"])
+        await db.drop_table("test_table", namespace_path=["test_namespace"])

        # Verify table no longer exists in namespace
-        tables_in_namespace = list(await db.table_names(namespace=["test_namespace"]))
+        tables_in_namespace = list(
+            await db.table_names(namespace_path=["test_namespace"])
+        )
        assert len(tables_in_namespace) == 0

        # Drop namespace
@@ -627,15 +647,98 @@ class TestAsyncNamespaceConnection:
            ]
        )
        for i in range(3):
-            await db.create_table(f"table{i}", schema=schema, namespace=["test_ns"])
+            await db.create_table(
+                f"table{i}", schema=schema, namespace_path=["test_ns"]
+            )

        # Verify tables exist in child namespace
-        table_names = await db.table_names(namespace=["test_ns"])
+        table_names = await db.table_names(namespace_path=["test_ns"])
        assert len(list(table_names)) == 3

        # Drop all tables in child namespace
-        await db.drop_all_tables(namespace=["test_ns"])
+        await db.drop_all_tables(namespace_path=["test_ns"])

        # Verify all tables are gone from child namespace
-        table_names = await db.table_names(namespace=["test_ns"])
+        table_names = await db.table_names(namespace_path=["test_ns"])
        assert len(list(table_names)) == 0
+
+
+class TestPushdownOperations:
+    """Test pushdown operations on namespace connections."""
+
+    def setup_method(self):
+        """Set up test fixtures."""
+        self.temp_dir = tempfile.mkdtemp()
+
+    def teardown_method(self):
+        """Clean up test fixtures."""
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_query_table_pushdown_stored(self):
+        """Test that QueryTable pushdown is stored on sync connection."""
+        db = lancedb.connect_namespace(
+            "dir",
+            {"root": self.temp_dir},
+            namespace_client_pushdown_operations=["QueryTable"],
+        )
+        assert "QueryTable" in db._namespace_client_pushdown_operations
+
+    def test_create_table_pushdown_stored(self):
+        """Test that CreateTable pushdown is stored on sync connection."""
+        db = lancedb.connect_namespace(
+            "dir",
+            {"root": self.temp_dir},
+            namespace_client_pushdown_operations=["CreateTable"],
+        )
+        assert "CreateTable" in db._namespace_client_pushdown_operations
+
+    def test_both_pushdowns_stored(self):
+        """Test that both pushdown operations can be set together."""
+        db = lancedb.connect_namespace(
+            "dir",
+            {"root": self.temp_dir},
+            namespace_client_pushdown_operations=["QueryTable", "CreateTable"],
+        )
+        assert "QueryTable" in db._namespace_client_pushdown_operations
+        assert "CreateTable" in db._namespace_client_pushdown_operations
+
+    def test_pushdown_defaults_to_empty(self):
+        """Test that pushdown operations default to empty."""
+        db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
+        assert len(db._namespace_client_pushdown_operations) == 0
+
+
+@pytest.mark.asyncio
+class TestAsyncPushdownOperations:
+    """Test pushdown operations on async namespace connections."""
+
+    def setup_method(self):
+        """Set up test fixtures."""
+        self.temp_dir = tempfile.mkdtemp()
+
+    def teardown_method(self):
+        """Clean up test fixtures."""
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    async def test_async_query_table_pushdown_stored(self):
+        """Test that QueryTable pushdown is stored on async connection."""
+        db = lancedb.connect_namespace_async(
+            "dir",
+            {"root": self.temp_dir},
+            namespace_client_pushdown_operations=["QueryTable"],
+        )
+        assert "QueryTable" in db._namespace_client_pushdown_operations
+
+    async def test_async_create_table_pushdown_stored(self):
+        """Test that CreateTable pushdown is stored on async connection."""
+        db = lancedb.connect_namespace_async(
+            "dir",
+            {"root": self.temp_dir},
+            namespace_client_pushdown_operations=["CreateTable"],
+        )
+        assert "CreateTable" in db._namespace_client_pushdown_operations
+
+    async def test_async_pushdown_defaults_to_empty(self):
+        """Test that pushdown operations default to empty on async connection."""
+        db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
+        assert len(db._namespace_client_pushdown_operations) == 0
--- a/python/python/tests/test_namespace_integration.py
+++ b/python/python/tests/test_namespace_integration.py
@@ -4,9 +4,11 @@
 """
 Integration tests for LanceDB Namespace with S3 and credential refresh.

-This test simulates a namespace server that returns incrementing credentials
-and verifies that the credential refresh mechanism works correctly for both
-create_table and open_table operations.
+This test uses DirectoryNamespace with native ops_metrics and vend_input_storage_options
+features to track API calls and test credential refresh mechanisms.
+
+Tests are parameterized to run with both DirectoryNamespace and a CustomNamespace
+wrapper to verify Python-Rust binding works correctly for custom implementations.

 Tests verify:
 - Storage options provider is auto-created and used
@@ -16,24 +18,141 @@ Tests verify:
 """

 import copy
+import shutil
+import sys
+import tempfile
 import time
 import uuid
-from threading import Lock
-from typing import Dict
+from typing import Dict, Optional

 import pyarrow as pa
 import pytest
-from lance_namespace import (
-    CreateEmptyTableRequest,
-    CreateEmptyTableResponse,
+from lance.namespace import (
    DeclareTableRequest,
    DeclareTableResponse,
    DescribeTableRequest,
    DescribeTableResponse,
+    DirectoryNamespace,
    LanceNamespace,
 )
+from lance_namespace import (
+    CreateNamespaceRequest,
+    CreateNamespaceResponse,
+    CreateTableRequest,
+    CreateTableResponse,
+    CreateTableVersionRequest,
+    CreateTableVersionResponse,
+    DeregisterTableRequest,
+    DeregisterTableResponse,
+    DescribeNamespaceRequest,
+    DescribeNamespaceResponse,
+    DescribeTableVersionRequest,
+    DescribeTableVersionResponse,
+    DropNamespaceRequest,
+    DropNamespaceResponse,
+    DropTableRequest,
+    DropTableResponse,
+    ListNamespacesRequest,
+    ListNamespacesResponse,
+    ListTablesRequest,
+    ListTablesResponse,
+    ListTableVersionsRequest,
+    ListTableVersionsResponse,
+    NamespaceExistsRequest,
+    RegisterTableRequest,
+    RegisterTableResponse,
+    TableExistsRequest,
+)
 from lancedb.namespace import LanceNamespaceDBConnection

+
+class CustomNamespace(LanceNamespace):
+    """A custom namespace wrapper that delegates to DirectoryNamespace.
+
+    This class verifies that the Python-Rust binding works correctly for
+    custom namespace implementations that wrap the native DirectoryNamespace.
+    All methods simply delegate to the underlying DirectoryNamespace instance.
+    """
+
+    def __init__(self, inner: DirectoryNamespace):
+        self._inner = inner
+
+    def namespace_id(self) -> str:
+        return f"CustomNamespace[{self._inner.namespace_id()}]"
+
+    def create_namespace(
+        self, request: CreateNamespaceRequest
+    ) -> CreateNamespaceResponse:
+        return self._inner.create_namespace(request)
+
+    def describe_namespace(
+        self, request: DescribeNamespaceRequest
+    ) -> DescribeNamespaceResponse:
+        return self._inner.describe_namespace(request)
+
+    def namespace_exists(self, request: NamespaceExistsRequest) -> None:
+        return self._inner.namespace_exists(request)
+
+    def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse:
+        return self._inner.drop_namespace(request)
+
+    def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse:
+        return self._inner.list_namespaces(request)
+
+    def create_table(
+        self, request: CreateTableRequest, data: bytes
+    ) -> CreateTableResponse:
+        return self._inner.create_table(request, data)
+
+    def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
+        return self._inner.declare_table(request)
+
+    def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
+        return self._inner.describe_table(request)
+
+    def table_exists(self, request: TableExistsRequest) -> None:
+        return self._inner.table_exists(request)
+
+    def drop_table(self, request: DropTableRequest) -> DropTableResponse:
+        return self._inner.drop_table(request)
+
+    def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
+        return self._inner.list_tables(request)
+
+    def register_table(self, request: RegisterTableRequest) -> RegisterTableResponse:
+        return self._inner.register_table(request)
+
+    def deregister_table(
+        self, request: DeregisterTableRequest
+    ) -> DeregisterTableResponse:
+        return self._inner.deregister_table(request)
+
+    def list_table_versions(
+        self, request: ListTableVersionsRequest
+    ) -> ListTableVersionsResponse:
+        return self._inner.list_table_versions(request)
+
+    def describe_table_version(
+        self, request: DescribeTableVersionRequest
+    ) -> DescribeTableVersionResponse:
+        return self._inner.describe_table_version(request)
+
+    def create_table_version(
+        self, request: CreateTableVersionRequest
+    ) -> CreateTableVersionResponse:
+        return self._inner.create_table_version(request)
+
+    def retrieve_ops_metrics(self) -> Optional[Dict[str, int]]:
+        return self._inner.retrieve_ops_metrics()
+
+
+def _wrap_if_custom(ns_client: DirectoryNamespace, use_custom: bool):
+    """Wrap namespace client in CustomNamespace if use_custom is True."""
+    if use_custom:
+        return CustomNamespace(ns_client)
+    return ns_client
+
+
 # LocalStack S3 configuration
 CONFIG = {
    "allow_http": "true",
@@ -89,162 +208,88 @@ def delete_bucket(s3, bucket_name):
        pass


-class TrackingNamespace(LanceNamespace):
+def create_tracking_namespace(
+    bucket_name: str,
+    storage_options: dict,
+    credential_expires_in_seconds: int = 60,
+    use_custom: bool = False,
+):
+    """Create a DirectoryNamespace with ops metrics and credential vending enabled.
+
+    Uses native DirectoryNamespace features:
+    - ops_metrics_enabled=true: Tracks API call counts via retrieve_ops_metrics()
+    - vend_input_storage_options=true: Returns input storage options in responses
+    - vend_input_storage_options_refresh_interval_millis: Adds expires_at_millis
+
+    Args:
+        bucket_name: S3 bucket name or local path
+        storage_options: Storage options to pass through (credentials, endpoint, etc.)
+        credential_expires_in_seconds: Interval in seconds for credential expiration
+        use_custom: If True, wrap in CustomNamespace for testing custom implementations
+
+    Returns:
+        Tuple of (namespace_client, inner_namespace_client) where inner is always
+        the DirectoryNamespace (used for metrics retrieval)
    """
-    Mock namespace that wraps DirectoryNamespace and tracks API calls.
+    # Add refresh_offset_millis to storage options so that credentials are not
+    # considered expired immediately. Set to 1 second (1000ms) so that refresh
+    # checks work correctly with short-lived credentials in tests.
+    storage_options_with_refresh = dict(storage_options)
+    storage_options_with_refresh["refresh_offset_millis"] = "1000"

-    This namespace returns incrementing credentials with each API call to simulate
-    credential rotation. It also tracks the number of times each API is called
-    to verify caching behavior.
-    """
+    dir_props = {f"storage.{k}": v for k, v in storage_options_with_refresh.items()}

-    def __init__(
-        self,
-        bucket_name: str,
-        storage_options: Dict[str, str],
-        credential_expires_in_seconds: int = 60,
-    ):
-        from lance.namespace import DirectoryNamespace
+    if bucket_name.startswith("/") or bucket_name.startswith("file://"):
+        dir_props["root"] = f"{bucket_name}/namespace_root"
+    else:
+        dir_props["root"] = f"s3://{bucket_name}/namespace_root"

-        self.bucket_name = bucket_name
-        self.base_storage_options = storage_options
-        self.credential_expires_in_seconds = credential_expires_in_seconds
-        self.describe_call_count = 0
-        self.create_call_count = 0
-        self.lock = Lock()
+    # Enable ops metrics tracking
+    dir_props["ops_metrics_enabled"] = "true"
+    # Enable storage options vending
+    dir_props["vend_input_storage_options"] = "true"
+    # Set refresh interval in milliseconds
+    dir_props["vend_input_storage_options_refresh_interval_millis"] = str(
+        credential_expires_in_seconds * 1000
+    )

-        # Create underlying DirectoryNamespace with storage options
-        dir_props = {f"storage.{k}": v for k, v in storage_options.items()}
+    inner_ns_client = DirectoryNamespace(**dir_props)
+    ns_client = _wrap_if_custom(inner_ns_client, use_custom)
+    return ns_client, inner_ns_client

-        # Use S3 path for bucket name, local path for file paths
-        if bucket_name.startswith("/") or bucket_name.startswith("file://"):
-            dir_props["root"] = f"{bucket_name}/namespace_root"
-        else:
-            dir_props["root"] = f"s3://{bucket_name}/namespace_root"

-        self.inner = DirectoryNamespace(**dir_props)
+def get_describe_call_count(namespace_client) -> int:
+    """Get the number of describe_table calls made to the namespace client."""
+    return namespace_client.retrieve_ops_metrics().get("describe_table", 0)

-    def get_describe_call_count(self) -> int:
-        """Thread-safe getter for describe call count."""
-        with self.lock:
-            return self.describe_call_count

-    def get_create_call_count(self) -> int:
-        """Thread-safe getter for create call count."""
-        with self.lock:
-            return self.create_call_count
-
-    def namespace_id(self) -> str:
-        """Return namespace identifier."""
-        return f"TrackingNamespace {{ inner: {self.inner.namespace_id()} }}"
-
-    def _modify_storage_options(
-        self, storage_options: Dict[str, str], count: int
-    ) -> Dict[str, str]:
-        """
-        Add incrementing credentials with expiration timestamp.
-
-        This simulates a credential rotation system where each call returns
-        new credentials that expire after credential_expires_in_seconds.
-        """
-        # Start from base storage options (endpoint, region, allow_http, etc.)
-        # because DirectoryNamespace returns None for storage_options from
-        # describe_table/declare_table when no credential vendor is configured.
-        modified = copy.deepcopy(self.base_storage_options)
-        if storage_options:
-            modified.update(storage_options)
-
-        # Increment credentials to simulate rotation
-        modified["aws_access_key_id"] = f"AKID_{count}"
-        modified["aws_secret_access_key"] = f"SECRET_{count}"
-        modified["aws_session_token"] = f"TOKEN_{count}"
-
-        # Set expiration time
-        expires_at_millis = int(
-            (time.time() + self.credential_expires_in_seconds) * 1000
-        )
-        modified["expires_at_millis"] = str(expires_at_millis)
-
-        return modified
-
-    def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
-        """Track declare_table calls and inject rotating credentials."""
-        with self.lock:
-            self.create_call_count += 1
-            count = self.create_call_count
-
-        response = self.inner.declare_table(request)
-        response.storage_options = self._modify_storage_options(
-            response.storage_options, count
-        )
-
-        return response
-
-    def create_empty_table(
-        self, request: CreateEmptyTableRequest
-    ) -> CreateEmptyTableResponse:
-        """Track create_empty_table calls and inject rotating credentials."""
-        with self.lock:
-            self.create_call_count += 1
-            count = self.create_call_count
-
-        response = self.inner.create_empty_table(request)
-        response.storage_options = self._modify_storage_options(
-            response.storage_options, count
-        )
-
-        return response
-
-    def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
-        """Track describe_table calls and inject rotating credentials."""
-        with self.lock:
-            self.describe_call_count += 1
-            count = self.describe_call_count
-
-        response = self.inner.describe_table(request)
-        response.storage_options = self._modify_storage_options(
-            response.storage_options, count
-        )
-
-        return response
-
-    # Pass through other methods to inner namespace
-    def list_tables(self, request):
-        return self.inner.list_tables(request)
-
-    def drop_table(self, request):
-        return self.inner.drop_table(request)
-
-    def list_namespaces(self, request):
-        return self.inner.list_namespaces(request)
-
-    def create_namespace(self, request):
-        return self.inner.create_namespace(request)
-
-    def drop_namespace(self, request):
-        return self.inner.drop_namespace(request)
+def get_declare_call_count(namespace_client) -> int:
+    """Get the number of declare_table calls made to the namespace client."""
+    return namespace_client.retrieve_ops_metrics().get("declare_table", 0)


@pytest.mark.s3_test
-def test_namespace_create_table_with_provider(s3_bucket: str):
+@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
+def test_namespace_create_table_with_provider(s3_bucket: str, use_custom: bool):
    """
    Test creating a table through namespace with storage options provider.

    Verifies:
-    - create_empty_table is called once to reserve location
+    - declare_table is called once to reserve location
    - Storage options provider is auto-created
    - Table can be written successfully
    - Credentials are cached during write operations
    """
    storage_options = copy.deepcopy(CONFIG)

-    namespace = TrackingNamespace(
+    ns_client, inner_ns_client = create_tracking_namespace(
        bucket_name=s3_bucket,
        storage_options=storage_options,
        credential_expires_in_seconds=3600,  # 1 hour
+        use_custom=use_custom,
    )

-    db = LanceNamespaceDBConnection(namespace)
+    db = LanceNamespaceDBConnection(ns_client)

    # Create unique namespace for this test
    namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -254,8 +299,8 @@ def test_namespace_create_table_with_provider(s3_bucket: str):
    namespace_path = [namespace_name]

    # Verify initial state
-    assert namespace.get_create_call_count() == 0
-    assert namespace.get_describe_call_count() == 0
+    assert get_declare_call_count(inner_ns_client) == 0
+    assert get_describe_call_count(inner_ns_client) == 0

    # Create table with data
    data = pa.table(
@@ -266,12 +311,12 @@ def test_namespace_create_table_with_provider(s3_bucket: str):
        }
    )

-    table = db.create_table(table_name, data, namespace=namespace_path)
+    table = db.create_table(table_name, data, namespace_path=namespace_path)

-    # Verify create_empty_table was called exactly once
-    assert namespace.get_create_call_count() == 1
+    # Verify declare_table was called exactly once
+    assert get_declare_call_count(inner_ns_client) == 1
    # describe_table should NOT be called during create in create mode
-    assert namespace.get_describe_call_count() == 0
+    assert get_describe_call_count(inner_ns_client) == 0

    # Verify table was created successfully
    assert table.name == table_name
@@ -281,7 +326,8 @@ def test_namespace_create_table_with_provider(s3_bucket: str):


@pytest.mark.s3_test
-def test_namespace_open_table_with_provider(s3_bucket: str):
+@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
+def test_namespace_open_table_with_provider(s3_bucket: str, use_custom: bool):
    """
    Test opening a table through namespace with storage options provider.

@@ -293,13 +339,14 @@ def test_namespace_open_table_with_provider(s3_bucket: str):
    """
    storage_options = copy.deepcopy(CONFIG)

-    namespace = TrackingNamespace(
+    ns_client, inner_ns_client = create_tracking_namespace(
        bucket_name=s3_bucket,
        storage_options=storage_options,
        credential_expires_in_seconds=3600,
+        use_custom=use_custom,
    )

-    db = LanceNamespaceDBConnection(namespace)
+    db = LanceNamespaceDBConnection(ns_client)

    # Create unique namespace for this test
    namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -317,21 +364,21 @@ def test_namespace_open_table_with_provider(s3_bucket: str):
        }
    )

-    db.create_table(table_name, data, namespace=namespace_path)
+    db.create_table(table_name, data, namespace_path=namespace_path)

-    initial_create_count = namespace.get_create_call_count()
-    assert initial_create_count == 1
+    initial_declare_count = get_declare_call_count(inner_ns_client)
+    assert initial_declare_count == 1

    # Open the table
-    opened_table = db.open_table(table_name, namespace=namespace_path)
+    opened_table = db.open_table(table_name, namespace_path=namespace_path)

    # Verify describe_table was called exactly once
-    assert namespace.get_describe_call_count() == 1
-    # create_empty_table should not be called again
-    assert namespace.get_create_call_count() == initial_create_count
+    assert get_describe_call_count(inner_ns_client) == 1
+    # declare_table should not be called again
+    assert get_declare_call_count(inner_ns_client) == initial_declare_count

    # Perform multiple read operations
-    describe_count_after_open = namespace.get_describe_call_count()
+    describe_count_after_open = get_describe_call_count(inner_ns_client)

    for _ in range(3):
        result = opened_table.to_pandas()
@@ -340,11 +387,72 @@ def test_namespace_open_table_with_provider(s3_bucket: str):
        assert count == 5

    # Verify credentials were cached (no additional describe_table calls)
-    assert namespace.get_describe_call_count() == describe_count_after_open
+    assert get_describe_call_count(inner_ns_client) == describe_count_after_open
+
+
+@pytest.mark.skipif(
+    sys.platform == "win32",
+    reason="TODO: fix schema-only namespace metrics test on Windows",
+)
+@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
+def test_namespace_create_schema_only_with_provider(use_custom: bool):
+    """
+    Test creating a schema-only table through namespace with storage options provider.
+
+    Verifies:
+    - declare_table is called once to reserve the location
+    - describe_table is not needed during create in create mode
+    - the table can be reopened successfully afterward
+    - opening the table triggers exactly one describe_table call
+    """
+    temp_dir = tempfile.mkdtemp()
+    try:
+        ns_client, inner_ns_client = create_tracking_namespace(
+            bucket_name=temp_dir,
+            storage_options={},
+            credential_expires_in_seconds=3600,
+            use_custom=use_custom,
+        )
+
+        db = LanceNamespaceDBConnection(ns_client)
+
+        namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
+        db.create_namespace([namespace_name])
+
+        table_name = f"test_table_{uuid.uuid4().hex}"
+        namespace_path = [namespace_name]
+        schema = pa.schema(
+            [
+                pa.field("id", pa.int64()),
+                pa.field("vector", pa.list_(pa.float32(), 2)),
+                pa.field("text", pa.string()),
+            ]
+        )
+
+        assert get_declare_call_count(inner_ns_client) == 0
+        assert get_describe_call_count(inner_ns_client) == 0
+
+        table = db.create_table(
+            table_name, schema=schema, namespace_path=namespace_path
+        )
+
+        assert table.name == table_name
+        assert table.namespace == namespace_path
+        assert get_declare_call_count(inner_ns_client) == 1
+        assert get_describe_call_count(inner_ns_client) == 0
+
+        reopened_table = db.open_table(table_name, namespace_path=namespace_path)
+
+        assert reopened_table.schema == schema
+        assert get_declare_call_count(inner_ns_client) == 1
+        assert get_describe_call_count(inner_ns_client) == 1
+    finally:
+        shutil.rmtree(temp_dir, ignore_errors=True)


@pytest.mark.s3_test
-def test_namespace_credential_refresh_on_read(s3_bucket: str):
+@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
+def test_namespace_credential_refresh_on_read(s3_bucket: str, use_custom: bool):
    """
    Test credential refresh when credentials expire during read operations.

@@ -355,13 +463,14 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str):
    """
    storage_options = copy.deepcopy(CONFIG)

-    namespace = TrackingNamespace(
+    ns_client, inner_ns_client = create_tracking_namespace(
        bucket_name=s3_bucket,
        storage_options=storage_options,
        credential_expires_in_seconds=3,  # Short expiration for testing
+        use_custom=use_custom,
    )

-    db = LanceNamespaceDBConnection(namespace)
+    db = LanceNamespaceDBConnection(ns_client)

    # Create unique namespace for this test
    namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -378,16 +487,16 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str):
        }
    )

-    db.create_table(table_name, data, namespace=namespace_path)
+    db.create_table(table_name, data, namespace_path=namespace_path)

    # Open table (triggers describe_table)
-    opened_table = db.open_table(table_name, namespace=namespace_path)
+    opened_table = db.open_table(table_name, namespace_path=namespace_path)

    # Perform an immediate read (should use credentials from open)
    result = opened_table.to_pandas()
    assert len(result) == 3

-    describe_count_after_first_read = namespace.get_describe_call_count()
+    describe_count_after_first_read = get_describe_call_count(inner_ns_client)

    # Wait for credentials to expire (3 seconds + buffer)
    time.sleep(5)
@@ -396,7 +505,7 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str):
    result = opened_table.to_pandas()
    assert len(result) == 3

-    describe_count_after_refresh = namespace.get_describe_call_count()
+    describe_count_after_refresh = get_describe_call_count(inner_ns_client)
    # Verify describe_table was called again (credential refresh)
    refresh_delta = describe_count_after_refresh - describe_count_after_first_read

@@ -409,7 +518,8 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str):


@pytest.mark.s3_test
-def test_namespace_credential_refresh_on_write(s3_bucket: str):
+@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
+def test_namespace_credential_refresh_on_write(s3_bucket: str, use_custom: bool):
    """
    Test credential refresh when credentials expire during write operations.

@@ -420,13 +530,14 @@ def test_namespace_credential_refresh_on_write(s3_bucket: str):
    """
    storage_options = copy.deepcopy(CONFIG)

-    namespace = TrackingNamespace(
+    ns_client, inner_ns_client = create_tracking_namespace(
        bucket_name=s3_bucket,
        storage_options=storage_options,
        credential_expires_in_seconds=3,  # Short expiration
+        use_custom=use_custom,
    )

-    db = LanceNamespaceDBConnection(namespace)
+    db = LanceNamespaceDBConnection(ns_client)

    # Create unique namespace for this test
    namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -443,7 +554,7 @@ def test_namespace_credential_refresh_on_write(s3_bucket: str):
        }
    )

-    table = db.create_table(table_name, initial_data, namespace=namespace_path)
+    table = db.create_table(table_name, initial_data, namespace_path=namespace_path)

    # Add more data (should use cached credentials)
    new_data = pa.table(
@@ -471,24 +582,26 @@ def test_namespace_credential_refresh_on_write(s3_bucket: str):


@pytest.mark.s3_test
-def test_namespace_overwrite_mode(s3_bucket: str):
+@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
+def test_namespace_overwrite_mode(s3_bucket: str, use_custom: bool):
    """
    Test creating table in overwrite mode with credential tracking.

    Verifies:
-    - First create calls create_empty_table exactly once
+    - First create calls declare_table exactly once
    - Overwrite mode calls describe_table exactly once to check existence
    - Storage options provider works in overwrite mode
    """
    storage_options = copy.deepcopy(CONFIG)

-    namespace = TrackingNamespace(
+    ns_client, inner_ns_client = create_tracking_namespace(
        bucket_name=s3_bucket,
        storage_options=storage_options,
        credential_expires_in_seconds=3600,
+        use_custom=use_custom,
    )

-    db = LanceNamespaceDBConnection(namespace)
+    db = LanceNamespaceDBConnection(ns_client)

    # Create unique namespace for this test
    namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -505,11 +618,11 @@ def test_namespace_overwrite_mode(s3_bucket: str):
        }
    )

-    table = db.create_table(table_name, data1, namespace=namespace_path)
-    # Exactly one create_empty_table call for initial create
-    assert namespace.get_create_call_count() == 1
+    table = db.create_table(table_name, data1, namespace_path=namespace_path)
+    # Exactly one declare_table call for initial create
+    assert get_declare_call_count(inner_ns_client) == 1
    # No describe_table calls in create mode
-    assert namespace.get_describe_call_count() == 0
+    assert get_describe_call_count(inner_ns_client) == 0
    assert table.count_rows() == 2

    # Overwrite the table
@@ -521,14 +634,14 @@ def test_namespace_overwrite_mode(s3_bucket: str):
    )

    table2 = db.create_table(
-        table_name, data2, namespace=namespace_path, mode="overwrite"
+        table_name, data2, namespace_path=namespace_path, mode="overwrite"
    )

-    # Should still have only 1 create_empty_table call
+    # Should still have only 1 declare_table call
    # (overwrite reuses location from describe_table)
-    assert namespace.get_create_call_count() == 1
+    assert get_declare_call_count(inner_ns_client) == 1
    # Should have called describe_table exactly once to get existing table location
-    assert namespace.get_describe_call_count() == 1
+    assert get_describe_call_count(inner_ns_client) == 1

    # Verify new data
    assert table2.count_rows() == 3
@@ -537,7 +650,8 @@ def test_namespace_overwrite_mode(s3_bucket: str):


@pytest.mark.s3_test
-def test_namespace_multiple_tables(s3_bucket: str):
+@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
+def test_namespace_multiple_tables(s3_bucket: str, use_custom: bool):
    """
    Test creating and opening multiple tables in the same namespace.

@@ -548,13 +662,14 @@ def test_namespace_multiple_tables(s3_bucket: str):
    """
    storage_options = copy.deepcopy(CONFIG)

-    namespace = TrackingNamespace(
+    ns_client, inner_ns_client = create_tracking_namespace(
        bucket_name=s3_bucket,
        storage_options=storage_options,
        credential_expires_in_seconds=3600,
+        use_custom=use_custom,
    )

-    db = LanceNamespaceDBConnection(namespace)
+    db = LanceNamespaceDBConnection(ns_client)

    # Create unique namespace for this test
    namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -564,22 +679,22 @@ def test_namespace_multiple_tables(s3_bucket: str):
    # Create first table
    table1_name = f"table1_{uuid.uuid4().hex}"
    data1 = pa.table({"id": [1, 2], "value": [10, 20]})
-    db.create_table(table1_name, data1, namespace=namespace_path)
+    db.create_table(table1_name, data1, namespace_path=namespace_path)

    # Create second table
    table2_name = f"table2_{uuid.uuid4().hex}"
    data2 = pa.table({"id": [3, 4], "value": [30, 40]})
-    db.create_table(table2_name, data2, namespace=namespace_path)
+    db.create_table(table2_name, data2, namespace_path=namespace_path)

-    # Should have 2 create calls (one per table)
-    assert namespace.get_create_call_count() == 2
+    # Should have 2 declare calls (one per table)
+    assert get_declare_call_count(inner_ns_client) == 2

    # Open both tables
-    opened1 = db.open_table(table1_name, namespace=namespace_path)
-    opened2 = db.open_table(table2_name, namespace=namespace_path)
+    opened1 = db.open_table(table1_name, namespace_path=namespace_path)
+    opened2 = db.open_table(table2_name, namespace_path=namespace_path)

    # Should have 2 describe calls (one per open)
-    assert namespace.get_describe_call_count() == 2
+    assert get_describe_call_count(inner_ns_client) == 2

    # Verify both tables work independently
    assert opened1.count_rows() == 2
@@ -593,7 +708,8 @@ def test_namespace_multiple_tables(s3_bucket: str):


@pytest.mark.s3_test
-def test_namespace_with_schema_only(s3_bucket: str):
+@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
+def test_namespace_with_schema_only(s3_bucket: str, use_custom: bool):
    """
    Test creating empty table with schema only (no data).

@@ -604,13 +720,14 @@ def test_namespace_with_schema_only(s3_bucket: str):
    """
    storage_options = copy.deepcopy(CONFIG)

-    namespace = TrackingNamespace(
+    ns_client, inner_ns_client = create_tracking_namespace(
        bucket_name=s3_bucket,
        storage_options=storage_options,
        credential_expires_in_seconds=3600,
+        use_custom=use_custom,
    )

-    db = LanceNamespaceDBConnection(namespace)
+    db = LanceNamespaceDBConnection(ns_client)

    # Create unique namespace for this test
    namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
@@ -628,12 +745,12 @@ def test_namespace_with_schema_only(s3_bucket: str):
        ]
    )

-    table = db.create_table(table_name, schema=schema, namespace=namespace_path)
+    table = db.create_table(table_name, schema=schema, namespace_path=namespace_path)

-    # Should have called create_empty_table once
-    assert namespace.get_create_call_count() == 1
+    # Should have called declare_table once
+    assert get_declare_call_count(inner_ns_client) == 1
    # Should NOT have called describe_table in create mode
-    assert namespace.get_describe_call_count() == 0
+    assert get_describe_call_count(inner_ns_client) == 0

    # Verify empty table
    assert table.count_rows() == 0
--- a/python/python/tests/test_permutation.py
+++ b/python/python/tests/test_permutation.py
@@ -9,21 +9,6 @@ from lancedb import DBConnection, Table, connect
 from lancedb.permutation import Permutation, Permutations, permutation_builder


-def test_permutation_persistence(tmp_path):
-    db = connect(tmp_path)
-    tbl = db.create_table("test_table", pa.table({"x": range(100), "y": range(100)}))
-
-    permutation_tbl = (
-        permutation_builder(tbl).shuffle().persist(db, "test_permutation").execute()
-    )
-    assert permutation_tbl.count_rows() == 100
-
-    re_open = db.open_table("test_permutation")
-    assert re_open.count_rows() == 100
-
-    assert permutation_tbl.to_arrow() == re_open.to_arrow()
-
-
 def test_split_random_ratios(mem_db):
    """Test random splitting with ratios."""
    tbl = mem_db.create_table(
@@ -522,6 +507,50 @@ def test_no_split_names(some_table: Table):
    assert permutations[1].num_rows == 500


+def test_permutations_metadata_without_split_names_key(mem_db: DBConnection):
+    """Regression: schema metadata present but missing split_names key must not crash.
+
+    Previously, `.get(b"split_names", None).decode()` was called unconditionally,
+    so any permutation table whose metadata dict had other keys but no split_names
+    raised AttributeError: 'NoneType' has no attribute 'decode'.
+    """
+    base = mem_db.create_table("base_nosplit", pa.table({"x": range(10)}))
+
+    # Build a permutation-like table that carries some metadata but NOT split_names.
+    raw = pa.table(
+        {
+            "row_id": pa.array(range(10), type=pa.uint64()),
+            "split_id": pa.array([0] * 10, type=pa.uint32()),
+        }
+    ).replace_schema_metadata({b"other_key": b"other_value"})
+    perm_tbl = mem_db.create_table("perm_nosplit", raw)
+
+    permutations = Permutations(base, perm_tbl)
+    assert permutations.split_names == []
+    assert permutations.split_dict == {}
+
+
+def test_from_tables_string_split_missing_names_key(mem_db: DBConnection):
+    """Regression: from_tables() with a string split must raise ValueError, not
+    AttributeError.
+
+    Previously, `.get(b"split_names", None).decode()` crashed with AttributeError
+    when the metadata dict existed but had no split_names key.
+    """
+    base = mem_db.create_table("base_strsplit", pa.table({"x": range(10)}))
+
+    raw = pa.table(
+        {
+            "row_id": pa.array(range(10), type=pa.uint64()),
+            "split_id": pa.array([0] * 10, type=pa.uint32()),
+        }
+    ).replace_schema_metadata({b"other_key": b"other_value"})
+    perm_tbl = mem_db.create_table("perm_strsplit", raw)
+
+    with pytest.raises(ValueError, match="no split names are defined"):
+        Permutation.from_tables(base, perm_tbl, split="train")
+
+
@pytest.fixture
 def some_perm_table(some_table: Table) -> Table:
    return (
--- a/python/python/tests/test_pydantic.py
+++ b/python/python/tests/test_pydantic.py
@@ -3,6 +3,7 @@

 import json
 from datetime import date, datetime
+from enum import Enum
 from typing import List, Optional, Tuple

 import pyarrow as pa
@@ -673,3 +674,29 @@ async def test_aliases_in_lance_model_async(mem_db_async):
    assert hasattr(model, "name")
    assert hasattr(model, "distance")
    assert model.distance < 0.01
+
+
+def test_enum_types():
+    """Enum fields should map to the Arrow type of their value (issue #1846)."""
+
+    class StrStatus(str, Enum):
+        PENDING = "pending"
+        RUNNING = "running"
+        DONE = "done"
+
+    class IntPriority(int, Enum):
+        LOW = 1
+        MEDIUM = 2
+        HIGH = 3
+
+    class TestModel(pydantic.BaseModel):
+        status: StrStatus
+        priority: IntPriority
+        opt_status: Optional[StrStatus] = None
+
+    schema = pydantic_to_schema(TestModel)
+
+    assert schema.field("status").type == pa.dictionary(pa.int32(), pa.utf8())
+    assert schema.field("priority").type == pa.int64()
+    assert schema.field("opt_status").type == pa.dictionary(pa.int32(), pa.utf8())
+    assert schema.field("opt_status").nullable
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -1385,7 +1385,7 @@ def test_query_timeout(tmp_path):
        }
    )
    table = db.create_table("test", data)
-    table.create_fts_index("text", use_tantivy=False)
+    table.create_fts_index("text")

    with pytest.raises(Exception, match="Query timeout"):
        table.search().where("text = 'a'").to_list(timeout=timedelta(0))
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -6,6 +6,8 @@ import contextlib
 from datetime import timedelta
 import http.server
 import json
+import multiprocessing as mp
+import sys
 import threading
 import time
 from unittest.mock import MagicMock, patch
@@ -1230,3 +1232,82 @@ def test_background_loop_cancellation(exception):
        with pytest.raises(exception):
            loop.run(None)
        mock_future.cancel.assert_called_once()
+
+
+def _remote_fork_child(port: int, queue) -> None:
+    # Build a fresh Connection in the child so we exercise the at-fork-child
+    # tokio runtime reset rather than relying on an inherited reqwest client.
+    db = lancedb.connect(
+        "db://dev",
+        api_key="fake",
+        host_override=f"http://localhost:{port}",
+        client_config={
+            "retry_config": {"retries": 0},
+            "timeout_config": {"connect_timeout": 2, "read_timeout": 2},
+        },
+    )
+    queue.put(db.table_names())
+
+
+@pytest.mark.skipif(
+    sys.platform != "linux",
+    reason=(
+        "fork() is unavailable on Windows and unsafe on macOS "
+        "(Apple frameworks/TLS are not fork-safe)"
+    ),
+)
+def test_remote_connection_after_fork():
+    """A freshly-built remote Connection in a forked child should not hang.
+
+    The pyo3-async-runtimes tokio runtime would otherwise be inherited from
+    the parent with dead worker threads; the at-fork-child handler in our
+    runtime module rebuilds it on first use in the child.
+    """
+
+    def handler(request):
+        request.send_response(200)
+        request.send_header("Content-Type", "application/json")
+        request.end_headers()
+        request.wfile.write(b'{"tables": []}')
+
+    server = http.server.HTTPServer(("localhost", 0), make_mock_http_handler(handler))
+    port = server.server_address[1]
+    server_thread = threading.Thread(target=server.serve_forever)
+    server_thread.start()
+    try:
+        # Hit the server in the parent first so the runtime + LOOP are warm
+        # before fork; a fresh child must still succeed.
+        parent_db = lancedb.connect(
+            "db://dev",
+            api_key="fake",
+            host_override=f"http://localhost:{port}",
+            client_config={
+                "retry_config": {"retries": 0},
+                "timeout_config": {"connect_timeout": 2, "read_timeout": 2},
+            },
+        )
+        assert parent_db.table_names() == []
+
+        ctx = mp.get_context("fork")
+        queue = ctx.Queue()
+        proc = ctx.Process(target=_remote_fork_child, args=(port, queue))
+        proc.start()
+        proc.join(timeout=15)
+
+        if proc.is_alive():
+            proc.terminate()
+            proc.join(timeout=5)
+            if proc.is_alive():
+                proc.kill()
+                proc.join()
+            pytest.fail("Remote connection hung after fork")
+
+        assert proc.exitcode == 0, f"child exited with code {proc.exitcode}"
+        assert not queue.empty(), "child produced no result"
+        assert queue.get() == []
+
+        # Parent connection must still be usable after the child returned.
+        assert parent_db.table_names() == []
+    finally:
+        server.shutdown()
+        server_thread.join()
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -26,11 +26,8 @@ from lancedb.rerankers import (
 )
 from lancedb.table import LanceTable

-# Tests rely on FTS index
-pytest.importorskip("lancedb.fts")

-
-def get_test_table(tmp_path, use_tantivy):
+def get_test_table(tmp_path):
    db = lancedb.connect(tmp_path)
    # Create a LanceDB table schema with a vector and a text column
    emb = EmbeddingFunctionRegistry.get_instance().get("test").create()
@@ -98,7 +95,7 @@ def get_test_table(tmp_path, use_tantivy):
    )

    # Create a fts index
-    table.create_fts_index("text", use_tantivy=use_tantivy, replace=True)
+    table.create_fts_index("text", replace=True)

    return table, MyTable

@@ -208,8 +205,8 @@ def _run_test_reranker(reranker, table, query, query_vector, schema):
    assert len(result) == 20 and result == result_arrow


-def _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy):
-    table, schema = get_test_table(tmp_path, use_tantivy)
+def _run_test_hybrid_reranker(reranker, tmp_path):
+    table, schema = get_test_table(tmp_path)
    # The default reranker
    result1 = (
        table.search(
@@ -285,8 +282,7 @@ def _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy):
    )


-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_linear_combination(tmp_path, use_tantivy):
+def test_linear_combination(tmp_path):
    reranker = LinearCombinationReranker()

    vector_results = pa.Table.from_pydict(
@@ -313,22 +309,20 @@ def test_linear_combination(tmp_path, use_tantivy):
    assert "_score" not in combined_results.column_names
    assert "_relevance_score" in combined_results.column_names

-    _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
+    _run_test_hybrid_reranker(reranker, tmp_path)


-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_rrf_reranker(tmp_path, use_tantivy):
+def test_rrf_reranker(tmp_path):
    reranker = RRFReranker()
-    _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
+    _run_test_hybrid_reranker(reranker, tmp_path)


-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_mrr_reranker(tmp_path, use_tantivy):
+def test_mrr_reranker(tmp_path):
    reranker = MRRReranker()
-    _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
+    _run_test_hybrid_reranker(reranker, tmp_path)

    # Test multi-vector part
-    table, schema = get_test_table(tmp_path, use_tantivy)
+    table, schema = get_test_table(tmp_path)
    query = "single player experience"
    rs1 = table.search(query, vector_column_name="vector").limit(10).with_row_id(True)
    rs2 = (
@@ -363,7 +357,7 @@ def test_rrf_reranker_distance():
    table = db.create_table("test", data)

    table.create_index(num_partitions=1, num_sub_vectors=2)
-    table.create_fts_index("text", use_tantivy=False)
+    table.create_fts_index("text")

    reranker = RRFReranker(return_score="all")

@@ -422,35 +416,31 @@ def test_rrf_reranker_distance():
@pytest.mark.skipif(
    os.environ.get("COHERE_API_KEY") is None, reason="COHERE_API_KEY not set"
 )
-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_cohere_reranker(tmp_path, use_tantivy):
+def test_cohere_reranker(tmp_path):
    pytest.importorskip("cohere")
    reranker = CohereReranker()
-    table, schema = get_test_table(tmp_path, use_tantivy)
+    table, schema = get_test_table(tmp_path)
    _run_test_reranker(reranker, table, "single player experience", None, schema)


-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_cross_encoder_reranker(tmp_path, use_tantivy):
+def test_cross_encoder_reranker(tmp_path):
    pytest.importorskip("sentence_transformers")
    reranker = CrossEncoderReranker()
-    table, schema = get_test_table(tmp_path, use_tantivy)
+    table, schema = get_test_table(tmp_path)
    _run_test_reranker(reranker, table, "single player experience", None, schema)


-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_colbert_reranker(tmp_path, use_tantivy):
+def test_colbert_reranker(tmp_path):
    pytest.importorskip("rerankers")
    reranker = ColbertReranker()
-    table, schema = get_test_table(tmp_path, use_tantivy)
+    table, schema = get_test_table(tmp_path)
    _run_test_reranker(reranker, table, "single player experience", None, schema)


-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_answerdotai_reranker(tmp_path, use_tantivy):
+def test_answerdotai_reranker(tmp_path):
    pytest.importorskip("rerankers")
    reranker = AnswerdotaiRerankers()
-    table, schema = get_test_table(tmp_path, use_tantivy)
+    table, schema = get_test_table(tmp_path)
    _run_test_reranker(reranker, table, "single player experience", None, schema)


@@ -459,10 +449,9 @@ def test_answerdotai_reranker(tmp_path, use_tantivy):
    or os.environ.get("OPENAI_BASE_URL") is not None,
    reason="OPENAI_API_KEY not set",
 )
-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_openai_reranker(tmp_path, use_tantivy):
+def test_openai_reranker(tmp_path):
    pytest.importorskip("openai")
-    table, schema = get_test_table(tmp_path, use_tantivy)
+    table, schema = get_test_table(tmp_path)
    reranker = OpenaiReranker()
    _run_test_reranker(reranker, table, "single player experience", None, schema)

@@ -470,10 +459,9 @@ def test_openai_reranker(tmp_path, use_tantivy):
@pytest.mark.skipif(
    os.environ.get("JINA_API_KEY") is None, reason="JINA_API_KEY not set"
 )
-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_jina_reranker(tmp_path, use_tantivy):
+def test_jina_reranker(tmp_path):
    pytest.importorskip("jina")
-    table, schema = get_test_table(tmp_path, use_tantivy)
+    table, schema = get_test_table(tmp_path)
    reranker = JinaReranker()
    _run_test_reranker(reranker, table, "single player experience", None, schema)

@@ -481,11 +469,10 @@ def test_jina_reranker(tmp_path, use_tantivy):
@pytest.mark.skipif(
    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
 )
-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_voyageai_reranker(tmp_path, use_tantivy):
+def test_voyageai_reranker(tmp_path):
    pytest.importorskip("voyageai")
    reranker = VoyageAIReranker(model_name="rerank-2.5")
-    table, schema = get_test_table(tmp_path, use_tantivy)
+    table, schema = get_test_table(tmp_path)
    _run_test_reranker(reranker, table, "single player experience", None, schema)


@@ -504,7 +491,7 @@ def test_empty_result_reranker():

    # Create empty table with schema
    empty_table = db.create_table("empty_table", schema=schema, mode="overwrite")
-    empty_table.create_fts_index("text", use_tantivy=False, replace=True)
+    empty_table.create_fts_index("text", replace=True)
    for reranker in [
        CrossEncoderReranker(),
        # ColbertReranker(),
@@ -603,11 +590,10 @@ def test_empty_hybrid_result_reranker():
    assert "_rowid" in result.column_names


-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_cross_encoder_reranker_return_all(tmp_path, use_tantivy):
+def test_cross_encoder_reranker_return_all(tmp_path):
    pytest.importorskip("sentence_transformers")
    reranker = CrossEncoderReranker(return_score="all")
-    table, schema = get_test_table(tmp_path, use_tantivy)
+    table, schema = get_test_table(tmp_path)
    query = "single player experience"
    result = (
        table.search(query, query_type="hybrid", vector_column_name="vector")
--- a/python/python/tests/test_s3.py
+++ b/python/python/tests/test_s3.py
@@ -242,8 +242,8 @@ def test_s3_dynamodb_sync(s3_bucket: str, commit_table: str, monkeypatch):

    # FTS indices should error since they are not supported yet.
    with pytest.raises(
-        NotImplementedError,
-        match="Full-text search is only supported on the local filesystem",
+        ValueError,
+        match="Tantivy-based FTS has been removed",
    ):
        table.create_fts_index("x", use_tantivy=True)

--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -3,6 +3,7 @@


 import os
+import sys
 from datetime import date, datetime, timedelta
 from time import sleep
 from typing import List
@@ -10,7 +11,7 @@ from unittest.mock import patch

 import lancedb
 from lancedb.dependencies import _PANDAS_AVAILABLE
-from lancedb.index import HnswPq, HnswSq, IvfPq
+from lancedb.index import HnswFlat, HnswPq, HnswSq, IvfPq
 import numpy as np
 import polars as pl
 import pyarrow as pa
@@ -916,6 +917,21 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
        "my_vector", replace=True, config=expected_config, name=None, train=True
    )

+    table.create_index(
+        vector_column_name="my_vector",
+        metric="cosine",
+        index_type="IVF_HNSW_FLAT",
+        sample_rate=0.1,
+        m=29,
+        ef_construction=10,
+    )
+    expected_config = HnswFlat(
+        distance_type="cosine", sample_rate=0.1, m=29, ef_construction=10
+    )
+    mock_create_index.assert_called_with(
+        "my_vector", replace=True, config=expected_config, name=None, train=True
+    )
+

@patch("lancedb.table.AsyncTable.create_index")
 def test_create_index_name_and_train_parameters(
@@ -1049,6 +1065,231 @@ def test_add_with_nans(mem_db: DBConnection):
    assert np.allclose(v, np.array([0.0, 0.0]))


+def test_add_with_empty_fixed_size_list_drops_bad_rows(mem_db: DBConnection):
+    class Schema(LanceModel):
+        text: str
+        embedding: Vector(16)
+
+    table = mem_db.create_table("test_empty_embeddings", schema=Schema)
+    table.add(
+        [
+            {"text": "hello", "embedding": []},
+            {"text": "bar", "embedding": [0.1] * 16},
+        ],
+        on_bad_vectors="drop",
+    )
+
+    data = table.to_arrow()
+    assert data["text"].to_pylist() == ["bar"]
+    assert np.allclose(data["embedding"].to_pylist()[0], np.array([0.1] * 16))
+
+
+def test_add_with_integer_embeddings_preserves_casting(mem_db: DBConnection):
+    class Schema(LanceModel):
+        text: str
+        embedding: Vector(4)
+
+    table = mem_db.create_table("test_integer_embeddings", schema=Schema)
+    table.add(
+        [{"text": "foo", "embedding": [1, 2, 3, 4]}],
+        on_bad_vectors="drop",
+    )
+
+    assert table.to_arrow()["embedding"].to_pylist() == [[1.0, 2.0, 3.0, 4.0]]
+
+
+def test_on_bad_vectors_does_not_handle_non_vector_fixed_size_lists(
+    mem_db: DBConnection,
+):
+    schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 4)),
+            pa.field("bbox", pa.list_(pa.float32(), 4)),
+        ]
+    )
+    table = mem_db.create_table("test_bbox_schema", schema=schema)
+
+    with pytest.raises(RuntimeError, match="FixedSizeListType"):
+        table.add(
+            [{"vector": [1.0, 2.0, 3.0, 4.0], "bbox": [0.0, 1.0]}],
+            on_bad_vectors="drop",
+        )
+
+
+def test_on_bad_vectors_does_not_handle_custom_named_fixed_size_lists(
+    mem_db: DBConnection,
+):
+    schema = pa.schema([pa.field("features", pa.list_(pa.float32(), 16))])
+    table = mem_db.create_table("test_custom_named_fixed_size_vector", schema=schema)
+
+    with pytest.raises(RuntimeError, match="FixedSizeListType"):
+        table.add(
+            [
+                {"features": []},
+                {"features": [0.1] * 16},
+            ],
+            on_bad_vectors="drop",
+        )
+
+
+def test_on_bad_vectors_with_schema_list_vector_still_sanitizes(mem_db: DBConnection):
+    schema = pa.schema([pa.field("vector", pa.list_(pa.float32()))])
+    table = mem_db.create_table("test_schema_list_vector", schema=schema)
+    table.add(
+        [
+            {"vector": [1.0, 2.0]},
+            {"vector": [3.0]},
+            {"vector": [4.0, 5.0]},
+        ],
+        on_bad_vectors="drop",
+    )
+
+    assert table.to_arrow()["vector"].to_pylist() == [[1.0, 2.0], [4.0, 5.0]]
+
+
+def test_on_bad_vectors_handles_typed_custom_fixed_vectors_for_list_schema(
+    mem_db: DBConnection,
+):
+    schema = pa.schema([pa.field("vec", pa.list_(pa.float32()))])
+    table = mem_db.create_table("test_typed_custom_fixed_vector", schema=schema)
+    data = pa.table(
+        {
+            "vec": pa.array(
+                [[float("nan")] * 16, [1.0] * 16],
+                type=pa.list_(pa.float32(), 16),
+            )
+        }
+    )
+
+    table.add(data, on_bad_vectors="drop")
+
+    assert table.to_arrow()["vec"].to_pylist() == [[1.0] * 16]
+
+
+def test_on_bad_vectors_fill_preserves_arrow_nested_vector_type(mem_db: DBConnection):
+    schema = pa.schema([pa.field("vector", pa.list_(pa.float32()))])
+    table = mem_db.create_table("test_fill_arrow_nested_type", schema=schema)
+    data = pa.table(
+        {
+            "vector": pa.array(
+                [[1.0, 2.0], [float("nan"), 3.0]],
+                type=pa.list_(pa.float32(), 2),
+            )
+        }
+    )
+
+    table.add(
+        data,
+        on_bad_vectors="fill",
+        fill_value=0.0,
+    )
+
+    assert table.to_arrow()["vector"].to_pylist() == [[1.0, 2.0], [0.0, 0.0]]
+
+
+@pytest.mark.parametrize(
+    ("table_name", "batch1", "expected"),
+    [
+        (
+            "test_schema_list_vector_empty_prefix",
+            pa.record_batch({"vector": [[], []]}),
+            [[], [], [1.0, 2.0], [3.0, 4.0]],
+        ),
+        (
+            "test_schema_list_vector_all_bad_prefix",
+            pa.record_batch({"vector": [[float("nan")] * 3, [float("nan")] * 3]}),
+            [[1.0, 2.0], [3.0, 4.0]],
+        ),
+    ],
+)
+def test_on_bad_vectors_with_schema_list_vector_ignores_invalid_prefix_batches(
+    mem_db: DBConnection,
+    table_name: str,
+    batch1: pa.RecordBatch,
+    expected: list,
+):
+    schema = pa.schema([pa.field("vector", pa.list_(pa.float32()))])
+    table = mem_db.create_table(table_name, schema=schema)
+    batch2 = pa.record_batch({"vector": [[1.0, 2.0], [3.0, 4.0]]})
+    reader = pa.RecordBatchReader.from_batches(batch1.schema, [batch1, batch2])
+
+    table.add(reader, on_bad_vectors="drop")
+
+    assert table.to_arrow()["vector"].to_pylist() == expected
+
+
+def test_on_bad_vectors_with_multiple_vectors_locks_dim_after_final_drop(
+    mem_db: DBConnection,
+):
+    registry = EmbeddingFunctionRegistry.get_instance()
+    func = MockTextEmbeddingFunction.create()
+    metadata = registry.get_table_metadata(
+        [
+            EmbeddingFunctionConfig(
+                source_column="text1", vector_column="vec1", function=func
+            ),
+            EmbeddingFunctionConfig(
+                source_column="text2", vector_column="vec2", function=func
+            ),
+        ]
+    )
+    schema = pa.schema(
+        [
+            pa.field("vec1", pa.list_(pa.float32())),
+            pa.field("vec2", pa.list_(pa.float32())),
+        ],
+        metadata=metadata,
+    )
+    table = mem_db.create_table("test_multi_vector_dim_lock", schema=schema)
+    batch1 = pa.record_batch(
+        {
+            "vec1": [[1.0, 2.0, 3.0], [10.0, 11.0]],
+            "vec2": [[float("nan"), 0.0], [5.0, 6.0]],
+        }
+    )
+    batch2 = pa.record_batch(
+        {
+            "vec1": [[20.0, 21.0], [30.0, 31.0]],
+            "vec2": [[7.0, 8.0], [9.0, 10.0]],
+        }
+    )
+    reader = pa.RecordBatchReader.from_batches(batch1.schema, [batch1, batch2])
+
+    table.add(reader, on_bad_vectors="drop")
+
+    data = table.to_arrow()
+    assert data["vec1"].to_pylist() == [[10.0, 11.0], [20.0, 21.0], [30.0, 31.0]]
+    assert data["vec2"].to_pylist() == [[5.0, 6.0], [7.0, 8.0], [9.0, 10.0]]
+
+
+def test_on_bad_vectors_does_not_handle_non_vector_list_columns(mem_db: DBConnection):
+    schema = pa.schema([pa.field("embedding_history", pa.list_(pa.float32()))])
+    table = mem_db.create_table("test_non_vector_list_schema", schema=schema)
+    table.add(
+        [
+            {"embedding_history": [1.0, 2.0]},
+            {"embedding_history": [3.0]},
+        ],
+        on_bad_vectors="drop",
+    )
+
+    assert table.to_arrow()["embedding_history"].to_pylist() == [
+        [1.0, 2.0],
+        [3.0],
+    ]
+
+
+def test_on_bad_vectors_all_null_schema_vector_batches_do_not_crash(
+    mem_db: DBConnection,
+):
+    schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2), nullable=True)])
+    table = mem_db.create_table("test_all_null_vector_batch", schema=schema)
+
+    table.add([{"vector": None}], on_bad_vectors="drop")
+
+    assert table.to_arrow()["vector"].to_pylist() == [None]
+
+
 def test_restore(mem_db: DBConnection):
    table = mem_db.create_table(
        "my_table",
@@ -1722,7 +1963,6 @@ def setup_hybrid_search_table(db: DBConnection, embedding_func):

 def test_hybrid_search(tmp_db: DBConnection):
    # This test uses an FTS index
-    pytest.importorskip("lancedb.fts")
    pytest.importorskip("lance")

    table, MyTable, emb = setup_hybrid_search_table(tmp_db, "test")
@@ -1793,7 +2033,6 @@ def test_hybrid_search(tmp_db: DBConnection):

 def test_hybrid_search_metric_type(tmp_db: DBConnection):
    # This test uses an FTS index
-    pytest.importorskip("lancedb.fts")
    pytest.importorskip("lance")

    # Need to use nonnorm as the embedding function so l2 and dot results
@@ -1815,6 +2054,13 @@ def test_hybrid_search_metric_type(tmp_db: DBConnection):
@pytest.mark.parametrize(
    "consistency_interval", [None, timedelta(seconds=0), timedelta(seconds=0.1)]
 )
+@pytest.mark.skipif(
+    sys.platform == "win32",
+    reason=(
+        "TODO: directory namespace is not supported on Windows yet; "
+        "re-enable after that is fixed."
+    ),
+)
 def test_consistency(tmp_path, consistency_interval):
    db = lancedb.connect(tmp_path)
    table = db.create_table("my_table", data=[{"id": 0}])
@@ -1835,7 +2081,6 @@ def test_consistency(tmp_path, consistency_interval):
    elif consistency_interval == timedelta(seconds=0):
        assert table2.version == table.version
    else:
-        # (consistency_interval == timedelta(seconds=0.1)
        assert table2.version == table.version - 1
        sleep(0.1)
        assert table2.version == table.version
@@ -2108,7 +2353,7 @@ def test_stats(mem_db: DBConnection):
    stats = table.stats()
    print(f"{stats=}")
    assert stats == {
-        "total_bytes": 38,
+        "total_bytes": 60,
        "num_rows": 2,
        "num_indices": 0,
        "fragment_stats": {
--- a/python/python/tests/test_torch.py
+++ b/python/python/tests/test_torch.py
@@ -1,14 +1,29 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

+import functools
+import multiprocessing as mp
+import pickle
+import sys
+
+import lancedb
 import pyarrow as pa
 import pytest
+from lancedb.permutation import Permutation, Permutations, permutation_builder
 from lancedb.util import tbl_to_tensor
-from lancedb.permutation import Permutation

 torch = pytest.importorskip("torch")


+def _open_native_table(uri: str, table_name: str):
+    """Top-level connection factory used by the explicit-factory pickle test.
+
+    Defined at module scope so that pickle can resolve it by name in the
+    worker / unpickling process.
+    """
+    return lancedb.connect(uri).open_table(table_name)
+
+
 def test_table_dataloader(mem_db):
    table = mem_db.create_table("test_table", pa.table({"a": range(1000)}))
    dataloader = torch.utils.data.DataLoader(
@@ -40,3 +55,156 @@ def test_permutation_dataloader(mem_db):
    for batch in dataloader:
        assert batch.size(0) == 1
        assert batch.size(1) == 10
+
+
+def test_permutation_is_picklable(tmp_db):
+    """A Permutation must be picklable so it can be used with PyTorch's
+    DataLoader when num_workers > 0 (which uses multiprocessing and pickles
+    the dataset to pass it to worker processes)."""
+    table = tmp_db.create_table("test_table", pa.table({"a": range(1000)}))
+    permutation = Permutation.identity(table)
+
+    pickled = pickle.dumps(permutation)
+    restored = pickle.loads(pickled)
+
+    assert len(restored) == 1000
+    rows = restored.__getitems__([0, 1, 2])
+    assert rows == [{"a": 0}, {"a": 1}, {"a": 2}]
+
+
+def test_permutation_with_memory_base_is_picklable(mem_db):
+    """An in-memory base table is inlined into the pickle as Arrow IPC bytes
+    and rebuilt on the other side as an in-memory LanceTable, so the
+    Permutation round-trips even though the original database can't be
+    reopened across processes."""
+    table = mem_db.create_table("test_table", pa.table({"a": range(50)}))
+    permutation = Permutation.identity(table)
+
+    restored = pickle.loads(pickle.dumps(permutation))
+
+    assert len(restored) == 50
+    assert restored.__getitems__([0, 10, 49]) == [{"a": 0}, {"a": 10}, {"a": 49}]
+
+
+def test_permutation_dataloader_multiprocessing(tmp_db):
+    """Using a Permutation with a PyTorch DataLoader that has num_workers > 0
+    must work end-to-end. Each worker process gets a pickled copy of the
+    dataset and reads batches from it."""
+    table = tmp_db.create_table("test_table", pa.table({"a": range(1000)}))
+    permutation = Permutation.identity(table)
+
+    dataloader = torch.utils.data.DataLoader(
+        permutation,
+        batch_size=10,
+        shuffle=True,
+        num_workers=2,
+        multiprocessing_context="spawn",
+    )
+    seen = 0
+    for batch in dataloader:
+        assert batch["a"].size(0) == 10
+        seen += batch["a"].size(0)
+    assert seen == 1000
+
+
+def test_permutation_pickle_with_connection_factory(tmp_path):
+    """When the user provides a connection_factory, pickling should round-trip
+    through that factory rather than introspecting the connection URI. Useful
+    for remote / cloud connections where the URI alone isn't reopenable."""
+    db = lancedb.connect(tmp_path)
+    db.create_table("test_table", pa.table({"a": range(50)}))
+
+    factory = functools.partial(_open_native_table, str(tmp_path))
+    permutation = Permutation.identity(factory("test_table")).with_connection_factory(
+        factory
+    )
+
+    restored = pickle.loads(pickle.dumps(permutation))
+
+    assert len(restored) == 50
+    # The factory survives pickling and is what powered base-table reopen.
+    assert restored.connection_factory is not None
+    assert restored.connection_factory.func is _open_native_table
+    assert restored.__getitems__([0, 1, 2]) == [{"a": 0}, {"a": 1}, {"a": 2}]
+
+
+def test_permutation_with_builder_is_picklable(tmp_db):
+    """A Permutation built from a non-identity permutation table must round-trip
+    through pickle while preserving the row order defined by the permutation."""
+    table = tmp_db.create_table("test_table", pa.table({"a": range(100)}))
+    perm_tbl = (
+        permutation_builder(table)
+        .split_random(ratios=[0.8, 0.2], seed=42, split_names=["train", "test"])
+        .shuffle(seed=42)
+        .execute()
+    )
+    permutations = Permutations(table, perm_tbl)
+    permutation = permutations["train"]
+
+    indices = list(range(len(permutation)))
+    expected = permutation.__getitems__(indices)
+
+    restored = pickle.loads(pickle.dumps(permutation))
+
+    assert len(restored) == len(permutation)
+    assert restored.__getitems__(indices) == expected
+
+
+def _multiworker_dataloader_target(db_uri: str, result_queue):
+    import lancedb
+    from lancedb.permutation import Permutation
+
+    db = lancedb.connect(db_uri)
+    table = db.open_table("test_table")
+    permutation = Permutation.identity(table)
+
+    dataloader = torch.utils.data.DataLoader(
+        permutation,
+        batch_size=10,
+        num_workers=2,
+        multiprocessing_context="fork",
+    )
+    count = 0
+    for batch in dataloader:
+        assert batch["a"].size(0) == 10
+        count += 1
+    result_queue.put(count)
+
+
+@pytest.mark.skipif(
+    sys.platform != "linux",
+    reason=(
+        "fork() is unavailable on Windows and unsafe on macOS "
+        "(Apple frameworks/TLS are not fork-safe)"
+    ),
+)
+def test_permutation_dataloader_fork_workers(tmp_path):
+    """A Permutation used by a fork-based DataLoader should not hang.
+
+    PyTorch's DataLoader uses fork-based multiprocessing by default on Linux.
+    LanceDB drives async work through a background asyncio thread that does
+    not survive a fork, so any LOOP.run() in a worker blocks forever.
+    """
+    import lancedb
+
+    db_uri = str(tmp_path / "db")
+    db = lancedb.connect(db_uri)
+    db.create_table("test_table", pa.table({"a": list(range(1000))}))
+
+    ctx = mp.get_context("spawn")
+    queue = ctx.Queue()
+    proc = ctx.Process(target=_multiworker_dataloader_target, args=(db_uri, queue))
+    proc.start()
+    proc.join(timeout=30)
+
+    if proc.is_alive():
+        proc.terminate()
+        proc.join(timeout=5)
+        if proc.is_alive():
+            proc.kill()
+            proc.join()
+        pytest.fail("Permutation hung when iterated in a fork-based DataLoader worker")
+
+    assert proc.exitcode == 0, f"child exited with code {proc.exitcode}"
+    assert not queue.empty(), "child produced no batches"
+    assert queue.get() == 100
--- a/python/python/tests/test_util.py
+++ b/python/python/tests/test_util.py
@@ -15,8 +15,10 @@ from lancedb.table import (
    _cast_to_target_schema,
    _handle_bad_vectors,
    _into_pyarrow_reader,
-    _sanitize_data,
    _infer_target_schema,
+    _merge_metadata,
+    _sanitize_data,
+    sanitize_create_table,
 )
 import pyarrow as pa
 import pandas as pd
@@ -304,6 +306,117 @@ def test_handle_bad_vectors_noop():
    assert output["vector"] == vector


+def test_handle_bad_vectors_updates_reader_schema_for_target_schema():
+    data = pa.table({"vector": [[1, 2, 3, 4]]})
+    target_schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 4))])
+
+    output = _handle_bad_vectors(
+        data.to_reader(),
+        on_bad_vectors="drop",
+        target_schema=target_schema,
+    )
+
+    assert output.schema == pa.schema([pa.field("vector", pa.list_(pa.float32()))])
+    assert output.read_all()["vector"].to_pylist() == [[1.0, 2.0, 3.0, 4.0]]
+
+
+def test_sanitize_data_keeps_target_field_metadata():
+    source_field = pa.field(
+        "vector",
+        pa.list_(pa.float32(), 2),
+        metadata={b"source": b"drop-me"},
+    )
+    target_field = pa.field(
+        "vector",
+        pa.list_(pa.float32(), 2),
+        metadata={b"target": b"keep-me"},
+    )
+    data = pa.table(
+        {"vector": pa.array([[1.0, 2.0]], type=pa.list_(pa.float32(), 2))},
+        schema=pa.schema([source_field]),
+    )
+
+    output = _sanitize_data(
+        data,
+        target_schema=pa.schema([target_field]),
+        on_bad_vectors="drop",
+    ).read_all()
+
+    assert output.schema.field("vector").metadata == {b"target": b"keep-me"}
+
+
+def test_sanitize_data_uses_separate_embedding_metadata_for_bad_vectors():
+    registry = EmbeddingFunctionRegistry.get_instance()
+    conf = EmbeddingFunctionConfig(
+        source_column="text",
+        vector_column="custom_vector",
+        function=MockTextEmbeddingFunction.create(),
+    )
+    metadata = registry.get_table_metadata([conf])
+    schema = pa.schema(
+        {
+            "text": pa.string(),
+            "custom_vector": pa.list_(pa.float32(), 10),
+        },
+        metadata={b"note": b"keep-me"},
+    )
+    data = pa.table(
+        {
+            "text": ["bad", "good"],
+            "custom_vector": [[1.0] * 9, [2.0] * 10],
+        }
+    )
+
+    output = _sanitize_data(
+        data,
+        target_schema=schema,
+        metadata=metadata,
+        on_bad_vectors="drop",
+    ).read_all()
+
+    assert output["text"].to_pylist() == ["good"]
+    assert output.schema.metadata[b"note"] == b"keep-me"
+    assert b"embedding_functions" in output.schema.metadata
+
+
+def test_sanitize_create_table_merges_and_overrides_embedding_metadata():
+    registry = EmbeddingFunctionRegistry.get_instance()
+    old_conf = EmbeddingFunctionConfig(
+        source_column="text",
+        vector_column="old_vector",
+        function=MockTextEmbeddingFunction.create(),
+    )
+    new_conf = EmbeddingFunctionConfig(
+        source_column="text",
+        vector_column="custom_vector",
+        function=MockTextEmbeddingFunction.create(),
+    )
+    metadata = registry.get_table_metadata([new_conf])
+    schema = pa.schema(
+        {
+            "text": pa.string(),
+            "custom_vector": pa.list_(pa.float32(), 10),
+        },
+        metadata=_merge_metadata(
+            {b"note": b"keep-me"},
+            registry.get_table_metadata([old_conf]),
+        ),
+    )
+
+    data, schema = sanitize_create_table(
+        pa.table({"text": ["good"]}),
+        schema,
+        metadata=metadata,
+        on_bad_vectors="drop",
+    )
+
+    assert schema.metadata[b"note"] == b"keep-me"
+    assert b"embedding_functions" in schema.metadata
+    assert data.schema.metadata[b"note"] == b"keep-me"
+    funcs = EmbeddingFunctionRegistry.get_instance().parse_functions(schema.metadata)
+    assert set(funcs.keys()) == {"custom_vector"}
+
+
 class TestModel(lancedb.pydantic.LanceModel):
    a: Optional[int]
    b: Optional[int]
--- a/python/src/arrow.rs
+++ b/python/src/arrow.rs
@@ -3,6 +3,8 @@

 use std::sync::Arc;

+use crate::error::PythonErrorExt;
+use crate::runtime::future_into_py;
 use arrow::{
    datatypes::SchemaRef,
    pyarrow::{IntoPyArrow, ToPyArrow},
@@ -12,9 +14,6 @@ use lancedb::arrow::SendableRecordBatchStream;
 use pyo3::{
    Bound, Py, PyAny, PyRef, PyResult, Python, exceptions::PyStopAsyncIteration, pyclass, pymethods,
 };
-use pyo3_async_runtimes::tokio::future_into_py;
-
-use crate::error::PythonErrorExt;

 #[pyclass]
 pub struct RecordBatchStream {
--- a/python/src/connection.rs
+++ b/python/src/connection.rs
@@ -1,11 +1,23 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use std::{collections::HashMap, sync::Arc, time::Duration};
+use std::{
+    collections::{HashMap, HashSet},
+    sync::Arc,
+    time::Duration,
+};

+use crate::{
+    error::PythonErrorExt,
+    namespace::{create_namespace_storage_options_provider, extract_namespace_arc},
+    runtime::future_into_py,
+    table::Table,
+};
 use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::FromPyArrow};
 use lancedb::{
    connection::Connection as LanceConnection,
+    connection::NamespaceClientPushdownOperation,
+    database::namespace::LanceNamespaceDatabase,
    database::{CreateTableMode, Database, ReadConsistency},
 };
 use pyo3::{
@@ -14,12 +26,6 @@ use pyo3::{
    pyclass, pyfunction, pymethods,
    types::{PyDict, PyDictMethods},
 };
-use pyo3_async_runtimes::tokio::future_into_py;
-
-use crate::{
-    error::PythonErrorExt, namespace::extract_namespace_arc,
-    storage_options::py_object_to_storage_options_provider, table::Table,
-};

 #[pyclass]
 pub struct Connection {
@@ -38,6 +44,29 @@ impl Connection {
    }
 }

+fn parse_namespace_client_pushdown_operations(
+    operations: Option<Vec<String>>,
+) -> PyResult<HashSet<NamespaceClientPushdownOperation>> {
+    let mut parsed = HashSet::new();
+    for operation in operations.unwrap_or_default() {
+        match operation.as_str() {
+            "QueryTable" => {
+                parsed.insert(NamespaceClientPushdownOperation::QueryTable);
+            }
+            "CreateTable" => {
+                parsed.insert(NamespaceClientPushdownOperation::CreateTable);
+            }
+            _ => {
+                return Err(PyValueError::new_err(format!(
+                    "Invalid pushdown operation: {}",
+                    operation
+                )));
+            }
+        }
+    }
+    Ok(parsed)
+}
+
 impl Connection {
    fn parse_create_mode_str(mode: &str) -> PyResult<CreateTableMode> {
        match mode {
@@ -87,16 +116,16 @@ impl Connection {
        })
    }

-    #[pyo3(signature = (namespace=vec![], start_after=None, limit=None))]
+    #[pyo3(signature = (namespace_path=None, start_after=None, limit=None))]
    pub fn table_names(
        self_: PyRef<'_, Self>,
-        namespace: Vec<String>,
+        namespace_path: Option<Vec<String>>,
        start_after: Option<String>,
        limit: Option<u32>,
    ) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.get_inner()?.clone();
        let mut op = inner.table_names();
-        op = op.namespace(namespace);
+        op = op.namespace(namespace_path.unwrap_or_default());
        if let Some(start_after) = start_after {
            op = op.start_after(start_after);
        }
@@ -107,34 +136,43 @@ impl Connection {
    }

    #[allow(clippy::too_many_arguments)]
-    #[pyo3(signature = (name, mode, data, namespace=vec![], storage_options=None, storage_options_provider=None, location=None))]
+    #[pyo3(signature = (name, mode, data, namespace_path=None, storage_options=None, location=None, namespace_client=None))]
    pub fn create_table<'a>(
        self_: PyRef<'a, Self>,
        name: String,
        mode: &str,
        data: Bound<'_, PyAny>,
-        namespace: Vec<String>,
+        namespace_path: Option<Vec<String>>,
        storage_options: Option<HashMap<String, String>>,
-        storage_options_provider: Option<Py<PyAny>>,
        location: Option<String>,
+        namespace_client: Option<Py<PyAny>>,
    ) -> PyResult<Bound<'a, PyAny>> {
        let inner = self_.get_inner()?.clone();
+        let py = self_.py();

        let mode = Self::parse_create_mode_str(mode)?;

        let batches: Box<dyn arrow::array::RecordBatchReader + Send> =
            Box::new(ArrowArrayStreamReader::from_pyarrow_bound(&data)?);

-        let mut builder = inner.create_table(name, batches).mode(mode);
+        let ns_path = namespace_path.clone().unwrap_or_default();
+        let mut builder = inner.create_table(name.clone(), batches).mode(mode);

-        builder = builder.namespace(namespace);
+        builder = builder.namespace(ns_path.clone());
        if let Some(storage_options) = storage_options {
            builder = builder.storage_options(storage_options);
        }
-        if let Some(provider_obj) = storage_options_provider {
-            let provider = py_object_to_storage_options_provider(provider_obj)?;
+
+        // Auto-create storage options provider from namespace_client
+        if let Some(ns_obj) = namespace_client {
+            let ns_client = extract_namespace_arc(py, ns_obj)?;
+            // Create table_id by combining namespace_path with table name
+            let mut table_id = ns_path;
+            table_id.push(name);
+            let provider = create_namespace_storage_options_provider(ns_client, table_id);
            builder = builder.storage_options_provider(provider);
        }
+
        if let Some(location) = location {
            builder = builder.location(location);
        }
@@ -146,33 +184,44 @@ impl Connection {
    }

    #[allow(clippy::too_many_arguments)]
-    #[pyo3(signature = (name, mode, schema, namespace=vec![], storage_options=None, storage_options_provider=None, location=None))]
+    #[pyo3(signature = (name, mode, schema, namespace_path=None, storage_options=None, location=None, namespace_client=None))]
    pub fn create_empty_table<'a>(
        self_: PyRef<'a, Self>,
        name: String,
        mode: &str,
        schema: Bound<'_, PyAny>,
-        namespace: Vec<String>,
+        namespace_path: Option<Vec<String>>,
        storage_options: Option<HashMap<String, String>>,
-        storage_options_provider: Option<Py<PyAny>>,
        location: Option<String>,
+        namespace_client: Option<Py<PyAny>>,
    ) -> PyResult<Bound<'a, PyAny>> {
        let inner = self_.get_inner()?.clone();
+        let py = self_.py();

        let mode = Self::parse_create_mode_str(mode)?;

        let schema = Schema::from_pyarrow_bound(&schema)?;

-        let mut builder = inner.create_empty_table(name, Arc::new(schema)).mode(mode);
+        let ns_path = namespace_path.clone().unwrap_or_default();
+        let mut builder = inner
+            .create_empty_table(name.clone(), Arc::new(schema))
+            .mode(mode);

-        builder = builder.namespace(namespace);
+        builder = builder.namespace(ns_path.clone());
        if let Some(storage_options) = storage_options {
            builder = builder.storage_options(storage_options);
        }
-        if let Some(provider_obj) = storage_options_provider {
-            let provider = py_object_to_storage_options_provider(provider_obj)?;
+
+        // Auto-create storage options provider from namespace_client
+        if let Some(ns_obj) = namespace_client {
+            let ns_client = extract_namespace_arc(py, ns_obj)?;
+            // Create table_id by combining namespace_path with table name
+            let mut table_id = ns_path;
+            table_id.push(name);
+            let provider = create_namespace_storage_options_provider(ns_client, table_id);
            builder = builder.storage_options_provider(provider);
        }
+
        if let Some(location) = location {
            builder = builder.location(location);
        }
@@ -184,45 +233,44 @@ impl Connection {
    }

    #[allow(clippy::too_many_arguments)]
-    #[pyo3(signature = (name, namespace=vec![], storage_options = None, storage_options_provider=None, index_cache_size = None, location=None, namespace_client=None, managed_versioning=None))]
+    #[pyo3(signature = (name, namespace_path=None, storage_options=None, index_cache_size=None, location=None, namespace_client=None, managed_versioning=None))]
    pub fn open_table(
        self_: PyRef<'_, Self>,
        name: String,
-        namespace: Vec<String>,
+        namespace_path: Option<Vec<String>>,
        storage_options: Option<HashMap<String, String>>,
-        storage_options_provider: Option<Py<PyAny>>,
        index_cache_size: Option<u32>,
        location: Option<String>,
        namespace_client: Option<Py<PyAny>>,
        managed_versioning: Option<bool>,
    ) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.get_inner()?.clone();
+        let py = self_.py();

-        let mut builder = inner.open_table(name);
-        builder = builder.namespace(namespace.clone());
+        let ns_path = namespace_path.clone().unwrap_or_default();
+        let mut builder = inner.open_table(name.clone());
+        builder = builder.namespace(ns_path.clone());
        if let Some(storage_options) = storage_options {
            builder = builder.storage_options(storage_options);
        }
-        if let Some(provider_obj) = storage_options_provider {
-            let provider = py_object_to_storage_options_provider(provider_obj)?;
+
+        // Auto-create storage options provider from namespace_client
+        if let Some(ns_obj) = namespace_client {
+            let ns_client = extract_namespace_arc(py, ns_obj)?;
+            // Create table_id by combining namespace_path with table name
+            let mut table_id = ns_path;
+            table_id.push(name);
+            let provider = create_namespace_storage_options_provider(ns_client.clone(), table_id);
            builder = builder.storage_options_provider(provider);
+            builder = builder.namespace_client(ns_client);
        }
+
        if let Some(index_cache_size) = index_cache_size {
            builder = builder.index_cache_size(index_cache_size);
        }
        if let Some(location) = location {
            builder = builder.location(location);
        }
-        // Extract namespace client from Python object if provided
-        let ns_client = if let Some(ns_obj) = namespace_client {
-            let py = self_.py();
-            Some(extract_namespace_arc(py, ns_obj)?)
-        } else {
-            None
-        };
-        if let Some(ns_client) = ns_client {
-            builder = builder.namespace_client(ns_client);
-        }
        // Pass managed_versioning if provided to avoid redundant describe_table call
        if let Some(enabled) = managed_versioning {
            builder = builder.managed_versioning(enabled);
@@ -234,12 +282,12 @@ impl Connection {
        })
    }

-    #[pyo3(signature = (target_table_name, source_uri, target_namespace=vec![], source_version=None, source_tag=None, is_shallow=true))]
+    #[pyo3(signature = (target_table_name, source_uri, target_namespace_path=None, source_version=None, source_tag=None, is_shallow=true))]
    pub fn clone_table(
        self_: PyRef<'_, Self>,
        target_table_name: String,
        source_uri: String,
-        target_namespace: Vec<String>,
+        target_namespace_path: Option<Vec<String>>,
        source_version: Option<u64>,
        source_tag: Option<String>,
        is_shallow: bool,
@@ -247,7 +295,7 @@ impl Connection {
        let inner = self_.get_inner()?.clone();

        let mut builder = inner.clone_table(target_table_name, source_uri);
-        builder = builder.target_namespace(target_namespace);
+        builder = builder.target_namespace(target_namespace_path.unwrap_or_default());
        if let Some(version) = source_version {
            builder = builder.source_version(version);
        }
@@ -262,52 +310,56 @@ impl Connection {
        })
    }

-    #[pyo3(signature = (cur_name, new_name, cur_namespace=vec![], new_namespace=vec![]))]
+    #[pyo3(signature = (cur_name, new_name, cur_namespace_path=None, new_namespace_path=None))]
    pub fn rename_table(
        self_: PyRef<'_, Self>,
        cur_name: String,
        new_name: String,
-        cur_namespace: Vec<String>,
-        new_namespace: Vec<String>,
+        cur_namespace_path: Option<Vec<String>>,
+        new_namespace_path: Option<Vec<String>>,
    ) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.get_inner()?.clone();
+        let cur_ns_path = cur_namespace_path.unwrap_or_default();
+        let new_ns_path = new_namespace_path.unwrap_or_default();
        future_into_py(self_.py(), async move {
            inner
-                .rename_table(cur_name, new_name, &cur_namespace, &new_namespace)
+                .rename_table(cur_name, new_name, &cur_ns_path, &new_ns_path)
                .await
                .infer_error()
        })
    }

-    #[pyo3(signature = (name, namespace=vec![]))]
+    #[pyo3(signature = (name, namespace_path=None))]
    pub fn drop_table(
        self_: PyRef<'_, Self>,
        name: String,
-        namespace: Vec<String>,
+        namespace_path: Option<Vec<String>>,
    ) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.get_inner()?.clone();
+        let ns_path = namespace_path.unwrap_or_default();
        future_into_py(self_.py(), async move {
-            inner.drop_table(name, &namespace).await.infer_error()
+            inner.drop_table(name, &ns_path).await.infer_error()
        })
    }

-    #[pyo3(signature = (namespace=vec![],))]
+    #[pyo3(signature = (namespace_path=None,))]
    pub fn drop_all_tables(
        self_: PyRef<'_, Self>,
-        namespace: Vec<String>,
+        namespace_path: Option<Vec<String>>,
    ) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.get_inner()?.clone();
+        let ns_path = namespace_path.unwrap_or_default();
        future_into_py(self_.py(), async move {
-            inner.drop_all_tables(&namespace).await.infer_error()
+            inner.drop_all_tables(&ns_path).await.infer_error()
        })
    }

    // Namespace management methods

-    #[pyo3(signature = (namespace=vec![], page_token=None, limit=None))]
+    #[pyo3(signature = (namespace_path=None, page_token=None, limit=None))]
    pub fn list_namespaces(
        self_: PyRef<'_, Self>,
-        namespace: Vec<String>,
+        namespace_path: Option<Vec<String>>,
        page_token: Option<String>,
        limit: Option<u32>,
    ) -> PyResult<Bound<'_, PyAny>> {
@@ -316,11 +368,7 @@ impl Connection {
        future_into_py(py, async move {
            use lance_namespace::models::ListNamespacesRequest;
            let request = ListNamespacesRequest {
-                id: if namespace.is_empty() {
-                    None
-                } else {
-                    Some(namespace)
-                },
+                id: namespace_path,
                page_token,
                limit: limit.map(|l| l as i32),
                ..Default::default()
@@ -335,10 +383,10 @@ impl Connection {
        })
    }

-    #[pyo3(signature = (namespace, mode=None, properties=None))]
+    #[pyo3(signature = (namespace_path, mode=None, properties=None))]
    pub fn create_namespace(
        self_: PyRef<'_, Self>,
-        namespace: Vec<String>,
+        namespace_path: Vec<String>,
        mode: Option<String>,
        properties: Option<std::collections::HashMap<String, String>>,
    ) -> PyResult<Bound<'_, PyAny>> {
@@ -354,11 +402,7 @@ impl Connection {
                _ => None,
            });
            let request = CreateNamespaceRequest {
-                id: if namespace.is_empty() {
-                    None
-                } else {
-                    Some(namespace)
-                },
+                id: Some(namespace_path),
                mode: mode_str,
                properties,
                ..Default::default()
@@ -372,10 +416,10 @@ impl Connection {
        })
    }

-    #[pyo3(signature = (namespace, mode=None, behavior=None))]
+    #[pyo3(signature = (namespace_path, mode=None, behavior=None))]
    pub fn drop_namespace(
        self_: PyRef<'_, Self>,
-        namespace: Vec<String>,
+        namespace_path: Vec<String>,
        mode: Option<String>,
        behavior: Option<String>,
    ) -> PyResult<Bound<'_, PyAny>> {
@@ -395,11 +439,7 @@ impl Connection {
                _ => None,
            });
            let request = DropNamespaceRequest {
-                id: if namespace.is_empty() {
-                    None
-                } else {
-                    Some(namespace)
-                },
+                id: Some(namespace_path),
                mode: mode_str,
                behavior: behavior_str,
                ..Default::default()
@@ -414,21 +454,17 @@ impl Connection {
        })
    }

-    #[pyo3(signature = (namespace,))]
+    #[pyo3(signature = (namespace_path,))]
    pub fn describe_namespace(
        self_: PyRef<'_, Self>,
-        namespace: Vec<String>,
+        namespace_path: Vec<String>,
    ) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.get_inner()?.clone();
        let py = self_.py();
        future_into_py(py, async move {
            use lance_namespace::models::DescribeNamespaceRequest;
            let request = DescribeNamespaceRequest {
-                id: if namespace.is_empty() {
-                    None
-                } else {
-                    Some(namespace)
-                },
+                id: Some(namespace_path),
                ..Default::default()
            };
            let response = inner.describe_namespace(request).await.infer_error()?;
@@ -440,10 +476,10 @@ impl Connection {
        })
    }

-    #[pyo3(signature = (namespace=vec![], page_token=None, limit=None))]
+    #[pyo3(signature = (namespace_path=None, page_token=None, limit=None))]
    pub fn list_tables(
        self_: PyRef<'_, Self>,
-        namespace: Vec<String>,
+        namespace_path: Option<Vec<String>>,
        page_token: Option<String>,
        limit: Option<u32>,
    ) -> PyResult<Bound<'_, PyAny>> {
@@ -452,11 +488,7 @@ impl Connection {
        future_into_py(py, async move {
            use lance_namespace::models::ListTablesRequest;
            let request = ListTablesRequest {
-                id: if namespace.is_empty() {
-                    None
-                } else {
-                    Some(namespace)
-                },
+                id: namespace_path,
                page_token,
                limit: limit.map(|l| l as i32),
                ..Default::default()
@@ -470,10 +502,29 @@ impl Connection {
            })
        })
    }
+
+    /// Get the configuration for constructing an equivalent namespace client.
+    /// Returns a dict with:
+    /// - "impl": "dir" for DirectoryNamespace, "rest" for RestNamespace
+    /// - "properties": configuration properties for the namespace
+    #[pyo3(signature = ())]
+    pub fn namespace_client_config(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
+        let inner = self_.get_inner()?.clone();
+        let py = self_.py();
+        future_into_py(py, async move {
+            let (impl_type, properties) = inner.namespace_client_config().await.infer_error()?;
+            Python::attach(|py| -> PyResult<Py<PyDict>> {
+                let dict = PyDict::new(py);
+                dict.set_item("impl", impl_type)?;
+                dict.set_item("properties", properties)?;
+                Ok(dict.unbind())
+            })
+        })
+    }
 }

 #[pyfunction]
-#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
+#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None, manifest_enabled=false, namespace_client_properties=None))]
 #[allow(clippy::too_many_arguments)]
 pub fn connect(
    py: Python<'_>,
@@ -485,6 +536,8 @@ pub fn connect(
    client_config: Option<PyClientConfig>,
    storage_options: Option<HashMap<String, String>>,
    session: Option<crate::session::Session>,
+    manifest_enabled: bool,
+    namespace_client_properties: Option<HashMap<String, String>>,
 ) -> PyResult<Bound<'_, PyAny>> {
    future_into_py(py, async move {
        let mut builder = lancedb::connect(&uri);
@@ -504,6 +557,12 @@ pub fn connect(
        if let Some(storage_options) = storage_options {
            builder = builder.storage_options(storage_options);
        }
+        if manifest_enabled {
+            builder = builder.manifest_enabled(true);
+        }
+        if let Some(namespace_client_properties) = namespace_client_properties {
+            builder = builder.namespace_client_properties(namespace_client_properties);
+        }
        #[cfg(feature = "remote")]
        if let Some(client_config) = client_config {
            builder = builder.client_config(client_config.into());
@@ -515,6 +574,52 @@ pub fn connect(
    })
 }

+#[pyfunction]
+#[pyo3(signature = (
+    namespace_client,
+    read_consistency_interval=None,
+    storage_options=None,
+    session=None,
+    namespace_client_pushdown_operations=None,
+    namespace_client_impl=None,
+    namespace_client_properties=None,
+))]
+#[allow(clippy::too_many_arguments)]
+pub fn connect_namespace_client(
+    py: Python<'_>,
+    namespace_client: Py<PyAny>,
+    read_consistency_interval: Option<f64>,
+    storage_options: Option<HashMap<String, String>>,
+    session: Option<crate::session::Session>,
+    namespace_client_pushdown_operations: Option<Vec<String>>,
+    namespace_client_impl: Option<String>,
+    namespace_client_properties: Option<HashMap<String, String>>,
+) -> PyResult<Connection> {
+    let namespace_client = extract_namespace_arc(py, namespace_client)?;
+    let read_consistency_interval = read_consistency_interval.map(Duration::from_secs_f64);
+    let namespace_client_pushdown_operations =
+        parse_namespace_client_pushdown_operations(namespace_client_pushdown_operations)?;
+    let ns_impl = namespace_client_impl.unwrap_or_else(|| "python".to_string());
+    let ns_properties = namespace_client_properties.unwrap_or_default();
+    let storage_options = storage_options.unwrap_or_default();
+    let session = session.map(|s| s.inner.clone());
+
+    let database = LanceNamespaceDatabase::from_namespace_client(
+        namespace_client,
+        ns_impl,
+        ns_properties,
+        storage_options,
+        read_consistency_interval,
+        session,
+        namespace_client_pushdown_operations,
+    );
+
+    Ok(Connection::new(LanceConnection::new(
+        Arc::new(database),
+        Arc::new(lancedb::embeddings::MemoryRegistry::new()),
+    )))
+}
+
 #[derive(FromPyObject)]
 pub struct PyClientConfig {
    user_agent: String,
@@ -524,6 +629,7 @@ pub struct PyClientConfig {
    id_delimiter: Option<String>,
    tls_config: Option<PyClientTlsConfig>,
    header_provider: Option<Py<PyAny>>,
+    user_id: Option<String>,
 }

 #[derive(FromPyObject)]
@@ -608,6 +714,7 @@ impl From<PyClientConfig> for lancedb::remote::ClientConfig {
            id_delimiter: value.id_delimiter,
            tls_config: value.tls_config.map(Into::into),
            header_provider,
+            user_id: value.user_id,
        }
    }
 }
--- a/python/src/expr.rs
+++ b/python/src/expr.rs
@@ -17,7 +17,7 @@ use pyo3::{Bound, PyAny, PyResult, exceptions::PyValueError, prelude::*, pyfunct
 /// [`expr_lit`] and combined with the methods on this struct.  On the Python
 /// side a thin wrapper class (`lancedb.expr.Expr`) delegates to these methods
 /// and adds Python operator overloads.
-#[pyclass(name = "PyExpr")]
+#[pyclass(name = "PyExpr", from_py_object)]
 #[derive(Clone)]
 pub struct PyExpr(pub DfExpr);

--- a/python/src/header.rs
+++ b/python/src/header.rs
@@ -33,7 +33,7 @@ impl PyHeaderProvider {
                Ok(headers_py) => {
                    // Convert Python dict to Rust HashMap
                    let bound_headers = headers_py.bind(py);
-                    let dict: &Bound<PyDict> = bound_headers.downcast().map_err(|e| {
+                    let dict: &Bound<PyDict> = bound_headers.cast().map_err(|e| {
                        format!("HeaderProvider.get_headers must return a dict: {}", e)
                    })?;

--- a/python/src/index.rs
+++ b/python/src/index.rs
@@ -1,11 +1,13 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder};
+use lancedb::index::vector::{
+    IvfFlatIndexBuilder, IvfHnswFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder,
+    IvfPqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder,
+};
 use lancedb::index::{
    Index as LanceDbIndex,
    scalar::{BTreeIndexBuilder, FtsIndexBuilder},
-    vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
 };
 use pyo3::IntoPyObject;
 use pyo3::types::PyStringMethods;
@@ -13,7 +15,7 @@ use pyo3::{
    Bound, FromPyObject, PyAny, PyResult, Python,
    exceptions::{PyKeyError, PyValueError},
    intern, pyclass, pymethods,
-    types::PyAnyMethods,
+    types::{PyAnyMethods, PyString},
 };

 use crate::util::parse_distance_type;
@@ -22,7 +24,7 @@ pub fn class_name(ob: &'_ Bound<'_, PyAny>) -> PyResult<String> {
    let full_name = ob
        .getattr(intern!(ob.py(), "__class__"))?
        .getattr(intern!(ob.py(), "__name__"))?;
-    let full_name = full_name.downcast()?.to_string_lossy();
+    let full_name = full_name.cast::<PyString>()?.to_string_lossy();

    match full_name.rsplit_once('.') {
        Some((_, name)) => Ok(name.to_string()),
@@ -162,8 +164,26 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                }
                Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
            }
+            "HnswFlat" => {
+                let params = source.extract::<IvfHnswFlatParams>()?;
+                let distance_type = parse_distance_type(params.distance_type)?;
+                let mut hnsw_flat_builder = IvfHnswFlatIndexBuilder::default()
+                    .distance_type(distance_type)
+                    .max_iterations(params.max_iterations)
+                    .sample_rate(params.sample_rate)
+                    .num_edges(params.m)
+                    .ef_construction(params.ef_construction);
+                if let Some(num_partitions) = params.num_partitions {
+                    hnsw_flat_builder = hnsw_flat_builder.num_partitions(num_partitions);
+                }
+                if let Some(target_partition_size) = params.target_partition_size {
+                    hnsw_flat_builder =
+                        hnsw_flat_builder.target_partition_size(target_partition_size);
+                }
+                Ok(LanceDbIndex::IvfHnswFlat(hnsw_flat_builder))
+            }
            not_supported => Err(PyValueError::new_err(format!(
-                "Invalid index type '{}'.  Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, or IvfHnswSq",
+                "Invalid index type '{}'.  Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, IvfHnswSq, or IvfHnswFlat",
                not_supported
            ))),
        }
@@ -250,6 +270,17 @@ struct IvfHnswSqParams {
    target_partition_size: Option<u32>,
 }

+#[derive(FromPyObject)]
+struct IvfHnswFlatParams {
+    distance_type: String,
+    num_partitions: Option<u32>,
+    max_iterations: u32,
+    sample_rate: u32,
+    m: u32,
+    ef_construction: u32,
+    target_partition_size: Option<u32>,
+}
+
 #[pyclass(get_all)]
 /// A description of an index currently configured on a column
 pub struct IndexConfig {
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -2,7 +2,7 @@
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

 use arrow::RecordBatchStream;
-use connection::{Connection, connect};
+use connection::{Connection, connect, connect_namespace_client};
 use env_logger::Env;
 use expr::{PyExpr, expr_col, expr_func, expr_lit};
 use index::IndexConfig;
@@ -28,8 +28,8 @@ pub mod index;
 pub mod namespace;
 pub mod permutation;
 pub mod query;
+pub mod runtime;
 pub mod session;
-pub mod storage_options;
 pub mod table;
 pub mod util;

@@ -59,6 +59,7 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<PyPermutationReader>()?;
    m.add_class::<PyExpr>()?;
    m.add_function(wrap_pyfunction!(connect, m)?)?;
+    m.add_function(wrap_pyfunction!(connect_namespace_client, m)?)?;
    m.add_function(wrap_pyfunction!(permutation::async_permutation_builder, m)?)?;
    m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?;
    m.add_function(wrap_pyfunction!(query::fts_query_to_json, m)?)?;
--- a/python/src/namespace.rs
+++ b/python/src/namespace.rs
@@ -8,6 +8,7 @@ use std::sync::Arc;

 use async_trait::async_trait;
 use bytes::Bytes;
+use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsProvider};
 use lance_namespace::LanceNamespace as LanceNamespaceTrait;
 use lance_namespace::models::*;
 use pyo3::prelude::*;
@@ -182,7 +183,7 @@ async fn call_py_method_primitive<Req, Resp>(
 ) -> lance_core::Result<Resp>
 where
    Req: serde::Serialize + Send + 'static,
-    Resp: for<'py> pyo3::FromPyObject<'py> + Send + 'static,
+    Resp: for<'a, 'py> pyo3::FromPyObject<'a, 'py> + Send + 'static,
 {
    let request_json = serde_json::to_string(&request).map_err(|e| {
        lance_core::Error::io(format!(
@@ -202,7 +203,7 @@ where

            // Call the Python method
            let result = py_namespace.call_method1(py, method_name, (request_arg,))?;
-            let value: Resp = result.extract(py)?;
+            let value: Resp = result.extract(py).map_err(Into::into)?;
            Ok::<_, PyErr>(value)
        })
    })
@@ -694,3 +695,21 @@ pub fn extract_namespace_arc(
    let ns_ref = ns.bind(py);
    PyLanceNamespace::create_arc(py, ns_ref)
 }
+
+/// Create a LanceNamespaceStorageOptionsProvider from a namespace client and table ID.
+///
+/// This creates a Rust storage options provider that fetches credentials from the
+/// namespace's describe_table() method, enabling automatic credential refresh.
+///
+/// # Arguments
+/// * `namespace_client` - The namespace client (wrapped PyLanceNamespace)
+/// * `table_id` - Full table identifier (namespace_path + table_name)
+pub fn create_namespace_storage_options_provider(
+    namespace_client: Arc<dyn LanceNamespaceTrait>,
+    table_id: Vec<String>,
+) -> Arc<dyn StorageOptionsProvider> {
+    Arc::new(LanceNamespaceStorageOptionsProvider::new(
+        namespace_client,
+        table_id,
+    ))
+}
--- a/python/src/permutation.rs
+++ b/python/src/permutation.rs
@@ -4,7 +4,7 @@
 use std::sync::{Arc, Mutex};

 use crate::{
-    arrow::RecordBatchStream, connection::Connection, error::PythonErrorExt, table::Table,
+    arrow::RecordBatchStream, error::PythonErrorExt, runtime::future_into_py, table::Table,
 };
 use arrow::pyarrow::{PyArrowType, ToPyArrow};
 use lancedb::{
@@ -21,16 +21,15 @@ use pyo3::{
    pyclass, pymethods,
    types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
 };
-use pyo3_async_runtimes::tokio::future_into_py;

 fn table_from_py<'a>(table: Bound<'a, PyAny>) -> PyResult<Bound<'a, Table>> {
    if table.hasattr("_inner")? {
-        Ok(table.getattr("_inner")?.downcast_into::<Table>()?)
+        Ok(table.getattr("_inner")?.cast_into::<Table>()?)
    } else if table.hasattr("_table")? {
        Ok(table
            .getattr("_table")?
            .getattr("_inner")?
-            .downcast_into::<Table>()?)
+            .cast_into::<Table>()?)
    } else {
        Err(PyRuntimeError::new_err(
            "Provided table does not appear to be a Table or RemoteTable instance",
@@ -80,24 +79,6 @@ impl PyAsyncPermutationBuilder {

 #[pymethods]
 impl PyAsyncPermutationBuilder {
-    #[pyo3(signature = (database, table_name))]
-    pub fn persist(
-        slf: PyRefMut<'_, Self>,
-        database: Bound<'_, PyAny>,
-        table_name: String,
-    ) -> PyResult<Self> {
-        let conn = if database.hasattr("_conn")? {
-            database
-                .getattr("_conn")?
-                .getattr("_inner")?
-                .downcast_into::<Connection>()?
-        } else {
-            database.getattr("_inner")?.downcast_into::<Connection>()?
-        };
-        let database = conn.borrow().database()?;
-        slf.modify(|builder| builder.persist(database, table_name))
-    }
-
    #[pyo3(signature = (*, ratios=None, counts=None, fixed=None, seed=None, split_names=None))]
    pub fn split_random(
        slf: PyRefMut<'_, Self>,
@@ -243,7 +224,7 @@ impl PyPermutationReader {
        let Some(selection) = selection else {
            return Ok(Select::All);
        };
-        let selection = selection.downcast_into::<PyDict>()?;
+        let selection = selection.cast_into::<PyDict>()?;
        let selection = selection
            .iter()
            .map(|(key, value)| {
--- a/Show More
+++ b/Show More