feat: add native OAuth/OIDC authentication support

Add OAuthConfig and OAuthHeaderProvider to the Rust core with support for five OAuth flows: ClientCredentials, AuthorizationCodePKCE, DeviceCode, AzureManagedIdentity, and WorkloadIdentity. Token acquisition and auto-refresh happen entirely in Rust. Python and TypeScript expose OAuthConfig as a plain config object that maps to the Rust header provider via FFI — no dynamic callbacks cross the language boundary. ConnectBuilder gains an oauth_config() method that replaces the API key requirement when OAuth is configured.
feat(python): add IVF_HNSW_FLAT vector index support (#3366 )
2026-05-13 01:50:42 +00:00 · 2026-05-12 13:52:17 -07:00 · 2026-05-11 15:08:32 -07:00 · 2026-05-08 23:53:14 +08:00 · 2026-05-07 23:29:29 -07:00 · 2026-05-07 16:04:38 -07:00
140 changed files with 6763 additions and 2147 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.28.0-beta.9"
+current_version = "0.28.0-beta.11"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -0,0 +1,18 @@
+version: 2
+
+# Scope: the root Cargo workspace, which produces the Rust binaries we
+# ship to users (the Node.js and Python native extensions). The
+# `rust/lancedb` library crate shares the same lockfile; its consumers
+# pick their own dependency versions, but bumping transitive deps here
+# keeps the binaries we ship current.
+updates:
+  - package-ecosystem: cargo
+    directory: /
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 10
+    groups:
+      rust-minor-patch:
+        update-types:
+          - minor
+          - patch
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -8,6 +8,9 @@ concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

+permissions:
+  contents: read
+
 jobs:
  labeler:
    permissions:
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -19,6 +19,9 @@ on:
    paths:
      - .github/workflows/java-publish.yml

+permissions:
+  contents: read
+
 jobs:
  publish:
    name: Build and Publish
@@ -40,7 +43,7 @@ jobs:
          server-username: SONATYPE_USER
          server-password: SONATYPE_TOKEN
          gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
-          gpg-passphrase: ${{ secrets.GPG_PASSPHRASE }}
+          gpg-passphrase: MAVEN_GPG_PASSPHRASE
      - name: Set git config
        run: |
          git config --global user.email "dev+gha@lancedb.com"
@@ -55,10 +58,11 @@ jobs:
          echo "use-agent" >> ~/.gnupg/gpg.conf
          echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf
          export GPG_TTY=$(tty)
-          ./mvnw --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -pl lancedb-core -am -P deploy-to-ossrh
+          ./mvnw --batch-mode -DskipTests -DpushChanges=false deploy -pl lancedb-core -am -P deploy-to-ossrh
        env:
          SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
          SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
+          MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}

  report-failure:
    name: Report Workflow Failure
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -24,6 +24,9 @@ on:
      - java/**
      - .github/workflows/java.yml

+permissions:
+  contents: read
+
 jobs:
  build-java:
    runs-on: ubuntu-24.04
--- a/.github/workflows/license-header-check.yml
+++ b/.github/workflows/license-header-check.yml
@@ -10,6 +10,10 @@ on:
      - nodejs/**
      - java/**
      - .github/workflows/license-header-check.yml
+
+permissions:
+  contents: read
+
 jobs:
  check-licenses:
    runs-on: ubuntu-latest
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -15,6 +15,9 @@ on:
      - .github/workflows/nodejs.yml
      - docker-compose.yml

+permissions:
+  contents: read
+
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -14,10 +14,16 @@ on:
 env:
  PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"

+permissions:
+  contents: read
+
 jobs:
  linux:
    name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }}
    timeout-minutes: 60
+    permissions:
+      id-token: write
+      contents: read
    strategy:
      matrix:
        config:
@@ -57,10 +63,12 @@ jobs:
      - uses: ./.github/workflows/upload_wheel
        if: startsWith(github.ref, 'refs/tags/python-v')
        with:
-          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          fury_token: ${{ secrets.FURY_TOKEN }}
  mac:
    timeout-minutes: 90
+    permissions:
+      id-token: write
+      contents: read
    runs-on: ${{ matrix.config.runner }}
    strategy:
      matrix:
@@ -85,10 +93,12 @@ jobs:
      - uses: ./.github/workflows/upload_wheel
        if: startsWith(github.ref, 'refs/tags/python-v')
        with:
-          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          fury_token: ${{ secrets.FURY_TOKEN }}
  windows:
    timeout-minutes: 60
+    permissions:
+      id-token: write
+      contents: read
    runs-on: windows-latest
    steps:
      - uses: actions/checkout@v4
@@ -107,7 +117,6 @@ jobs:
      - uses: ./.github/workflows/upload_wheel
        if: startsWith(github.ref, 'refs/tags/python-v')
        with:
-          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          fury_token: ${{ secrets.FURY_TOKEN }}
  gh-release:
    if: startsWith(github.ref, 'refs/tags/python-v')
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -17,6 +17,9 @@ on:
      - .github/workflows/build_windows_wheel/**
      - .github/workflows/run_tests/**

+permissions:
+  contents: read
+
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
@@ -108,7 +111,6 @@ jobs:
      - name: Install
        run: |
          pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
-          pip install tantivy
          pip install mlx
      - name: Doctest
        run: pytest --doctest-modules python/lancedb
@@ -227,6 +229,5 @@ jobs:
          pip install "pydantic<2"
          pip install pyarrow==16
          pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
-          pip install tantivy
      - name: Run tests
        run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -9,9 +9,15 @@ on:
      - Cargo.toml
      - Cargo.lock
      - rust-toolchain.toml
+      - deny.toml
      - rust/**
+      - nodejs/Cargo.toml
+      - python/Cargo.toml
      - .github/workflows/rust.yml

+permissions:
+  contents: read
+
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
@@ -53,6 +59,17 @@ jobs:
      - name: Run clippy (without remote feature)
        run: cargo clippy --profile ci --workspace --tests -- -D warnings

+  deny:
+    # Supply-chain checks: advisories, licenses, banned crates, and source
+    # restrictions. Configuration lives in `deny.toml` at the workspace root.
+    timeout-minutes: 10
+    runs-on: ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: EmbarkStudios/cargo-deny-action@v2
+        with:
+          command: check advisories bans licenses sources
+
  build-no-lock:
    runs-on: ubuntu-24.04
    timeout-minutes: 30
--- a/.github/workflows/update_package_lock_run.yml
+++ b/.github/workflows/update_package_lock_run.yml
@@ -3,6 +3,9 @@ name: Update package-lock.json
 on:
  workflow_dispatch:

+permissions:
+  contents: read
+
 jobs:
  publish:
    runs-on: ubuntu-latest
--- a/.github/workflows/update_package_lock_run_nodejs.yml
+++ b/.github/workflows/update_package_lock_run_nodejs.yml
@@ -3,6 +3,9 @@ name: Update NodeJs package-lock.json
 on:
  workflow_dispatch:

+permissions:
+  contents: read
+
 jobs:
  publish:
    runs-on: ubuntu-latest
--- a/.github/workflows/upload_wheel/action.yml
+++ b/.github/workflows/upload_wheel/action.yml
@@ -2,9 +2,6 @@ name: upload-wheel

 description: "Upload wheels to Pypi"
 inputs:
-  pypi_token:
-    required: true
-    description: "release token for the repo"
  fury_token:
    required: true
    description: "release token for the fury repo"
@@ -12,12 +9,6 @@ inputs:
 runs:
  using: "composite"
  steps:
-  - name: Install dependencies
-    shell: bash
-    run: |
-      python -m pip install --upgrade pip
-      pip install twine
-      python3 -m pip install --upgrade pkginfo
  - name: Choose repo
    shell: bash
    id: choose_repo
@@ -27,19 +18,17 @@ runs:
      else
        echo "repo=pypi" >> $GITHUB_OUTPUT
      fi
-  - name: Publish to PyPI
+  - name: Publish to Fury
+    if: steps.choose_repo.outputs.repo == 'fury'
    shell: bash
    env:
      FURY_TOKEN: ${{ inputs.fury_token }}
-      PYPI_TOKEN: ${{ inputs.pypi_token }}
    run: |
-      if [[ ${{ steps.choose_repo.outputs.repo }} == fury ]]; then
-        WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
-        echo "Uploading $WHEEL to Fury"
-        curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
-      else
-        twine upload --repository ${{ steps.choose_repo.outputs.repo }} \
-          --username __token__ \
-          --password $PYPI_TOKEN \
-          target/wheels/lancedb-*.whl
-      fi
+      WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
+      echo "Uploading $WHEEL to Fury"
+      curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
+  - name: Publish to PyPI
+    if: steps.choose_repo.outputs.repo == 'pypi'
+    uses: pypa/gh-action-pypi-publish@release/v1
+    with:
+      packages-dir: target/wheels/
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,5 @@
 [workspace]
 members = ["rust/lancedb", "nodejs", "python"]
-# Python package needs to be built by maturin.
-exclude = ["python"]
 resolver = "2"

 [workspace.package]
@@ -15,40 +13,40 @@ categories = ["database-implementations"]
 rust-version = "1.91.0"

 [workspace.dependencies]
-lance = { "version" = "=6.0.0-beta.1", default-features = false, "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-core = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-datagen = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-file = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-io = { "version" = "=6.0.0-beta.1", default-features = false, "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-index = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-linalg = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace-impls = { "version" = "=6.0.0-beta.1", default-features = false, "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-table = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-testing = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-datafusion = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-encoding = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
-lance-arrow = { "version" = "=6.0.0-beta.1", "tag" = "v6.0.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-core = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-datagen = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-file = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-io = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-index = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-linalg = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace-impls = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-table = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-testing = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-datafusion = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-encoding = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-arrow = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
 ahash = "0.8"
 # Note that this one does not include pyarrow
-arrow = { version = "57.2", optional = false }
-arrow-array = "57.2"
-arrow-data = "57.2"
-arrow-ipc = "57.2"
-arrow-ord = "57.2"
-arrow-schema = "57.2"
-arrow-select = "57.2"
-arrow-cast = "57.2"
+arrow = { version = "58.0.0", optional = false }
+arrow-array = "58.0.0"
+arrow-data = "58.0.0"
+arrow-ipc = "58.0.0"
+arrow-ord = "58.0.0"
+arrow-schema = "58.0.0"
+arrow-select = "58.0.0"
+arrow-cast = "58.0.0"
 async-trait = "0"
-datafusion = { version = "52.1", default-features = false }
-datafusion-catalog = "52.1"
-datafusion-common = { version = "52.1", default-features = false }
-datafusion-execution = "52.1"
-datafusion-expr = "52.1"
-datafusion-functions = "52.1"
-datafusion-physical-plan = "52.1"
-datafusion-physical-expr = "52.1"
-datafusion-sql = "52.1"
+datafusion = { version = "53.0.0", default-features = false }
+datafusion-catalog = "53.0.0"
+datafusion-common = { version = "53.0.0", default-features = false }
+datafusion-execution = "53.0.0"
+datafusion-expr = "53.0.0"
+datafusion-functions = "53.0.0"
+datafusion-physical-plan = "53.0.0"
+datafusion-physical-expr = "53.0.0"
+datafusion-sql = "53.0.0"
 env_logger = "0.11"
 half = { "version" = "2.7.1", default-features = false, features = [
    "num-traits",
@@ -56,7 +54,7 @@ half = { "version" = "2.7.1", default-features = false, features = [
 futures = "0"
 log = "0.4"
 moka = { version = "0.12", features = ["future"] }
-object_store = "0.12.0"
+object_store = "0.13.2"
 pin-project = "1.0.7"
 rand = "0.9"
 snafu = "0.8"
--- a/deny.toml
+++ b/deny.toml
@@ -0,0 +1,196 @@
+# cargo-deny configuration for LanceDB.
+#
+# Run locally with `cargo deny check`. See
+# https://embarkstudios.github.io/cargo-deny/ for the full reference.
+
+# The set of target triples we care about. cargo-deny will only consider
+# dependencies that are used on at least one of these targets. Keeping this
+# explicit avoids noise from platform-specific crates (e.g. wasm, android,
+# ios) that we never actually ship.
+[graph]
+targets = [
+    "x86_64-unknown-linux-gnu",
+    "aarch64-unknown-linux-gnu",
+    "x86_64-apple-darwin",
+    "aarch64-apple-darwin",
+    "x86_64-pc-windows-msvc",
+    "aarch64-pc-windows-msvc",
+]
+all-features = true
+
+[output]
+feature-depth = 1
+
+# ---------------------------------------------------------------------------
+# Advisories: security vulnerabilities and yanked crates.
+# ---------------------------------------------------------------------------
+[advisories]
+version = 2
+# Fail the check if any crate in the lockfile has been yanked from crates.io.
+# Yanked crates are a signal the author retracted the release (often due to
+# bugs or security issues) and should not be depended on.
+yanked = "deny"
+# Advisory IDs we have explicitly reviewed and chosen to accept. Every
+# entry must include a rationale and, where possible, an upstream issue
+# pointing to a fix. Revisit this list whenever dependencies are updated.
+ignore = [
+    # rsa: Marvin Attack timing side-channel in PKCS#1 v1.5 decryption.
+    # Reached only through opendal → reqsign → rsa. We do not use RSA
+    # decryption in LanceDB ourselves; this is dormant in the signing path.
+    # No fixed release exists upstream as of this writing.
+    # https://rustsec.org/advisories/RUSTSEC-2023-0071
+    { id = "RUSTSEC-2023-0071", reason = "rsa crate via opendal/reqsign; no fixed upstream release" },
+
+    # instant: unmaintained. Pulled in via backoff → instant. Upstream
+    # recommends switching to `web-time`; fix has to come from backoff.
+    # https://rustsec.org/advisories/RUSTSEC-2024-0384
+    { id = "RUSTSEC-2024-0384", reason = "transitive via backoff; waiting on backoff replacement" },
+
+    # paste: unmaintained (author archived the repo). Used transitively by
+    # datafusion and the arrow ecosystem; widespread, no drop-in replacement.
+    # https://rustsec.org/advisories/RUSTSEC-2024-0436
+    { id = "RUSTSEC-2024-0436", reason = "transitive via datafusion; awaiting ecosystem migration" },
+
+    # encoding: unmaintained. Reached through lindera-dictionary, which is
+    # required by the native Lindera tokenizer path. Lindera has not migrated
+    # off this crate yet.
+    # https://rustsec.org/advisories/RUSTSEC-2021-0153
+    { id = "RUSTSEC-2021-0153", reason = "transitive via lindera-dictionary for native Lindera tokenizer" },
+
+    # fast-float: unsound and unmaintained. Reached only through polars-arrow
+    # from the optional Polars integration; replacement requires a Polars
+    # dependency upgrade.
+    # https://rustsec.org/advisories/RUSTSEC-2024-0379
+    { id = "RUSTSEC-2024-0379", reason = "transitive via polars-arrow; waiting on Polars migration" },
+
+    # tantivy: segfault on malformed input due to missing bounds check.
+    # Pulled in via lance for full-text search. We only feed tantivy
+    # documents we construct ourselves, not attacker-controlled bytes.
+    # Tracked for a lance dependency bump.
+    # https://rustsec.org/advisories/RUSTSEC-2025-0003
+    { id = "RUSTSEC-2025-0003", reason = "tantivy via lance; inputs are internally produced, not user-supplied bytes" },
+
+    # backoff: unmaintained. Reached only via async-openai. Replacement
+    # requires async-openai to migrate (or us to drop async-openai).
+    # https://rustsec.org/advisories/RUSTSEC-2025-0012
+    { id = "RUSTSEC-2025-0012", reason = "transitive via async-openai; waiting on upstream migration" },
+
+    # number_prefix: unmaintained. Transitive via indicatif → hf-hub.
+    # No security impact, just maintenance status.
+    # https://rustsec.org/advisories/RUSTSEC-2025-0119
+    { id = "RUSTSEC-2025-0119", reason = "transitive via hf-hub/indicatif; cosmetic formatting crate" },
+
+    # bincode: unmaintained. Reached through lindera and lindera-dictionary,
+    # which are required by the native Lindera tokenizer path. Lindera has not
+    # migrated to another serialization format yet.
+    # https://rustsec.org/advisories/RUSTSEC-2025-0141
+    { id = "RUSTSEC-2025-0141", reason = "transitive via lindera/lindera-dictionary for native Lindera tokenizer" },
+
+    # lru: soundness issue in IterMut. Reached only through aws-sdk-s3 in
+    # LanceDB's dev-dependency graph; LanceDB does not use that iterator
+    # directly. Clearing this requires the AWS SDK chain to update lru.
+    # https://rustsec.org/advisories/RUSTSEC-2026-0002
+    { id = "RUSTSEC-2026-0002", reason = "transitive via aws-sdk-s3 dev-dependency; waiting on AWS SDK lru upgrade" },
+
+    # rustls-webpki 0.101.7 (old major line): name-constraint checks for
+    # URI / wildcard names. Pulled in only via the legacy rustls 0.21 chain
+    # from aws-smithy-http-client. The 0.103 line we actively use is patched.
+    # Clearing the 0.101 copy requires the aws-sdk chain to migrate off
+    # rustls 0.21.
+    # https://rustsec.org/advisories/RUSTSEC-2026-0098
+    # https://rustsec.org/advisories/RUSTSEC-2026-0099
+    { id = "RUSTSEC-2026-0098", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
+    { id = "RUSTSEC-2026-0099", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
+
+    # rustls-webpki 0.101.7: reachable panic in CRL parsing. Same legacy
+    # rustls 0.21 chain from aws-smithy-http-client as above. The 0.103 line
+    # we actively use is upgraded to 0.103.13 which contains the fix.
+    # https://rustsec.org/advisories/RUSTSEC-2026-0104
+    { id = "RUSTSEC-2026-0104", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
+
+    # rand 0.8.5: soundness issue only when ThreadRng reseeds inside a custom
+    # logger. Reached through several transitive chains. LanceDB does not use
+    # rand from a custom logger; upgrade once all pinned chains accept 0.8.6+.
+    # https://rustsec.org/advisories/RUSTSEC-2026-0097
+    { id = "RUSTSEC-2026-0097", reason = "transitive rand 0.8.5; LanceDB does not call ThreadRng from custom logging" },
+]
+
+# ---------------------------------------------------------------------------
+# Licenses: only allow licenses we've reviewed as compatible with Apache-2.0.
+# ---------------------------------------------------------------------------
+[licenses]
+version = 2
+# SPDX identifiers for licenses that are compatible with our Apache-2.0
+# distribution. Additions require legal review.
+allow = [
+    "Apache-2.0",
+    "Apache-2.0 WITH LLVM-exception",
+    "MIT",
+    "BSD-2-Clause",
+    "BSD-3-Clause",
+    "ISC",
+    "Unicode-3.0",
+    "Unicode-DFS-2016",
+    "Zlib",
+    "CC0-1.0",
+    "MPL-2.0",
+    "BSL-1.0",
+    "OpenSSL",
+    # 0BSD ("BSD Zero Clause") is effectively public domain — no attribution
+    # required. Pulled in by `mock_instant`.
+    "0BSD",
+    # bzip2-1.0.6 is the permissive upstream bzip2 license (BSD-like). Pulled
+    # in by `libbz2-rs-sys`, the pure-Rust bzip2 implementation.
+    "bzip2-1.0.6",
+    # CDLA-Permissive-2.0 is a permissive data license used by `webpki-roots`
+    # for the Mozilla CA root bundle. Data-only, distribution-compatible.
+    "CDLA-Permissive-2.0",
+]
+confidence-threshold = 0.8
+# Crates whose license cannot be determined from Cargo metadata but whose
+# license we've manually confirmed from upstream. Keep this list minimal.
+[[licenses.clarify]]
+# polars-arrow-format omits the `license` field in its Cargo.toml, but the
+# upstream repo (pola-rs/polars-arrow-format) is dual-licensed Apache-2.0 OR
+# MIT. See https://github.com/pola-rs/polars-arrow-format/blob/main/LICENSE
+crate = "polars-arrow-format"
+expression = "Apache-2.0 OR MIT"
+license-files = []
+
+# ---------------------------------------------------------------------------
+# Bans: disallow specific crates and flag dependency hygiene issues.
+# ---------------------------------------------------------------------------
+[bans]
+# Warn (not deny) on duplicate versions of the same crate. In a large
+# workspace like this one, duplicates are common and often unavoidable
+# transitively. We surface them to discourage growth, but don't fail CI.
+multiple-versions = "warn"
+# Wildcard version requirements (`foo = "*"`) are a footgun — they let any
+# future release in without review. Ban them outright.
+wildcards = "deny"
+# Internal workspace crates reference each other via `path = "..."`, which
+# cargo-deny sees as a wildcard version. That's fine for private workspace
+# members (not published to crates.io), so allow it specifically for paths.
+allow-wildcard-paths = true
+# Features that, if enabled, should cause the check to fail.
+deny = []
+# Crates to skip when checking for duplicate versions.
+skip = []
+# Similar to `skip`, but also skips the entire transitive subtree.
+skip-tree = []
+
+# ---------------------------------------------------------------------------
+# Sources: restrict where crates can come from.
+# ---------------------------------------------------------------------------
+[sources]
+# Deny any registry other than the ones explicitly listed below.
+unknown-registry = "deny"
+# Deny any git dependency whose host isn't in the allow-list below. This
+# prevents accidental pulls from arbitrary forks.
+unknown-git = "deny"
+allow-registry = ["https://github.com/rust-lang/crates.io-index"]
+# Lance is developed in a sibling repo and pulled as a git dependency until
+# releases are cut to crates.io. Allow that specific host.
+allow-git = [
+    "https://github.com/lance-format/lance",
+]
--- a/dockerfiles/Dockerfile
+++ b/dockerfiles/Dockerfile
@@ -24,4 +24,4 @@ RUN python --version && \
  rustc --version && \
  protoc --version

-RUN pip install --no-cache-dir tantivy lancedb
+RUN pip install --no-cache-dir lancedb
--- a/docs/src/java/java.md
+++ b/docs/src/java/java.md
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
 <dependency>
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-core</artifactId>
-    <version>0.28.0-beta.9</version>
+    <version>0.28.0-beta.11</version>
 </dependency>
 ```

--- a/docs/src/js/classes/BooleanQuery.md
+++ b/docs/src/js/classes/BooleanQuery.md
@@ -25,8 +25,7 @@ new BooleanQuery(queries): BooleanQuery
 Creates an instance of BooleanQuery.

 #### Parameters
-
-* **queries**: [[`Occur`](../enumerations/Occur.md), [`FullTextQuery`](../interfaces/FullTextQuery.md)][]
+    * **queries**: [[`Occur`](../enumerations/Occur.md), [`FullTextQuery`](../interfaces/FullTextQuery.md)][]
    An array of (Occur, FullTextQuery objects) to combine.
    Occur specifies whether the query must match, or should match.

--- a/docs/src/js/classes/BoostQuery.md
+++ b/docs/src/js/classes/BoostQuery.md
@@ -31,18 +31,14 @@ but penalizes those that match the negative query.
 the penalty is controlled by the `negativeBoost` parameter.

 #### Parameters
-
-* **positive**: [`FullTextQuery`](../interfaces/FullTextQuery.md)
+    * **positive**: [`FullTextQuery`](../interfaces/FullTextQuery.md)
    The positive query that boosts the relevance score.
-
-* **negative**: [`FullTextQuery`](../interfaces/FullTextQuery.md)
+    * **negative**: [`FullTextQuery`](../interfaces/FullTextQuery.md)
    The negative query that reduces the relevance score.
-
-* **options?**
+    * **options?**
    Optional parameters for the boost query.
    - `negativeBoost`: The boost factor for the negative query (default is 0.0).
-
-* **options.negativeBoost?**: `number`
+    * **options.negativeBoost?**: `number`

 #### Returns

--- a/docs/src/js/classes/Connection.md
+++ b/docs/src/js/classes/Connection.md
@@ -42,26 +42,19 @@ both the source and cloned tables to evolve independently while initially
 sharing the same data, deletion, and index files.

 #### Parameters
-
-* **targetTableName**: `string`
+    * **targetTableName**: `string`
    The name of the target table to create.
-
-* **sourceUri**: `string`
+    * **sourceUri**: `string`
    The URI of the source table to clone from.
-
-* **options?**
+    * **options?**
    Clone options.
-
-* **options.isShallow?**: `boolean`
+    * **options.isShallow?**: `boolean`
    Whether to perform a shallow clone (defaults to true).
-
-* **options.sourceTag?**: `string`
+    * **options.sourceTag?**: `string`
    The tag of the source table to clone.
-
-* **options.sourceVersion?**: `number`
+    * **options.sourceVersion?**: `number`
    The version of the source table to clone.
-
-* **options.targetNamespacePath?**: `string`[]
+    * **options.targetNamespacePath?**: `string`[]
    The namespace path for the target table (defaults to root namespace).

 #### Returns
@@ -102,14 +95,11 @@ abstract createEmptyTable(
 Creates a new empty Table

 ##### Parameters
-
-* **name**: `string`
+    * **name**: `string`
    The name of the table.
-
-* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
+    * **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
    The schema of the table
-
-* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    Additional options (backwards compatibility)

 ##### Returns
@@ -129,17 +119,13 @@ abstract createEmptyTable(
 Creates a new empty Table

 ##### Parameters
-
-* **name**: `string`
+    * **name**: `string`
    The name of the table.
-
-* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
+    * **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
    The schema of the table
-
-* **namespacePath?**: `string`[]
+    * **namespacePath?**: `string`[]
    The namespace path to create the table in (defaults to root namespace)
-
-* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    Additional options

 ##### Returns
@@ -159,11 +145,9 @@ abstract createTable(options, namespacePath?): Promise<Table>
 Creates a new Table and initialize it with new data.

 ##### Parameters
-
-* **options**: `object` & `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
+    * **options**: `object` & `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    The options object.
-
-* **namespacePath?**: `string`[]
+    * **namespacePath?**: `string`[]
    The namespace path to create the table in (defaults to root namespace)

 ##### Returns
@@ -182,15 +166,12 @@ abstract createTable(
 Creates a new Table and initialize it with new data.

 ##### Parameters
-
-* **name**: `string`
+    * **name**: `string`
    The name of the table.
-
-* **data**: [`TableLike`](../type-aliases/TableLike.md) \| `Record`&lt;`string`, `unknown`&gt;[]
+    * **data**: [`TableLike`](../type-aliases/TableLike.md) \| `Record`&lt;`string`, `unknown`&gt;[]
    Non-empty Array of Records
    to be inserted into the table
-
-* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    Additional options (backwards compatibility)

 ##### Returns
@@ -210,18 +191,14 @@ abstract createTable(
 Creates a new Table and initialize it with new data.

 ##### Parameters
-
-* **name**: `string`
+    * **name**: `string`
    The name of the table.
-
-* **data**: [`TableLike`](../type-aliases/TableLike.md) \| `Record`&lt;`string`, `unknown`&gt;[]
+    * **data**: [`TableLike`](../type-aliases/TableLike.md) \| `Record`&lt;`string`, `unknown`&gt;[]
    Non-empty Array of Records
    to be inserted into the table
-
-* **namespacePath?**: `string`[]
+    * **namespacePath?**: `string`[]
    The namespace path to create the table in (defaults to root namespace)
-
-* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    Additional options

 ##### Returns
@@ -253,8 +230,7 @@ abstract dropAllTables(namespacePath?): Promise<void>
 Drop all tables in the database.

 #### Parameters
-
-* **namespacePath?**: `string`[]
+    * **namespacePath?**: `string`[]
    The namespace path to drop tables from (defaults to root namespace).

 #### Returns
@@ -272,11 +248,9 @@ abstract dropTable(name, namespacePath?): Promise<void>
 Drop an existing table.

 #### Parameters
-
-* **name**: `string`
+    * **name**: `string`
    The name of the table to drop.
-
-* **namespacePath?**: `string`[]
+    * **namespacePath?**: `string`[]
    The namespace path of the table (defaults to root namespace).

 #### Returns
@@ -311,14 +285,11 @@ abstract openTable(
 Open a table in the database.

 #### Parameters
-
-* **name**: `string`
+    * **name**: `string`
    The name of the table
-
-* **namespacePath?**: `string`[]
+    * **namespacePath?**: `string`[]
    The namespace path of the table (defaults to root namespace)
-
-* **options?**: `Partial`&lt;[`OpenTableOptions`](../interfaces/OpenTableOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`OpenTableOptions`](../interfaces/OpenTableOptions.md)&gt;
    Additional options

 #### Returns
@@ -340,8 +311,7 @@ List all the table names in this database.
 Tables will be returned in lexicographical order.

 ##### Parameters
-
-* **options?**: `Partial`&lt;[`TableNamesOptions`](../interfaces/TableNamesOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`TableNamesOptions`](../interfaces/TableNamesOptions.md)&gt;
    options to control the
    paging / start point (backwards compatibility)

@@ -360,11 +330,9 @@ List all the table names in this database.
 Tables will be returned in lexicographical order.

 ##### Parameters
-
-* **namespacePath?**: `string`[]
+    * **namespacePath?**: `string`[]
    The namespace path to list tables from (defaults to root namespace)
-
-* **options?**: `Partial`&lt;[`TableNamesOptions`](../interfaces/TableNamesOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`TableNamesOptions`](../interfaces/TableNamesOptions.md)&gt;
    options to control the
    paging / start point

--- a/docs/src/js/classes/Index.md
+++ b/docs/src/js/classes/Index.md
@@ -73,8 +73,7 @@ The results of a full text search are ordered by relevance measured by BM25.
 You can combine filters with full text search.

 #### Parameters
-
-* **options?**: `Partial`&lt;[`FtsOptions`](../interfaces/FtsOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`FtsOptions`](../interfaces/FtsOptions.md)&gt;

 #### Returns

@@ -95,8 +94,7 @@ It is a variant of the HNSW algorithm that uses product quantization to compress
 the vectors.

 #### Parameters
-
-* **options?**: `Partial`&lt;[`HnswPqOptions`](../interfaces/HnswPqOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`HnswPqOptions`](../interfaces/HnswPqOptions.md)&gt;

 #### Returns

@@ -117,8 +115,7 @@ It is a variant of the HNSW algorithm that uses scalar quantization to compress
 the vectors.

 #### Parameters
-
-* **options?**: `Partial`&lt;[`HnswSqOptions`](../interfaces/HnswSqOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`HnswSqOptions`](../interfaces/HnswSqOptions.md)&gt;

 #### Returns

@@ -148,8 +145,7 @@ Note that training an IVF FLAT index on a large dataset is a slow operation and
 currently is also a memory intensive operation.

 #### Parameters
-
-* **options?**: `Partial`&lt;[`IvfFlatOptions`](../interfaces/IvfFlatOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`IvfFlatOptions`](../interfaces/IvfFlatOptions.md)&gt;

 #### Returns

@@ -185,8 +181,7 @@ Note that training an IVF PQ index on a large dataset is a slow operation and
 currently is also a memory intensive operation.

 #### Parameters
-
-* **options?**: `Partial`&lt;[`IvfPqOptions`](../interfaces/IvfPqOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`IvfPqOptions`](../interfaces/IvfPqOptions.md)&gt;

 #### Returns

@@ -216,8 +211,7 @@ Note that training an IVF RQ index on a large dataset is a slow operation and
 currently is also a memory intensive operation.

 #### Parameters
-
-* **options?**: `Partial`&lt;[`IvfRqOptions`](../interfaces/IvfRqOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`IvfRqOptions`](../interfaces/IvfRqOptions.md)&gt;

 #### Returns

--- a/docs/src/js/classes/MakeArrowTableOptions.md
+++ b/docs/src/js/classes/MakeArrowTableOptions.md
@@ -17,8 +17,7 @@ new MakeArrowTableOptions(values?): MakeArrowTableOptions
 ```

 #### Parameters
-
-* **values?**: `Partial`&lt;[`MakeArrowTableOptions`](MakeArrowTableOptions.md)&gt;
+    * **values?**: `Partial`&lt;[`MakeArrowTableOptions`](MakeArrowTableOptions.md)&gt;

 #### Returns

--- a/docs/src/js/classes/MatchQuery.md
+++ b/docs/src/js/classes/MatchQuery.md
@@ -28,30 +28,22 @@ new MatchQuery(
 Creates an instance of MatchQuery.

 #### Parameters
-
-* **query**: `string`
+    * **query**: `string`
    The text query to search for.
-
-* **column**: `string`
+    * **column**: `string`
    The name of the column to search within.
-
-* **options?**
+    * **options?**
    Optional parameters for the match query.
    - `boost`: The boost factor for the query (default is 1.0).
    - `fuzziness`: The fuzziness level for the query (default is 0).
    - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
    - `operator`: The logical operator to use for combining terms in the query (default is "OR").
    - `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.
-
-* **options.boost?**: `number`
-
-* **options.fuzziness?**: `number`
-
-* **options.maxExpansions?**: `number`
-
-* **options.operator?**: [`Operator`](../enumerations/Operator.md)
-
-* **options.prefixLength?**: `number`
+    * **options.boost?**: `number`
+    * **options.fuzziness?**: `number`
+    * **options.maxExpansions?**: `number`
+    * **options.operator?**: [`Operator`](../enumerations/Operator.md)
+    * **options.prefixLength?**: `number`

 #### Returns

--- a/docs/src/js/classes/MergeInsertBuilder.md
+++ b/docs/src/js/classes/MergeInsertBuilder.md
@@ -19,10 +19,8 @@ new MergeInsertBuilder(native, schema): MergeInsertBuilder
 Construct a MergeInsertBuilder. __Internal use only.__

 #### Parameters
-
-* **native**: `NativeMergeInsertBuilder`
-
-* **schema**: `Schema`&lt;`any`&gt; \| `Promise`&lt;`Schema`&lt;`any`&gt;&gt;
+    * **native**: `NativeMergeInsertBuilder`
+    * **schema**: `Schema`&lt;`any`&gt; \| `Promise`&lt;`Schema`&lt;`any`&gt;&gt;

 #### Returns

@@ -39,10 +37,8 @@ execute(data, execOptions?): Promise<MergeResult>
 Executes the merge insert operation

 #### Parameters
-
-* **data**: [`Data`](../type-aliases/Data.md)
-
-* **execOptions?**: `Partial`&lt;[`WriteExecutionOptions`](../interfaces/WriteExecutionOptions.md)&gt;
+    * **data**: [`Data`](../type-aliases/Data.md)
+    * **execOptions?**: `Partial`&lt;[`WriteExecutionOptions`](../interfaces/WriteExecutionOptions.md)&gt;

 #### Returns

@@ -66,8 +62,7 @@ table scan even if an index exists. This can be useful for benchmarking or when
 the query optimizer chooses a suboptimal path.

 #### Parameters
-
-* **useIndex**: `boolean`
+    * **useIndex**: `boolean`
    Whether to use indices for the merge operation. Defaults to `true`.

 #### Returns
@@ -104,10 +99,8 @@ table (new data).
 For example, "target.last_update < source.last_update"

 #### Parameters
-
-* **options?**
-
-* **options.where?**: `string`
+    * **options?**
+    * **options.where?**: `string`

 #### Returns

@@ -126,10 +119,8 @@ deleted.  An optional condition can be provided to limit what
 data is deleted.

 #### Parameters
-
-* **options?**
-
-* **options.where?**: `string`
+    * **options?**
+    * **options.where?**: `string`
    An optional condition to limit what data is deleted

 #### Returns
--- a/docs/src/js/classes/MultiMatchQuery.md
+++ b/docs/src/js/classes/MultiMatchQuery.md
@@ -28,21 +28,16 @@ new MultiMatchQuery(
 Creates an instance of MultiMatchQuery.

 #### Parameters
-
-* **query**: `string`
+    * **query**: `string`
    The text query to search for across multiple columns.
-
-* **columns**: `string`[]
+    * **columns**: `string`[]
    An array of column names to search within.
-
-* **options?**
+    * **options?**
    Optional parameters for the multi-match query.
    - `boosts`: An array of boost factors for each column (default is 1.0 for all).
    - `operator`: The logical operator to use for combining terms in the query (default is "OR").
-
-* **options.boosts?**: `number`[]
-
-* **options.operator?**: [`Operator`](../enumerations/Operator.md)
+    * **options.boosts?**: `number`[]
+    * **options.operator?**: [`Operator`](../enumerations/Operator.md)

 #### Returns

--- a/docs/src/js/classes/NativeJsHeaderProvider.md
+++ b/docs/src/js/classes/NativeJsHeaderProvider.md
@@ -21,8 +21,7 @@ new NativeJsHeaderProvider(getHeadersCallback): NativeJsHeaderProvider
 Create a new JsHeaderProvider from a JavaScript callback

 #### Parameters
-
-* **getHeadersCallback**
+    * **getHeadersCallback**

 #### Returns

--- a/docs/src/js/classes/OAuthHeaderProvider.md
+++ b/docs/src/js/classes/OAuthHeaderProvider.md
@@ -51,11 +51,9 @@ new OAuthHeaderProvider(tokenFetcher, refreshBufferSeconds): OAuthHeaderProvider
 Initialize the OAuth provider.

 #### Parameters
-
-* **tokenFetcher**
+    * **tokenFetcher**
    Function to fetch new tokens. Should return object with 'accessToken' and optionally 'expiresIn'.
-
-* **refreshBufferSeconds**: `number` = `300`
+    * **refreshBufferSeconds**: `number` = `300`
    Seconds before expiry to refresh token. Default 300 (5 minutes).

 #### Returns
--- a/docs/src/js/classes/PermutationBuilder.md
+++ b/docs/src/js/classes/PermutationBuilder.md
@@ -46,8 +46,7 @@ filter(filter): PermutationBuilder
 Configure filtering for the permutation.

 #### Parameters
-
-* **filter**: `string`
+    * **filter**: `string`
    SQL filter expression

 #### Returns
@@ -73,11 +72,9 @@ persist(connection, tableName): PermutationBuilder
 Configure the permutation to be persisted.

 #### Parameters
-
-* **connection**: [`Connection`](Connection.md)
+    * **connection**: [`Connection`](Connection.md)
    The connection to persist the permutation to
-
-* **tableName**: `string`
+    * **tableName**: `string`
    The name of the table to create

 #### Returns
@@ -103,8 +100,7 @@ shuffle(options): PermutationBuilder
 Configure shuffling for the permutation.

 #### Parameters
-
-* **options**: [`ShuffleOptions`](../interfaces/ShuffleOptions.md)
+    * **options**: [`ShuffleOptions`](../interfaces/ShuffleOptions.md)
    Configuration for shuffling

 #### Returns
@@ -134,8 +130,7 @@ splitCalculated(options): PermutationBuilder
 Configure calculated splits for the permutation.

 #### Parameters
-
-* **options**: [`SplitCalculatedOptions`](../interfaces/SplitCalculatedOptions.md)
+    * **options**: [`SplitCalculatedOptions`](../interfaces/SplitCalculatedOptions.md)
    Configuration for calculated splitting

 #### Returns
@@ -161,8 +156,7 @@ splitHash(options): PermutationBuilder
 Configure hash-based splits for the permutation.

 #### Parameters
-
-* **options**: [`SplitHashOptions`](../interfaces/SplitHashOptions.md)
+    * **options**: [`SplitHashOptions`](../interfaces/SplitHashOptions.md)
    Configuration for hash-based splitting

 #### Returns
@@ -192,8 +186,7 @@ splitRandom(options): PermutationBuilder
 Configure random splits for the permutation.

 #### Parameters
-
-* **options**: [`SplitRandomOptions`](../interfaces/SplitRandomOptions.md)
+    * **options**: [`SplitRandomOptions`](../interfaces/SplitRandomOptions.md)
    Configuration for random splitting

 #### Returns
@@ -226,8 +219,7 @@ splitSequential(options): PermutationBuilder
 Configure sequential splits for the permutation.

 #### Parameters
-
-* **options**: [`SplitSequentialOptions`](../interfaces/SplitSequentialOptions.md)
+    * **options**: [`SplitSequentialOptions`](../interfaces/SplitSequentialOptions.md)
    Configuration for sequential splitting

 #### Returns
--- a/docs/src/js/classes/PhraseQuery.md
+++ b/docs/src/js/classes/PhraseQuery.md
@@ -28,18 +28,14 @@ new PhraseQuery(
 Creates an instance of `PhraseQuery`.

 #### Parameters
-
-* **query**: `string`
+    * **query**: `string`
    The phrase to search for in the specified column.
-
-* **column**: `string`
+    * **column**: `string`
    The name of the column to search within.
-
-* **options?**
+    * **options?**
    Optional parameters for the phrase query.
    - `slop`: The maximum number of intervening unmatched positions allowed between words in the phrase (default is 0).
-
-* **options.slop?**: `number`
+    * **options.slop?**: `number`

 #### Returns

--- a/docs/src/js/classes/Query.md
+++ b/docs/src/js/classes/Query.md
@@ -86,8 +86,7 @@ protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
 Execute the query and return the results as an

 #### Parameters
-
-* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -120,8 +119,7 @@ explainPlan(verbose): Promise<string>
 Generates an explanation of the query execution plan.

 #### Parameters
-
-* **verbose**: `boolean` = `false`
+    * **verbose**: `boolean` = `false`
    If true, provides a more detailed explanation. Defaults to false.

 #### Returns
@@ -177,8 +175,7 @@ filter(predicate): this
 A filter statement to be applied to this query.

 #### Parameters
-
-* **predicate**: `string`
+    * **predicate**: `string`

 #### Returns

@@ -205,10 +202,8 @@ fullTextSearch(query, options?): this
 ```

 #### Parameters
-
-* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
-
-* **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;
+    * **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
+    * **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;

 #### Returns

@@ -232,8 +227,7 @@ By default, a plain search has no limit.  If this method is not
 called then every valid row from the table will be returned.

 #### Parameters
-
-* **limit**: `number`
+    * **limit**: `number`

 #### Returns

@@ -268,8 +262,7 @@ fixed size list of floats) then the column does not need to be specified.
 If there is more than one vector column you must use

 #### Parameters
-
-* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)
+    * **vector**: [`IntoVector`](../type-aliases/IntoVector.md)

 #### Returns

@@ -308,10 +301,8 @@ nearestToText(query, columns?): Query
 ```

 #### Parameters
-
-* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
-
-* **columns?**: `string`[]
+    * **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
+    * **columns?**: `string`[]

 #### Returns

@@ -330,8 +321,7 @@ Set the number of rows to skip before returning results.
 This is useful for pagination.

 #### Parameters
-
-* **offset**: `number`
+    * **offset**: `number`

 #### Returns

@@ -393,8 +383,7 @@ For example, an SQL query might state `SELECT a + b AS combined, c`.  The equiva
 input to this method would be:

 #### Parameters
-
-* **columns**: `string` \| `string`[] \| `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;
+    * **columns**: `string` \| `string`[] \| `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;

 #### Returns

@@ -428,8 +417,7 @@ toArray(options?): Promise<any[]>
 Collect the results as an array of objects.

 #### Parameters
-
-* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -450,8 +438,7 @@ toArrow(options?): Promise<Table<any>>
 Collect the results as an Arrow

 #### Parameters
-
-* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -478,8 +465,7 @@ A filter statement to be applied to this query.
 The filter should be supplied as an SQL query string.  For example:

 #### Parameters
-
-* **predicate**: `string`
+    * **predicate**: `string`

 #### Returns

--- a/docs/src/js/classes/QueryBase.md
+++ b/docs/src/js/classes/QueryBase.md
@@ -87,8 +87,7 @@ protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
 Execute the query and return the results as an

 #### Parameters
-
-* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -117,8 +116,7 @@ explainPlan(verbose): Promise<string>
 Generates an explanation of the query execution plan.

 #### Parameters
-
-* **verbose**: `boolean` = `false`
+    * **verbose**: `boolean` = `false`
    If true, provides a more detailed explanation. Defaults to false.

 #### Returns
@@ -186,8 +184,7 @@ For example, an SQL query might state `SELECT a + b AS combined, c`.  The equiva
 input to this method would be:

 #### Parameters
-
-* **columns**: `string` \| `string`[] \| `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;
+    * **columns**: `string` \| `string`[] \| `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;

 #### Returns

@@ -217,8 +214,7 @@ toArray(options?): Promise<any[]>
 Collect the results as an array of objects.

 #### Parameters
-
-* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -235,8 +231,7 @@ toArrow(options?): Promise<Table<any>>
 Collect the results as an Arrow

 #### Parameters
-
-* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

--- a/docs/src/js/classes/Session.md
+++ b/docs/src/js/classes/Session.md
@@ -33,10 +33,8 @@ Create a new session with custom cache sizes.
  Defaults to 1GB if not specified.

 #### Parameters
-
-* **indexCacheSizeBytes?**: `null` \| `bigint`
-
-* **metadataCacheSizeBytes?**: `null` \| `bigint`
+    * **indexCacheSizeBytes?**: `null` \| `bigint`
+    * **metadataCacheSizeBytes?**: `null` \| `bigint`

 #### Returns

--- a/docs/src/js/classes/StaticHeaderProvider.md
+++ b/docs/src/js/classes/StaticHeaderProvider.md
@@ -37,8 +37,7 @@ new StaticHeaderProvider(headers): StaticHeaderProvider
 Initialize with static headers.

 #### Parameters
-
-* **headers**: `Record`&lt;`string`, `string`&gt;
+    * **headers**: `Record`&lt;`string`, `string`&gt;
    Headers to return for every request.

 #### Returns
--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -46,11 +46,9 @@ abstract add(data, options?): Promise<AddResult>
 Insert records into this Table.

 #### Parameters
-
-* **data**: [`Data`](../type-aliases/Data.md)
+    * **data**: [`Data`](../type-aliases/Data.md)
    Records to be inserted into the Table
-
-* **options?**: `Partial`&lt;[`AddDataOptions`](../interfaces/AddDataOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`AddDataOptions`](../interfaces/AddDataOptions.md)&gt;

 #### Returns

@@ -70,8 +68,7 @@ abstract addColumns(newColumnTransforms): Promise<AddColumnsResult>
 Add new columns with defined values.

 #### Parameters
-
-* **newColumnTransforms**: `Field`&lt;`any`&gt; \| `Field`&lt;`any`&gt;[] \| `Schema`&lt;`any`&gt; \| [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
+    * **newColumnTransforms**: `Field`&lt;`any`&gt; \| `Field`&lt;`any`&gt;[] \| `Schema`&lt;`any`&gt; \| [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
    Either:
    - An array of objects with column names and SQL expressions to calculate values
    - A single Arrow Field defining one column with its data type (column will be initialized with null values)
@@ -96,8 +93,7 @@ abstract alterColumns(columnAlterations): Promise<AlterColumnsResult>
 Alter the name or nullability of columns.

 #### Parameters
-
-* **columnAlterations**: [`ColumnAlteration`](../interfaces/ColumnAlteration.md)[]
+    * **columnAlterations**: [`ColumnAlteration`](../interfaces/ColumnAlteration.md)[]
    One or more alterations to
    apply to columns.

@@ -126,8 +122,7 @@ Calling this method will set the table into time-travel mode. If you
 wish to return to standard mode, call `checkoutLatest`.

 #### Parameters
-
-* **version**: `string` \| `number`
+    * **version**: `string` \| `number`
    The version to checkout, could be version number or tag

 #### Returns
@@ -196,8 +191,7 @@ abstract countRows(filter?): Promise<number>
 Count the total number of rows in the dataset.

 #### Parameters
-
-* **filter?**: `string`
+    * **filter?**: `string`

 #### Returns

@@ -222,10 +216,8 @@ We currently don't support custom named indexes.
 The index name will always be `${column}_idx`.

 #### Parameters
-
-* **column**: `string`
-
-* **options?**: `Partial`&lt;[`IndexOptions`](../interfaces/IndexOptions.md)&gt;
+    * **column**: `string`
+    * **options?**: `Partial`&lt;[`IndexOptions`](../interfaces/IndexOptions.md)&gt;

 #### Returns

@@ -268,8 +260,7 @@ abstract delete(predicate): Promise<DeleteResult>
 Delete the rows that satisfy the predicate.

 #### Parameters
-
-* **predicate**: `string`
+    * **predicate**: `string`

 #### Returns

@@ -308,8 +299,7 @@ call ``compact_files`` to rewrite the data without the removed columns and
 then call ``cleanup_files`` to remove the old files.

 #### Parameters
-
-* **columnNames**: `string`[]
+    * **columnNames**: `string`[]
    The names of the columns to drop. These can
    be nested column references (e.g. "a.b.c") or top-level column names
    (e.g. "a").
@@ -332,8 +322,7 @@ abstract dropIndex(name): Promise<void>
 Drop an index from the table.

 #### Parameters
-
-* **name**: `string`
+    * **name**: `string`
    The name of the index.
    This does not delete the index from disk, it just removes it from the table.
    To delete the index, run [Table#optimize](Table.md#optimize) after dropping the index.
@@ -354,8 +343,7 @@ abstract indexStats(name): Promise<undefined | IndexStatistics>
 List all the stats of a specified index

 #### Parameters
-
-* **name**: `string`
+    * **name**: `string`
    The name of the index.

 #### Returns
@@ -460,8 +448,7 @@ abstract mergeInsert(on): MergeInsertBuilder
 ```

 #### Parameters
-
-* **on**: `string` \| `string`[]
+    * **on**: `string` \| `string`[]

 #### Returns

@@ -492,8 +479,7 @@ Modeled after ``VACUUM`` in PostgreSQL.
 modification operations.

 #### Parameters
-
-* **options?**: `Partial`&lt;[`OptimizeOptions`](../interfaces/OptimizeOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`OptimizeOptions`](../interfaces/OptimizeOptions.md)&gt;

 #### Returns

@@ -510,8 +496,7 @@ abstract prewarmIndex(name): Promise<void>
 Prewarm an index in the table.

 #### Parameters
-
-* **name**: `string`
+    * **name**: `string`
    The name of the index.
    This will load the index into memory.  This may reduce the cold-start time for
    future queries.  If the index does not fit in the cache then this call may be
@@ -643,14 +628,11 @@ Create a search query to find the nearest neighbors
 of the given query

 #### Parameters
-
-* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
+    * **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
    the query, a vector or string
-
-* **queryType?**: `string`
+    * **queryType?**: `string`
    the type of the query, "vector", "fts", or "auto"
-
-* **ftsColumns?**: `string` \| `string`[]
+    * **ftsColumns?**: `string` \| `string`[]
    the columns to search in for full text search
    for now, only one column can be searched at a time.
    when "auto" is used, if the query is a string and an embedding function is defined, it will be treated as a vector query
@@ -715,8 +697,7 @@ abstract takeOffsets(offsets): TakeQuery
 Create a query that returns a subset of the rows in the table.

 #### Parameters
-
-* **offsets**: `number`[]
+    * **offsets**: `number`[]
    The offsets of the rows to return.

 #### Returns
@@ -736,8 +717,7 @@ abstract takeRowIds(rowIds): TakeQuery
 Create a query that returns a subset of the rows in the table.

 #### Parameters
-
-* **rowIds**: readonly (`number` \| `bigint`)[]
+    * **rowIds**: readonly (`number` \| `bigint`)[]
    The row ids of the rows to return.
    Row ids returned by `withRowId()` are `bigint`, so `bigint[]` is supported.
    For convenience / backwards compatibility, `number[]` is also accepted (for
@@ -776,8 +756,7 @@ abstract update(opts): Promise<UpdateResult>
 Update existing records in the Table

 ##### Parameters
-
-* **opts**: `object` & `Partial`&lt;[`UpdateOptions`](../interfaces/UpdateOptions.md)&gt;
+    * **opts**: `object` & `Partial`&lt;[`UpdateOptions`](../interfaces/UpdateOptions.md)&gt;

 ##### Returns

@@ -801,8 +780,7 @@ abstract update(opts): Promise<UpdateResult>
 Update existing records in the Table

 ##### Parameters
-
-* **opts**: `object` & `Partial`&lt;[`UpdateOptions`](../interfaces/UpdateOptions.md)&gt;
+    * **opts**: `object` & `Partial`&lt;[`UpdateOptions`](../interfaces/UpdateOptions.md)&gt;

 ##### Returns

@@ -839,12 +817,10 @@ better performance with a single [`merge_insert`] call instead of
 repeatedly calilng this method.

 ##### Parameters
-
-* **updates**: `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;
+    * **updates**: `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;
    the
    columns to update
-
-* **options?**: `Partial`&lt;[`UpdateOptions`](../interfaces/UpdateOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`UpdateOptions`](../interfaces/UpdateOptions.md)&gt;
    additional options to control
    the update behavior

@@ -875,8 +851,7 @@ is the same thing as calling `nearestTo` on the builder returned
 by `query`.

 #### Parameters
-
-* **vector**: [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md)
+    * **vector**: [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md)

 #### Returns

@@ -911,11 +886,9 @@ abstract waitForIndex(indexNames, timeoutSeconds): Promise<void>
 Waits for asynchronous indexing to complete on the table.

 #### Parameters
-
-* **indexNames**: `string`[]
+    * **indexNames**: `string`[]
    The name of the indices to wait for
-
-* **timeoutSeconds**: `number`
+    * **timeoutSeconds**: `number`
    The number of seconds to wait before timing out
    This will raise an error if the indices are not created and fully indexed within the timeout.

--- a/docs/src/js/classes/Tags.md
+++ b/docs/src/js/classes/Tags.md
@@ -27,10 +27,8 @@ create(tag, version): Promise<void>
 ```

 #### Parameters
-
-* **tag**: `string`
-
-* **version**: `number`
+    * **tag**: `string`
+    * **version**: `number`

 #### Returns

@@ -45,8 +43,7 @@ delete(tag): Promise<void>
 ```

 #### Parameters
-
-* **tag**: `string`
+    * **tag**: `string`

 #### Returns

@@ -61,8 +58,7 @@ getVersion(tag): Promise<number>
 ```

 #### Parameters
-
-* **tag**: `string`
+    * **tag**: `string`

 #### Returns

@@ -89,10 +85,8 @@ update(tag, version): Promise<void>
 ```

 #### Parameters
-
-* **tag**: `string`
-
-* **version**: `number`
+    * **tag**: `string`
+    * **version**: `number`

 #### Returns

--- a/docs/src/js/classes/TakeQuery.md
+++ b/docs/src/js/classes/TakeQuery.md
@@ -82,8 +82,7 @@ protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
 Execute the query and return the results as an

 #### Parameters
-
-* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -116,8 +115,7 @@ explainPlan(verbose): Promise<string>
 Generates an explanation of the query execution plan.

 #### Parameters
-
-* **verbose**: `boolean` = `false`
+    * **verbose**: `boolean` = `false`
    If true, provides a more detailed explanation. Defaults to false.

 #### Returns
@@ -193,8 +191,7 @@ For example, an SQL query might state `SELECT a + b AS combined, c`.  The equiva
 input to this method would be:

 #### Parameters
-
-* **columns**: `string` \| `string`[] \| `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;
+    * **columns**: `string` \| `string`[] \| `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;

 #### Returns

@@ -228,8 +225,7 @@ toArray(options?): Promise<any[]>
 Collect the results as an array of objects.

 #### Parameters
-
-* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -250,8 +246,7 @@ toArrow(options?): Promise<Table<any>>
 Collect the results as an Arrow

 #### Parameters
-
-* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

--- a/docs/src/js/classes/VectorColumnOptions.md
+++ b/docs/src/js/classes/VectorColumnOptions.md
@@ -15,8 +15,7 @@ new VectorColumnOptions(values?): VectorColumnOptions
 ```

 #### Parameters
-
-* **values?**: `Partial`&lt;[`VectorColumnOptions`](VectorColumnOptions.md)&gt;
+    * **values?**: `Partial`&lt;[`VectorColumnOptions`](VectorColumnOptions.md)&gt;

 #### Returns

--- a/docs/src/js/classes/VectorQuery.md
+++ b/docs/src/js/classes/VectorQuery.md
@@ -39,8 +39,7 @@ addQueryVector(vector): VectorQuery
 ```

 #### Parameters
-
-* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)
+    * **vector**: [`IntoVector`](../type-aliases/IntoVector.md)

 #### Returns

@@ -127,8 +126,7 @@ This controls which column is compared to the query vector supplied in
 the call to

 #### Parameters
-
-* **column**: `string`
+    * **column**: `string`

 #### Returns

@@ -150,10 +148,8 @@ distanceRange(lowerBound?, upperBound?): VectorQuery
 ```

 #### Parameters
-
-* **lowerBound?**: `number`
-
-* **upperBound?**: `number`
+    * **lowerBound?**: `number`
+    * **upperBound?**: `number`

 #### Returns

@@ -174,8 +170,7 @@ to some kind of distance metric.  This parameter controls which distance metric
 use.  See

 #### Parameters
-
-* **distanceType**: `"l2"` \| `"cosine"` \| `"dot"`
+    * **distanceType**: `"l2"` \| `"cosine"` \| `"dot"`

 #### Returns

@@ -209,8 +204,7 @@ Increasing this value will increase the recall of your query but will
 also increase the latency of your query. The default value is 1.5*limit.

 #### Parameters
-
-* **ef**: `number`
+    * **ef**: `number`

 #### Returns

@@ -227,8 +221,7 @@ protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
 Execute the query and return the results as an

 #### Parameters
-
-* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -261,8 +254,7 @@ explainPlan(verbose): Promise<string>
 Generates an explanation of the query execution plan.

 #### Parameters
-
-* **verbose**: `boolean` = `false`
+    * **verbose**: `boolean` = `false`
    If true, provides a more detailed explanation. Defaults to false.

 #### Returns
@@ -318,8 +310,7 @@ filter(predicate): this
 A filter statement to be applied to this query.

 #### Parameters
-
-* **predicate**: `string`
+    * **predicate**: `string`

 #### Returns

@@ -346,10 +337,8 @@ fullTextSearch(query, options?): this
 ```

 #### Parameters
-
-* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
-
-* **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;
+    * **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
+    * **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;

 #### Returns

@@ -373,8 +362,7 @@ By default, a plain search has no limit.  If this method is not
 called then every valid row from the table will be returned.

 #### Parameters
-
-* **limit**: `number`
+    * **limit**: `number`

 #### Returns

@@ -401,8 +389,7 @@ a narrow filter to allow these queries to spend more time searching and avoid
 potential false negatives.

 #### Parameters
-
-* **maximumNprobes**: `number`
+    * **maximumNprobes**: `number`

 #### Returns

@@ -424,8 +411,7 @@ filter.  See `nprobes` for more details.  Higher values will increase recall
 but will also increase latency.

 #### Parameters
-
-* **minimumNprobes**: `number`
+    * **minimumNprobes**: `number`

 #### Returns

@@ -465,8 +451,7 @@ you can use `minimumNprobes` and `maximumNprobes`.  This method sets both
 the minimum and maximum to the same value.

 #### Parameters
-
-* **nprobes**: `number`
+    * **nprobes**: `number`

 #### Returns

@@ -485,8 +470,7 @@ Set the number of rows to skip before returning results.
 This is useful for pagination.

 #### Parameters
-
-* **offset**: `number`
+    * **offset**: `number`

 #### Returns

@@ -590,8 +574,7 @@ and the quantized result vectors.  This can be considerably different than the t
 distance between the query vector and the actual uncompressed vector.

 #### Parameters
-
-* **refineFactor**: `number`
+    * **refineFactor**: `number`

 #### Returns

@@ -606,8 +589,7 @@ rerank(reranker): VectorQuery
 ```

 #### Parameters
-
-* **reranker**: [`Reranker`](../namespaces/rerankers/interfaces/Reranker.md)
+    * **reranker**: [`Reranker`](../namespaces/rerankers/interfaces/Reranker.md)

 #### Returns

@@ -642,8 +624,7 @@ For example, an SQL query might state `SELECT a + b AS combined, c`.  The equiva
 input to this method would be:

 #### Parameters
-
-* **columns**: `string` \| `string`[] \| `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;
+    * **columns**: `string` \| `string`[] \| `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;

 #### Returns

@@ -677,8 +658,7 @@ toArray(options?): Promise<any[]>
 Collect the results as an array of objects.

 #### Parameters
-
-* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -699,8 +679,7 @@ toArrow(options?): Promise<Table<any>>
 Collect the results as an Arrow

 #### Parameters
-
-* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -727,8 +706,7 @@ A filter statement to be applied to this query.
 The filter should be supplied as an SQL query string.  For example:

 #### Parameters
-
-* **predicate**: `string`
+    * **predicate**: `string`

 #### Returns

--- a/docs/src/js/enumerations/OAuthFlowType.md
+++ b/docs/src/js/enumerations/OAuthFlowType.md
@@ -0,0 +1,59 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / OAuthFlowType
+
+# Enumeration: OAuthFlowType
+
+OAuth authentication flow types.
+
+## Enumeration Members
+
+### AuthorizationCodePKCE
+
+```ts
+AuthorizationCodePKCE: "authorization_code_pkce";
+```
+
+Authorization Code with PKCE (interactive browser-based auth).
+
+***
+
+### AzureManagedIdentity
+
+```ts
+AzureManagedIdentity: "azure_managed_identity";
+```
+
+Azure Managed Identity via IMDS.
+
+***
+
+### ClientCredentials
+
+```ts
+ClientCredentials: "client_credentials";
+```
+
+Client Credentials grant (service-to-service / M2M).
+
+***
+
+### DeviceCode
+
+```ts
+DeviceCode: "device_code";
+```
+
+Device Code grant (CLI / headless environments).
+
+***
+
+### WorkloadIdentity
+
+```ts
+WorkloadIdentity: "workload_identity";
+```
+
+Workload Identity Federation (K8s, GitHub Actions).
--- a/docs/src/js/functions/RecordBatchIterator.md
+++ b/docs/src/js/functions/RecordBatchIterator.md
@@ -11,8 +11,7 @@ function RecordBatchIterator(promisedInner): AsyncGenerator<RecordBatch<any>, vo
 ```

 ## Parameters
-
-* **promisedInner**: `Promise`&lt;`RecordBatchIterator`&gt;
+    * **promisedInner**: `Promise`&lt;`RecordBatchIterator`&gt;

 ## Returns

--- a/docs/src/js/functions/connect.md
+++ b/docs/src/js/functions/connect.md
@@ -25,17 +25,13 @@ Accepted formats:
 - `db://host:port` - remote database (LanceDB cloud)

 ### Parameters
-
-* **uri**: `string`
+    * **uri**: `string`
    The uri of the database. If the database uri starts
    with `db://` then it connects to a remote database.
-
-* **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
+    * **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
    The options to use when connecting to the database
-
-* **session?**: [`Session`](../classes/Session.md)
-
-* **headerProvider?**: [`HeaderProvider`](../classes/HeaderProvider.md) \| () => `Record`&lt;`string`, `string`&gt; \| () => `Promise`&lt;`Record`&lt;`string`, `string`&gt;&gt;
+    * **session?**: [`Session`](../classes/Session.md)
+    * **headerProvider?**: [`HeaderProvider`](../classes/HeaderProvider.md) \| () => `Record`&lt;`string`, `string`&gt; \| () => `Promise`&lt;`Record`&lt;`string`, `string`&gt;&gt;

 ### Returns

@@ -85,8 +81,7 @@ Accepted formats:
 - `db://host:port` - remote database (LanceDB cloud)

 ### Parameters
-
-* **options**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt; & `object`
+    * **options**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt; & `object`
    The options to use when connecting to the database

 ### Returns
--- a/docs/src/js/functions/makeArrowTable.md
+++ b/docs/src/js/functions/makeArrowTable.md
@@ -46,12 +46,9 @@ rules are as follows:
 - Array<any> => List

 ## Parameters
-
-* **data**: `Record`&lt;`string`, `unknown`&gt;[]
-
-* **options?**: `Partial`&lt;[`MakeArrowTableOptions`](../classes/MakeArrowTableOptions.md)&gt;
-
-* **metadata?**: `Map`&lt;`string`, `string`&gt;
+    * **data**: `Record`&lt;`string`, `unknown`&gt;[]
+    * **options?**: `Partial`&lt;[`MakeArrowTableOptions`](../classes/MakeArrowTableOptions.md)&gt;
+    * **metadata?**: `Map`&lt;`string`, `string`&gt;

 ## Returns

--- a/docs/src/js/functions/packBits.md
+++ b/docs/src/js/functions/packBits.md
@@ -11,8 +11,7 @@ function packBits(data): number[]
 ```

 ## Parameters
-
-* **data**: `number`[]
+    * **data**: `number`[]

 ## Returns

--- a/docs/src/js/functions/permutationBuilder.md
+++ b/docs/src/js/functions/permutationBuilder.md
@@ -13,8 +13,7 @@ function permutationBuilder(table): PermutationBuilder
 Create a permutation builder for the given table.

 ## Parameters
-
-* **table**: [`Table`](../classes/Table.md)
+    * **table**: [`Table`](../classes/Table.md)
    The source table to create a permutation from

 ## Returns
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -12,6 +12,7 @@
 ## Enumerations

 - [FullTextQueryType](enumerations/FullTextQueryType.md)
+- [OAuthFlowType](enumerations/OAuthFlowType.md)
 - [Occur](enumerations/Occur.md)
 - [Operator](enumerations/Operator.md)

@@ -70,6 +71,8 @@
 - [IvfPqOptions](interfaces/IvfPqOptions.md)
 - [IvfRqOptions](interfaces/IvfRqOptions.md)
 - [MergeResult](interfaces/MergeResult.md)
+- [NativeOAuthConfig](interfaces/NativeOAuthConfig.md)
+- [OAuthConfig](interfaces/OAuthConfig.md)
 - [OpenTableOptions](interfaces/OpenTableOptions.md)
 - [OptimizeOptions](interfaces/OptimizeOptions.md)
 - [OptimizeStats](interfaces/OptimizeStats.md)
--- a/docs/src/js/interfaces/ConnectionOptions.md
+++ b/docs/src/js/interfaces/ConnectionOptions.md
@@ -41,6 +41,41 @@ for testing purposes.

 ***

+### manifestEnabled?
+
+```ts
+optional manifestEnabled: boolean;
+```
+
+(For LanceDB OSS only): use directory namespace manifests as the source
+of truth for table metadata. Existing directory-listed root tables are
+migrated into the manifest on access.
+
+***
+
+### namespaceClientProperties?
+
+```ts
+optional namespaceClientProperties: Record<string, string>;
+```
+
+(For LanceDB OSS only): extra properties for the backing namespace
+client used by manifest-enabled native connections.
+
+***
+
+### oauthConfig?
+
+```ts
+optional oauthConfig: NativeOAuthConfig;
+```
+
+(For LanceDB cloud only): OAuth configuration for IdP-based
+authentication (e.g., Azure Entra ID). When set, token acquisition
+and refresh are handled entirely in Rust.
+
+***
+
 ### readConsistencyInterval?

 ```ts
--- a/docs/src/js/interfaces/NativeOAuthConfig.md
+++ b/docs/src/js/interfaces/NativeOAuthConfig.md
@@ -0,0 +1,112 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / NativeOAuthConfig
+
+# Interface: NativeOAuthConfig
+
+OAuth configuration for LanceDB authentication.
+All token acquisition and refresh is handled in the Rust layer.
+
+## Properties
+
+### callbackPort?
+
+```ts
+optional callbackPort: number;
+```
+
+Port for local callback server (authorization_code_pkce, default: 8400).
+
+***
+
+### clientId
+
+```ts
+clientId: string;
+```
+
+Application / Client ID.
+
+***
+
+### clientSecret?
+
+```ts
+optional clientSecret: string;
+```
+
+Client secret (required for client_credentials).
+
+***
+
+### flow?
+
+```ts
+optional flow: string;
+```
+
+Authentication flow: "client_credentials", "authorization_code_pkce",
+"device_code", "azure_managed_identity", "workload_identity"
+
+***
+
+### issuerUrl
+
+```ts
+issuerUrl: string;
+```
+
+OIDC issuer URL or OAuth authority URL.
+For Azure: `https://login.microsoftonline.com/{tenant_id}/v2.0`
+
+***
+
+### managedIdentityClientId?
+
+```ts
+optional managedIdentityClientId: string;
+```
+
+Client ID for user-assigned managed identity (azure_managed_identity).
+
+***
+
+### redirectUri?
+
+```ts
+optional redirectUri: string;
+```
+
+Redirect URI (authorization_code_pkce flow).
+
+***
+
+### refreshBufferSecs?
+
+```ts
+optional refreshBufferSecs: number;
+```
+
+Seconds before expiry to trigger proactive refresh (default: 300).
+
+***
+
+### scopes
+
+```ts
+scopes: string[];
+```
+
+OAuth scopes to request. For Azure: `["api://{app_id}/.default"]`
+
+***
+
+### tokenFile?
+
+```ts
+optional tokenFile: string;
+```
+
+Path to federated token file (workload_identity).
--- a/docs/src/js/interfaces/OAuthConfig.md
+++ b/docs/src/js/interfaces/OAuthConfig.md
@@ -0,0 +1,134 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / OAuthConfig
+
+# Interface: OAuthConfig
+
+OAuth configuration for LanceDB authentication.
+
+All token acquisition and refresh is handled in the Rust layer.
+This config is passed through to Rust via napi-rs.
+
+## Examples
+
+```typescript
+const config: OAuthConfig = {
+  issuerUrl: "https://login.microsoftonline.com/{tenant}/v2.0",
+  clientId: "app-id",
+  clientSecret: "secret",
+  scopes: ["api://lancedb-api/.default"],
+};
+```
+
+```typescript
+const config: OAuthConfig = {
+  issuerUrl: "https://login.microsoftonline.com/{tenant}/v2.0",
+  clientId: "app-id",
+  scopes: ["api://lancedb-api/.default"],
+  flow: OAuthFlowType.AzureManagedIdentity,
+};
+```
+
+## Properties
+
+### callbackPort?
+
+```ts
+optional callbackPort: number;
+```
+
+Port for local callback server (AuthorizationCodePKCE, default: 8400).
+
+***
+
+### clientId
+
+```ts
+clientId: string;
+```
+
+Application / Client ID.
+
+***
+
+### clientSecret?
+
+```ts
+optional clientSecret: string;
+```
+
+Client secret (required for ClientCredentials).
+
+***
+
+### flow?
+
+```ts
+optional flow: OAuthFlowType;
+```
+
+Authentication flow (default: ClientCredentials).
+
+***
+
+### issuerUrl
+
+```ts
+issuerUrl: string;
+```
+
+OIDC issuer URL or OAuth authority URL.
+For Azure: `https://login.microsoftonline.com/{tenant_id}/v2.0`
+
+***
+
+### managedIdentityClientId?
+
+```ts
+optional managedIdentityClientId: string;
+```
+
+Client ID for user-assigned managed identity (AzureManagedIdentity).
+
+***
+
+### redirectUri?
+
+```ts
+optional redirectUri: string;
+```
+
+Redirect URI (AuthorizationCodePKCE flow).
+
+***
+
+### refreshBufferSecs?
+
+```ts
+optional refreshBufferSecs: number;
+```
+
+Seconds before expiry to trigger proactive refresh (default: 300).
+
+***
+
+### scopes
+
+```ts
+scopes: string[];
+```
+
+OAuth scopes to request.
+For Azure: `["api://{app_id}/.default"]`
+
+***
+
+### tokenFile?
+
+```ts
+optional tokenFile: string;
+```
+
+Path to federated token file (WorkloadIdentity).
--- a/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md
+++ b/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md
@@ -58,8 +58,7 @@ computeQueryEmbeddings(data): Promise<number[] | Uint8Array | Float32Array | Flo
 Compute the embeddings for a single query

 #### Parameters
-
-* **data**: `T`
+    * **data**: `T`

 #### Returns

@@ -76,8 +75,7 @@ abstract computeSourceEmbeddings(data): Promise<number[][] | Float32Array[] | Fl
 Creates a vector representation for the given values.

 #### Parameters
-
-* **data**: `T`[]
+    * **data**: `T`[]

 #### Returns

@@ -155,8 +153,7 @@ protected resolveVariables(config): Partial<M>
 Apply variables to the config.

 #### Parameters
-
-* **config**: `Partial`&lt;`M`&gt;
+    * **config**: `Partial`&lt;`M`&gt;

 #### Returns

@@ -173,8 +170,7 @@ sourceField(optionsOrDatatype): [DataType<Type, any>, Map<string, EmbeddingFunct
 sourceField is used in combination with `LanceSchema` to provide a declarative data model

 #### Parameters
-
-* **optionsOrDatatype**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;[`FieldOptions`](../interfaces/FieldOptions.md)&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
+    * **optionsOrDatatype**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;[`FieldOptions`](../interfaces/FieldOptions.md)&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
    The options for the field or the datatype

 #### Returns
@@ -211,8 +207,7 @@ vectorField(optionsOrDatatype?): [DataType<Type, any>, Map<string, EmbeddingFunc
 vectorField is used in combination with `LanceSchema` to provide a declarative data model

 #### Parameters
-
-* **optionsOrDatatype?**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;[`FieldOptions`](../interfaces/FieldOptions.md)&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
+    * **optionsOrDatatype?**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;[`FieldOptions`](../interfaces/FieldOptions.md)&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
    The options for the field

 #### Returns
--- a/docs/src/js/namespaces/embedding/classes/EmbeddingFunctionRegistry.md
+++ b/docs/src/js/namespaces/embedding/classes/EmbeddingFunctionRegistry.md
@@ -32,8 +32,7 @@ functionToMetadata(conf): Record<string, any>
 ```

 #### Parameters
-
-* **conf**: [`EmbeddingFunctionConfig`](../interfaces/EmbeddingFunctionConfig.md)
+    * **conf**: [`EmbeddingFunctionConfig`](../interfaces/EmbeddingFunctionConfig.md)

 #### Returns

@@ -54,8 +53,7 @@ Fetch an embedding function by name
 • **T** *extends* [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`unknown`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;

 #### Parameters
-
-* **name**: `string`
+    * **name**: `string`
    The name of the function

 #### Returns
@@ -71,8 +69,7 @@ getTableMetadata(functions): Map<string, string>
 ```

 #### Parameters
-
-* **functions**: [`EmbeddingFunctionConfig`](../interfaces/EmbeddingFunctionConfig.md)[]
+    * **functions**: [`EmbeddingFunctionConfig`](../interfaces/EmbeddingFunctionConfig.md)[]

 #### Returns

@@ -89,8 +86,7 @@ getVar(name): undefined | string
 Get a variable.

 #### Parameters
-
-* **name**: `string`
+    * **name**: `string`

 #### Returns

@@ -129,18 +125,15 @@ Register an embedding function
 • **T** *extends* [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)&lt;[`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt; = [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)&lt;[`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;

 #### Parameters
-
-* **this**: [`EmbeddingFunctionRegistry`](EmbeddingFunctionRegistry.md)
-
-* **alias?**: `string`
+    * **this**: [`EmbeddingFunctionRegistry`](EmbeddingFunctionRegistry.md)
+    * **alias?**: `string`

 #### Returns

 `Function`

 ##### Parameters
-
-* **ctor**: `T`
+    * **ctor**: `T`

 ##### Returns

@@ -161,8 +154,7 @@ reset(this): void
 reset the registry to the initial state

 #### Parameters
-
-* **this**: [`EmbeddingFunctionRegistry`](EmbeddingFunctionRegistry.md)
+    * **this**: [`EmbeddingFunctionRegistry`](EmbeddingFunctionRegistry.md)

 #### Returns

@@ -187,10 +179,8 @@ whether to use a GPU for inference.
 The name must not contain colons. The default value can contain colons.

 #### Parameters
-
-* **name**: `string`
-
-* **value**: `string`
+    * **name**: `string`
+    * **value**: `string`

 #### Returns

--- a/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md
+++ b/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md
@@ -43,8 +43,7 @@ computeQueryEmbeddings(data): Promise<number[] | Uint8Array | Float32Array | Flo
 Compute the embeddings for a single query

 #### Parameters
-
-* **data**: `string`
+    * **data**: `string`

 #### Returns

@@ -65,8 +64,7 @@ computeSourceEmbeddings(data): Promise<number[][] | Float32Array[] | Float64Arra
 Creates a vector representation for the given values.

 #### Parameters
-
-* **data**: `string`[]
+    * **data**: `string`[]

 #### Returns

@@ -103,10 +101,8 @@ abstract generateEmbeddings(texts, ...args): Promise<number[][] | Float32Array[]
 ```

 #### Parameters
-
-* **texts**: `string`[]
-
-* ...**args**: `any`[]
+    * **texts**: `string`[]
+    * ...**args**: `any`[]

 #### Returns

@@ -182,8 +178,7 @@ protected resolveVariables(config): Partial<M>
 Apply variables to the config.

 #### Parameters
-
-* **config**: `Partial`&lt;`M`&gt;
+    * **config**: `Partial`&lt;`M`&gt;

 #### Returns

@@ -245,8 +240,7 @@ vectorField(optionsOrDatatype?): [DataType<Type, any>, Map<string, EmbeddingFunc
 vectorField is used in combination with `LanceSchema` to provide a declarative data model

 #### Parameters
-
-* **optionsOrDatatype?**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;[`FieldOptions`](../interfaces/FieldOptions.md)&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
+    * **optionsOrDatatype?**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;[`FieldOptions`](../interfaces/FieldOptions.md)&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
    The options for the field

 #### Returns
--- a/docs/src/js/namespaces/embedding/functions/LanceSchema.md
+++ b/docs/src/js/namespaces/embedding/functions/LanceSchema.md
@@ -13,8 +13,7 @@ function LanceSchema(fields): Schema
 Create a schema with embedding functions.

 ## Parameters
-
-* **fields**: `Record`&lt;`string`, `object` \| [`object`, `Map`&lt;`string`, [`EmbeddingFunction`](../classes/EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;]&gt;
+    * **fields**: `Record`&lt;`string`, `object` \| [`object`, `Map`&lt;`string`, [`EmbeddingFunction`](../classes/EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;]&gt;

 ## Returns

--- a/docs/src/js/namespaces/embedding/functions/register.md
+++ b/docs/src/js/namespaces/embedding/functions/register.md
@@ -11,16 +11,14 @@ function register(name?): (ctor) => any
 ```

 ## Parameters
-
-* **name?**: `string`
+    * **name?**: `string`

 ## Returns

 `Function`

 ### Parameters
-
-* **ctor**: [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)&lt;[`EmbeddingFunction`](../classes/EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;
+    * **ctor**: [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)&lt;[`EmbeddingFunction`](../classes/EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;

 ### Returns

--- a/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionConstructor.md
+++ b/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionConstructor.md
@@ -19,8 +19,7 @@ new EmbeddingFunctionConstructor(modelOptions?): T
 ```

 #### Parameters
-
-* **modelOptions?**: `T`\[`"TOptions"`\]
+    * **modelOptions?**: `T`\[`"TOptions"`\]

 #### Returns

--- a/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionCreate.md
+++ b/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionCreate.md
@@ -19,8 +19,7 @@ create(options?): CreateReturnType<T>
 ```

 #### Parameters
-
-* **options?**: `T`\[`"TOptions"`\]
+    * **options?**: `T`\[`"TOptions"`\]

 #### Returns

--- a/docs/src/js/namespaces/rerankers/classes/RRFReranker.md
+++ b/docs/src/js/namespaces/rerankers/classes/RRFReranker.md
@@ -20,12 +20,9 @@ rerankHybrid(
 ```

 #### Parameters
-
-* **query**: `string`
-
-* **vecResults**: `RecordBatch`&lt;`any`&gt;
-
-* **ftsResults**: `RecordBatch`&lt;`any`&gt;
+    * **query**: `string`
+    * **vecResults**: `RecordBatch`&lt;`any`&gt;
+    * **ftsResults**: `RecordBatch`&lt;`any`&gt;

 #### Returns

@@ -40,8 +37,7 @@ static create(k): Promise<RRFReranker>
 ```

 #### Parameters
-
-* **k**: `number` = `60`
+    * **k**: `number` = `60`

 #### Returns

--- a/docs/src/js/namespaces/rerankers/interfaces/Reranker.md
+++ b/docs/src/js/namespaces/rerankers/interfaces/Reranker.md
@@ -18,12 +18,9 @@ rerankHybrid(
 ```

 #### Parameters
-
-* **query**: `string`
-
-* **vecResults**: `RecordBatch`&lt;`any`&gt;
-
-* **ftsResults**: `RecordBatch`&lt;`any`&gt;
+    * **query**: `string`
+    * **vecResults**: `RecordBatch`&lt;`any`&gt;
+    * **ftsResults**: `RecordBatch`&lt;`any`&gt;

 #### Returns

--- a/docs/src/python/python.md
+++ b/docs/src/python/python.md
@@ -94,11 +94,11 @@ of raw SQL strings with [where][lancedb.query.LanceQueryBuilder.where] and

 ## Full text search

-::: lancedb.fts.create_index
+Use [lancedb.table.Table.create_fts_index][] for the synchronous API or
+[lancedb.table.AsyncTable.create_index][] with [lancedb.index.FTS][] for the
+asynchronous API.

-::: lancedb.fts.populate_index
-
-::: lancedb.fts.search_index
+::: lancedb.index.FTS

 ## Utilities

--- a/java/lancedb-core/pom.xml
+++ b/java/lancedb-core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
      <groupId>com.lancedb</groupId>
      <artifactId>lancedb-parent</artifactId>
-      <version>0.28.0-beta.9</version>
+      <version>0.28.0-beta.11</version>
      <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.28.0-beta.9</version>
+    <version>0.28.0-beta.11</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
@@ -28,7 +28,7 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
-        <lance-core.version>6.0.0-beta.1</lance-core.version>
+        <lance-core.version>7.0.0-beta.7</lance-core.version>
        <spotless.skip>false</spotless.skip>
        <spotless.version>2.30.0</spotless.version>
        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,8 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.28.0-beta.9"
+version = "0.28.0-beta.11"
+publish = false
 license.workspace = true
 description.workspace = true
 repository.workspace = true
@@ -15,7 +16,7 @@ crate-type = ["cdylib"]
 async-trait.workspace = true
 arrow-ipc.workspace = true
 arrow-array.workspace = true
-arrow-buffer = "57.2"
+arrow-buffer = "58.0.0"
 half.workspace = true
 arrow-schema.workspace = true
 env_logger.workspace = true
@@ -31,8 +32,8 @@ lzma-sys = { version = "0.1", features = ["static"] }
 log.workspace = true

 # Pin to resolve build failures; update periodically for security patches.
-aws-lc-sys = "=0.38.0"
-aws-lc-rs = "=1.16.1"
+aws-lc-sys = "=0.40.0"
+aws-lc-rs = "=1.16.3"

 [build-dependencies]
 napi-build = "2.3.1"
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -48,6 +48,7 @@ export {
  SplitHashOptions,
  SplitSequentialOptions,
  ShuffleOptions,
+  OAuthConfig as NativeOAuthConfig,
 } from "./native.js";

 export {
@@ -113,6 +114,8 @@ export {
  TokenResponse,
 } from "./header";

+export { OAuthConfig, OAuthFlowType } from "./oauth";
+
 export { MergeInsertBuilder, WriteExecutionOptions } from "./merge";

 export * as embedding from "./embedding";
--- a/nodejs/lancedb/oauth.ts
+++ b/nodejs/lancedb/oauth.ts
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+/**
+ * OAuth authentication flow types.
+ */
+export enum OAuthFlowType {
+  /** Client Credentials grant (service-to-service / M2M). */
+  ClientCredentials = "client_credentials",
+  /** Authorization Code with PKCE (interactive browser-based auth). */
+  AuthorizationCodePKCE = "authorization_code_pkce",
+  /** Device Code grant (CLI / headless environments). */
+  DeviceCode = "device_code",
+  /** Azure Managed Identity via IMDS. */
+  AzureManagedIdentity = "azure_managed_identity",
+  /** Workload Identity Federation (K8s, GitHub Actions). */
+  WorkloadIdentity = "workload_identity",
+}
+
+/**
+ * OAuth configuration for LanceDB authentication.
+ *
+ * All token acquisition and refresh is handled in the Rust layer.
+ * This config is passed through to Rust via napi-rs.
+ *
+ * @example Client Credentials (service-to-service):
+ * ```typescript
+ * const config: OAuthConfig = {
+ *   issuerUrl: "https://login.microsoftonline.com/{tenant}/v2.0",
+ *   clientId: "app-id",
+ *   clientSecret: "secret",
+ *   scopes: ["api://lancedb-api/.default"],
+ * };
+ * ```
+ *
+ * @example Azure Managed Identity:
+ * ```typescript
+ * const config: OAuthConfig = {
+ *   issuerUrl: "https://login.microsoftonline.com/{tenant}/v2.0",
+ *   clientId: "app-id",
+ *   scopes: ["api://lancedb-api/.default"],
+ *   flow: OAuthFlowType.AzureManagedIdentity,
+ * };
+ * ```
+ */
+export interface OAuthConfig {
+  /**
+   * OIDC issuer URL or OAuth authority URL.
+   * For Azure: `https://login.microsoftonline.com/{tenant_id}/v2.0`
+   */
+  issuerUrl: string;
+
+  /** Application / Client ID. */
+  clientId: string;
+
+  /**
+   * OAuth scopes to request.
+   * For Azure: `["api://{app_id}/.default"]`
+   */
+  scopes: string[];
+
+  /** Authentication flow (default: ClientCredentials). */
+  flow?: OAuthFlowType;
+
+  /** Client secret (required for ClientCredentials). */
+  clientSecret?: string;
+
+  /** Redirect URI (AuthorizationCodePKCE flow). */
+  redirectUri?: string;
+
+  /** Port for local callback server (AuthorizationCodePKCE, default: 8400). */
+  callbackPort?: number;
+
+  /** Client ID for user-assigned managed identity (AzureManagedIdentity). */
+  managedIdentityClientId?: string;
+
+  /** Path to federated token file (WorkloadIdentity). */
+  tokenFile?: string;
+
+  /** Seconds before expiry to trigger proactive refresh (default: 300). */
+  refreshBufferSecs?: number;
+}
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.28.0-beta.9",
+	"version": "0.28.0-beta.11",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.28.0-beta.9",
+	"version": "0.28.0-beta.11",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.28.0-beta.9",
+	"version": "0.28.0-beta.11",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.28.0-beta.9",
+	"version": "0.28.0-beta.11",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.28.0-beta.9",
+	"version": "0.28.0-beta.11",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.28.0-beta.9",
+  "version": "0.28.0-beta.11",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.28.0-beta.9",
+	"version": "0.28.0-beta.11",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.28.0-beta.8",
+  "version": "0.28.0-beta.11",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.28.0-beta.8",
+      "version": "0.28.0-beta.11",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.28.0-beta.9",
+  "version": "0.28.0-beta.11",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
@@ -75,7 +75,6 @@
    "build:debug": "napi build --platform --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir lancedb",
    "postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
    "build:release": "napi build --platform --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir dist",
-    "postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
    "build": "npm run build:debug && npm run tsc",
    "build-release": "npm run build:release && npm run tsc",
    "tsc": "tsc -b",
--- a/nodejs/src/connection.rs
+++ b/nodejs/src/connection.rs
@@ -67,6 +67,12 @@ impl Connection {
                builder = builder.storage_option(key, value);
            }
        }
+        if let Some(manifest_enabled) = options.manifest_enabled {
+            builder = builder.manifest_enabled(manifest_enabled);
+        }
+        if let Some(namespace_client_properties) = options.namespace_client_properties {
+            builder = builder.namespace_client_properties(namespace_client_properties);
+        }

        // Create client config, optionally with header provider
        let client_config = options.client_config.unwrap_or_default();
@@ -79,6 +85,11 @@ impl Connection {

        builder = builder.client_config(rust_config);

+        if let Some(oauth_config) = options.oauth_config {
+            let config: lancedb::remote::oauth::OAuthConfig = oauth_config.into();
+            builder = builder.oauth_config(config);
+        }
+
        if let Some(api_key) = options.api_key {
            builder = builder.api_key(&api_key);
        }
--- a/nodejs/src/lib.rs
+++ b/nodejs/src/lib.rs
@@ -37,6 +37,13 @@ pub struct ConnectionOptions {
    ///
    /// The available options are described at https://docs.lancedb.com/storage/
    pub storage_options: Option<HashMap<String, String>>,
+    /// (For LanceDB OSS only): use directory namespace manifests as the source
+    /// of truth for table metadata. Existing directory-listed root tables are
+    /// migrated into the manifest on access.
+    pub manifest_enabled: Option<bool>,
+    /// (For LanceDB OSS only): extra properties for the backing namespace
+    /// client used by manifest-enabled native connections.
+    pub namespace_client_properties: Option<HashMap<String, String>>,
    /// (For LanceDB OSS only): the session to use for this connection. Holds
    /// shared caches and other session-specific state.
    pub session: Option<session::Session>,
@@ -53,6 +60,10 @@ pub struct ConnectionOptions {
    /// (For LanceDB cloud only): the host to use for LanceDB cloud. Used
    /// for testing purposes.
    pub host_override: Option<String>,
+    /// (For LanceDB cloud only): OAuth configuration for IdP-based
+    /// authentication (e.g., Azure Entra ID). When set, token acquisition
+    /// and refresh are handled entirely in Rust.
+    pub oauth_config: Option<remote::OAuthConfig>,
 }

 #[napi(object)]
--- a/nodejs/src/remote.rs
+++ b/nodejs/src/remote.rs
@@ -140,6 +140,67 @@ impl From<TlsConfig> for lancedb::remote::TlsConfig {
    }
 }

+/// OAuth configuration for LanceDB authentication.
+/// All token acquisition and refresh is handled in the Rust layer.
+#[napi(object)]
+#[derive(Debug, Clone)]
+pub struct OAuthConfig {
+    /// OIDC issuer URL or OAuth authority URL.
+    /// For Azure: `https://login.microsoftonline.com/{tenant_id}/v2.0`
+    pub issuer_url: String,
+    /// Application / Client ID.
+    pub client_id: String,
+    /// OAuth scopes to request. For Azure: `["api://{app_id}/.default"]`
+    pub scopes: Vec<String>,
+    /// Authentication flow: "client_credentials", "authorization_code_pkce",
+    /// "device_code", "azure_managed_identity", "workload_identity"
+    pub flow: Option<String>,
+    /// Client secret (required for client_credentials).
+    pub client_secret: Option<String>,
+    /// Redirect URI (authorization_code_pkce flow).
+    pub redirect_uri: Option<String>,
+    /// Port for local callback server (authorization_code_pkce, default: 8400).
+    pub callback_port: Option<u16>,
+    /// Client ID for user-assigned managed identity (azure_managed_identity).
+    pub managed_identity_client_id: Option<String>,
+    /// Path to federated token file (workload_identity).
+    pub token_file: Option<String>,
+    /// Seconds before expiry to trigger proactive refresh (default: 300).
+    pub refresh_buffer_secs: Option<u32>,
+}
+
+impl From<OAuthConfig> for lancedb::remote::oauth::OAuthConfig {
+    fn from(config: OAuthConfig) -> Self {
+        use lancedb::remote::oauth::OAuthFlow;
+
+        let flow = match config.flow.as_deref().unwrap_or("client_credentials") {
+            "authorization_code_pkce" => OAuthFlow::AuthorizationCodePKCE {
+                redirect_uri: config.redirect_uri,
+                callback_port: config.callback_port,
+            },
+            "device_code" => OAuthFlow::DeviceCode,
+            "azure_managed_identity" => OAuthFlow::AzureManagedIdentity {
+                client_id: config.managed_identity_client_id,
+            },
+            "workload_identity" => OAuthFlow::WorkloadIdentity {
+                token_file: config
+                    .token_file
+                    .expect("tokenFile is required for workload_identity flow"),
+            },
+            other => panic!("Unknown OAuth flow type: {other}"),
+        };
+
+        Self {
+            issuer_url: config.issuer_url,
+            client_id: config.client_id,
+            client_secret: config.client_secret,
+            scopes: config.scopes,
+            flow,
+            refresh_buffer_secs: config.refresh_buffer_secs.map(|v| v as u64),
+        }
+    }
+}
+
 impl From<ClientConfig> for lancedb::remote::ClientConfig {
    fn from(config: ClientConfig) -> Self {
        Self {
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.31.0-beta.9"
+current_version = "0.31.0-beta.11"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,7 @@
 [package]
 name = "lancedb-python"
-version = "0.31.0-beta.9"
+version = "0.31.0-beta.11"
+publish = false
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
@@ -14,7 +15,7 @@ name = "_lancedb"
 crate-type = ["cdylib"]

 [dependencies]
-arrow = { version = "57.2", features = ["pyarrow"] }
+arrow = { version = "58.0.0", features = ["pyarrow"] }
 async-trait = "0.1"
 bytes = "1"
 lancedb = { path = "../rust/lancedb", default-features = false }
@@ -24,8 +25,8 @@ lance-namespace-impls.workspace = true
 lance-io.workspace = true
 env_logger.workspace = true
 log.workspace = true
-pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
-pyo3-async-runtimes = { version = "0.26", features = [
+pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39"] }
+pyo3-async-runtimes = { version = "0.28", features = [
    "attributes",
    "tokio-runtime",
 ] }
@@ -34,10 +35,11 @@ futures.workspace = true
 serde = "1"
 serde_json = "1"
 snafu.workspace = true
-tokio = { version = "1.40", features = ["sync"] }
+tokio = { version = "1.40", features = ["sync", "rt-multi-thread"] }
+libc = "0.2"

 [build-dependencies]
-pyo3-build-config = { version = "0.26", features = [
+pyo3-build-config = { version = "0.28", features = [
    "extension-module",
    "abi3-py39",
 ] }
--- a/python/PYTHON_THIRD_PARTY_LICENSES.md
+++ b/python/PYTHON_THIRD_PARTY_LICENSES.md
@@ -183,7 +183,6 @@
 | stack-data                     | 0.6.3           | MIT License                                                                                      | http://github.com/alexmojaki/stack_data                                                           |
 | sympy                          | 1.14.0          | BSD License                                                                                      | https://sympy.org                                                                                 |
 | tabulate                       | 0.9.0           | MIT License                                                                                      | https://github.com/astanin/python-tabulate                                                        |
-| tantivy                        | 0.25.1          | UNKNOWN                                                                                          | UNKNOWN                                                                                           |
 | threadpoolctl                  | 3.6.0           | BSD License                                                                                      | https://github.com/joblib/threadpoolctl                                                           |
 | timm                           | 1.0.24          | Apache Software License                                                                          | https://github.com/huggingface/pytorch-image-models                                               |
 | tinycss2                       | 1.4.0           | BSD License                                                                                      | https://www.courtbouillon.org/tinycss2                                                            |
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -57,7 +57,6 @@ tests = [
    "duckdb>=0.9.0",
    "pytz>=2023.3",
    "polars>=0.19, <=1.3.0",
-    "tantivy>=0.20.0",
    "pyarrow-stubs>=16.0",
    "pylance>=5.0.0b5",
    "requests>=2.31.0",
--- a/python/python/lancedb/init.py
+++ b/python/python/lancedb/init.py
@@ -7,7 +7,6 @@ import os
 from concurrent.futures import ThreadPoolExecutor
 from datetime import timedelta
 from typing import Dict, Optional, Union, Any, List
-import warnings

 __version__ = importlib.metadata.version("lancedb")

@@ -73,6 +72,7 @@ def connect(
    client_config: Union[ClientConfig, Dict[str, Any], None] = None,
    storage_options: Optional[Dict[str, str]] = None,
    session: Optional[Session] = None,
+    manifest_enabled: bool = False,
    namespace_client_impl: Optional[str] = None,
    namespace_client_properties: Optional[Dict[str, str]] = None,
    namespace_client_pushdown_operations: Optional[List[str]] = None,
@@ -111,6 +111,10 @@ def connect(
    storage_options: dict, optional
        Additional options for the storage backend. See available options at
        <https://docs.lancedb.com/storage/>
+    manifest_enabled : bool, default False
+        When true for local/native connections, use directory namespace
+        manifests as the source of truth for table metadata. Existing
+        directory-listed root tables are migrated into the manifest on access.
    session: Session, optional
        (For LanceDB OSS only)
        A session to use for this connection. Sessions allow you to configure
@@ -158,11 +162,11 @@ def connect(
    conn : DBConnection
        A connection to a LanceDB database.
    """
-    if namespace_client_impl is not None or namespace_client_properties is not None:
-        if namespace_client_impl is None or namespace_client_properties is None:
+    if namespace_client_impl is not None:
+        if namespace_client_properties is None:
            raise ValueError(
-                "Both namespace_client_impl and "
-                "namespace_client_properties must be provided"
+                "namespace_client_properties must be provided when "
+                "namespace_client_impl is set"
            )
        if kwargs:
            raise ValueError(f"Unknown keyword arguments: {kwargs}")
@@ -175,6 +179,12 @@ def connect(
            namespace_client_pushdown_operations=namespace_client_pushdown_operations,
        )

+    if namespace_client_properties is not None and not manifest_enabled:
+        raise ValueError(
+            "namespace_client_impl must be provided when using "
+            "namespace_client_properties unless manifest_enabled=True"
+        )
+
    if namespace_client_pushdown_operations is not None:
        raise ValueError(
            "namespace_client_pushdown_operations is only valid when "
@@ -212,6 +222,8 @@ def connect(
        read_consistency_interval=read_consistency_interval,
        storage_options=storage_options,
        session=session,
+        manifest_enabled=manifest_enabled,
+        namespace_client_properties=namespace_client_properties,
    )


@@ -289,6 +301,8 @@ def deserialize_conn(
            parsed["uri"],
            read_consistency_interval=rci,
            storage_options=storage_options,
+            manifest_enabled=parsed.get("manifest_enabled", False),
+            namespace_client_properties=parsed.get("namespace_client_properties"),
        )
    else:
        raise ValueError(f"Unknown connection_type: {connection_type}")
@@ -304,6 +318,9 @@ async def connect_async(
    client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
    storage_options: Optional[Dict[str, str]] = None,
    session: Optional[Session] = None,
+    manifest_enabled: bool = False,
+    namespace_client_properties: Optional[Dict[str, str]] = None,
+    oauth_config=None,
 ) -> AsyncConnection:
    """Connect to a LanceDB database.

@@ -343,6 +360,13 @@ async def connect_async(
        cache sizes for index and metadata caches, which can significantly
        impact memory use and performance. They can also be re-used across
        multiple connections to share the same cache state.
+    manifest_enabled : bool, default False
+        When true for local/native connections, use directory namespace
+        manifests as the source of truth for table metadata. Existing
+        directory-listed root tables are migrated into the manifest on access.
+    namespace_client_properties : dict, optional
+        Additional directory namespace client properties to use with
+        ``manifest_enabled=True``.

    Examples
    --------
@@ -385,6 +409,9 @@ async def connect_async(
            client_config,
            storage_options,
            session,
+            manifest_enabled,
+            namespace_client_properties,
+            oauth_config,
        )
    )

@@ -412,13 +439,3 @@ __all__ = [
    "Table",
    "__version__",
 ]
-
-
-def __warn_on_fork():
-    warnings.warn(
-        "lance is not fork-safe. If you are using multiprocessing, use spawn instead.",
-    )
-
-
-if hasattr(os, "register_at_fork"):
-    os.register_at_fork(before=__warn_on_fork)  # type: ignore[attr-defined]
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -12,6 +12,7 @@ from .index import (
    LabelList,
    HnswPq,
    HnswSq,
+    HnswFlat,
    FTS,
 )
 from lance_namespace import (
@@ -25,6 +26,7 @@ from .remote import ClientConfig

 IvfHnswPq: type[HnswPq] = HnswPq
 IvfHnswSq: type[HnswSq] = HnswSq
+IvfHnswFlat: type[HnswFlat] = HnswFlat

 class PyExpr:
    """A type-safe DataFusion expression node (Rust-side handle)."""
@@ -180,6 +182,7 @@ class Table:
            IvfPq,
            HnswPq,
            HnswSq,
+            HnswFlat,
            BTree,
            Bitmap,
            LabelList,
@@ -242,6 +245,9 @@ async def connect(
    client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
    storage_options: Optional[Dict[str, str]],
    session: Optional[Session],
+    manifest_enabled: bool = False,
+    namespace_client_properties: Optional[Dict[str, str]] = None,
+    oauth_config: Optional[Any] = None,
 ) -> Connection: ...

 class RecordBatchStream:
@@ -440,7 +446,7 @@ class AsyncPermutationBuilder:
    async def execute(self) -> Table: ...

 def async_permutation_builder(
-    table: Table, dest_table_name: str
+    table: Table,
 ) -> AsyncPermutationBuilder: ...
 def fts_query_to_json(query: Any) -> str: ...

--- a/python/python/lancedb/background_loop.py
+++ b/python/python/lancedb/background_loop.py
@@ -2,7 +2,9 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import asyncio
+import os
 import threading
+import warnings


 class BackgroundEventLoop:
@@ -13,6 +15,9 @@ class BackgroundEventLoop:
    """

    def __init__(self):
+        self._start()
+
+    def _start(self):
        self.loop = asyncio.new_event_loop()
        self.thread = threading.Thread(
            target=self.loop.run_forever,
@@ -31,3 +36,30 @@ class BackgroundEventLoop:


 LOOP = BackgroundEventLoop()
+
+_FORK_WARNED = False
+
+
+def _reset_after_fork():
+    # Threads do not survive fork(), so the asyncio loop in LOOP.thread is
+    # dead in the child. Re-initialize the singleton in place so existing
+    # `from .background_loop import LOOP` references in other modules see
+    # the new state. The Rust-side tokio runtime is reset analogously by a
+    # pthread_atfork hook installed in the _lancedb extension.
+    LOOP._start()
+    global _FORK_WARNED
+    if not _FORK_WARNED:
+        _FORK_WARNED = True
+        warnings.warn(
+            "lancedb fork support is experimental: the internal async "
+            "runtime has been reset in the forked child, but a small chance "
+            "of deadlock remains if other state was mid-operation at fork "
+            "time. The 'forkserver' or 'spawn' multiprocessing start method "
+            "is likely a safer alternative.",
+            RuntimeWarning,
+            stacklevel=2,
+        )
+
+
+if hasattr(os, "register_at_fork"):
+    os.register_at_fork(after_in_child=_reset_after_fork)
--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -590,8 +590,13 @@ class LanceDBConnection(DBConnection):
        read_consistency_interval: Optional[timedelta] = None,
        storage_options: Optional[Dict[str, str]] = None,
        session: Optional[Session] = None,
+        manifest_enabled: bool = False,
+        namespace_client_properties: Optional[Dict[str, str]] = None,
        _inner: Optional[LanceDbConnection] = None,
    ):
+        self.storage_options = storage_options
+        self._manifest_enabled = manifest_enabled
+        self._namespace_client_properties = namespace_client_properties
        if _inner is not None:
            self._conn = _inner
            self._cached_namespace_client = None
@@ -633,6 +638,8 @@ class LanceDBConnection(DBConnection):
                None,
                storage_options,
                session,
+                manifest_enabled,
+                namespace_client_properties,
            )

        # TODO: It would be nice if we didn't store self.storage_options but it is
@@ -640,7 +647,6 @@ class LanceDBConnection(DBConnection):
        # work because some paths like LanceDBConnection.from_inner will lose the
        # storage_options.  Also, this class really shouldn't be holding any state
        # beyond _conn.
-        self.storage_options = storage_options
        self._conn = AsyncConnection(LOOP.run(do_connect()))
        self._cached_namespace_client: Optional[LanceNamespace] = None

@@ -677,6 +683,8 @@ class LanceDBConnection(DBConnection):
                "connection_type": "local",
                "uri": self.uri,
                "storage_options": self.storage_options,
+                "manifest_enabled": self._manifest_enabled,
+                "namespace_client_properties": self._namespace_client_properties,
                "read_consistency_interval_seconds": (
                    rci.total_seconds() if rci else None
                ),
--- a/python/python/lancedb/fts.py
+++ b/python/python/lancedb/fts.py
@@ -1,201 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-"""Full text search index using tantivy-py"""
-
-import os
-from typing import List, Tuple, Optional
-
-import pyarrow as pa
-
-try:
-    import tantivy
-except ImportError:
-    raise ImportError(
-        "Please install tantivy-py `pip install tantivy` to use the full text search feature."  # noqa: E501
-    )
-
-from .table import LanceTable
-
-
-def create_index(
-    index_path: str,
-    text_fields: List[str],
-    ordering_fields: Optional[List[str]] = None,
-    tokenizer_name: str = "default",
-) -> tantivy.Index:
-    """
-    Create a new Index (not populated)
-
-    Parameters
-    ----------
-    index_path : str
-        Path to the index directory
-    text_fields : List[str]
-        List of text fields to index
-    ordering_fields: List[str]
-        List of unsigned type fields to order by at search time
-    tokenizer_name : str, default "default"
-        The tokenizer to use
-
-    Returns
-    -------
-    index : tantivy.Index
-        The index object (not yet populated)
-    """
-    if ordering_fields is None:
-        ordering_fields = []
-    # Declaring our schema.
-    schema_builder = tantivy.SchemaBuilder()
-    # special field that we'll populate with row_id
-    schema_builder.add_integer_field("doc_id", stored=True)
-    # data fields
-    for name in text_fields:
-        schema_builder.add_text_field(name, stored=True, tokenizer_name=tokenizer_name)
-    if ordering_fields:
-        for name in ordering_fields:
-            schema_builder.add_unsigned_field(name, fast=True)
-    schema = schema_builder.build()
-    os.makedirs(index_path, exist_ok=True)
-    index = tantivy.Index(schema, path=index_path)
-    return index
-
-
-def populate_index(
-    index: tantivy.Index,
-    table: LanceTable,
-    fields: List[str],
-    writer_heap_size: Optional[int] = None,
-    ordering_fields: Optional[List[str]] = None,
-) -> int:
-    """
-    Populate an index with data from a LanceTable
-
-    Parameters
-    ----------
-    index : tantivy.Index
-        The index object
-    table : LanceTable
-        The table to index
-    fields : List[str]
-        List of fields to index
-    writer_heap_size : int
-        The writer heap size in bytes, defaults to 1GB
-
-    Returns
-    -------
-    int
-        The number of rows indexed
-    """
-    if ordering_fields is None:
-        ordering_fields = []
-    writer_heap_size = writer_heap_size or 1024 * 1024 * 1024
-    # first check the fields exist and are string or large string type
-    nested = []
-
-    for name in fields:
-        try:
-            f = table.schema.field(name)  # raises KeyError if not found
-        except KeyError:
-            f = resolve_path(table.schema, name)
-            nested.append(name)
-
-        if not pa.types.is_string(f.type) and not pa.types.is_large_string(f.type):
-            raise TypeError(f"Field {name} is not a string type")
-
-    # create a tantivy writer
-    writer = index.writer(heap_size=writer_heap_size)
-    # write data into index
-    dataset = table.to_lance()
-    row_id = 0
-
-    max_nested_level = 0
-    if len(nested) > 0:
-        max_nested_level = max([len(name.split(".")) for name in nested])
-
-    for b in dataset.to_batches(columns=fields + ordering_fields):
-        if max_nested_level > 0:
-            b = pa.Table.from_batches([b])
-            for _ in range(max_nested_level - 1):
-                b = b.flatten()
-        for i in range(b.num_rows):
-            doc = tantivy.Document()
-            for name in fields:
-                value = b[name][i].as_py()
-                if value is not None:
-                    doc.add_text(name, value)
-            for name in ordering_fields:
-                value = b[name][i].as_py()
-                if value is not None:
-                    doc.add_unsigned(name, value)
-            if not doc.is_empty:
-                doc.add_integer("doc_id", row_id)
-                writer.add_document(doc)
-            row_id += 1
-    # commit changes
-    writer.commit()
-    return row_id
-
-
-def resolve_path(schema, field_name: str) -> pa.Field:
-    """
-    Resolve a nested field path to a list of field names
-
-    Parameters
-    ----------
-    field_name : str
-        The field name to resolve
-
-    Returns
-    -------
-    List[str]
-        The resolved path
-    """
-    path = field_name.split(".")
-    field = schema.field(path.pop(0))
-    for segment in path:
-        if pa.types.is_struct(field.type):
-            field = field.type.field(segment)
-        else:
-            raise KeyError(f"field {field_name} not found in schema {schema}")
-    return field
-
-
-def search_index(
-    index: tantivy.Index, query: str, limit: int = 10, ordering_field=None
-) -> Tuple[Tuple[int], Tuple[float]]:
-    """
-    Search an index for a query
-
-    Parameters
-    ----------
-    index : tantivy.Index
-        The index object
-    query : str
-        The query string
-    limit : int
-        The maximum number of results to return
-
-    Returns
-    -------
-    ids_and_score: list[tuple[int], tuple[float]]
-        A tuple of two tuples, the first containing the document ids
-        and the second containing the scores
-    """
-    searcher = index.searcher()
-    query = index.parse_query(query)
-    # get top results
-    if ordering_field:
-        results = searcher.search(query, limit, order_by_field=ordering_field)
-    else:
-        results = searcher.search(query, limit)
-    if results.count == 0:
-        return tuple(), tuple()
-    return tuple(
-        zip(
-            *[
-                (searcher.doc(doc_address)["doc_id"][0], score)
-                for score, doc_address in results.hits
-            ]
-        )
-    )
--- a/python/python/lancedb/index.py
+++ b/python/python/lancedb/index.py
@@ -7,6 +7,7 @@ from typing import Literal, Optional
 from ._lancedb import (
    IndexConfig,
 )
+from .types import BaseTokenizerType

 lang_mapping = {
    "ar": "Arabic",
@@ -111,8 +112,12 @@ class FTS:
        - "simple": Splits text by whitespace and punctuation.
        - "whitespace": Split text by whitespace, but not punctuation.
        - "raw": No tokenization. The entire text is treated as a single token.
+        - "ngram": N-gram tokenizer for substring-style matching.
+        - "jieba/*": Jieba tokenizer loaded from Lance's language model home.
+        - "lindera/*": Lindera tokenizer loaded from Lance's language model home.
    language : str, default "English"
-        The language to use for tokenization.
+        The language to use for stemming and stop-word removal. This is not the
+        primary way to enable CJK tokenization.
    max_token_length : int, default 40
        The maximum token length to index. Tokens longer than this length will be
        ignored.
@@ -127,10 +132,17 @@ class FTS:
    ascii_folding : bool, default True
        Whether to fold ASCII characters. This converts accented characters to
        their ASCII equivalent. For example, "café" would be converted to "cafe".
+
+    Notes
+    -----
+    Model-backed tokenizers such as ``jieba/default`` and ``lindera/ipadic``
+    require tokenizer models in Lance's language model home. Set
+    ``LANCE_LANGUAGE_MODEL_HOME`` to override the default platform data
+    directory under ``lance/language_models``.
    """

    with_position: bool = False
-    base_tokenizer: Literal["simple", "raw", "whitespace"] = "simple"
+    base_tokenizer: BaseTokenizerType = "simple"
    language: str = "English"
    max_token_length: Optional[int] = 40
    lower_case: bool = True
@@ -376,9 +388,98 @@ class HnswSq:
    target_partition_size: Optional[int] = None


+@dataclass
+class HnswFlat:
+    """Describe a HNSW-FLAT index configuration.
+
+    HNSW-FLAT stands for Hierarchical Navigable Small World without quantization.
+    It stores raw vectors in the HNSW graph, providing the highest recall among
+    the IVF_HNSW family at the cost of more memory and disk space compared to
+    :class:`HnswSq` or :class:`HnswPq`.
+
+    Parameters
+    ----------
+
+    distance_type: str, default "l2"
+
+        The distance metric used to train the index.
+
+        The following distance types are available:
+
+        "l2" - Euclidean distance. This is a very common distance metric that
+        accounts for both magnitude and direction when determining the distance
+        between vectors. l2 distance has a range of [0, ∞).
+
+        "cosine" - Cosine distance.  Cosine distance is a distance metric
+        calculated from the cosine similarity between two vectors. Cosine
+        similarity is a measure of similarity between two non-zero vectors of an
+        inner product space. It is defined to equal the cosine of the angle
+        between them.  Unlike l2, the cosine distance is not affected by the
+        magnitude of the vectors.  Cosine distance has a range of [0, 2].
+
+        "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
+        distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
+        l2 norm is 1), then dot distance is equivalent to the cosine distance.
+
+    num_partitions, default sqrt(num_rows)
+
+        The number of IVF partitions to create.
+
+        For HNSW, we recommend a small number of partitions. Setting this to 1
+        works well for most tables. For very large tables, training just one HNSW
+        graph will require too much memory. Each partition becomes its own HNSW
+        graph, so setting this value higher reduces the peak memory use of
+        training.
+
+    max_iterations, default 50
+
+        Max iterations to train kmeans.
+
+        When training an IVF index we use kmeans to calculate the partitions.
+        This parameter controls how many iterations of kmeans to run.
+
+    sample_rate, default 256
+
+        The rate used to calculate the number of training vectors for kmeans.
+
+    m, default 20
+
+        The number of neighbors to select for each vector in the HNSW graph.
+
+        This value controls the tradeoff between search speed and accuracy.
+        The higher the value the more accurate the search but the slower it
+        will be.
+
+    ef_construction, default 300
+
+        The number of candidates to evaluate during the construction of the HNSW
+        graph.
+
+        This value controls the tradeoff between build speed and accuracy.
+        The higher the value the more accurate the build but the slower it will
+        be.  150 to 300 is the typical range. 100 is a minimum for good quality
+        search results. In most cases, there is no benefit to setting this higher
+        than 500.  This value should be set to a value that is not less than `ef`
+        in the search phase.
+
+    target_partition_size, default is 1,048,576
+
+        The target size of each partition.
+    """
+
+    distance_type: Literal["l2", "cosine", "dot"] = "l2"
+    num_partitions: Optional[int] = None
+    max_iterations: int = 50
+    sample_rate: int = 256
+    m: int = 20
+    ef_construction: int = 300
+    target_partition_size: Optional[int] = None
+
+
 # Backwards-compatible aliases
 IvfHnswPq = HnswPq
 IvfHnswSq = HnswSq
+IvfHnswFlat = HnswFlat


@dataclass
@@ -698,11 +799,13 @@ __all__ = [
    "IvfPq",
    "IvfHnswPq",
    "IvfHnswSq",
+    "IvfHnswFlat",
    "IvfSq",
    "IvfRq",
    "IvfFlat",
    "HnswPq",
    "HnswSq",
+    "HnswFlat",
    "IndexConfig",
    "FTS",
    "Bitmap",
--- a/python/python/lancedb/permutation.py
+++ b/python/python/lancedb/permutation.py
@@ -1,11 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

-from deprecation import deprecated
-from lancedb import AsyncConnection, DBConnection
-import pyarrow as pa
+import copy
 import json

+from deprecation import deprecated
+import pyarrow as pa
+
 from ._lancedb import async_permutation_builder, PermutationReader
 from .table import LanceTable
 from .background_loop import LOOP
@@ -36,10 +37,7 @@ class PermutationBuilder:
    be referenced by name in the future.  If names are not provided then they can only
    be referenced by their ordinal index.  There is no requirement to name every split.

-    By default, the permutation will be stored in memory and will be lost when the
-    program exits.  To persist the permutation (for very large datasets or to share
-    the permutation across multiple workers) use the [persist](#persist) method to
-    create a permanent table.
+    The permutation is stored in memory and will be lost when the program exits.
    """

    def __init__(self, table: LanceTable):
@@ -51,15 +49,6 @@ class PermutationBuilder:
        """
        self._async = async_permutation_builder(table)

-    def persist(
-        self, database: Union[DBConnection, AsyncConnection], table_name: str
-    ) -> "PermutationBuilder":
-        """
-        Persist the permutation to the given database.
-        """
-        self._async.persist(database, table_name)
-        return self
-
    def split_random(
        self,
        *,
@@ -380,20 +369,44 @@ class Permutation:

    def __init__(
        self,
-        reader: PermutationReader,
+        base_table: LanceTable,
+        permutation_table: Optional[LanceTable],
+        split: int,
        selection: dict[str, str],
        batch_size: int,
        transform_fn: Callable[pa.RecordBatch, Any],
+        offset: Optional[int] = None,
+        limit: Optional[int] = None,
+        connection_factory: Optional[Callable[[str], LanceTable]] = None,
+        _reader: Optional[PermutationReader] = None,
    ):
        """
        Internal constructor.  Use [from_tables](#from_tables) instead.
        """
-        assert reader is not None, "reader is required"
+        assert base_table is not None, "base_table is required"
        assert selection is not None, "selection is required"
-        self.reader = reader
+        self.base_table = base_table
+        self.permutation_table = permutation_table
+        self.split = split
        self.selection = selection
        self.transform_fn = transform_fn
        self.batch_size = batch_size
+        self.offset = offset
+        self.limit = limit
+        self.connection_factory = connection_factory
+        if _reader is None:
+            _reader = LOOP.run(self._build_reader())
+        self.reader: PermutationReader = _reader
+
+    async def _build_reader(self) -> PermutationReader:
+        reader = await PermutationReader.from_tables(
+            self.base_table, self.permutation_table, self.split
+        )
+        if self.offset is not None:
+            reader = await reader.with_offset(self.offset)
+        if self.limit is not None:
+            reader = await reader.with_limit(self.limit)
+        return reader

    def _with_selection(self, selection: dict[str, str]) -> "Permutation":
        """
@@ -402,21 +415,97 @@ class Permutation:
        Does not validation of the selection and it replaces it entirely.  This is not
        intended for public use.
        """
-        return Permutation(self.reader, selection, self.batch_size, self.transform_fn)
-
-    def _with_reader(self, reader: PermutationReader) -> "Permutation":
-        """
-        Creates a new permutation with the given reader
-
-        This is an internal method and should not be used directly.
-        """
-        return Permutation(reader, self.selection, self.batch_size, self.transform_fn)
+        new = copy.copy(self)
+        new.selection = selection
+        return new

    def with_batch_size(self, batch_size: int) -> "Permutation":
        """
        Creates a new permutation with the given batch size
        """
-        return Permutation(self.reader, self.selection, batch_size, self.transform_fn)
+        new = copy.copy(self)
+        new.batch_size = batch_size
+        return new
+
+    def with_connection_factory(
+        self, connection_factory: Callable[[str], LanceTable]
+    ) -> "Permutation":
+        """
+        Creates a new permutation that will use ``connection_factory`` to reopen
+        the base table when this permutation is unpickled in a worker process.
+
+        The factory is a callable that takes a single argument — the base table
+        name — and returns a [LanceTable]. It must be picklable; the worker
+        will pickle it via standard ``pickle`` and call it to recover the base
+        table. Picklable callables in practice means top-level (module-level)
+        functions, ``functools.partial`` of such functions, or instances of
+        picklable classes implementing ``__call__``. Lambdas and closures over
+        local variables don't pickle with the default protocol.
+
+        Setting a factory is necessary when the URI alone is not enough to
+        re-open the connection — most importantly for LanceDB Cloud (``db://``)
+        connections, where ``api_key`` and ``region`` aren't recoverable from
+        the connection object after construction.
+
+        For local file or cloud-storage paths the factory is optional: if not
+        set, ``__getstate__`` falls back to capturing
+        ``(uri, storage_options, namespace_path)`` and re-opening via
+        ``lancedb.connect(uri, storage_options=...)``.
+
+        Examples
+        --------
+        Basic native (file-system path), parameterized via ``functools.partial``::
+
+            import functools, lancedb
+            from lancedb.permutation import Permutation
+
+            def open_native_table(uri: str, table_name: str):
+                return lancedb.connect(uri).open_table(table_name)
+
+            factory = functools.partial(open_native_table, "/data/lance_db")
+            permutation = Permutation.identity(
+                factory("training")
+            ).with_connection_factory(factory)
+
+        Native via :func:`lancedb.connect_namespace` (e.g. a directory- or
+        REST-backed namespace client). The factory takes the
+        implementation name and properties dict as partial-bound args so
+        the worker can rebuild the same namespace connection::
+
+            def open_via_namespace(
+                impl: str, properties: dict[str, str], table_name: str,
+            ):
+                return lancedb.connect_namespace(impl, properties).open_table(
+                    table_name,
+                )
+
+            factory = functools.partial(
+                open_via_namespace,
+                "dir",
+                {"root": "/data/lance_db"},
+            )
+
+        LanceDB Cloud, reading credentials from env vars at worker startup
+        so secrets aren't pickled into the dataset::
+
+            import os, lancedb
+
+            def open_remote_table(table_name: str):
+                db = lancedb.connect(
+                    "db://my-database",
+                    api_key=os.environ["LANCEDB_API_KEY"],
+                    region=os.environ.get("LANCEDB_REGION", "us-east-1"),
+                )
+                return db.open_table(table_name)
+
+            permutation = Permutation.identity(
+                open_remote_table("training")
+            ).with_connection_factory(open_remote_table)
+        """
+        assert connection_factory is not None, "connection_factory is required"
+        new = copy.copy(self)
+        new.connection_factory = connection_factory
+        return new

    @classmethod
    def identity(cls, table: LanceTable) -> "Permutation":
@@ -489,11 +578,126 @@ class Permutation:
            schema = await reader.output_schema(None)
            initial_selection = {name: name for name in schema.names}
            return cls(
-                reader, initial_selection, DEFAULT_BATCH_SIZE, Transforms.arrow2python
+                base_table,
+                permutation_table,
+                split,
+                initial_selection,
+                DEFAULT_BATCH_SIZE,
+                Transforms.arrow2python,
+                _reader=reader,
            )

        return LOOP.run(do_from_tables())

+    def __getstate__(self) -> dict[str, Any]:
+        """Build a picklable state dict for this permutation.
+
+        The base table is captured either via a user-supplied
+        ``connection_factory`` (see [with_connection_factory]) or, as a
+        fallback, by introspecting ``(uri, storage_options, namespace_path)``
+        on the connection. The permutation table — always an in-memory
+        LanceDB table — is captured as a pyarrow Table (which pickles via
+        Arrow IPC natively). The reader is dropped from the wire format;
+        ``__setstate__`` rebuilds it from the restored tables.
+        """
+        permutation_data: Optional[pa.Table] = None
+        if self.permutation_table is not None:
+            permutation_data = self.permutation_table.to_arrow()
+
+        common = {
+            "base_table_name": self.base_table.name,
+            "permutation_data": permutation_data,
+            "split": self.split,
+            "selection": self.selection,
+            "batch_size": self.batch_size,
+            "transform_fn": self.transform_fn,
+            "offset": self.offset,
+            "limit": self.limit,
+            "connection_factory": self.connection_factory,
+        }
+
+        if self.connection_factory is not None:
+            # The factory carries enough state to recover the base table on
+            # its own; we don't need to capture the URI / storage options /
+            # namespace from the existing connection.
+            return common
+
+        # URI-introspection fallback: only viable for native (OSS) connections
+        # where (uri, storage_options) is enough to reopen. Remote / cloud
+        # connections don't expose recoverable api_key / region — those users
+        # must call with_connection_factory().
+        try:
+            base_uri = self.base_table._conn.uri
+            storage_options = self.base_table._conn.storage_options
+        except AttributeError as e:
+            raise ValueError(
+                "Cannot pickle this Permutation: the base table's connection "
+                "does not expose a uri/storage_options, which usually means it "
+                "is a remote (LanceDB Cloud) connection. Call "
+                "Permutation.with_connection_factory(...) first to provide a "
+                "picklable callable that re-opens the base table from a worker "
+                "process."
+            ) from e
+
+        if base_uri.startswith("memory://"):
+            # In-memory base tables don't exist in any worker process by
+            # default, so dump the entire base table into the pickle. This
+            # can be expensive for large datasets — users with large
+            # in-memory base tables should either persist them or set a
+            # connection_factory.
+            return {
+                **common,
+                "base_table_data": self.base_table.to_arrow(),
+            }
+
+        return {
+            **common,
+            "base_table_uri": base_uri,
+            "base_table_namespace": self.base_table._namespace_path,
+            "base_table_storage_options": storage_options,
+        }
+
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        from . import connect
+
+        connection_factory = state["connection_factory"]
+        if connection_factory is not None:
+            base_table = connection_factory(state["base_table_name"])
+        elif "base_table_data" in state:
+            # In-memory base table inlined into the pickle; rebuild the same
+            # way we rebuild the in-memory permutation table.
+            mem_db = connect("memory://")
+            base_table = mem_db.create_table(
+                state["base_table_name"], state["base_table_data"]
+            )
+        else:
+            base_db = connect(
+                state["base_table_uri"],
+                storage_options=state["base_table_storage_options"],
+            )
+            base_table = base_db.open_table(
+                state["base_table_name"],
+                namespace_path=state["base_table_namespace"] or None,
+            )
+
+        permutation_table: Optional[LanceTable] = None
+        if state["permutation_data"] is not None:
+            mem_db = connect("memory://")
+            permutation_table = mem_db.create_table(
+                "permutation", state["permutation_data"]
+            )
+
+        self.base_table = base_table
+        self.permutation_table = permutation_table
+        self.split = state["split"]
+        self.selection = state["selection"]
+        self.batch_size = state["batch_size"]
+        self.transform_fn = state["transform_fn"]
+        self.offset = state["offset"]
+        self.limit = state["limit"]
+        self.connection_factory = connection_factory
+        self.reader = LOOP.run(self._build_reader())
+
    @property
    def schema(self) -> pa.Schema:
        async def do_output_schema():
@@ -760,7 +964,9 @@ class Permutation:
        for expensive operations such as image decoding.
        """
        assert transform is not None, "transform is required"
-        return Permutation(self.reader, self.selection, self.batch_size, transform)
+        new = copy.copy(self)
+        new.transform_fn = transform
+        return new

    def __getitem__(self, index: int) -> Any:
        """
@@ -795,12 +1001,10 @@ class Permutation:
        """
        Skip the first `skip` rows of the permutation
        """
-
-        async def do_with_skip():
-            reader = await self.reader.with_offset(skip)
-            return self._with_reader(reader)
-
-        return LOOP.run(do_with_skip())
+        new = copy.copy(self)
+        new.offset = skip
+        new.reader = LOOP.run(new._build_reader())
+        return new

    @deprecated(details="Use with_take instead")
    def take(self, limit: int) -> "Permutation":
@@ -818,12 +1022,10 @@ class Permutation:
        """
        Limit the permutation to `limit` rows (following any `skip`)
        """
-
-        async def do_with_take():
-            reader = await self.reader.with_limit(limit)
-            return self._with_reader(reader)
-
-        return LOOP.run(do_with_take())
+        new = copy.copy(self)
+        new.limit = limit
+        new.reader = LOOP.run(new._build_reader())
+        return new

    @deprecated(details="Use with_repeat instead")
    def repeat(self, times: int) -> "Permutation":
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -25,7 +25,6 @@ import deprecation
 import numpy as np
 import pyarrow as pa
 import pyarrow.compute as pc
-import pyarrow.fs as pa_fs
 import pydantic

 from lancedb.pydantic import PYDANTIC_VERSION
@@ -1526,9 +1525,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
        return self._table._output_schema(self.to_query_object())

    def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
-        path, fs, exist = self._table._get_fts_index_path()
-        if exist:
-            return self.tantivy_to_arrow()
+        self._table._ensure_no_legacy_fts_index()

        query = self._query
        if self._phrase_query:
@@ -1552,90 +1549,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
    ):
        raise NotImplementedError("to_batches on an FTS query")

-    def tantivy_to_arrow(self) -> pa.Table:
-        try:
-            import tantivy
-        except ImportError:
-            raise ImportError(
-                "Please install tantivy-py `pip install tantivy` to use the full text search feature."  # noqa: E501
-            )
-
-        from .fts import search_index
-
-        # get the index path
-        path, fs, exist = self._table._get_fts_index_path()
-
-        # check if the index exist
-        if not exist:
-            raise FileNotFoundError(
-                "Fts index does not exist. "
-                "Please first call table.create_fts_index(['<field_names>']) to "
-                "create the fts index."
-            )
-
-        # Check that we are on local filesystem
-        if not isinstance(fs, pa_fs.LocalFileSystem):
-            raise NotImplementedError(
-                "Tantivy-based full text search "
-                "is only supported on the local filesystem"
-            )
-        # open the index
-        index = tantivy.Index.open(path)
-        # get the scores and doc ids
-        query = self._query
-        if self._phrase_query:
-            query = query.replace('"', "'")
-            query = f'"{query}"'
-        limit = self._limit if self._limit is not None else 10
-        row_ids, scores = search_index(
-            index, query, limit, ordering_field=self.ordering_field_name
-        )
-        if len(row_ids) == 0:
-            empty_schema = pa.schema([pa.field("_score", pa.float32())])
-            return pa.Table.from_batches([], schema=empty_schema)
-        scores = pa.array(scores)
-        output_tbl = self._table.to_lance().take(row_ids, columns=self._columns)
-        output_tbl = output_tbl.append_column("_score", scores)
-        # this needs to match vector search results which are uint64
-        row_ids = pa.array(row_ids, type=pa.uint64())
-
-        if self._where is not None:
-            tmp_name = "__lancedb__duckdb__indexer__"
-            output_tbl = output_tbl.append_column(
-                tmp_name, pa.array(range(len(output_tbl)))
-            )
-            try:
-                # TODO would be great to have Substrait generate pyarrow compute
-                # expressions or conversely have pyarrow support SQL expressions
-                # using Substrait
-                import duckdb
-
-                indexer = duckdb.sql(
-                    f"SELECT {tmp_name} FROM output_tbl WHERE {self._where}"
-                ).to_arrow_table()[tmp_name]
-                output_tbl = output_tbl.take(indexer).drop([tmp_name])
-                row_ids = row_ids.take(indexer)
-
-            except ImportError:
-                import tempfile
-
-                import lance
-
-                # TODO Use "memory://" instead once that's supported
-                with tempfile.TemporaryDirectory() as tmp:
-                    ds = lance.write_dataset(output_tbl, tmp)
-                    output_tbl = ds.to_table(filter=self._where)
-                    indexer = output_tbl[tmp_name]
-                    row_ids = row_ids.take(indexer)
-                    output_tbl = output_tbl.drop([tmp_name])
-
-        if self._with_row_id:
-            output_tbl = output_tbl.append_column("_rowid", row_ids)
-
-        if self._reranker is not None:
-            output_tbl = self._reranker.rerank_fts(self._query, output_tbl)
-        return output_tbl
-
    def rerank(self, reranker: Reranker) -> LanceFtsQueryBuilder:
        """Rerank the results using the specified reranker.

@@ -1730,7 +1643,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
    def _validate_query(self, query, vector=None, text=None):
        if query is not None and (vector is not None or text is not None):
            raise ValueError(
-                "You can either provide a string query in search() method"
+                "You can either provide a string query in search() method "
                "or set `vector()` and `text()` explicitly for hybrid search."
                "But not both."
            )
--- a/python/python/lancedb/remote/init.py
+++ b/python/python/lancedb/remote/init.py
@@ -9,6 +9,7 @@ from typing import List, Optional
 from lancedb import __version__

 from .header import HeaderProvider
+from .oauth import OAuthConfig, OAuthFlowType

 __all__ = [
    "TimeoutConfig",
@@ -16,6 +17,8 @@ __all__ = [
    "TlsConfig",
    "ClientConfig",
    "HeaderProvider",
+    "OAuthConfig",
+    "OAuthFlowType",
 ]


--- a/python/python/lancedb/remote/oauth.py
+++ b/python/python/lancedb/remote/oauth.py
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import List, Optional
+
+
+class OAuthFlowType(str, Enum):
+    """OAuth authentication flow types."""
+
+    CLIENT_CREDENTIALS = "client_credentials"
+    """Client Credentials grant (service-to-service / M2M)."""
+
+    AUTHORIZATION_CODE_PKCE = "authorization_code_pkce"
+    """Authorization Code with PKCE (interactive browser-based auth)."""
+
+    DEVICE_CODE = "device_code"
+    """Device Code grant (CLI / headless environments)."""
+
+    AZURE_MANAGED_IDENTITY = "azure_managed_identity"
+    """Azure Managed Identity via IMDS."""
+
+    WORKLOAD_IDENTITY = "workload_identity"
+    """Workload Identity Federation (K8s, GitHub Actions)."""
+
+
+@dataclass
+class OAuthConfig:
+    """OAuth configuration for LanceDB authentication.
+
+    All token acquisition and refresh is handled in the Rust layer.
+    This config is passed through to Rust via PyO3.
+
+    Parameters
+    ----------
+    issuer_url : str
+        OIDC issuer URL or OAuth authority URL.
+        For Azure: ``https://login.microsoftonline.com/{tenant_id}/v2.0``
+    client_id : str
+        Application / Client ID.
+    scopes : List[str]
+        OAuth scopes to request.
+        For Azure: ``["api://{app_id}/.default"]``
+    flow : OAuthFlowType
+        Authentication flow to use. Default: CLIENT_CREDENTIALS.
+    client_secret : Optional[str]
+        Client secret (required for CLIENT_CREDENTIALS).
+    redirect_uri : Optional[str]
+        Redirect URI for AUTHORIZATION_CODE_PKCE flow.
+    callback_port : Optional[int]
+        Port for local HTTP callback server (AUTHORIZATION_CODE_PKCE, default: 8400).
+    managed_identity_client_id : Optional[str]
+        Client ID for user-assigned managed identity (AZURE_MANAGED_IDENTITY).
+    token_file : Optional[str]
+        Path to federated token file (WORKLOAD_IDENTITY).
+    refresh_buffer_secs : Optional[int]
+        Seconds before expiry to trigger proactive refresh (default: 300).
+
+    Examples
+    --------
+    Client Credentials (service-to-service):
+
+    >>> config = OAuthConfig(
+    ...     issuer_url="https://login.microsoftonline.com/{tenant}/v2.0",
+    ...     client_id="app-id",
+    ...     client_secret="secret",
+    ...     scopes=["api://lancedb-api/.default"],
+    ... )
+
+    Azure Managed Identity:
+
+    >>> config = OAuthConfig(
+    ...     issuer_url="https://login.microsoftonline.com/{tenant}/v2.0",
+    ...     client_id="app-id",
+    ...     scopes=["api://lancedb-api/.default"],
+    ...     flow=OAuthFlowType.AZURE_MANAGED_IDENTITY,
+    ... )
+    """
+
+    issuer_url: str
+    client_id: str
+    scopes: List[str]
+    flow: OAuthFlowType = OAuthFlowType.CLIENT_CREDENTIALS
+    client_secret: Optional[str] = None
+    redirect_uri: Optional[str] = None
+    callback_port: Optional[int] = None
+    managed_identity_client_id: Optional[str] = None
+    token_file: Optional[str] = None
+    refresh_buffer_secs: Optional[int] = None
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -22,6 +22,7 @@ from lancedb.index import (
    FTS,
    BTree,
    Bitmap,
+    HnswFlat,
    HnswSq,
    IvfFlat,
    IvfPq,
@@ -39,6 +40,7 @@ from lancedb.table import _normalize_progress

 from ..query import LanceVectorQueryBuilder, LanceQueryBuilder, LanceTakeQueryBuilder
 from ..table import AsyncTable, IndexStatistics, Query, Table, Tags
+from ..types import BaseTokenizerType


 class RemoteTable(Table):
@@ -167,7 +169,7 @@ class RemoteTable(Table):
        wait_timeout: Optional[timedelta] = None,
        with_position: bool = False,
        # tokenizer configs:
-        base_tokenizer: str = "simple",
+        base_tokenizer: BaseTokenizerType = "simple",
        language: str = "English",
        max_token_length: Optional[int] = 40,
        lower_case: bool = True,
@@ -284,13 +286,15 @@ class RemoteTable(Table):
            )
        elif index_type == "IVF_HNSW_SQ":
            config = HnswSq(distance_type=metric, num_partitions=num_partitions)
+        elif index_type == "IVF_HNSW_FLAT":
+            config = HnswFlat(distance_type=metric, num_partitions=num_partitions)
        elif index_type == "IVF_FLAT":
            config = IvfFlat(distance_type=metric, num_partitions=num_partitions)
        else:
            raise ValueError(
                f"Unknown vector index type: {index_type}. Valid options are"
                " 'IVF_FLAT', 'IVF_PQ', 'IVF_RQ', 'IVF_SQ',"
-                " 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
+                " 'IVF_HNSW_PQ', 'IVF_HNSW_SQ', 'IVF_HNSW_FLAT'"
            )

        LOOP.run(
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -57,6 +57,7 @@ from .index import (
    LabelList,
    HnswPq,
    HnswSq,
+    HnswFlat,
    FTS,
 )
 from .merge import LanceMergeInsertBuilder
@@ -86,6 +87,59 @@ from .util import (
 )
 from .index import lang_mapping

+_MODEL_BACKED_TOKENIZER_PREFIXES = ("jieba", "lindera")
+_MODEL_BACKED_TOKENIZER_ERRORS = (
+    "unknown base tokenizer",
+    "Invalid directory path:",
+    "Failed to load Jieba",
+    "Failed to load tokenizer config",
+    "Failed to initialize default tokenizer",
+)
+
+
+def _add_unique_note(exception: BaseException, note: str) -> None:
+    existing_notes = getattr(exception, "__notes__", ()) or ()
+    message = (
+        exception.args[0]
+        if exception.args and isinstance(exception.args[0], str)
+        else ""
+    )
+    if note not in existing_notes and note not in message:
+        add_note(exception, note)
+
+
+def _is_model_backed_tokenizer(base_tokenizer: str) -> bool:
+    return any(
+        base_tokenizer == prefix or base_tokenizer.startswith(f"{prefix}/")
+        for prefix in _MODEL_BACKED_TOKENIZER_PREFIXES
+    )
+
+
+def _maybe_add_fts_error_note(
+    exception: BaseException, *, base_tokenizer: str, language: Optional[str] = None
+) -> None:
+    message = str(exception)
+    if language is not None and "not support the requested language" in message:
+        supported_langs = ", ".join(lang_mapping.values())
+        _add_unique_note(exception, f"Supported languages: {supported_langs}")
+        return
+
+    if not _is_model_backed_tokenizer(base_tokenizer):
+        return
+
+    if not any(marker in message for marker in _MODEL_BACKED_TOKENIZER_ERRORS):
+        return
+
+    _add_unique_note(
+        exception,
+        "Model-backed tokenizers such as 'jieba/default' and 'lindera/ipadic' "
+        "require tokenizer models in Lance's language model home. Set "
+        "LANCE_LANGUAGE_MODEL_HOME to override the default platform data "
+        "directory under 'lance/language_models'. Expected layouts include "
+        "'<model-home>/jieba/default/...' and "
+        "'<model-home>/lindera/ipadic/...'.",
+    )
+

 if TYPE_CHECKING:
    from .db import LanceDBConnection
@@ -943,29 +997,29 @@ class Table(ABC):
        Parameters
        ----------
        field_names: str or list of str
-            The name(s) of the field to index.
-            If ``use_tantivy`` is False (default), only a single field name
-            (str) is supported. To index multiple fields, create a separate
-            FTS index for each field.
+            The name of the field to index. Native FTS indexes can only be
+            created on a single field at a time. To search over multiple text
+            fields, create a separate FTS index for each field.
        replace: bool, default False
            If True, replace the existing index if it exists. Note that this is
            not yet an atomic operation; the index will be temporarily
            unavailable while the new index is being created.
        writer_heap_size: int, default 1GB
-            Only available with use_tantivy=True
+            Deprecated legacy Tantivy parameter. Any value other than the
+            default raises an error.
        ordering_field_names:
-            A list of unsigned type fields to index to optionally order
-            results on at search time.
-            only available with use_tantivy=True
+            Deprecated legacy Tantivy parameter. Setting this raises an error.
        tokenizer_name: str, default "default"
-            The tokenizer to use for the index. Can be "raw", "default" or the 2 letter
-            language code followed by "_stem". So for english it would be "en_stem".
-            For available languages see: https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html
+            A compatibility alias for native tokenizer configs. Can be "raw",
+            "default" or the 2 letter language code followed by "_stem". So
+            for english it would be "en_stem". For new native FTS indexes, use
+            ``base_tokenizer`` directly; ``tokenizer_name`` is a legacy
+            compatibility alias and does not expose model-backed tokenizer names
+            such as ``jieba/default`` or ``lindera/ipadic``.
        use_tantivy: bool, default False
-            If True, use the legacy full-text search implementation based on tantivy.
-            If False, use the new full-text search implementation based on lance-index.
+            Deprecated legacy Tantivy parameter. Setting this to True raises an
+            error.
        with_position: bool, default False
-            Only available with use_tantivy=False
            If False, do not store the positions of the terms in the text.
            This can reduce the size of the index and improve indexing speed.
            But it will raise an exception for phrase queries.
@@ -975,8 +1029,11 @@ class Table(ABC):
            - "whitespace": Split text by whitespace, but not punctuation.
            - "raw": No tokenization. The entire text is treated as a single token.
            - "ngram": N-Gram tokenizer.
+            - "jieba/*": Jieba tokenizer loaded from Lance's language model home.
+            - "lindera/*": Lindera tokenizer loaded from Lance's language model home.
        language : str, default "English"
-            The language to use for tokenization.
+            The language to use for stemming and stop-word removal. This is not
+            the primary way to enable CJK tokenization.
        max_token_length : int, default 40
            The maximum token length to index. Tokens longer than this length will be
            ignored.
@@ -1002,6 +1059,13 @@ class Table(ABC):
            The timeout to wait if indexing is asynchronous.
        name: str, optional
            The name of the index. If not provided, a default name will be generated.
+
+        Notes
+        -----
+        Model-backed tokenizers such as ``jieba/default`` and ``lindera/ipadic``
+        require tokenizer models in Lance's language model home. Set
+        ``LANCE_LANGUAGE_MODEL_HOME`` to override the default platform data
+        directory under ``lance/language_models``.
        """
        raise NotImplementedError

@@ -1746,6 +1810,16 @@ class Table(ABC):
        index_exists = fs.get_file_info(path).type != pa_fs.FileType.NotFound
        return (path, fs, index_exists)

+    def _ensure_no_legacy_fts_index(self):
+        path, _, exists = self._get_fts_index_path()
+        if exists:
+            raise ValueError(
+                "Legacy Tantivy FTS index detected at "
+                f"{path}. Tantivy-based FTS has been removed. "
+                "Delete the legacy index and recreate it with "
+                "table.create_fts_index(...)."
+            )
+
    @abstractmethod
    def uses_v2_manifest_paths(self) -> bool:
        """
@@ -2163,7 +2237,13 @@ class LanceTable(Table):
        index_cache_size: Optional[int] = None,
        num_bits: int = 8,
        index_type: Literal[
-            "IVF_FLAT", "IVF_SQ", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
+            "IVF_FLAT",
+            "IVF_SQ",
+            "IVF_PQ",
+            "IVF_RQ",
+            "IVF_HNSW_SQ",
+            "IVF_HNSW_PQ",
+            "IVF_HNSW_FLAT",
        ] = "IVF_PQ",
        max_iterations: int = 50,
        sample_rate: int = 256,
@@ -2250,6 +2330,16 @@ class LanceTable(Table):
                ef_construction=ef_construction,
                target_partition_size=target_partition_size,
            )
+        elif index_type == "IVF_HNSW_FLAT":
+            config = HnswFlat(
+                distance_type=metric,
+                num_partitions=num_partitions,
+                max_iterations=max_iterations,
+                sample_rate=sample_rate,
+                m=m,
+                ef_construction=ef_construction,
+                target_partition_size=target_partition_size,
+            )
        else:
            raise ValueError(f"Unknown index type {index_type}")

@@ -2405,41 +2495,57 @@ class LanceTable(Table):
        prefix_only: bool = False,
        name: Optional[str] = None,
    ):
-        if not use_tantivy:
-            if not isinstance(field_names, str):
-                raise ValueError(
-                    "Native FTS indexes can only be created on a single field "
-                    "at a time. To search over multiple text fields, create a "
-                    "separate FTS index for each field."
-                )
+        self._ensure_no_legacy_fts_index()

-            if tokenizer_name is None:
-                tokenizer_configs = {
-                    "base_tokenizer": base_tokenizer,
-                    "language": language,
-                    "with_position": with_position,
-                    "max_token_length": max_token_length,
-                    "lower_case": lower_case,
-                    "stem": stem,
-                    "remove_stop_words": remove_stop_words,
-                    "ascii_folding": ascii_folding,
-                    "ngram_min_length": ngram_min_length,
-                    "ngram_max_length": ngram_max_length,
-                    "prefix_only": prefix_only,
-                }
-            else:
-                tokenizer_configs = self.infer_tokenizer_configs(tokenizer_name)
-
-            config = FTS(
-                **tokenizer_configs,
+        if use_tantivy:
+            raise ValueError(
+                "Tantivy-based FTS has been removed. "
+                "Remove use_tantivy and recreate the index with native FTS."
+            )
+        if ordering_field_names is not None:
+            raise ValueError(
+                "ordering_field_names was only supported by the removed "
+                "Tantivy-based FTS implementation."
+            )
+        if writer_heap_size != 1024 * 1024 * 1024:
+            raise ValueError(
+                "writer_heap_size was only supported by the removed "
+                "Tantivy-based FTS implementation."
+            )
+        if not isinstance(field_names, str):
+            raise ValueError(
+                "Native FTS indexes can only be created on a single field "
+                "at a time. To search over multiple text fields, create a "
+                "separate FTS index for each field."
+            )
+        if "." in field_names:
+            raise ValueError(
+                "Native FTS indexes can only be created on top-level fields. "
+                f"Received nested field path: {field_names!r}."
            )

-            # delete the existing legacy index if it exists
-            if replace:
-                path, fs, exist = self._get_fts_index_path()
-                if exist:
-                    fs.delete_dir(path)
+        if tokenizer_name is None:
+            tokenizer_configs = {
+                "base_tokenizer": base_tokenizer,
+                "language": language,
+                "with_position": with_position,
+                "max_token_length": max_token_length,
+                "lower_case": lower_case,
+                "stem": stem,
+                "remove_stop_words": remove_stop_words,
+                "ascii_folding": ascii_folding,
+                "ngram_min_length": ngram_min_length,
+                "ngram_max_length": ngram_max_length,
+                "prefix_only": prefix_only,
+            }
+        else:
+            tokenizer_configs = self.infer_tokenizer_configs(tokenizer_name)

+        config = FTS(
+            **tokenizer_configs,
+        )
+
+        try:
            LOOP.run(
                self._table.create_index(
                    field_names,
@@ -2448,42 +2554,13 @@ class LanceTable(Table):
                    name=name,
                )
            )
-            return
-
-        from .fts import create_index, populate_index
-
-        if isinstance(field_names, str):
-            field_names = [field_names]
-
-        if isinstance(ordering_field_names, str):
-            ordering_field_names = [ordering_field_names]
-
-        path, fs, exist = self._get_fts_index_path()
-        if exist:
-            if not replace:
-                raise ValueError("Index already exists. Use replace=True to overwrite.")
-            fs.delete_dir(path)
-
-        if not isinstance(fs, pa_fs.LocalFileSystem):
-            raise NotImplementedError(
-                "Full-text search is only supported on the local filesystem"
+        except (ValueError, RuntimeError) as e:
+            _maybe_add_fts_error_note(
+                e,
+                base_tokenizer=config.base_tokenizer,
+                language=config.language,
            )
-
-        if tokenizer_name is None:
-            tokenizer_name = "default"
-        index = create_index(
-            path,
-            field_names,
-            ordering_fields=ordering_field_names,
-            tokenizer_name=tokenizer_name,
-        )
-        populate_index(
-            index,
-            self,
-            field_names,
-            ordering_fields=ordering_field_names,
-            writer_heap_size=writer_heap_size,
-        )
+            raise e

    @staticmethod
    def infer_tokenizer_configs(tokenizer_name: str) -> dict:
@@ -3813,7 +3890,18 @@ class AsyncTable:
        *,
        replace: Optional[bool] = None,
        config: Optional[
-            Union[IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
+            Union[
+                IvfFlat,
+                IvfPq,
+                IvfRq,
+                HnswPq,
+                HnswSq,
+                HnswFlat,
+                BTree,
+                Bitmap,
+                LabelList,
+                FTS,
+            ]
        ] = None,
        wait_timeout: Optional[timedelta] = None,
        name: Optional[str] = None,
@@ -3860,6 +3948,7 @@ class AsyncTable:
                    IvfRq,
                    HnswPq,
                    HnswSq,
+                    HnswFlat,
                    BTree,
                    Bitmap,
                    LabelList,
@@ -3879,11 +3968,13 @@ class AsyncTable:
                name=name,
                train=train,
            )
-        except ValueError as e:
-            if "not support the requested language" in str(e):
-                supported_langs = ", ".join(lang_mapping.values())
-                help_msg = f"Supported languages: {supported_langs}"
-                add_note(e, help_msg)
+        except (ValueError, RuntimeError) as e:
+            if isinstance(config, FTS):
+                _maybe_add_fts_error_note(
+                    e,
+                    base_tokenizer=config.base_tokenizer,
+                    language=config.language,
+                )
            raise e

    async def drop_index(self, name: str) -> None:
@@ -5028,6 +5119,7 @@ class IndexStatistics:
        "IVF_RQ",
        "IVF_HNSW_SQ",
        "IVF_HNSW_PQ",
+        "IVF_HNSW_FLAT",
        "FTS",
        "BTREE",
        "BITMAP",
--- a/python/python/lancedb/types.py
+++ b/python/python/lancedb/types.py
@@ -24,6 +24,7 @@ VectorIndexType = Literal[
    "IVF_PQ",
    "IVF_HNSW_SQ",
    "IVF_HNSW_PQ",
+    "IVF_HNSW_FLAT",
    "IVF_RQ",
 ]
 ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"]
@@ -31,6 +32,7 @@ IndexType = Literal[
    "IVF_PQ",
    "IVF_HNSW_PQ",
    "IVF_HNSW_SQ",
+    "IVF_HNSW_FLAT",
    "IVF_SQ",
    "FTS",
    "BTREE",
@@ -40,4 +42,5 @@ IndexType = Literal[
 ]

 # Tokenizer literals
-BaseTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
+BuiltinTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
+BaseTokenizerType = BuiltinTokenizerType | str
--- a/python/python/tests/docs/test_search.py
+++ b/python/python/tests/docs/test_search.py
@@ -180,7 +180,7 @@ def test_fts_fuzzy_query():
        ),
        mode="overwrite",
    )
-    table.create_fts_index("text", use_tantivy=False, replace=True)
+    table.create_fts_index("text", replace=True)

    results = table.search(MatchQuery("foo", "text", fuzziness=1)).to_pandas()
    assert len(results) == 4
@@ -230,7 +230,7 @@ def test_fts_boost_query():
        ),
        mode="overwrite",
    )
-    table.create_fts_index("desc", use_tantivy=False, replace=True)
+    table.create_fts_index("desc", replace=True)

    results = table.search(
        BoostQuery(
@@ -265,7 +265,7 @@ def test_fts_boolean_query(tmp_path):
        ],
        mode="overwrite",
    )
-    table.create_fts_index("text", use_tantivy=False, replace=True)
+    table.create_fts_index("text", replace=True)

    # SHOULD
    results = table.search(
@@ -319,9 +319,7 @@ def test_fts_native():
        ],
    )

-    # passing `use_tantivy=False` to use lance FTS index
-    # `use_tantivy=True` by default
-    table.create_fts_index("text", use_tantivy=False)
+    table.create_fts_index("text")
    table.search("puppy").limit(10).select(["text"]).to_list()
    # [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]
    # ...
@@ -332,7 +330,6 @@ def test_fts_native():
    # --8<-- [start:fts_config_folding]
    table.create_fts_index(
        "text",
-        use_tantivy=False,
        language="French",
        stem=True,
        ascii_folding=True,
@@ -346,7 +343,7 @@ def test_fts_native():
    table.search("puppy").limit(10).where("text='foo'", prefilter=False).to_list()
    # --8<-- [end:fts_postfiltering]
    # --8<-- [start:fts_with_position]
-    table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True)
+    table.create_fts_index("text", with_position=True, replace=True)
    # --8<-- [end:fts_with_position]
    # --8<-- [start:fts_incremental_index]
    table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}])
--- a/python/python/tests/models/jieba/default/dict.txt
+++ b/python/python/tests/models/jieba/default/dict.txt
@@ -0,0 +1,8 @@
+我们 98740 r
+都 202780 d
+有 423765 v
+光明 1219 n
+的 318825 uj
+前途 1263 n
+前 62779 f
+途 857 n
--- a/python/python/tests/models/lindera/ipadic/config.yml
+++ b/python/python/tests/models/lindera/ipadic/config.yml
@@ -0,0 +1,4 @@
+segmenter:
+  mode: "normal"
+  dictionary:
+    path: "./python/tests/models/lindera/ipadic/main"
--- a/python/python/tests/models/lindera/ipadic/main.zip
+++ b/python/python/tests/models/lindera/ipadic/main.zip
--- a/python/python/tests/test_db.py
+++ b/python/python/tests/test_db.py
@@ -15,8 +15,7 @@ import pytest
 from lancedb.pydantic import LanceModel, Vector


-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_basic(tmp_path, use_tantivy):
+def test_basic(tmp_path):
    db = lancedb.connect(tmp_path)

    assert db.uri == str(tmp_path)
@@ -49,7 +48,7 @@ def test_basic(tmp_path, use_tantivy):
    assert len(rs) == 1
    assert rs["item"].iloc[0] == "foo"

-    table.create_fts_index("item", use_tantivy=use_tantivy)
+    table.create_fts_index("item")
    rs = table.search("bar", query_type="fts").to_pandas()
    assert len(rs) == 1
    assert rs["item"].iloc[0] == "bar"
--- a/python/python/tests/test_fts.py
+++ b/python/python/tests/test_fts.py
@@ -15,7 +15,10 @@
 #  limitations under the License.
 import os
 import random
+import shutil
 from unittest import mock
+from pathlib import Path
+import zipfile

 import lancedb as ldb
 from lancedb.db import DBConnection
@@ -36,8 +39,7 @@ import pytest
 import pytest_asyncio
 from utils import exception_output

-pytest.importorskip("lancedb.fts")
-tantivy = pytest.importorskip("tantivy")
+TEST_LANGUAGE_MODEL_HOME = Path(__file__).parent / "models"


@pytest.fixture
@@ -92,6 +94,40 @@ def table(tmp_path) -> ldb.table.LanceTable:
    return table


+@pytest.fixture
+def language_model_home(monkeypatch, tmp_path):
+    model_home = tmp_path / "language-models"
+    shutil.copytree(TEST_LANGUAGE_MODEL_HOME, model_home)
+    monkeypatch.setenv("LANCE_LANGUAGE_MODEL_HOME", str(model_home))
+    return model_home
+
+
+@pytest.fixture
+def lindera_ipadic(language_model_home):
+    model_path = language_model_home / "lindera" / "ipadic"
+    extracted_model = model_path / "main"
+    config_path = model_path / "config.yml"
+
+    if extracted_model.exists():
+        shutil.rmtree(extracted_model)
+
+    with zipfile.ZipFile(model_path / "main.zip", "r") as zip_ref:
+        zip_ref.extractall(model_path)
+    config_path.write_text(
+        "segmenter:\n"
+        '  mode: "normal"\n'
+        "  dictionary:\n"
+        f'    path: "{extracted_model.resolve().as_posix()}"\n',
+        encoding="utf-8",
+    )
+
+    try:
+        yield
+    finally:
+        if extracted_model.exists():
+            shutil.rmtree(extracted_model)
+
+
@pytest_asyncio.fixture
 async def async_table(tmp_path) -> ldb.table.AsyncTable:
    # Use local random state to avoid affecting other tests
@@ -144,58 +180,53 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
    return table


-def test_create_index(tmp_path):
-    index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
-    assert isinstance(index, tantivy.Index)
-    assert os.path.exists(str(tmp_path / "index"))
+@pytest.mark.parametrize(
+    ("kwargs", "match"),
+    [
+        (
+            {"use_tantivy": True},
+            "Tantivy-based FTS has been removed",
+        ),
+        (
+            {"ordering_field_names": ["count"]},
+            "ordering_field_names was only supported",
+        ),
+        (
+            {"writer_heap_size": 128},
+            "writer_heap_size was only supported",
+        ),
+    ],
+)
+def test_reject_removed_tantivy_parameters(table, kwargs, match):
+    with pytest.raises(ValueError, match=match):
+        table.create_fts_index("text", **kwargs)


-def test_create_index_with_stemming(tmp_path, table):
-    index = ldb.fts.create_index(
-        str(tmp_path / "index"), ["text"], tokenizer_name="en_stem"
-    )
-    assert isinstance(index, tantivy.Index)
-    assert os.path.exists(str(tmp_path / "index"))
+def test_reject_legacy_tantivy_index(table):
+    path, _, _ = table._get_fts_index_path()
+    os.makedirs(path, exist_ok=True)

-    # Check stemming by running tokenizer on non empty table
-    table.create_fts_index("text", tokenizer_name="en_stem", use_tantivy=True)
+    with pytest.raises(ValueError, match="Legacy Tantivy FTS index detected"):
+        table.search("puppy").limit(5).to_list()
+
+    with pytest.raises(ValueError, match="Legacy Tantivy FTS index detected"):
+        table.create_fts_index("text")


-@pytest.mark.parametrize("use_tantivy", [True, False])
@pytest.mark.parametrize("with_position", [True, False])
-def test_create_inverted_index(table, use_tantivy, with_position):
-    if use_tantivy and not with_position:
-        pytest.skip("we don't support building a tantivy index without position")
+def test_create_inverted_index(table, with_position):
    table.create_fts_index(
        "text",
-        use_tantivy=use_tantivy,
        with_position=with_position,
        name="custom_fts_index",
    )
-    if not use_tantivy:
-        indices = table.list_indices()
-        fts_indices = [i for i in indices if i.index_type == "FTS"]
-        assert any(i.name == "custom_fts_index" for i in fts_indices)
+    indices = table.list_indices()
+    fts_indices = [i for i in indices if i.index_type == "FTS"]
+    assert any(i.name == "custom_fts_index" for i in fts_indices)


-def test_populate_index(tmp_path, table):
-    index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
-    assert ldb.fts.populate_index(index, table, ["text"]) == len(table)
-
-
-def test_search_index(tmp_path, table):
-    index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
-    ldb.fts.populate_index(index, table, ["text"])
-    index.reload()
-    results = ldb.fts.search_index(index, query="puppy", limit=5)
-    assert len(results) == 2
-    assert len(results[0]) == 5  # row_ids
-    assert len(results[1]) == 5  # _score
-
-
-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_search_fts(table, use_tantivy):
-    table.create_fts_index("text", use_tantivy=use_tantivy)
+def test_search_fts(table):
+    table.create_fts_index("text")
    results = table.search("puppy").select(["id", "text"]).limit(5).to_list()
    assert len(results) == 5
    assert len(results[0]) == 3  # id, text, _score
@@ -204,53 +235,52 @@ def test_search_fts(table, use_tantivy):
    results = table.search("puppy").select(["id", "text"]).to_list()
    assert len(results) == 10

-    if not use_tantivy:
-        # Test with a query
-        results = (
-            table.search(MatchQuery("puppy", "text"))
-            .select(["id", "text"])
-            .limit(5)
-            .to_list()
-        )
-        assert len(results) == 5
+    # Test with a query
+    results = (
+        table.search(MatchQuery("puppy", "text"))
+        .select(["id", "text"])
+        .limit(5)
+        .to_list()
+    )
+    assert len(results) == 5

-        # Test boost query
-        results = (
-            table.search(
-                BoostQuery(
-                    MatchQuery("puppy", "text"),
-                    MatchQuery("runs", "text"),
-                )
+    # Test boost query
+    results = (
+        table.search(
+            BoostQuery(
+                MatchQuery("puppy", "text"),
+                MatchQuery("runs", "text"),
            )
-            .select(["id", "text"])
-            .limit(5)
-            .to_list()
        )
-        assert len(results) == 5
+        .select(["id", "text"])
+        .limit(5)
+        .to_list()
+    )
+    assert len(results) == 5

-        # Test multi match query
-        table.create_fts_index("text2", use_tantivy=use_tantivy)
-        results = (
-            table.search(MultiMatchQuery("puppy", ["text", "text2"]))
-            .select(["id", "text"])
-            .limit(5)
-            .to_list()
-        )
-        assert len(results) == 5
-        assert len(results[0]) == 3  # id, text, _score
+    # Test multi match query
+    table.create_fts_index("text2")
+    results = (
+        table.search(MultiMatchQuery("puppy", ["text", "text2"]))
+        .select(["id", "text"])
+        .limit(5)
+        .to_list()
+    )
+    assert len(results) == 5
+    assert len(results[0]) == 3  # id, text, _score

-        # Test boolean query
-        results = (
-            table.search(MatchQuery("puppy", "text") & MatchQuery("runs", "text"))
-            .select(["id", "text"])
-            .limit(5)
-            .to_list()
-        )
-        assert len(results) == 5
-        assert len(results[0]) == 3  # id, text, _score
-        for r in results:
-            assert "puppy" in r["text"]
-            assert "runs" in r["text"]
+    # Test boolean query
+    results = (
+        table.search(MatchQuery("puppy", "text") & MatchQuery("runs", "text"))
+        .select(["id", "text"])
+        .limit(5)
+        .to_list()
+    )
+    assert len(results) == 5
+    assert len(results[0]) == 3  # id, text, _score
+    for r in results:
+        assert "puppy" in r["text"]
+        assert "runs" in r["text"]


@pytest.mark.asyncio
@@ -318,13 +348,13 @@ async def test_fts_select_async(async_table):


 def test_search_fts_phrase_query(table):
-    table.create_fts_index("text", use_tantivy=False, with_position=False)
+    table.create_fts_index("text", with_position=False)
    try:
        phrase_results = table.search('"puppy runs"').limit(100).to_list()
        assert False
    except Exception:
        pass
-    table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True)
+    table.create_fts_index("text", with_position=True, replace=True)
    results = table.search("puppy").limit(100).to_list()

    # Test with quotation marks
@@ -375,8 +405,8 @@ async def test_search_fts_phrase_query_async(async_table):


 def test_search_fts_specify_column(table):
-    table.create_fts_index("text", use_tantivy=False)
-    table.create_fts_index("text2", use_tantivy=False)
+    table.create_fts_index("text")
+    table.create_fts_index("text2")

    results = table.search("puppy", fts_columns="text").limit(5).to_list()
    assert len(results) == 5
@@ -470,42 +500,8 @@ async def test_search_fts_specify_column_async(async_table):
        pass


-def test_search_ordering_field_index_table(tmp_path, table):
-    table.create_fts_index("text", ordering_field_names=["count"], use_tantivy=True)
-    rows = (
-        table.search("puppy", ordering_field_name="count")
-        .limit(20)
-        .select(["text", "count"])
-        .to_list()
-    )
-    for r in rows:
-        assert "puppy" in r["text"]
-    assert sorted(rows, key=lambda x: x["count"], reverse=True) == rows
-
-
-def test_search_ordering_field_index(tmp_path, table):
-    index = ldb.fts.create_index(
-        str(tmp_path / "index"), ["text"], ordering_fields=["count"]
-    )
-
-    ldb.fts.populate_index(index, table, ["text"], ordering_fields=["count"])
-    index.reload()
-    results = ldb.fts.search_index(
-        index, query="puppy", limit=5, ordering_field="count"
-    )
-    assert len(results) == 2
-    assert len(results[0]) == 5  # row_ids
-    assert len(results[1]) == 5  # _distance
-    rows = table.to_lance().take(results[0]).to_pylist()
-
-    for r in rows:
-        assert "puppy" in r["text"]
-    assert sorted(rows, key=lambda x: x["count"], reverse=True) == rows
-
-
-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_create_index_from_table(tmp_path, table, use_tantivy):
-    table.create_fts_index("text", use_tantivy=use_tantivy)
+def test_create_index_from_table(tmp_path, table):
+    table.create_fts_index("text")
    df = table.search("puppy").limit(5).select(["text"]).to_pandas()
    assert len(df) <= 5
    assert "text" in df.columns
@@ -525,36 +521,24 @@ def test_create_index_from_table(tmp_path, table, use_tantivy):
    )

    with pytest.raises(Exception, match="already exists"):
-        table.create_fts_index("text", use_tantivy=use_tantivy)
+        table.create_fts_index("text")

-    table.create_fts_index("text", replace=True, use_tantivy=use_tantivy)
+    table.create_fts_index("text", replace=True)
    assert len(table.search("gorilla").limit(1).to_pandas()) == 1


 def test_create_index_multiple_columns(tmp_path, table):
-    table.create_fts_index(["text", "text2"], use_tantivy=True)
-    df = table.search("puppy").limit(5).to_pandas()
-    assert len(df) == 5
-    assert "text" in df.columns
-    assert "text2" in df.columns
-
-
-def test_empty_rs(tmp_path, table, mocker):
-    table.create_fts_index(["text", "text2"], use_tantivy=True)
-    mocker.patch("lancedb.fts.search_index", return_value=([], []))
-    df = table.search("puppy").limit(5).to_pandas()
-    assert len(df) == 0
+    with pytest.raises(ValueError, match="Native FTS indexes can only be created"):
+        table.create_fts_index(["text", "text2"])


 def test_nested_schema(tmp_path, table):
-    table.create_fts_index("nested.text", use_tantivy=True)
-    rs = table.search("puppy").limit(5).to_list()
-    assert len(rs) == 5
+    with pytest.raises(ValueError, match="top-level fields"):
+        table.create_fts_index("nested.text")


-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_search_index_with_filter(table, use_tantivy):
-    table.create_fts_index("text", use_tantivy=use_tantivy)
+def test_search_index_with_filter(table):
+    table.create_fts_index("text")
    orig_import = __import__

    def import_mock(name, *args):
@@ -584,8 +568,7 @@ def test_search_index_with_filter(table, use_tantivy):
        assert r["_rowid"] is not None


-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_null_input(table, use_tantivy):
+def test_null_input(table):
    table.add(
        [
            {
@@ -598,14 +581,13 @@ def test_null_input(table, use_tantivy):
            }
        ]
    )
-    table.create_fts_index("text", use_tantivy=use_tantivy)
+    table.create_fts_index("text")


 def test_syntax(table):
    # https://github.com/lancedb/lancedb/issues/769
-    table.create_fts_index("text", use_tantivy=True)
-    with pytest.raises(ValueError, match="Syntax Error"):
-        table.search("they could have been dogs OR").limit(10).to_list()
+    table.create_fts_index("text")
+    table.search("they could have been dogs OR").limit(10).to_list()

    # these should work

@@ -616,6 +598,7 @@ def test_syntax(table):
    ).to_list()

    # phrase queries
+    table.create_fts_index("text", with_position=True, replace=True)
    table.search("they could have been dogs OR cats").phrase_query().limit(10).to_list()
    table.search('"they could have been dogs OR cats"').limit(10).to_list()
    table.search('''"the cats OR dogs were not really 'pets' at all"''').limit(
@@ -639,7 +622,7 @@ def test_language(mem_db: DBConnection):
    table = mem_db.create_table("test", data=data)

    with pytest.raises(ValueError) as e:
-        table.create_fts_index("text", use_tantivy=False, language="klingon")
+        table.create_fts_index("text", language="klingon")

    assert exception_output(e) == (
        "ValueError: LanceDB does not support the requested language: 'klingon'\n"
@@ -650,7 +633,6 @@ def test_language(mem_db: DBConnection):

    table.create_fts_index(
        "text",
-        use_tantivy=False,
        language="French",
        stem=True,
        ascii_folding=True,
@@ -690,7 +672,7 @@ def test_fts_on_list(mem_db: DBConnection):
        }
    )
    table = mem_db.create_table("test", data=data)
-    table.create_fts_index("text", use_tantivy=False, with_position=True)
+    table.create_fts_index("text", with_position=True)

    res = table.search("lance").limit(5).to_list()
    assert len(res) == 3
@@ -702,7 +684,7 @@ def test_fts_on_list(mem_db: DBConnection):
 def test_fts_ngram(mem_db: DBConnection):
    data = pa.table({"text": ["hello world", "lance database", "lance is cool"]})
    table = mem_db.create_table("test", data=data)
-    table.create_fts_index("text", use_tantivy=False, base_tokenizer="ngram")
+    table.create_fts_index("text", base_tokenizer="ngram")

    results = table.search("lan", query_type="fts").limit(10).to_list()
    assert len(results) == 2
@@ -721,7 +703,6 @@ def test_fts_ngram(mem_db: DBConnection):
    # test setting min_ngram_length and prefix_only
    table.create_fts_index(
        "text",
-        use_tantivy=False,
        base_tokenizer="ngram",
        replace=True,
        ngram_min_length=2,
@@ -742,6 +723,90 @@ def test_fts_ngram(mem_db: DBConnection):
    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}


+def test_fts_jieba_tokenizer(mem_db: DBConnection, language_model_home):
+    data = pa.table({"text": ["我们都有光明的前途", "光明的前途"]})
+    table = mem_db.create_table("test_jieba", data=data)
+    table.create_fts_index(
+        "text",
+        base_tokenizer="jieba/default",
+        stem=False,
+        remove_stop_words=False,
+        ascii_folding=False,
+    )
+
+    results = table.search("我们", query_type="fts").limit(10).to_list()
+    assert [row["text"] for row in results] == ["我们都有光明的前途"]
+
+
+def test_fts_jieba_missing_language_model_note(
+    mem_db: DBConnection, monkeypatch, tmp_path
+):
+    missing_root = tmp_path / "missing-language-models"
+    monkeypatch.setenv("LANCE_LANGUAGE_MODEL_HOME", str(missing_root))
+    table = mem_db.create_table(
+        "test_missing_jieba_model",
+        data=pa.table({"text": ["我们都有光明的前途"]}),
+    )
+
+    with pytest.raises((ValueError, RuntimeError)) as e:
+        table.create_fts_index(
+            "text",
+            base_tokenizer="jieba/default",
+            stem=False,
+            remove_stop_words=False,
+            ascii_folding=False,
+        )
+
+    output = exception_output(e)
+    assert "Invalid directory path:" in output
+    assert "LANCE_LANGUAGE_MODEL_HOME" in output
+    assert "jieba/default" in output
+
+
+@pytest.mark.asyncio
+async def test_fts_jieba_missing_language_model_note_async(monkeypatch, tmp_path):
+    missing_root = tmp_path / "missing-language-models"
+    monkeypatch.setenv("LANCE_LANGUAGE_MODEL_HOME", str(missing_root))
+    db = await ldb.connect_async(tmp_path / "async-db")
+    table = await db.create_table(
+        "test_missing_jieba_model_async",
+        data=pa.table({"text": ["我们都有光明的前途"]}),
+    )
+
+    with pytest.raises((ValueError, RuntimeError)) as e:
+        await table.create_index(
+            "text",
+            config=FTS(
+                base_tokenizer="jieba/default",
+                stem=False,
+                remove_stop_words=False,
+                ascii_folding=False,
+            ),
+        )
+
+    output = exception_output(e)
+    assert "Invalid directory path:" in output
+    assert "LANCE_LANGUAGE_MODEL_HOME" in output
+    assert "jieba/default" in output
+
+
+def test_fts_lindera_tokenizer(
+    mem_db: DBConnection, language_model_home, lindera_ipadic
+):
+    data = pa.table({"text": ["成田国際空港", "東京国際空港", "羽田空港"]})
+    table = mem_db.create_table("test_lindera", data=data)
+    table.create_fts_index(
+        "text",
+        base_tokenizer="lindera/ipadic",
+        stem=False,
+        remove_stop_words=False,
+        ascii_folding=False,
+    )
+
+    results = table.search("成田", query_type="fts").limit(10).to_list()
+    assert [row["text"] for row in results] == ["成田国際空港"]
+
+
 def test_fts_query_to_json():
    """Test that FTS query to_json() produces valid JSON strings with exact format."""

@@ -886,7 +951,7 @@ def test_fts_query_to_json():


 def test_fts_fast_search(table):
-    table.create_fts_index("text", use_tantivy=False)
+    table.create_fts_index("text")

    # Insert some unindexed data
    table.add(
--- a/python/python/tests/test_hybrid_query.py
+++ b/python/python/tests/test_hybrid_query.py
@@ -28,7 +28,7 @@ def sync_table(tmpdir_factory) -> Table:
        }
    )
    table = db.create_table("test", data)
-    table.create_fts_index("text", with_position=False, use_tantivy=False)
+    table.create_fts_index("text", with_position=False)
    return table


@@ -192,7 +192,7 @@ def table_with_id(tmpdir_factory) -> Table:
        }
    )
    table = db.create_table("test_with_id", data)
-    table.create_fts_index("text", with_position=False, use_tantivy=False)
+    table.create_fts_index("text", with_position=False)
    return table


--- a/Show More
+++ b/Show More